diff --git a/parquet-column/src/main/java/org/apache/parquet/column/statistics/SizeStatistics.java b/parquet-column/src/main/java/org/apache/parquet/column/statistics/SizeStatistics.java index 97a49be652..0dbb20e660 100644 --- a/parquet-column/src/main/java/org/apache/parquet/column/statistics/SizeStatistics.java +++ b/parquet-column/src/main/java/org/apache/parquet/column/statistics/SizeStatistics.java @@ -136,8 +136,10 @@ public SizeStatistics( List definitionLevelHistogram) { this.type = type; this.unencodedByteArrayDataBytes = unencodedByteArrayDataBytes; - this.repetitionLevelHistogram = repetitionLevelHistogram; - this.definitionLevelHistogram = definitionLevelHistogram; + this.repetitionLevelHistogram = + repetitionLevelHistogram == null ? Collections.emptyList() : repetitionLevelHistogram; + this.definitionLevelHistogram = + definitionLevelHistogram == null ? Collections.emptyList() : definitionLevelHistogram; } /** diff --git a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestSizeStatistics.java b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestSizeStatistics.java index 6c166b0e7f..6e2b68167d 100644 --- a/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestSizeStatistics.java +++ b/parquet-column/src/test/java/org/apache/parquet/column/statistics/TestSizeStatistics.java @@ -19,6 +19,7 @@ package org.apache.parquet.column.statistics; import java.util.Arrays; +import java.util.Collections; import java.util.Optional; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.LogicalTypeAnnotation; @@ -124,4 +125,20 @@ public void testCopyStatistics() { Assert.assertEquals(Arrays.asList(1L, 1L, 1L), copy.getRepetitionLevelHistogram()); Assert.assertEquals(Arrays.asList(1L, 1L, 1L), copy.getDefinitionLevelHistogram()); } + + @Test + public void testOmittedHistogram() { + PrimitiveType type = Types.optional(PrimitiveType.PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.stringType()) + .named("a"); + SizeStatistics statistics = new SizeStatistics(type, 1024L, null, null); + Assert.assertEquals(Optional.of(1024L), statistics.getUnencodedByteArrayDataBytes()); + Assert.assertEquals(Collections.emptyList(), statistics.getRepetitionLevelHistogram()); + Assert.assertEquals(Collections.emptyList(), statistics.getDefinitionLevelHistogram()); + + SizeStatistics copy = statistics.copy(); + Assert.assertEquals(Optional.of(1024L), copy.getUnencodedByteArrayDataBytes()); + Assert.assertEquals(Collections.emptyList(), copy.getRepetitionLevelHistogram()); + Assert.assertEquals(Collections.emptyList(), copy.getDefinitionLevelHistogram()); + } } diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index d1c6b01c93..e72f2c33a2 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -2382,8 +2382,14 @@ public static SizeStatistics toParquetSizeStatistics(org.apache.parquet.column.s formatStats.setUnencoded_byte_array_data_bytes( stats.getUnencodedByteArrayDataBytes().get()); } - formatStats.setRepetition_level_histogram(stats.getRepetitionLevelHistogram()); - formatStats.setDefinition_level_histogram(stats.getDefinitionLevelHistogram()); + List repLevelHistogram = stats.getRepetitionLevelHistogram(); + if (repLevelHistogram != null && !repLevelHistogram.isEmpty()) { + formatStats.setRepetition_level_histogram(repLevelHistogram); + } + List defLevelHistogram = stats.getDefinitionLevelHistogram(); + if (defLevelHistogram != null && !defLevelHistogram.isEmpty()) { + formatStats.setDefinition_level_histogram(defLevelHistogram); + } return formatStats; } } diff --git a/parquet-plugins/parquet-encoding-vector/pom.xml b/parquet-plugins/parquet-encoding-vector/pom.xml index 4b79efdeb6..390ac88007 100644 --- a/parquet-plugins/parquet-encoding-vector/pom.xml +++ b/parquet-plugins/parquet-encoding-vector/pom.xml @@ -22,7 +22,7 @@ org.apache.parquet parquet - 1.15.0-SNAPSHOT + 1.15.1-SNAPSHOT ../../pom.xml diff --git a/parquet-plugins/parquet-plugins-benchmarks/pom.xml b/parquet-plugins/parquet-plugins-benchmarks/pom.xml index 19e500bdc3..99f779c8fa 100644 --- a/parquet-plugins/parquet-plugins-benchmarks/pom.xml +++ b/parquet-plugins/parquet-plugins-benchmarks/pom.xml @@ -22,7 +22,7 @@ org.apache.parquet parquet - 1.15.0-SNAPSHOT + 1.15.1-SNAPSHOT ../../pom.xml