@@ -68,6 +68,7 @@ public class ParquetProperties {
6868 public static final boolean DEFAULT_SIZE_STATISTICS_ENABLED = true ;
6969
7070 public static final boolean DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED = true ;
71+ public static final double DEFAULT_V2_PAGE_COMPRESS_THRESHOLD = 0.98 ;
7172
7273 /**
7374 * @deprecated This shared instance can cause thread safety issues when used by multiple builders concurrently.
@@ -120,6 +121,7 @@ public static WriterVersion fromString(String name) {
120121 private final int statisticsTruncateLength ;
121122 private final boolean statisticsEnabled ;
122123 private final boolean sizeStatisticsEnabled ;
124+ private final double v2PageCompressThreshold ;
123125
124126 // The expected NDV (number of distinct values) for each columns
125127 private final ColumnProperty <Long > bloomFilterNDVs ;
@@ -154,6 +156,8 @@ private ParquetProperties(Builder builder) {
154156 this .statisticsTruncateLength = builder .statisticsTruncateLength ;
155157 this .statisticsEnabled = builder .statisticsEnabled ;
156158 this .sizeStatisticsEnabled = builder .sizeStatisticsEnabled ;
159+ this .v2PageCompressThreshold = builder .v2PageCompressThreshold ;
160+
157161 this .bloomFilterNDVs = builder .bloomFilterNDVs .build ();
158162 this .bloomFilterFPPs = builder .bloomFilterFPPs .build ();
159163 this .bloomFilterEnabled = builder .bloomFilterEnabled .build ();
@@ -322,6 +326,10 @@ public boolean getPageWriteChecksumEnabled() {
322326 return pageWriteChecksumEnabled ;
323327 }
324328
329+ public double v2PageCompressThreshold () {
330+ return v2PageCompressThreshold ;
331+ }
332+
325333 public OptionalLong getBloomFilterNDV (ColumnDescriptor column ) {
326334 Long ndv = bloomFilterNDVs .getValue (column );
327335 return ndv == null ? OptionalLong .empty () : OptionalLong .of (ndv );
@@ -388,7 +396,8 @@ public String toString() {
388396 + "Page row count limit to " + getPageRowCountLimit () + '\n'
389397 + "Writing page checksums is: " + (getPageWriteChecksumEnabled () ? "on" : "off" ) + '\n'
390398 + "Statistics enabled: " + statisticsEnabled + '\n'
391- + "Size statistics enabled: " + sizeStatisticsEnabled ;
399+ + "Size statistics enabled: " + sizeStatisticsEnabled + '\n'
400+ + "V2 page compress threshold: " + v2PageCompressThreshold ;
392401 }
393402
394403 public static class Builder {
@@ -406,6 +415,7 @@ public static class Builder {
406415 private int statisticsTruncateLength = DEFAULT_STATISTICS_TRUNCATE_LENGTH ;
407416 private boolean statisticsEnabled = DEFAULT_STATISTICS_ENABLED ;
408417 private boolean sizeStatisticsEnabled = DEFAULT_SIZE_STATISTICS_ENABLED ;
418+ private double v2PageCompressThreshold = DEFAULT_V2_PAGE_COMPRESS_THRESHOLD ;
409419 private final ColumnProperty .Builder <Long > bloomFilterNDVs ;
410420 private final ColumnProperty .Builder <Double > bloomFilterFPPs ;
411421 private int maxBloomFilterBytes = DEFAULT_MAX_BLOOM_FILTER_BYTES ;
@@ -460,6 +470,7 @@ private Builder(ParquetProperties toCopy) {
460470 this .extraMetaData = toCopy .extraMetaData ;
461471 this .statistics = ColumnProperty .builder (toCopy .statistics );
462472 this .sizeStatistics = ColumnProperty .builder (toCopy .sizeStatistics );
473+ this .v2PageCompressThreshold = toCopy .v2PageCompressThreshold ();
463474 }
464475
465476 /**
@@ -756,6 +767,21 @@ public Builder withSizeStatisticsEnabled(String columnPath, boolean enabled) {
756767 return this ;
757768 }
758769
770+ /**
771+ * Sets the compression threshold for V2 data pages.
772+ *
773+ * <p>When the compression ratio (compressed size / uncompressed size) exceeds this threshold,
774+ * the uncompressed data will be used instead. For example, with a threshold of 0.98, if
775+ * compression only saves 2% of space, the data will not be compressed.
776+ *
777+ * @param threshold the compression ratio threshold, default is {@value #DEFAULT_V2_PAGE_COMPRESS_THRESHOLD}
778+ * @return this builder for method chaining
779+ */
780+ public Builder withV2PageCompressThreshold (double threshold ) {
781+ this .v2PageCompressThreshold = threshold ;
782+ return this ;
783+ }
784+
759785 public ParquetProperties build () {
760786 ParquetProperties properties = new ParquetProperties (this );
761787 // we pass a constructed but uninitialized factory to ParquetProperties above as currently
0 commit comments