|
80 | 80 | * # To enable/disable dictionary encoding |
81 | 81 | * parquet.enable.dictionary=true # false to disable dictionary encoding |
82 | 82 | * |
| 83 | + * # To enable/disable BYTE_STREAM_SPLIT encoding |
| 84 | + * parquet.enable.bytestreamsplit=false # true to enable BYTE_STREAM_SPLIT encoding |
| 85 | + * |
83 | 86 | * # To enable/disable summary metadata aggregation at the end of a MR job |
84 | 87 | * # The default is true (enabled) |
85 | 88 | * parquet.enable.summary-metadata=true # false to disable summary aggregation |
@@ -137,6 +140,7 @@ public static enum JobSummaryLevel { |
137 | 140 | public static final String WRITE_SUPPORT_CLASS = "parquet.write.support.class"; |
138 | 141 | public static final String DICTIONARY_PAGE_SIZE = "parquet.dictionary.page.size"; |
139 | 142 | public static final String ENABLE_DICTIONARY = "parquet.enable.dictionary"; |
| 143 | + public static final String ENABLE_BYTE_STREAM_SPLIT = "parquet.enable.bytestreamsplit"; |
140 | 144 | public static final String VALIDATION = "parquet.validation"; |
141 | 145 | public static final String WRITER_VERSION = "parquet.writer.version"; |
142 | 146 | public static final String MEMORY_POOL_RATIO = "parquet.memory.pool.ratio"; |
@@ -270,6 +274,11 @@ public static boolean getEnableDictionary(Configuration configuration) { |
270 | 274 | return configuration.getBoolean(ENABLE_DICTIONARY, ParquetProperties.DEFAULT_IS_DICTIONARY_ENABLED); |
271 | 275 | } |
272 | 276 |
|
| 277 | + public static boolean getByteStreamSplitEnabled(Configuration configuration) { |
| 278 | + return configuration.getBoolean( |
| 279 | + ENABLE_BYTE_STREAM_SPLIT, ParquetProperties.DEFAULT_IS_BYTE_STREAM_SPLIT_ENABLED); |
| 280 | + } |
| 281 | + |
273 | 282 | public static int getMinRowCountForPageSizeCheck(Configuration configuration) { |
274 | 283 | return configuration.getInt( |
275 | 284 | MIN_ROW_COUNT_FOR_PAGE_SIZE_CHECK, ParquetProperties.DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK); |
@@ -503,6 +512,7 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp |
503 | 512 | .withPageSize(getPageSize(conf)) |
504 | 513 | .withDictionaryPageSize(getDictionaryPageSize(conf)) |
505 | 514 | .withDictionaryEncoding(getEnableDictionary(conf)) |
| 515 | + .withByteStreamSplitEncoding(getByteStreamSplitEnabled(conf)) |
506 | 516 | .withWriterVersion(getWriterVersion(conf)) |
507 | 517 | .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf)) |
508 | 518 | .withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf)) |
|
0 commit comments