Hive合并
合并一个非分区表的小文件方法1:
SET hive.merge.mapfiles = true; SET hive.merge.mapredfiles = true; SET hive.merge.size.per.task = 256000000; SET hive.merge.smallfiles.avgsize = 134217728; SET hive.exec.compress.output = true; SET parquet.compression = snappy; INSERT OVERWRITE TABLE db_name.table_name SELECT * FROM db_name.table_name;
合并一个范围内的表分区的小文件:
SET hive.merge.mapfiles = true; SET hive.merge.mapredfiles = true; SET hive.merge.size.per.task = 256000000; SET hive.merge.smallfiles.avgsize = 134217728; SET hive.exec.compress.output = true; SET parquet.compression = snappy; SET hive.exec.dynamic.partition.mode = nonstrict; SET hive.exec.dynamic.partition = true; INSERT OVERWRITE TABLE db_name.table_name PARTITION (part_col) SELECT col1, col2, ..., coln, part_col FROM db_name.table_name WHERE part_col BETWEEN '<part_value1>' AND '<part_value2>';