spark.sql("select count(1) from kv").show(20)
spark.sql("select max(SALE_DATE),min(SALE_DATE) from sale_float").show(10)
spark-submit --master yarn --deploy-mode client --num-executors 20 --executor-cores 2 --executor-memory 4g --class com.lifecycle.tools.UngzTool s3://lifecyclebigdata/dataWareHouse/BALABALA/02pdw/jar/tools-1.0-SNAPSHOT.jar
+--------------+--------------+
|max(SALE_DATE)|min(SALE_DATE)|
+--------------+--------------+
| 78190820| 00181030|
+--------------+--------------+
hdfs dfs -mkdir data
hdfs dfs -ls /user/root
s3-dist-cp --src=s3://lifecyclebigdata/dataWareHouse/BALABALA/01history/wj/df_sale_float.csv.gz --dest=hdfs:///data
复制 :
s3-dist-cp --src=hdfs:///user/root/df_sale_float.csv.gz --dest=s3://lifecyclebigdata/dataWareHouse/BALABALA/01history/wj
spark-submit --master yarn --deploy-mode client --num-executors 20 --executor-cores 2 --executor-memory 4g --class com.lifecycle.tools.CountAll s3://lifecyclebigdata/dataWareHouse/BALABALA/02pdw/jar/tools-1.0-SNAPSHOT.jar
[001810],[200201],[201109],[201111],
[201510],[201511],[201512],
[201601],[201602],[201603],[201604],[201605],[201606],[201607],[201608],[201609],[201610],[201611],[201612],
[201701],[201702],[201703],[201704],[201705],[201706],[201707],[201708],[201709],[201710],[201711],[201712],
[201801],[201802],[201803],[201804],[201805],[201806],[201807],[201808],[201809],[201810],[201811],[201812],
[201901],[201902],[201903],[201904],[201905],[201906],[201907],[201908],[201909],[201910],[201911],[201912],
[202001],[202003],[202004],[202005],[202006],[202007],[202008],[202309],[202711],[205009],[210612],[210712],[210801],[210802],[210803],[210811],[781908]
=============
181558350
耗时:-46261
=============