完整spark sql 访问 aws s3 文件并写入mysql数据库的代码

书接上篇,需求同上。将读取的本地文件路径改为aws s3路径,实现如下:

1. 直接 上pom文件

    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    4.0.0

    org.example
    JavaDemo
    0.0.1-SNAPSHOT
    jar

    JavaDemo
    http://maven.apache.org

    
    
    
        UTF-8
        2.11
        2.4.3
        2.8.5
        1.11.636
        compile
    

    
        
            io.netty
            netty-all
            4.1.17.Final
            
                
                  netty
                  io.netty
                

            

        

        
        
        
        
        
        
        
        
        
        
            com.fasterxml.jackson.core
            jackson-core
            2.6.3
       

       
            com.fasterxml.jackson.core
            jackson-databind
            2.6.3
       

       
            com.fasterxml.jackson.core
            jackson-annotations
            2.6.3
       

        
       
            org.apache.hadoop
            hadoop-aws
            ${hadoop.version}
       

        
       
            org.apache.hadoop
            hadoop-client
            ${hadoop.version}
       

       
            net.java.dev.jets3t
            jets3t
            0.9.4
       

        
       
            org.apache.httpcomponents
            httpcore
            4.4
       

       
            org.apache.httpcomponents
            httpclient
            4.4
       

        
            org.apache.spark
            spark-core_${scala.version}
            ${spark.version}
       

       
            org.apache.spark
            spark-sql_${scala.version}
            ${spark.version}
       

       
       
            mysql
            mysql-connector-java
            5.1.47
       

       
       
       
       
       
       
       
        
            junit
            junit
            3.8.1
            test
        

         
    


-------------------------

2. 上代码:

package org.example.JavaDemo;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.hadoop.fs.s3a.S3AFileSystem;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Properties;

public class SparkSqlCsvToCsv {


    public static void main(String[] args) {
        /**
         * 中国区域s3.cn-north-1.amazonaws.com.cn
         * 宁夏cn-northwest-1,北京cn-north-1
         */
        System.out.println("=========0000=========");
        String hdfsInAddress = "s3a://emr-demo-input/mydata/";//"hdfs://192.168.209.129:9000/"; //server ip //D:\DevTemp\AWS\ ;s3://emr-demo-input/mydata/
        String inputAddress = "";//"in/";
        String csvFileName="emr-demo-data.csv";
        System.out.println("======111============");
        SparkConf conf = new SparkConf().setMaster("local").setAppName("TestSpark");
        System.out.println("=========222=========");

        /*
         * Properties properties = new Properties(); InputStream inputStream =
         * Object.class.getResourceAsStream("/s3.properties");
         * properties.load(inputStream);
         */
        System.out.println("=========333=========");
        JavaSparkContext sc = new JavaSparkContext(conf);//JavaSparkContext过时
//        SparkContext sc = new SparkContext(conf);
        System.out.println("=========444-1=========");
        /*
         * sc.hadoopConfiguration().set("fs.s3a.access.key",properties.getProperty(
         * "fs.s3a.access.key"));
         * sc.hadoopConfiguration().set("fs.s3a.secret.key",properties.getProperty(
         * "fs.s3a.secret.key"));
         * sc.hadoopConfiguration().set("fs.s3a.endpoint",properties.getProperty(
         * "fs.s3a.endpoint"));//spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
         */
        /* spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem  
            spark.hadoop.fs.s3a.access.key=ACCESSKEY  
            spark.hadoop.fs.s3a.secret.key=SECRETKEY
         */
        //sc.hadoopConfiguration().set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem");
        sc.hadoopConfiguration().set("fs.s3a.access.key","AKIA2CIDQ6XXXXXXXXX");
        sc.hadoopConfiguration().set("fs.s3a.secret.key","VR1spXe+Jb5pK4m1gKcBFXXXXXXXXXXXX");
        sc.hadoopConfiguration().set("fs.s3a.endpoint","s3.cn-northwest-1.amazonaws.com.cn");//这里使用的是宁夏服务器
        System.out.println("=========444=========");
        SQLContext sqlContext = new SQLContext(sc);
        System.out.println("=========555=========");
        HashMap options = new HashMap ();
        options.put("header", "true");//设置第一行为头
        options.put("inferSchema", "true");//设置自动分析片段类型
        //options.put("path", hdfsInAddress + inputAddress + filePath);
        options.put("path", hdfsInAddress + inputAddress + csvFileName);
        options.put("dateFormat","YYYY-MM-DD");
        System.out.println("打印上传文件在hdfs的路径:"+hdfsInAddress + inputAddress + csvFileName);
        System.out.println("=========666=========");
        /****声明字段类型****/
        StructField structFields[] = new StructField[9];
        structFields[0] = DataTypes.createStructField("Tier", DataTypes.StringType,true);
        structFields[1] = DataTypes.createStructField("SellerCode",DataTypes.StringType,true);
        structFields[2] = DataTypes.createStructField("SellerName",DataTypes.StringType,true);
        structFields[3] = DataTypes.createStructField("DataSource",DataTypes.StringType,true);
        structFields[4] = DataTypes.createStructField("SellerProvince",DataTypes.StringType,true);
        structFields[5] = DataTypes.createStructField("_201901",DataTypes.DoubleType,true);
        structFields[6] = DataTypes.createStructField("_201902",DataTypes.DoubleType,true);
        structFields[7] = DataTypes.createStructField("_201903",DataTypes.DoubleType,true);
        structFields[8] = DataTypes.createStructField("flag",DataTypes.StringType,true);
        StructType structType = new StructType(structFields);
        System.out.println("=========777=========");
        Dataset dataFrame = sqlContext.load("com.databricks.spark.csv", structType, options);
        System.out.println("=========8888=========");
        // DataFrame cars = (new CsvParser()).withUseHeader(true).csvFile(sqlContext, "cars.csv");//通过CsvParser里面的函数来读取CSV文件
        dataFrame.registerTempTable("result");
        System.out.println("=========9999=========");
        StringBuffer sparkSql = new StringBuffer("select ");
        sparkSql.append("Tier");
        sparkSql.append(", SellerCode");
        sparkSql.append(", SellerName");
        sparkSql.append(", DataSource");
        sparkSql.append(", SellerProvince");
        sparkSql.append(", _201901");
        sparkSql.append(", _201902");
        sparkSql.append(", _201903");
        sparkSql.append(", if(_201903>_201902,'up','down') as flag");
        sparkSql.append(" from result");
        Dataset resultFrame=sqlContext.sql(sparkSql.toString() );
        //resultFrame.createOrReplaceTempView("resultView");//创建视图

        //System.out.println("***************用Dataset打印*peopleScore********"+resultFrame.limit(10).showString(20,0,false));
        System.out.println("******print schema *******");
        resultFrame.printSchema();
        System.out.println("*************");
        //resultFrame.select("SellerName").show();
        System.out.println("*************");
        //Tier    SellerCode    SellerName    DataSource    SellerProvince    _201901    _201902    _201903
        Dataset df = resultFrame.select(
                resultFrame.col("Tier"),
                resultFrame.col("SellerCode"),
                resultFrame.col("SellerName"),
                resultFrame.col("DataSource"),
                resultFrame.col("SellerProvince"),
                resultFrame.col("_201901"),
                resultFrame.col("_201902"),
                resultFrame.col("_201903"),
                resultFrame.col("flag")
        );
        df = df.filter(df.col("Tier").contains("T"));//where condition:equalTo/
        //df = df.filter((df.col("_201902").cast(DataTypes.FloatType)).gt((df.col("201901").cast(DataTypes.FloatType))));//gt 大于
        //df = df.orderBy(df.col("_201902").cast(DataTypes.FloatType).asc_nulls_first());//转换类型并升序
        //df.groupBy("age").count();//分组

        System.out.println("******df.show() print schema *******");
        df.show();
        System.out.println("******df.show() print schema *******");
        
        /*************将结果写入到 mysql 数据库******************/
        //数据库连接
        String url = "jdbc:mysql://127.0.0.1:3306/hive?useUnicode=true&characterEncoding=utf-8";
        Properties connectionProperties = new Properties();
        connectionProperties.put("user","root");
        connectionProperties.put("password","123456");
        connectionProperties.put("driver","com.mysql.jdbc.Driver");

        /**插入数据库表中**/
        df.write().mode(SaveMode.Overwrite).jdbc(url,"t_result",connectionProperties);//Overwrite会覆盖数据和表结构
        sc.stop();
    }
}

------------------

run 结果:

=========0000=========
======111============
=========222=========
=========333=========
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
20/03/18 08:58:18 INFO SparkContext: Running Spark version 2.4.3
20/03/18 08:58:18 WARN Shell: Did not find winutils.exe: {}
java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset. -see https://wiki.apache.org/hadoop/WindowsProblems
    at org.apache.hadoop.util.Shell.fileNotFoundException(Shell.java:528)
    at org.apache.hadoop.util.Shell.getHadoopHomeDir(Shell.java:549)
    at org.apache.hadoop.util.Shell.getQualifiedBin(Shell.java:572)
    at org.apache.hadoop.util.Shell.(Shell.java:669)
    at org.apache.hadoop.util.StringUtils.(StringUtils.java:79)
    at org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1555)
    at org.apache.hadoop.security.SecurityUtil.getLogSlowLookupsEnabled(SecurityUtil.java:497)
    at org.apache.hadoop.security.SecurityUtil.(SecurityUtil.java:90)
    at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:293)
    at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:281)
    at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:837)
    at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:807)
    at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:680)
    at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2422)
    at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2422)
    at scala.Option.getOrElse(Option.scala:121)
    at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2422)
    at org.apache.spark.SparkContext.(SparkContext.scala:293)
    at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
    at org.example.JavaDemo.SparkSqlCsvToCsv.main(SparkSqlCsvToCsv.java:40)
Caused by: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset.
    at org.apache.hadoop.util.Shell.checkHadoopHomeInner(Shell.java:448)
    at org.apache.hadoop.util.Shell.checkHadoopHome(Shell.java:419)
    at org.apache.hadoop.util.Shell.(Shell.java:496)
    ... 16 more
20/03/18 08:58:19 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/03/18 08:58:19 INFO SparkContext: Submitted application: TestSpark
20/03/18 08:58:19 INFO SecurityManager: Changing view acls to: Ace
20/03/18 08:58:19 INFO SecurityManager: Changing modify acls to: Ace
20/03/18 08:58:19 INFO SecurityManager: Changing view acls groups to: 
20/03/18 08:58:19 INFO SecurityManager: Changing modify acls groups to: 
20/03/18 08:58:19 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(Ace); groups with view permissions: Set(); users  with modify permissions: Set(Ace); groups with modify permissions: Set()
20/03/18 08:58:20 INFO Utils: Successfully started service 'sparkDriver' on port 53335.
20/03/18 08:58:20 INFO SparkEnv: Registering MapOutputTracker
20/03/18 08:58:20 INFO SparkEnv: Registering BlockManagerMaster
20/03/18 08:58:20 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
20/03/18 08:58:20 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
20/03/18 08:58:20 INFO DiskBlockManager: Created local directory at C:\Users\Lenovo\AppData\Local\Temp\blockmgr-49830c1c-118a-4540-a84b-6ba29b2c3bd8
20/03/18 08:58:21 INFO MemoryStore: MemoryStore started with capacity 1984.5 MB
20/03/18 08:58:21 INFO SparkEnv: Registering OutputCommitCoordinator
20/03/18 08:58:21 INFO Utils: Successfully started service 'SparkUI' on port 4040.
20/03/18 08:58:21 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://Ace-Sun:4040
20/03/18 08:58:21 INFO Executor: Starting executor ID driver on host localhost
20/03/18 08:58:21 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 53348.
20/03/18 08:58:21 INFO NettyBlockTransferService: Server created on Ace-Sun:53348
20/03/18 08:58:21 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
20/03/18 08:58:21 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, Ace-Sun, 53348, None)
20/03/18 08:58:21 INFO BlockManagerMasterEndpoint: Registering block manager Ace-Sun:53348 with 1984.5 MB RAM, BlockManagerId(driver, Ace-Sun, 53348, None)
20/03/18 08:58:21 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, Ace-Sun, 53348, None)
20/03/18 08:58:21 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, Ace-Sun, 53348, None)
=========444-1=========
=========444=========
=========555=========
打印上传文件在hdfs的路径:s3a://emr-demo-input/mydata/emr-demo-data.csv
=========666=========
=========777=========
20/03/18 08:58:21 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/D:/DevWorkspase/eclipse-workspace/JavaDemo/spark-warehouse/').
20/03/18 08:58:21 INFO SharedState: Warehouse path is 'file:/D:/DevWorkspase/eclipse-workspace/JavaDemo/spark-warehouse/'.
20/03/18 08:58:22 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
=========8888=========
=========9999=========
******print schema *******
root
 |-- Tier: string (nullable = true)
 |-- SellerCode: string (nullable = true)
 |-- SellerName: string (nullable = true)
 |-- DataSource: string (nullable = true)
 |-- SellerProvince: string (nullable = true)
 |-- _201901: double (nullable = true)
 |-- _201902: double (nullable = true)
 |-- _201903: double (nullable = true)
 |-- flag: string (nullable = false)

*************
*************
******df.show() print schema *******
20/03/18 08:58:28 INFO FileSourceStrategy: Pruning directories with: 
20/03/18 08:58:28 INFO FileSourceStrategy: Post-Scan Filters: isnotnull(Tier#0),Contains(Tier#0, T)
20/03/18 08:58:28 INFO FileSourceStrategy: Output Data Schema: struct
20/03/18 08:58:28 INFO FileSourceScanExec: Pushed Filters: IsNotNull(Tier),StringContains(Tier,T)
20/03/18 08:58:28 INFO CodeGenerator: Code generated in 271.4181 ms
20/03/18 08:58:28 INFO CodeGenerator: Code generated in 35.9997 ms
20/03/18 08:58:28 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 244.1 KB, free 1984.3 MB)
20/03/18 08:58:29 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 20.0 KB, free 1984.2 MB)
20/03/18 08:58:29 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on Ace-Sun:53348 (size: 20.0 KB, free: 1984.5 MB)
20/03/18 08:58:29 INFO SparkContext: Created broadcast 0 from show at SparkSqlCsvToCsv.java:126
20/03/18 08:58:29 INFO FileSourceScanExec: Planning scan with bin packing, max size: 4199352 bytes, open cost is considered as scanning 4194304 bytes.
20/03/18 08:58:29 INFO SparkContext: Starting job: show at SparkSqlCsvToCsv.java:126
20/03/18 08:58:29 INFO DAGScheduler: Got job 0 (show at SparkSqlCsvToCsv.java:126) with 1 output partitions
20/03/18 08:58:29 INFO DAGScheduler: Final stage: ResultStage 0 (show at SparkSqlCsvToCsv.java:126)
20/03/18 08:58:29 INFO DAGScheduler: Parents of final stage: List()
20/03/18 08:58:29 INFO DAGScheduler: Missing parents: List()
20/03/18 08:58:29 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[3] at show at SparkSqlCsvToCsv.java:126), which has no missing parents
20/03/18 08:58:29 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 14.9 KB, free 1984.2 MB)
20/03/18 08:58:29 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 7.2 KB, free 1984.2 MB)
20/03/18 08:58:29 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on Ace-Sun:53348 (size: 7.2 KB, free: 1984.5 MB)
20/03/18 08:58:29 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1161
20/03/18 08:58:29 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[3] at show at SparkSqlCsvToCsv.java:126) (first 15 tasks are for partitions Vector(0))
20/03/18 08:58:29 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
20/03/18 08:58:29 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, executor driver, partition 0, PROCESS_LOCAL, 8323 bytes)
20/03/18 08:58:29 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
20/03/18 08:58:29 INFO FileScanRDD: Reading File path: s3a://emr-demo-input/mydata/emr-demo-data.csv, range: 0-5048, partition values: [empty row]
20/03/18 08:58:29 INFO CodeGenerator: Code generated in 22.3421 ms
20/03/18 08:58:30 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 3241 bytes result sent to driver
20/03/18 08:58:30 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 596 ms on localhost (executor driver) (1/1)
20/03/18 08:58:30 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 
20/03/18 08:58:30 INFO DAGScheduler: ResultStage 0 (show at SparkSqlCsvToCsv.java:126) finished in 0.700 s
20/03/18 08:58:30 INFO DAGScheduler: Job 0 finished: show at SparkSqlCsvToCsv.java:126, took 0.737439 s
+----+----------+----------------------------+----------+--------------+--------------+-------------+-------------+----+
|Tier|SellerCode|                  SellerName|DataSource|SellerProvince|       _201901|      _201902|      _201903|flag|
+----+----------+----------------------------+----------+--------------+--------------+-------------+-------------+----+
|  T1|    HE003|          医药有限公司|       DDI|          河北|1.0559443903E8|3.312345429E7|5.428380069E7|  up|
|  T1|  HE0009S|河公司田医药站|       DDI|          河北|     249239.76|    168139.14|    260403.56|  up|
|  T1|  HE006S|          河药集团有限公司|       DDI|          河北|    3856199.08|    1384355.4|   4070853.03|  up|
|  T1|   HEA1S|          邢医药药材有限公司|       DDI|          河北|     405327.83|     63712.79|     89365.28|  up|
|  T1|   H865S|            衡水医药有限公司|       DDI|          河北|      648096.6|     188102.8|     239028.8|  up|
|  T1|   HEA3S|            保定医药有限公司|       DDI|          河北|      794278.6|    143358.86|    280220.74|  up|
|  T1|    HB001|唐公司(新分公司)|       DDI|          河北|    2844517.25|    1066305.9|   1154788.35|  up|
|  T1|   T18S|              华医药有限公司|       DDI|          河北| 1.986586353E7|   3419255.58|   9636006.07|  up|
|  T1|   T34S|    国药控有限公司|       DDI|          河北|    2073843.21|     698878.7|    799672.08|  up|
|  T2|  H135S|    国药医药有限公司|       DDI|          河北|     161440.74|    111466.11|     111115.2|down|
|  T2|  HE3S|      国药堂医药有限公司|       DDI|          河北|    6660979.13|   1417602.22|   2650979.14|  up|
|  T2|  HE9S|      国药堂公司|       DDI|          河北|    4707805.76|   1884585.75|   2670068.27|  up|
|  T2|  H17S|    国药岛医药有限公司|       DDI|          河北|    2889987.07|    997135.23|   1670409.38|  up|
|  T2|  H0368S|      国药堂坊医药有限公司|       DDI|          河北|    2563005.46|    810446.44|   1546372.15|  up|
|  T2|  H0593S|      国药堂药有限公司|       DDI|          河北|    5412119.26|   1241300.64|   1654506.05|  up|
|  T2|   1006S|        河贸易有限公司|       DDI|          河北|      31847.58|      6605.72|      9101.52|  up|
|  T2|   1206S|        承盛限责任公司|       DDI|          河北|     372629.21|     81697.68|    157634.23|  up|
|  T2|   H227S|    国药庄医药有限公司|       DDI|          河北|    1382932.07|    243595.74|    892387.49|  up|
|  T2|   H1S|        有限公司|       DDI|          河北|    1581317.58|    1268579.3|   1270598.91|  up|
|  T2|   330S|            华有限公司|       DDI|          河北|    2133488.87|    685468.02|   1198794.77|  up|
+----+----------+----------------------------+----------+--------------+--------------+-------------+-------------+----+
only showing top 20 rows

******df.show() print schema *******
20/03/18 08:58:30 INFO SparkUI: Stopped Spark web UI at http://Ace-Sun:4040
20/03/18 08:58:30 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
20/03/18 08:58:30 INFO MemoryStore: MemoryStore cleared
20/03/18 08:58:30 INFO BlockManager: BlockManager stopped
20/03/18 08:58:30 INFO BlockManagerMaster: BlockManagerMaster stopped
20/03/18 08:58:30 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
20/03/18 08:58:30 INFO SparkContext: Successfully stopped SparkContext
20/03/18 08:58:30 INFO ShutdownHookManager: Shutdown hook called
20/03/18 08:58:30 INFO ShutdownHookManager: Deleting directory C:\Users\Lenovo\AppData\Local\Temp\spark-eb60c4d7-fe60-4af6-9eb6-3a2e934d1405
---------------------------

抛出的异常是由于本地没有hadoop环境所导致,并不影响运行。

可以将hadoop lib包导入 到本地开发环境,设置hadoop home解决。也可以在本地安装hadoop环境解决。

以上为个人学习研究代码,有冗余请自行优化。

交流学习或指教可添加微信 spsace

你可能感兴趣的:(学习笔记)