carbondata2.0.1初体验

一、更方便的部署配置模式

本地环境:

spark版本2.4.5
scala2.11.8
carbon版本:2.4.3-bin-hadoop2.7
hadoop版本:2.7
hive版本:2.4.3

只需要增加一个conf 即可,不再需要配置carbon的storepath

spark-shell --conf spark.sql.extensions=org.apache.spark.sql.CarbonExtensions --jars /Users/hulb/opt/third/spark-2.4.3-bin-hadoop2.7/jars/apache-carbondata-2.0.1-bin-spark2.4.5-hadoop2.7.2.jar

二、启动后直接使用spark,不需要额外建立carbonSession

三、使用

scala> spark.sql("create database carbon201test")
res5: org.apache.spark.sql.DataFrame = []

scala> spark.sql("use carbon201test")
res6: org.apache.spark.sql.DataFrame = []

scala> spark.sql(
     |            s"""
     |               | CREATE TABLE IF NOT EXISTS carbon201_test_table(
     |               |   id string,
     |               |   name string,
     |               |   city string,
     |               |   age Int)
     |               | STORED AS carbondata
     |            """.stripMargin)
2020-06-04 14:04:25 AUDIT audit:74 - {"time":"2020年6月4日 下午02时04分25秒","username":"hulb","opName":"CREATE TABLE","opId":"82805921821073","opStatus":"START"}
2020-06-04 14:04:25 AUDIT audit:97 - {"time":"2020年6月4日 下午02时04分25秒","username":"hulb","opName":"CREATE TABLE","opId":"82805921821073","opStatus":"SUCCESS","opTime":"108 ms","table":"NA","extraInfo":{}}
res7: org.apache.spark.sql.DataFrame = []

scala> spark.sql("LOAD DATA INPATH '/user/hulb/test/sample.csv' INTO TABLE carbon201_test_table")
2020-06-04 14:04:37 AUDIT audit:74 - {"time":"2020年6月4日 下午02时04分37秒","username":"hulb","opName":"LOAD DATA","opId":"82817994090824","opStatus":"START"}
2020-06-04 14:04:38 ERROR AbstractDFSCarbonFile:453 - Exception occured: File hdfs://localhost:9000/user/hive/warehouse/carbon201test.db/_system does not exist.
java.io.FileNotFoundException: File hdfs://localhost:9000/user/hive/warehouse/carbon201test.db/_system does not exist.
	at org.apache.hadoop.hdfs.DistributedFileSystem.listStatusInternal(DistributedFileSystem.java:795)
	at org.apache.hadoop.hdfs.DistributedFileSystem.access$700(DistributedFileSystem.java:106)
	at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:853)
	at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:849)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.listStatus(DistributedFileSystem.java:860)
	at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.listFiles(AbstractDFSCarbonFile.java:451)
	at org.apache.carbondata.core.datastore.filesystem.AbstractDFSCarbonFile.listFiles(AbstractDFSCarbonFile.java:554)
	at org.apache.carbondata.core.view.MVProvider$SchemaProvider.retrieveAllSchemasInternal(MVProvider.java:481)
	at org.apache.carbondata.core.view.MVProvider$SchemaProvider.checkAndReloadSchemas(MVProvider.java:536)
	at org.apache.carbondata.core.view.MVProvider$SchemaProvider.retrieveAllSchemas(MVProvider.java:472)
	at org.apache.carbondata.core.view.MVProvider.getSchemas(MVProvider.java:127)
	at org.apache.carbondata.core.view.MVManager.getSchemas(MVManager.java:126)
	at org.apache.carbondata.core.view.MVManager.getSchemas(MVManager.java:117)
	at org.apache.carbondata.core.view.MVManager.getSchemasOnTable(MVManager.java:85)
	at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.updateTableStatus(CarbonDataRDDFactory.scala:1038)
	at org.apache.carbondata.spark.rdd.CarbonDataRDDFactory$.loadCarbonData(CarbonDataRDDFactory.scala:613)
	at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.loadData(CarbonLoadDataCommand.scala:207)
	at org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand.processData(CarbonLoadDataCommand.scala:168)
	at org.apache.spark.sql.execution.command.AtomicRunnableCommand$$anonfun$run$3.apply(package.scala:148)
	at org.apache.spark.sql.execution.command.AtomicRunnableCommand$$anonfun$run$3.apply(package.scala:145)
	at org.apache.spark.sql.execution.command.Auditable$class.runWithAudit(package.scala:104)
	at org.apache.spark.sql.execution.command.AtomicRunnableCommand.runWithAudit(package.scala:141)
	at org.apache.spark.sql.execution.command.AtomicRunnableCommand.run(package.scala:145)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363)
	at org.apache.spark.sql.Dataset.(Dataset.scala:194)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:79)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
	at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:24)
	at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:29)
	at $line24.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:31)
	at $line24.$read$$iw$$iw$$iw$$iw$$iw.(:33)
	at $line24.$read$$iw$$iw$$iw$$iw.(:35)
	at $line24.$read$$iw$$iw$$iw.(:37)
	at $line24.$read$$iw$$iw.(:39)
	at $line24.$read$$iw.(:41)
	at $line24.$read.(:43)
	at $line24.$read$.(:47)
	at $line24.$read$.()
	at $line24.$eval$.$print$lzycompute(:7)
	at $line24.$eval$.$print(:6)
	at $line24.$eval.$print()
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793)
	at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)
	at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
	at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
	at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
	at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
	at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)
	at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)
	at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)
	at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:819)
	at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:691)
	at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:404)
	at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:425)
	at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:285)
	at org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)
	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)
	at org.apache.spark.repl.Main$.doMain(Main.scala:78)
	at org.apache.spark.repl.Main$.main(Main.scala:58)
	at org.apache.spark.repl.Main.main(Main.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
2020-06-04 14:04:38 AUDIT audit:97 - {"time":"2020年6月4日 下午02时04分38秒","username":"hulb","opName":"LOAD DATA","opId":"82817994090824","opStatus":"SUCCESS","opTime":"492 ms","table":"carbon201test.carbon201_test_table","extraInfo":{"SegmentId":"0","DataSize":"1.11KB","IndexSize":"663.0B"}}
res8: org.apache.spark.sql.DataFrame = []

scala>  spark.sql("LOAD DATA INPATH '/user/hulb/test/sample.csv' INTO TABLE carbon201_test_table")
2020-06-04 14:04:58 AUDIT audit:74 - {"time":"2020年6月4日 下午02时04分58秒","username":"hulb","opName":"LOAD DATA","opId":"82838494676198","opStatus":"START"}
2020-06-04 14:04:59 AUDIT audit:97 - {"time":"2020年6月4日 下午02时04分59秒","username":"hulb","opName":"LOAD DATA","opId":"82838494676198","opStatus":"SUCCESS","opTime":"829 ms","table":"carbon201test.carbon201_test_table","extraInfo":{"SegmentId":"1","DataSize":"1.11KB","IndexSize":"663.0B"}}
res9: org.apache.spark.sql.DataFrame = []




你可能感兴趣的:(-----Spark)