参考:1 http://stackoverflow.com/questions/31701273/setting-spark-classpaths-on-ec2-spark-driver-extraclasspath-and-spark-executor
参考:2 http://stackoverflow.com/questions/34901331/spark-hbase-error-java-lang-illegalstateexception-unread-block-data
参考:3 http://blog.csdn.net/wzq294328238/article/details/48054525
1 基本环境:spark-2.1.0-bin-hadoop2.7,apache-hive-1.2.2,Hbase 1.3.1
2 问题描述:通过spark-sql连接hbase出现错误,Caused by: java.lang.IllegalStateException: unread block data。
spark-env.sh:
export HIVE_HOME=/root/apache-hive-1.2.2-bin
export HBASE_HOME=/root/hbase-1.3.1
export SCALA_HOME=/usr/share/scala
export JAVA_HOME=/usr/java/jdk1.7.0_79
export SPARK_MASTER_IP=centos01
export SPARK_WORKER_MEMORY=4G
export HADOOP_CONF_DIR=/root/hadoop-2.7.3/etc/hadoop
export JAVA_OPTS="-server -Xms800m -Xmx800m -XX:PermSize=64M -XX:MaxNewSize=256m -XX:MaxPermSize=128m -Djava.awt.headless=true"
#export SPARK_CLASSPATH=$HIVE_HOME/lib:$SPARK_CLASSPATH
export SPARK_MEM=512M
export LD_LIBRARY_PATH=/root/hadoop-2.7.3/lib/native
hive-site.xml:
javax.jdo.option.ConnectionURL
jdbc:mysql://centos03:3306/hive?createDatabaseIfNotExist=true
JDBC connect string for a JDBC metastore
javax.jdo.option.ConnectionDriverName
com.mysql.jdbc.Driver
Driver class name for a JDBC metastore
javax.jdo.option.ConnectionUserName
...
username to use against metastore database
javax.jdo.option.ConnectionPassword
...
password to use against metastore database
hive.aux.jars.path
file:///root/apache-hive-1.2.2-bin/lib/mysql-connector-java-5.1.41-bin.jar,file:///root/hbase-1.3.1/lib/activation-1.1.jar,file:///root/hbase-1.3.1/lib/aopalliance-1.0.jar,file:///root/hbase-1.3.1/lib/apacheds-i18n-2.0.0-M15.jar,file:///root/hbase-1.3.1/lib/apacheds-kerberos-codec-2.0.0-M15.jar,file:///root/hbase-1.3.1/lib/api-asn1-api-1.0.0-M20.jar,file:///root/hbase-1.3.1/lib/api-util-1.0.0-M20.jar,file:///root/hbase-1.3.1/lib/asm-3.1.jar,file:///root/hbase-1.3.1/lib/avro-1.7.4.jar,file:///root/hbase-1.3.1/lib/commons-beanutils-1.7.0.jar,file:///root/hbase-1.3.1/lib/commons-beanutils-core-1.8.0.jar,file:///root/hbase-1.3.1/lib/commons-cli-1.2.jar,file:///root/hbase-1.3.1/lib/commons-codec-1.9.jar,file:///root/hbase-1.3.1/lib/commons-collections-3.2.2.jar,file:///root/hbase-1.3.1/lib/commons-compress-1.4.1.jar,file:///root/hbase-1.3.1/lib/commons-configuration-1.6.jar,file:///root/hbase-1.3.1/lib/commons-daemon-1.0.13.jar,file:///root/hbase-1.3.1/lib/commons-digester-1.8.jar,file:///root/hbase-1.3.1/lib/commons-el-1.0.jar,file:///root/hbase-1.3.1/lib/commons-httpclient-3.1.jar,file:///root/hbase-1.3.1/lib/commons-io-2.4.jar,file:///root/hbase-1.3.1/lib/commons-lang-2.6.jar,file:///root/hbase-1.3.1/lib/commons-logging-1.2.jar,file:///root/hbase-1.3.1/lib/commons-math-2.2.jar,file:///root/hbase-1.3.1/lib/commons-math3-3.1.1.jar,file:///root/hbase-1.3.1/lib/commons-net-3.1.jar,file:///root/hbase-1.3.1/lib/disruptor-3.3.0.jar,file:///root/hbase-1.3.1/lib/findbugs-annotations-1.3.9-1.jar,file:///root/hbase-1.3.1/lib/guava-12.0.1.jar,file:///root/hbase-1.3.1/lib/guice-3.0.jar,file:///root/hbase-1.3.1/lib/guice-servlet-3.0.jar,file:///root/hbase-1.3.1/lib/hadoop-annotations-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-auth-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-client-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-common-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-hdfs-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-mapreduce-client-app-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-mapreduce-client-common-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-mapreduce-client-core-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-mapreduce-client-jobclient-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-mapreduce-client-shuffle-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-yarn-api-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-yarn-client-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-yarn-common-2.5.1.jar,file:///root/hbase-1.3.1/lib/hadoop-yarn-server-common-2.5.1.jar,file:///root/hbase-1.3.1/lib/hbase-annotations-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-annotations-1.3.1-tests.jar,file:///root/hbase-1.3.1/lib/hbase-client-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-common-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-common-1.3.1-tests.jar,file:///root/hbase-1.3.1/lib/hbase-examples-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-external-blockcache-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-hadoop2-compat-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-hadoop-compat-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-it-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-it-1.3.1-tests.jar,file:///root/hbase-1.3.1/lib/hbase-prefix-tree-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-procedure-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-resource-bundle-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-rest-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-server-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-server-1.3.1-tests.jar,file:///root/hbase-1.3.1/lib/hbase-shell-1.3.1.jar,file:///root/hbase-1.3.1/lib/hbase-thrift-1.3.1.jar,file:///root/hbase-1.3.1/lib/hive-hbase-handler-1.2.2.jar,file:///root/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar,file:///root/hbase-1.3.1/lib/httpclient-4.2.5.jar,file:///root/hbase-1.3.1/lib/httpcore-4.4.1.jar,file:///root/hbase-1.3.1/lib/jackson-core-asl-1.9.13.jar,file:///root/hbase-1.3.1/lib/jackson-jaxrs-1.9.13.jar,file:///root/hbase-1.3.1/lib/jackson-mapper-asl-1.9.13.jar,file:///root/hbase-1.3.1/lib/jackson-xc-1.9.13.jar,file:///root/hbase-1.3.1/lib/jamon-runtime-2.4.1.jar,file:///root/hbase-1.3.1/lib/jasper-compiler-5.5.23.jar,file:///root/hbase-1.3.1/lib/jasper-runtime-5.5.23.jar,file:///root/hbase-1.3.1/lib/javax.inject-1.jar,file:///root/hbase-1.3.1/lib/java-xmlbuilder-0.4.jar,file:///root/hbase-1.3.1/lib/jaxb-api-2.2.2.jar,file:///root/hbase-1.3.1/lib/jaxb-impl-2.2.3-1.jar,file:///root/hbase-1.3.1/lib/jcodings-1.0.8.jar,file:///root/hbase-1.3.1/lib/jersey-client-1.9.jar,file:///root/hbase-1.3.1/lib/jersey-core-1.9.jar,file:///root/hbase-1.3.1/lib/jersey-guice-1.9.jar,file:///root/hbase-1.3.1/lib/jersey-json-1.9.jar,file:///root/hbase-1.3.1/lib/jersey-server-1.9.jar,file:///root/hbase-1.3.1/lib/jets3t-0.9.0.jar,file:///root/hbase-1.3.1/lib/jettison-1.3.3.jar,file:///root/hbase-1.3.1/lib/jetty-6.1.26.jar,file:///root/hbase-1.3.1/lib/jetty-sslengine-6.1.26.jar,file:///root/hbase-1.3.1/lib/jetty-util-6.1.26.jar,file:///root/hbase-1.3.1/lib/joni-2.1.2.jar,file:///root/hbase-1.3.1/lib/jruby-complete-1.6.8.jar,file:///root/hbase-1.3.1/lib/jsch-0.1.42.jar,file:///root/hbase-1.3.1/lib/jsp-2.1-6.1.14.jar,file:///root/hbase-1.3.1/lib/jsp-api-2.1-6.1.14.jar,file:///root/hbase-1.3.1/lib/junit-4.12.jar,file:///root/hbase-1.3.1/lib/leveldbjni-all-1.8.jar,file:///root/hbase-1.3.1/lib/libthrift-0.9.3.jar,file:///root/hbase-1.3.1/lib/log4j-1.2.17.jar,file:///root/hbase-1.3.1/lib/metrics-core-2.2.0.jar,file:///root/hbase-1.3.1/lib/netty-all-4.0.23.Final.jar,file:///root/hbase-1.3.1/lib/paranamer-2.3.jar,file:///root/hbase-1.3.1/lib/protobuf-java-2.5.0.jar,file:///root/hbase-1.3.1/lib/ruby,file:///root/hbase-1.3.1/lib/servlet-api-2.5-6.1.14.jar,file:///root/hbase-1.3.1/lib/servlet-api-2.5.jar,file:///root/hbase-1.3.1/lib/slf4j-api-1.7.7.jar,file:///root/hbase-1.3.1/lib/slf4j-log4j12-1.7.5.jar,file:///root/hbase-1.3.1/lib/snappy-java-1.0.4.1.jar,file:///root/hbase-1.3.1/lib/spymemcached-2.11.6.jar,file:///root/hbase-1.3.1/lib/xmlenc-0.52.jar,file:///root/hbase-1.3.1/lib/xz-1.0.jar,file:///root/hbase-1.3.1/lib/zookeeper-3.4.6.jar
这样配置以后,spark-sql 可以访问hive(mysql驱动分别拷贝的相应目录),hbase和hive相互访问(需另外配置)。但是通过spark-sql 访问hive中的hbase表时,总是出现错误,Caused by: java.lang.IllegalStateException: unread block data...,纠结了一天,查看了很多网页。最后,用如下命令解决:spark-sql --jars /root/hbase-1.3.1/lib/hive-hbase-handler-1.2.2.jar,/root/hbase-1.3.1/lib/hbase-client-1.3.1.jar,/root/hbase-1.3.1/lib/hbase-common-1.3.1.jar,/root/hbase-1.3.1/lib/hbase-server-1.3.1.jar,/root/hbase-1.3.1/lib/hbase-hadoop2-compat-1.3.1.jar,/root/hbase-1.3.1/lib/guava-12.0.1.jar,/root/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar,/root/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar,/root/hbase-1.3.1/lib/protobuf-java-2.5.0.jar,/root/hbase-1.3.1/lib/metrics-core-2.2.0.jar
继而,觉得每次都输入这样一段脚本太烦。最后可以通过设置两参数解决:
spark.driver.extraClassPath
spark.executor.extraClassPath
# cat spark-defaults.conf
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Default system properties included when running spark-submit.
# This is useful for setting default environmental settings.
# Example:
spark.master spark://centos01:7077
spark.eventLog.enabled true
spark.eventLog.dir hdfs://centos01:9000/sparkLogs
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.driver.memory 5g
spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.driver.extraClassPath /root/hbase-1.3.1/lib/hive-hbase-handler-1.2.2.jar:/root/hbase-1.3.1/lib/hbase-client-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-common-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-server-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-hadoop2-compat-1.3.1.jar:/root/hbase-1.3.1/lib/guava-12.0.1.jar:/root/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar:/root/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar:/root/hbase-1.3.1/lib/protobuf-java-2.5.0.jar:/root/hbase-1.3.1/lib/metrics-core-2.2.0.jar
spark.executor.extraClassPath /root/hbase-1.3.1/lib/hive-hbase-handler-1.2.2.jar:/root/hbase-1.3.1/lib/hbase-client-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-common-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-server-1.3.1.jar:/root/hbase-1.3.1/lib/hbase-hadoop2-compat-1.3.1.jar:/root/hbase-1.3.1/lib/guava-12.0.1.jar:/root/hbase-1.3.1/lib/hbase-protocol-1.3.1.jar:/root/hbase-1.3.1/lib/htrace-core-3.1.0-incubating.jar:/root/hbase-1.3.1/lib/protobuf-java-2.5.0.jar:/root/hbase-1.3.1/lib/metrics-core-2.2.0.jar