原文地址: http://blog.csdn.net/nsrainbow/article/details/43243389 最新课程请关注原作者博客,获得更好的显示体验
$ sudo yum install impala # Binaries for daemons
$ sudo yum install impala-server # Service start/stop script
$ sudo yum install impala-state-store # Service start/stop script
$ sudo yum install impala-catalog # Service start/stop script
$ sudo yum install impala # Binaries for daemons
$ sudo yum install impala-server # Service start/stop script
--> Finished Dependency Resolution
Error: Package: hadoop-libhdfs-2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6.x86_64 (cloudera-cdh5)
Requires: hadoop-hdfs = 2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6
Installed: hadoop-hdfs-2.5.0+cdh5.3.0+781-1.cdh5.3.0.p0.54.el6.x86_64 (@cloudera-cdh5)
hadoop-hdfs = 2.5.0+cdh5.3.0+781-1.cdh5.3.0.p0.54.el6
Available: hadoop-hdfs-2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6.x86_64 (cloudera-cdh5)
hadoop-hdfs = 2.5.0+cdh5.2.1+578-1.cdh5.2.1.p0.14.el6
You could try using --skip-broken to work around the problem
You could try running: rpm -Va --nofiles --nodigest
dfs.client.read.shortcircuit
true
dfs.domain.socket.path
/var/run/hdfs-sockets/dn._PORT
dfs.client.file-block-storage-locations.timeout.millis
10000
[root@host1 run]# mkdir /var/run/hdfs-sockets/
[root@host1 run]# chown -R hdfs.hdfs /var/run/hdfs-sockets/
usermod -a -G hadoop impala
usermod -a -G hdfs impala
dfs.datanode.hdfs-blocks-metadata.enabled
true
$ sudo service impala-state-store start
$ sudo service impala-catalog start
$ sudo service impala-server start
E0202 08:01:24.944171 29251 cpu-info.cc:135] CPU does not support the Supplemental SSE3 (SSSE3) instruction set, which is required. Exiting if Supplemental SSE3 is not functional...
sudo yum install impala-shell
IMPALA_CATALOG_SERVICE_HOST=host1
IMPALA_STATE_STORE_HOST=host1
IMPALA_STATE_STORE_PORT=24000
IMPALA_BACKEND_PORT=22000
IMPALA_LOG_DIR=/var/log/impala
IMPALA_CATALOG_SERVICE_HOST=host1
export IMPALA_STATE_STORE_ARGS=${IMPALA_STATE_STORE_ARGS:- \
-log_dir=${IMPALA_LOG_DIR} -state_store_port=${IMPALA_STATE_STORE_PORT}}
IMPALA_SERVER_ARGS=" \
-log_dir=${IMPALA_LOG_DIR} \
-catalog_service_host=${IMPALA_CATALOG_SERVICE_HOST} \
-state_store_port=${IMPALA_STATE_STORE_PORT} \
-use_statestore \
-state_store_host=${IMPALA_STATE_STORE_HOST} \
-be_port=${IMPALA_BACKEND_PORT}"
export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-false}
export ENABLE_CORE_DUMPS=${ENABLE_COREDUMPS:-true}
可以生成DUMPS文件
hdfs dfs -mkdir -p /user/cloudera/sample_data/tab1 /user/cloudera/sample_data/tab2
在本地建立文本tab1.csv
1,true,123.123,2012-10-24 08:55:00
2,false,1243.5,2012-10-25 13:40:00
3,false,24453.325,2008-08-22 09:33:21.123
4,false,243423.325,2007-05-12 22:32:21.33454
5,true,243.325,1953-04-22 09:11:33
tab2.csv
1,true,12789.123
2,false,1243.5
3,false,24453.325
4,false,2423.3254
5,true,243.325
60,false,243565423.325
70,true,243.325
80,false,243423.325
90,true,243.325
$ hdfs dfs -put tab1.csv /user/cloudera/sample_data/tab1
$ hdfs dfs -ls /user/cloudera/sample_data/tab1
Found 1 items
-rw-r--r-- 1 cloudera cloudera 192 2013-04-02 20:08 /user/cloudera/sample_data/tab1/tab1.csv
$ hdfs dfs -put tab2.csv /user/cloudera/sample_data/tab2
$ hdfs dfs -ls /user/cloudera/sample_data/tab2
Found 1 items
-rw-r--r-- 1 cloudera cloudera 158 2013-04-02 20:09 /user/cloudera/sample_data/tab2/tab2.csv
DROP TABLE IF EXISTS tab1;
-- The EXTERNAL clause means the data is located outside the central location
-- for Impala data files and is preserved when the associated Impala table is dropped.
-- We expect the data to already exist in the directory specified by the LOCATION clause.
CREATE EXTERNAL TABLE tab1
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE,
col_3 TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/sample_data/tab1';
DROP TABLE IF EXISTS tab2;
-- TAB2 is an external table, similar to TAB1.
CREATE EXTERNAL TABLE tab2
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/user/cloudera/sample_data/tab2';
DROP TABLE IF EXISTS student;
CREATE TABLE student
(
id INT,
name STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
[xmseapp03:21000] > select * from tab1;
Query: select * from tab1
+----+-------+------------+-------------------------------+
| id | col_1 | col_2 | col_3 |
+----+-------+------------+-------------------------------+
| 1 | true | 123.123 | 2012-10-24 08:55:00 |
| 2 | false | 1243.5 | 2012-10-25 13:40:00 |
| 3 | false | 24453.325 | 2008-08-22 09:33:21.123000000 |
| 4 | false | 243423.325 | 2007-05-12 22:32:21.334540000 |
| 5 | true | 243.325 | 1953-04-22 09:11:33 |
+----+-------+------------+-------------------------------+
Fetched 5 row(s) in 6.91s
1|AAAAAAAABAAAAAAA|980124|7135|32946|2452238|2452208|Mr.|Javier|Lewis|Y|9|12|1936|CHILE||Javie
[email protected]|2452508|
2|AAAAAAAACAAAAAAA|819667|1461|31655|2452318|2452288|Dr.|Amy|Moses|Y|9|4|1966|TOGO||Amy.Moses@
Ovk9KjHH.com|2452318|
3|AAAAAAAADAAAAAAA|1473522|6247|48572|2449130|2449100|Miss|Latisha|Hamilton|N|18|9|1979|NIUE||
[email protected]|2452313|
4|AAAAAAAAEAAAAAAA|1703214|3986|39558|2450030|2450000|Dr.|Michael|White|N|7|6|1983|MEXICO||Mic
[email protected]|2452361|
5|AAAAAAAAFAAAAAAA|953372|4470|36368|2449438|2449408|Sir|Robert|Moran|N|8|5|1956|FIJI||Robert.
[email protected]|2452469|
然后上传到hdfs上
hdfs dfs -put costomer.dat /user/hive/tpcds/customer/
--
-- store_sales fact table and surrounding dimension tables only
--
create database tpcds;
use tpcds;
drop table if exists customer;
create external table customer
(
c_customer_sk int,
c_customer_id string,
c_current_cdemo_sk int,
c_current_hdemo_sk int,
c_current_addr_sk int,
c_first_shipto_date_sk int,
c_first_sales_date_sk int,
c_salutation string,
c_first_name string,
c_last_name string,
c_preferred_cust_flag string,
c_birth_day int,
c_birth_month int,
c_birth_year int,
c_birth_country string,
c_login string,
c_email_address string,
c_last_review_date string
)
row format delimited fields terminated by '|'
location '/user/hive/tpcds/customer.dat';
impala-shell -i localhost -f customer_setup.sql
$ hdfs dfs -mkdir -p /user/impala/data/logs/year=2015/month=01/day=01/host=host1
$ hdfs dfs -mkdir -p /user/impala/data/logs/year=2015/month=02/day=22/host=host2
并上传文本文件a.txt
1,jack
2,michael
3,sara
4,john
hdfs dfs -put /root/a.txt /user/impala/data/logs/year=2015/month=01/day=01/host=host1
hdfs dfs -put /root/b.txt /user/impala/data/logs/year=2015/month=02/day=22/host=host2
create external table logs (id int, name string)
partitioned by (year string, month string, day string, host string)
row format delimited fields terminated by ','
location '/user/impala/data/logs';
alter table logs add partition (year="2015",month="01",day="01",host="host1");
alter table logs add partition (year="2015",month="02",day="22",host="host2");
select * from logs
insert into logs partition (year="2015", month="01", day="01", host="host1") values (6,"ted");
再查一下
select * from logs;
org.apache.hive
hive-jdbc
0.14.0
org.apache.hadoop
hadoop-common
2.2.0
Connection con = DriverManager.getConnection("jdbc:hive2://host1:10000/default", "hive", "");
改成
Connection con = DriverManager.getConnection("jdbc:hive2://host1:21050/;auth=noSasl, "", "");
为了简化例子,我把ImpalaJdbcClient简化成只有查询部分了
package org.crazycake.play_hive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class ImpalaJdbcClient {
/**
* 注意:hive-server2 引用的driver是 org.apache.hive.* 而 hive-server 是
* org.apache.hadoop.hive.*
*/
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
/**
* @param args
* @throws SQLException
*/
public static void main(String[] args) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.exit(1);
}
// Impala的默认端口是 21050
Connection con = DriverManager.getConnection("jdbc:hive2://xmseapp03:21050/;auth=noSasl", "", "");
Statement stmt = con.createStatement();
// select * query
String sql = "select * from logs";
System.out.println("Running: " + sql);
ResultSet res = stmt.executeQuery(sql);
while (res.next()) {
System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));
}
}
}
Running: select * from logs
3 sara
4 john
6 ted
1 jack
2 michael