问题导读:
1.向hbase中导入数据分为几种方式?
2.Bulk load 怎么导入数据?
3.Sqoop怎么导入到Hbase数据库中?
预备知识:启动 HBase
清单 1. 修改 hosts 文件清单
[root@node1:2 hbase-0.96.1.1-cdh5.0.1]# cat /etc/hosts 10.17.139.186 node1 10.17.139.185 scheduler2清单2. 启动 HBase 服务清单
[root@node1:2 bin]# ./start-hbase.sh starting master, logging to /home/zhoumingyao/hbase-0.96.1.1-cdh5.0.1/bin/../logs/hbase-root-master-node1.out [root@node1:2 bin]# jps 2981 SchedulerServer 46776 Jps 29242 org.eclipse.equinox.launcher_1.1.0.v20100507.jar 2686 IvmsSchedulerDog 46430 HMaster [root@node1:2 bin]# ps -ef | grep hbase root 46415 1 0 09:34 pts/2 00:00:00 bash /home/zhoumingyao/hbase-0.96.1.1-cdh5.0.1/bin/hbase-daemon.sh --config /home/zhoumingyao/hbase-0.96.1.1-cdh5.0.1/bin/../conf internal_start master root 46430 46415 91 09:34 pts/2 00:00:19 /usr/share/jdk1.8.0_45/bin/java -Dproc_master -XX:OnOutOfMemoryError=kill -9 %p -Xmx1000m -XX:+UseConcMarkSweepGC -Dhbase.log.dir=/home/zhoumingyao/hbase-0.96.1.1-cdh5.0.1/bin/../logs -Dhbase.log.file=hbase-root-master-node1.log -Dhbase.home.dir=/home/zhoumingyao/hbase-0.96.1.1-cdh5.0.1/bin/.. -Dhbase.id.str=root -Dhbase.root.logger=INFO,RFA -Dhbase.security.logger=INFO,RFAS org.apache.hadoop.hbase.master.HMaster start root 47464 1078 0 09:34 pts/2 00:00:00 grep hbase清单3. 插入若干数据
hbase(main):002:0> put 'test', 'row1', 'cf:a', 'value1' 0 row(s) in 0.1180 seconds => ["test"] hbase(main):004:0> scan 'test' ROW COLUMN+CELL row1 column=cf:a, timestamp=1439861879625, value=value1 1row(s) in 0.0380 seconds hbase(main):005:0> put 'test', 'row2', 'cf:b', 'value2' 0 row(s) in 0.0170 seconds hbase(main):006:0> put 'test', 'row3', 'cf:c', 'value3' 0 row(s) in 0.0130 seconds hbase(main):007:0> scan 'test' ROW COLUMN+CELL row1 column=cf:a, timestamp=1439861879625, value=value1 row2 column=cf:b, timestamp=1439861962080, value=value2 row3 column=cf:c, timestamp=1439861968096, value=value3 3 row(s) in 0.0270 seconds hbase(main):008:0> put 'test', 'row2', 'cf:b', 'value2' 0 row(s) in 0.0080 seconds hbase(main):009:0> scan 'test' ROW COLUMN+CELL row1 column=cf:a, timestamp=1439861879625, value=value1 row2 column=cf:b, timestamp=1439861984176, value=value2 row3 column=cf:c, timestamp=1439861968096, value=value3 3 row(s) in 0.0230 seconds hbase(main):013:0> put 'test','row1','cf:a','value2' 0 row(s) in 0.0150 seconds hbase(main):014:0> scan 'test' ROW COLUMN+CELL row1 column=cf:1, timestamp=1439862083677, value=value1 row1 column=cf:a, timestamp=1439862100401, value=value2 row2 column=cf:b, timestamp=1439861984176, value=value2 row3 column=cf:c, timestamp=1439861968096, value=value3
向 HBase 导入数据
import org.apache.hadoop.conf.Configuration; …… public class generateHFile { public static class generateHFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, KeyValue> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] items = line.split(",", -1); ImmutableBytesWritable rowkey = new ImmutableBytesWritable(items[0].getBytes()); KeyValue kvProtocol = new KeyValue(items[0].getBytes(), "colfam1".getBytes(), "colfam1".getBytes(), items[0].getBytes()); if (null != kvProtocol) { context.write(rowkey, kvProtocol); } } } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = HBaseConfiguration.create(); System.out.println("conf="+conf); HTable table = new HTable(conf, "testtable1"); System.out.println("table="+table); Job job = new Job(conf, "generateHFile"); job.setJarByClass(generateHFile.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); job.setMapperClass(generateHFileMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setOutputFormatClass(HFileOutputFormat.class);//组织成 HFile 文件 //自动对 job 进行配置,SimpleTotalOrderPartitioner 是需要先对 key 进行整体排序, //然后划分到每个 reduce 中,保证每一个 reducer 中的的 key 最小最大值区间范围,是不会有交集的。 HFileOutputFormat.configureIncrementalLoad(job, table); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; public class PutDemo { public static void main(String[] args) throws IOException { //创建 HBase 上下文环境 Configuration conf = HBaseConfiguration.create(); System.out.println("conf="+conf); int count=0; HBaseHelper helper = HBaseHelper.getHelper(conf); System.out.println("helper="+helper); helper.dropTable("testtable1"); helper.createTable("testtable1", "colfam1"); HTable table = new HTable(conf, "testtable1"); long start = System.currentTimeMillis(); for(int i=1;i<100000;i++){ //设置 rowkey 的值 Put put = new Put(Bytes.toBytes("row"+i)); // 设置 family:qualifier:value put.add(Bytes.toBytes("colfam1"), Bytes.toBytes("qual1"), Bytes.toBytes("val1")); put.add(Bytes.toBytes("colfam1"), Bytes.toBytes("qual2"), Bytes.toBytes("val2")); //调用 put 方法,插入数据导 HBase 数据表 testtable1 里 table.put(put); count++; if(count%10000==0){ System.out.println("Completed 10000 rows insetion"); } } System.out.println(System.currentTimeMillis() - start); } }
清单 6. 采用 HBase Client 方式代码运行输出
conf=Configuration: core-default.xml, core-site.xml, hbase-default.xml, hbase-site.xml 2015-08-20 18:58:18,184 WARN [main] util.NativeCodeLoader (NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2015-08-20 18:58:18,272 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:zookeeper.version=3.4.5-cdh4.6.0--1, built on 02/26/2014 09:15 GMT 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:host.name=node3 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.version=1.7.0_79 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.vendor=Oracle Corporation 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.home=/usr/lib/jdk1.7.0_79/jre 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.class.path=./zz.jar 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.library.path=/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib 2015-08-20 18:58:18,273 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.io.tmpdir=/tmp 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:java.compiler=<NA> 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:os.name=Linux 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:os.arch=amd64 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:os.version=2.6.32-220.el6.x86_64 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:user.name=root 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:user.home=/root 2015-08-20 18:58:18,274 INFO [main] zookeeper.ZooKeeper (Environment.java:logEnv(100)) - Client environment:user.dir=/home/zhoumingyao 2015-08-20 18:58:18,277 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:<init>(438)) - Initiating client connection, connectString=localhost:2181 sessionTimeout=180000 watcher=hconnection 2015-08-20 18:58:18,294 INFO [main] zookeeper.RecoverableZooKeeper (RecoverableZooKeeper.java:<init>(104)) - The identifier of this process is 32390@node3 2015-08-20 18:58:18,300 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:logStartConnect(973)) - Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 2015-08-20 18:58:18,308 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:primeConnection(852)) - Socket connection established to localhost/127.0.0.1:2181, initiating session 2015-08-20 18:58:18,317 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:onConnected(1214)) - Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x24f2624839f0023, negotiated timeout = 180000 2015-08-20 18:58:18,394 WARN [main] conf.Configuration (Configuration.java:warnOnceIfDeprecated(981)) - hadoop.native.lib is deprecated. Instead, use io.native.lib.available helper=HBaseHelper@5d48e5d6 2015-08-20 18:58:18,570 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:<init>(438)) - Initiating client connection, connectString=localhost:2181 sessionTimeout=180000 watcher=catalogtracker-on-org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation@6c521576 2015-08-20 18:58:18,571 INFO [main] zookeeper.RecoverableZooKeeper (RecoverableZooKeeper.java:<init>(104)) - The identifier of this process is 32390@node3 2015-08-20 18:58:18,572 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:logStartConnect(973)) - Opening socket connection to server localhost/0:0:0:0:0:0:0:1:2181. Will not attempt to authenticate using SASL (unknown error) 2015-08-20 18:58:18,572 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:primeConnection(852)) - Socket connection established to localhost/0:0:0:0:0:0:0:1:2181, initiating session 2015-08-20 18:58:18,575 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:onConnected(1214)) - Session establishment complete on server localhost/0:0:0:0:0:0:0:1:2181, sessionid = 0x24f2624839f0024, negotiated timeout = 180000 2015-08-20 18:58:18,647 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:close(684)) - Session: 0x24f2624839f0024 closed 2015-08-20 18:58:18,647 INFO [main-EventThread] zookeeper.ClientCnxn (ClientCnxn.java:run(512)) - EventThread shut down 2015-08-20 18:58:18,672 INFO [main] client.HBaseAdmin (HBaseAdmin.java:disableTableAsync(858)) - Started disable of testtable1 2015-08-20 18:58:18,676 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:<init>(438)) - Initiating client connection, connectString=localhost:2181 sessionTimeout=180000 watcher=catalogtracker-on-org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation@6c521576 2015-08-20 18:58:18,678 INFO [main] zookeeper.RecoverableZooKeeper (RecoverableZooKeeper.java:<init>(104)) - The identifier of this process is 32390@node3 2015-08-20 18:58:18,679 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:logStartConnect(973)) - Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 2015-08-20 18:58:18,680 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:primeConnection(852)) - Socket connection established to localhost/127.0.0.1:2181, initiating session 2015-08-20 18:58:18,683 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:onConnected(1214)) - Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x24f2624839f0025, negotiated timeout = 180000 2015-08-20 18:58:18,705 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:close(684)) - Session: 0x24f2624839f0025 closed 2015-08-20 18:58:18,705 INFO [main-EventThread] zookeeper.ClientCnxn (ClientCnxn.java:run(512)) - EventThread shut down 2015-08-20 18:58:19,713 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:<init>(438)) - Initiating client connection, connectString=localhost:2181 sessionTimeout=180000 watcher=catalogtracker-on-org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation@6c521576 2015-08-20 18:58:19,714 INFO [main] zookeeper.RecoverableZooKeeper (RecoverableZooKeeper.java:<init>(104)) - The identifier of this process is 32390@node3 2015-08-20 18:58:19,715 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:logStartConnect(973)) - Opening socket connection to server localhost/0:0:0:0:0:0:0:1:2181. Will not attempt to authenticate using SASL (unknown error) 2015-08-20 18:58:19,716 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:primeConnection(852)) - Socket connection established to localhost/0:0:0:0:0:0:0:1:2181, initiating session 2015-08-20 18:58:19,720 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:onConnected(1214)) - Session establishment complete on server localhost/0:0:0:0:0:0:0:1:2181, sessionid = 0x24f2624839f0026, negotiated timeout = 180000 2015-08-20 18:58:19,733 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:close(684)) - Session: 0x24f2624839f0026 closed 2015-08-20 18:58:19,733 INFO [main-EventThread] zookeeper.ClientCnxn (ClientCnxn.java:run(512)) - EventThread shut down 2015-08-20 18:58:19,735 INFO [main] client.HBaseAdmin (HBaseAdmin.java:disableTable(905)) - Disabled testtable1 2015-08-20 18:58:20,763 INFO [main] client.HBaseAdmin (HBaseAdmin.java:deleteTable(656)) - Deleted testtable1 table=testtable1 2015-08-20 18:58:21,809 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:<init>(438)) - Initiating client connection, connectString=localhost:2181 sessionTimeout=180000 watcher=catalogtracker-on-org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation@6c521576 2015-08-20 18:58:21,810 INFO [main] zookeeper.RecoverableZooKeeper (RecoverableZooKeeper.java:<init>(104)) - The identifier of this process is 32390@node3 2015-08-20 18:58:21,811 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:logStartConnect(973)) - Opening socket connection to server localhost/127.0.0.1:2181. Will not attempt to authenticate using SASL (unknown error) 2015-08-20 18:58:21,812 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:primeConnection(852)) - Socket connection established to localhost/127.0.0.1:2181, initiating session 2015-08-20 18:58:21,816 INFO [main-SendThread(localhost:2181)] zookeeper.ClientCnxn (ClientCnxn.java:onConnected(1214)) - Session establishment complete on server localhost/127.0.0.1:2181, sessionid = 0x24f2624839f0027, negotiated timeout = 180000 2015-08-20 18:58:21,828 INFO [main] zookeeper.ZooKeeper (ZooKeeper.java:close(684)) - Session: 0x24f2624839f0027 closed 2015-08-20 18:58:21,828 INFO [main-EventThread] zookeeper.ClientCnxn (ClientCnxn.java:run(512)) - EventThread shut down Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion Completed 10000 rows insetion 127073ms
import org.apache.hadoop.conf.Configuration; …… /** * Used by the book examples to generate tables and fill them with test data. */ public class HBaseHelper { //在 Java 代码中,为了连接到 HBase,我们首先创建一个配置(Configuration)对象,使用该对象创建一个 HTable 实例。 //这个 HTable 对象用于处理所有的客户端 API 调用。 private Configuration conf = null; private HBaseAdmin admin = null; protected HBaseHelper(Configuration conf) throws IOException { this.conf = conf; this.admin = new HBaseAdmin(conf); } public static HBaseHelper getHelper(Configuration conf) throws IOException { return new HBaseHelper(conf); } public void put(String table, String row, String fam, String qual, long ts, String val) throws IOException { HTable tbl = new HTable(conf, table); Put put = new Put(Bytes.toBytes(row)); put.add(Bytes.toBytes(fam), Bytes.toBytes(qual), ts, Bytes.toBytes(val)); tbl.put(put); tbl.close(); } public void put(String table, String[] rows, String[] fams, String[] quals, long[] ts, String[] vals) throws IOException { HTable tbl = new HTable(conf, table); for (String row : rows) { Put put = new Put(Bytes.toBytes(row)); for (String fam : fams) { int v = 0; for (String qual : quals) { String val = vals[v < vals.length ? v : vals.length]; long t = ts[v < ts.length ? v : ts.length - 1]; put.add(Bytes.toBytes(fam), Bytes.toBytes(qual), t, Bytes.toBytes(val)); v++; } } tbl.put(put); } tbl.close(); } public void dump(String table, String[] rows, String[] fams, String[] quals) throws IOException { HTable tbl = new HTable(conf, table); List<Get> gets = new ArrayList<Get>(); for (String row : rows) { Get get = new Get(Bytes.toBytes(row)); get.setMaxVersions(); if (fams != null) { for (String fam : fams) { for (String qual : quals) { get.addColumn(Bytes.toBytes(fam), Bytes.toBytes(qual)); } } } gets.add(get); } Result[] results = tbl.get(gets); for (Result result : results) { for (KeyValue kv : result.raw()) { System.out.println("KV: " + kv + ", Value: " + Bytes.toString(kv.getValue())); } } } } public void dropTable(String table) throws IOException { if (existsTable(table)) { disableTable(table); admin.deleteTable(table); } } public void put(String table, String row, String fam, String qual, long ts, String val) throws IOException { HTable tbl = new HTable(conf, table); Put put = new Put(Bytes.toBytes(row)); put.add(Bytes.toBytes(fam), Bytes.toBytes(qual), ts, Bytes.toBytes(val)); tbl.put(put); tbl.close(); }
import java.io.IOException; …… public class HBaseImportByMapReduce extends Configured implements Tool { static final Log LOG = LogFactory.getLog(HBaseImportByMapReduce.class); public static final String JOBNAME = "MapReduceImport"; public static class Map extends Mapper<LongWritable , Text, NullWritable, NullWritable>{ Configuration configuration = null; HTable xTable = null; static long count = 0; @Override protected void cleanup(Context context) throws IOException,InterruptedException { // TODO Auto-generated method stub super.cleanup(context); xTable.flushCommits(); xTable.close(); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String all[] = value.toString().split("/t"); Put put = new Put(Bytes.toBytes(all[0])); put.add(Bytes.toBytes("colfam1"),Bytes.toBytes("value1"), null); xTable.put(put); if ((++count % 100)==0) { context.setStatus(count +" DOCUMENTS done!"); context.progress(); System.out.println(count +" DOCUMENTS done!"); } } @Override protected void setup(Context context) throws IOException,InterruptedException { // TODO Auto-generated method stub super.setup(context); configuration = context.getConfiguration(); xTable = new HTable(configuration,"testtable2"); xTable.setAutoFlush(false); xTable.setWriteBufferSize(12*1024*1024); } } @Override public int run(String[] args) throws Exception { String input = args[0]; Configuration conf = HBaseConfiguration.create(getConf()); conf.set("hbase.master", "node1:60000"); Job job = new Job(conf,JOBNAME); job.setJarByClass(HBaseImportByMapReduce.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, input); job.setOutputFormatClass(NullOutputFormat.class); return job.waitForCompletion(true)?0:1; } public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); int res = 1; try {
[root@node3 zhoumingyao]# vi data.tsv 1001 name1 17 00000000001 1002 name2 16 00000000002 1003 name3 16 00000000003 1004 name4 16 00000000004
$HADOOP_HOME/bin/hadoop fs -mkdir /user/test 创建数据表 create 'student', {NAME => 'info'} 调用 importtsv 命令导入数据, $HADOOP_HOME/bin/hadoop jar /usr/lib/cdh/hbase/hbase-0.94.15-hdh4.6.0.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:age,info:phone -Dimporttsv.bulk.output=/user/test/output/ student /user/test/data.tsv
15/08/21 13:41:27 INFO ipc.Client: Retrying connect to server: node1/172.10.201.62:18040. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
创建生成文件的文件夹: $HADOOP_HOME/bin/hadoop fs -mkdir /user/hac/output 开始导入数据: $HADOOP_HOME/bin/hadoop jar /usr/lib/cdh/hbase/hbase-0.94.15-hdh4.6.0.jar importtsv -Dimporttsv.bulk.output=/user/hac/output/2-1 -Dimporttsv.columns= HBASE_ROW_KEY,info:name,info:age,info:phone student /user/hac/input/2-1 完成 bulk load 导入 $HADOOP_HOME/bin/hadoop jar /usr/lib/cdh/hbase/hbase-0.94.15-hdh4.6.0.jar completebulkload /user/hac/output/2-1 student
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; public class loadIncrementalHFileToHBase { public static void main(String[] args) throws Exception { Configuration conf = HBaseConfiguration.create(); HBaseHelper helper = HBaseHelper.getHelper(conf); helper.dropTable("testtable2"); helper.createTable("testtable2", "colfam1"); HTable table = new HTable("testtable2"); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); loader.doBulkLoad(new Path(args[0]), table); } }
export SQOOP_HOME=/home/zhoumingyao/sqoop2-1.99.3-cdh5.0.1 export PATH = $SQOOP_HOME/bin:$PATH source /etc/profile
sqoop import --connect jdbc:oracle:thin:@172.7.27.225:1521:testzmy //JDBC URL --username SYSTEM //Oracle username(必须大写) --password hik123456 //Oracle password --query 'SELECT RYID, HZCZRK_JBXXB.ZPID, HZCZRK_JBXXB.GMSFHM, HZCZRK_JBXXB.XM, HZCZRK_JBXXB.XB, HZCZRK_JBXXB.CSRQ, HZCZRK_ZPXXB.ZP AS ZP FROM HZCZRK_JBXXB JOIN HZCZRK_ZPXXB USING(RYID) WHERE $CONDITIONS' // Oracle 数据,Sqoop 支持多表 query --split-by RYID //指定并行处理切分任务的列名,通常为主键 --map-column-java ZP=String //ZP 为 LONG RAW 类型,sqoop 不支持,需要映射成 String --hbase-table TESTHZ //HBase 中的 Table --column-family INFO //HBase 中的 column-family