– sqoop 1014–
– 连接关系型数据库
– sqoop list-databases --connect jdbc:mysql://localhost:3306/ --username root --password 123456
drop database sqoop;
create database sqoop charset utf8;
show databases ;
use sqoop ;
– drop table sqoop.sq_table ;
show tables;
create table if not exists sqoop.sq_table(
id int,
name varchar(255),
sex varchar(255)
);
insert into sq_table values
(1,‘thomas’,‘男’),
(2,‘catalina’,‘女’);
SELECT * from sq_table ;
– sqoop的导入导出是相对hdfs而言,如果数据从rdbms进入到hdfs,则为导入;从hdfs到出到rdbms则为导出
– 全局导入数据 sqoop import
– 导入数据 delete-target-dir 导入之前删除原来的数据
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–table sq_table
–target-dir /data/test
–delete-target-dir
–num-mappers 1 \ – 只开一个mapreduce
–fields-terminated-by “\t”
– 查询导入数据 --query 不能跟 --table 参数一起使用
– 必须跟上参数 and $CONDITIONS
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–target-dir /data/test
–delete-target-dir
–num-mappers 1 \
–fields-terminated-by “\t”
–query ‘select name, sex from sq_table where id<=1 and $CONDITIONS;’
– 导入指定列 columns中如果涉及到多列,用逗号分隔,分隔时不要添加空格
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–target-dir /data/test
–delete-target-dir
–num-mappers 1
–fields-terminated-by “\t”
–columns id,sex \ – 列与逗号之间不能有空格
–table sq_table
– 导入where条件筛选后的数据
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–target-dir /data/test
–delete-target-dir
–num-mappers 1
–fields-terminated-by “\t”
–table sq_table
–where “id=1”
– 增量导入数据
– incremental 模式1:append模式
insert into sq_table values (3,‘alix’,‘女’),(4,‘sam’,‘男’);
SELECT * from sq_table ;
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–m 1 --table sq_table
–target-dir /data/test
–incremental append \ – append 表示追加模式
–check-column id \ – 依据的列是id
–last-value 2 – 条件是id大于2数据
– incremental 模式2:Lastmodified模式增量导入
– Lastmodified append模式增量导入有重复数据,这是因为采用lastmodified模式去处理增量时,会将大于等于last-value值的数据当做增量插入
– 首先创建一个ltf_table表,指定一个时间戳字段,此处的时间戳设置为在数据的产生和更新时都会发生改变
create table if not exists ltf_table(
id int,
name varchar(255),
last_mod timestamp default current_timestamp on update current_timestamp
);
SELECT * from ltf_table ;
insert into ltf_table(id,name) values (1,‘kk’);
insert into ltf_table(id,name) values (2,‘jj’);
insert into ltf_table(id,name) values (3,‘ll’);
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–target-dir /data/test2
–table ltf_table
–incremental lastmodified \
–check-column last_mod
–m 1
–last-value “2022-10-14 19:25:53”
–append – 附加模式
– Lastmodified merge-key模式增量导入会将文件数据合并为一个,不会有重复数据 经常使用
sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–target-dir /data/test2
–table ltf_table
–incremental lastmodified \
–check-column last_mod
–m 1
–last-value “2022-10-14 19:25:53”
–merge-key id – 附加模式
– 数据导出 sqoop export
– 表不存在不会自动创建
use sqoop ;
select * from sq_table ;
truncate table sq_table;
– 全量导出
sqoop export
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–table sq_table --m 1
–export-dir /data/test/part-m-00001 – 分隔符为 逗号 所以下面指定为 逗号
–input-fields-terminated-by “,” – 分隔符要与hdfs上数据文件的格式一致
sqoop export
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–table sq_table --m 1
–export-dir /data/test/part-m-00000 – 分隔符为 \t 所以下面指定为 \t
–input-fields-terminated-by “\t”
– 更新导出
– 默认updateonly模式仅仅更新已存在的数据记录,不会插入新纪录。
sqoop export
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–table sq_table
–export-dir /data/test/part-m-00001
–input-fields-terminated-by “,”
–update-key id
–update-mode updateonly
– allowinsert 模式,更新已存在的数据记录,同时插入新纪录。实质上是一个insert & update的操作。
sqoop export
–connect jdbc:mysql://localhost:3306/sqoop
–username root --password 123456
–table sq_table
–export-dir /data/test/part-m-00001
–input-fields-terminated-by “,”
–update-key id
–update-mode allowinsert
– 脚本打包
– 需要分行写
use sqoop ;
select * from sq_table ;
truncate table sq_table ;
– 对应路径下船创建一个.opt 文件
– 使用sqoop --options-file /home/bi008/my_sqoop/job.opt 执行脚本
export
–connect
jdbc:mysql://localhost:3306/sqoop
–username
root
–password
123456
–table
sq_table
–num-mappers
1
–export-dir
/data/test/part-m-00001
–input-fields-terminated-by
“,”
– sqoop 定时任务
create table if not exists ltf_table(
id int,
name varchar(255),
last_mod timestamp default current_timestamp on update current_timestamp
);
SELECT * from ltf_table ;
update ltf_table set last_mod=‘2022-10-14 17:00:00’ where name = ‘ll2’;
update ltf_table set last_mod=‘2022-10-14 18:00:00’ where name = ‘kk2’;
update ltf_table set last_mod=‘2022-10-14 18:00:00’ where name = ‘jj2’;
update ltf_table set last_mod=‘2022-10-14 18:00:00’ where name = ‘vv2’;
INSERT into ltf_table (id,name) values (8,‘vv2’),(9,‘jj2’),(10,‘kk2’),(11,‘ll2’);
update ltf_table set last_mod=‘2022-10-13 11:00:00’ where name = ‘uu’;
update ltf_table set last_mod=‘2022-10-13 11:00:00’ where name = ‘pp’;
update ltf_table set last_mod=‘2022-10-13 11:00:00’ where name = ‘pp’;
– 应用
#!/bin/bash
date_time=$(date -d ‘-1 day’ “+%Y-%m-%d %H:%M:%S”)
/usr/local/sqoop/bin/sqoop import
–connect jdbc:mysql://localhost:3306/sqoop
–username root
–password 123456
–table ltf_table
–target-dir /data/test
–incremental lastmodified
–check-column last_mod
–last-value “${date_time}”
–merge-key id
–num-mappers 1