fyl005

hiveSQL语法及练习题整理(mysql)

hiveSQL练习题整理：

第一题

第二题

第三题

第四题

第五题

第六题

第七题

第八题

第九题

第十题

第十一题

第十二题

hivesql常用函数：

hiveSQL常用操作语句（mysql）

hiveSQL练习题整理：

第一题

我们有如下的用户访问数据

userId  visitDate   visitCount

u01 2017/1/21   5

u02 2017/1/23   6

u03 2017/1/22   8

u04 2017/1/20   3

u01 2017/1/23   6

u01 2017/2/21   8

U02 2017/1/23   6

U01 2017/2/22   4

要求使用SQL统计出每个用户的累积访问次数，如下表所示：

用户id    月份  小计  累积

u01 2017-01 11  11

u01 2017-02 12  23

u02 2017-01 12  12

u03 2017-01 8   8

u04 2017-01 3   3



--建表

drop table if exists test_one;

create table test_one(

    userId string comment '用户id',

    visitDate string comment '访问日期',

    visitCount bigint comment '访问次数'

) comment '第一题'

row format delimited fields terminated by '\t';



--插入数据

insert into table test_one values('u01','2017/1/21',5);

insert into table test_one values('u02','2017/1/23',6);

insert into table test_one values('u03','2017/1/22',8);

insert into table test_one values('u04','2017/1/20',3);

insert into table test_one values('u01','2017/1/23',6);

insert into table test_one values('u01','2017/2/21',8);

insert into table test_one values('u02','2017/1/23',6);

insert into table test_one values('u01','2017/2/22',4);



--查询

select

    userId `用户id`,

    visitDate `月份`,

    sum_mn `小计`,

    sum(sum_mn) over(partition by userId rows between UNBOUNDED PRECEDING and current row) `累计`

from

(

    select

        t1.userId,

        t1.visitDate,

        sum(t1.visitCount) sum_mn

    from

    (

        select

            userId,

            --date_format(to_date(from_unixtime(UNIX_TIMESTAMP(visitDate,'yyyy/MM/dd'))),'yyyy-MM') visitDate,

            date_format(regexp_replace(visitdate,"/","-"),'yyyy-MM') visitDate,

            visitCount

        from test_one

    ) t1

    group by userId,visitDate

) t2;

第二题

有50W个京东店铺，每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志，

访问日志存储的表名为Visit，访客的用户id为user_id，被访问的店铺名称为shop，请统计：

1）每个店铺的UV（访客数）

2）每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数



--建表

drop table if exists test_two;

create table test_two(

    shoop_name string COMMENT '店铺名称',

    user_id string COMMENT '用户id',

    visit_time string COMMENT '访问时间'

)

row format delimited fields terminated by '\t';



--插入数据

insert into table test_two values ('huawei','1001','2017-02-10');

insert into table test_two values ('icbc','1001','2017-02-10');

insert into table test_two values ('huawei','1001','2017-02-10');

insert into table test_two values ('apple','1001','2017-02-10');

insert into table test_two values ('huawei','1001','2017-02-10');

insert into table test_two values ('huawei','1002','2017-02-10');

insert into table test_two values ('huawei','1002','2017-02-10');

insert into table test_two values ('huawei','1001','2017-02-10');

insert into table test_two values ('huawei','1003','2017-02-10');

insert into table test_two values ('huawei','1004','2017-02-10');

insert into table test_two values ('huawei','1005','2017-02-10');

insert into table test_two values ('icbc','1002','2017-02-10');

insert into table test_two values ('jingdong','1006','2017-02-10');

insert into table test_two values ('jingdong','1003','2017-02-10');

insert into table test_two values ('jingdong','1002','2017-02-10');

insert into table test_two values ('jingdong','1004','2017-02-10');

insert into table test_two values ('apple','1001','2017-02-10');

insert into table test_two values ('apple','1001','2017-02-10');

insert into table test_two values ('apple','1001','2017-02-10');

insert into table test_two values ('apple','1002','2017-02-10');

insert into table test_two values ('apple','1002','2017-02-10');

insert into table test_two values ('apple','1005','2017-02-10');

insert into table test_two values ('apple','1005','2017-02-10');

insert into table test_two values ('apple','1006','2017-02-10');



--1)每个店铺的UV（访客数）

select

    shoop_name,

    count(*) shoop_uv

from test_two

group by shoop_name

order by shoop_uv desc;



--2）每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数

select

    shoop_name `商店名称`,

    user_id `用户id`,

    visit_time `访问次数`,

    rank_vis `忠诚排名`

from

    (

    select

        shoop_name,

        user_id,

        visit_time,

        row_number() over(partition by shoop_name order by visit_time desc) rank_vis



    from

    (

        select

            shoop_name,

            user_id,

            count(*) visit_time

        from test_two

        group by shoop_name,user_id

    ) t1

) t2

where rank_vis<=3;

第三题


-- 已知一个表STG.ORDER,有如下字段:Date,Order_id,User_id,amount。

-- 请给出sql进行统计:数据样例:2017-01-01,10029028,1000003251,33.57。

-- 1）给出 2017年每个月的订单数、用户数、总成交金额。

-- 2）给出2017年11月的新客数(指在11月才有第一笔订单)



drop table if exists test_three_ORDER;

create table test_three_ORDER

(

    `Date` String COMMENT '下单时间',

    `Order_id` String COMMENT '订单ID',

    `User_id` String COMMENT '用户ID',

    `amount` decimal(10,2) COMMENT '金额'

)

row format delimited fields terminated by '\t';



--插入数据

insert into table test_three_ORDER values ('2017-10-01','10029011','1000003251',19.50);

insert into table test_three_ORDER values ('2017-10-03','10029012','1000003251',29.50);

insert into table test_three_ORDER values ('2017-10-04','10029013','1000003252',39.50);

insert into table test_three_ORDER values ('2017-10-05','10029014','1000003253',49.50);

insert into table test_three_ORDER values ('2017-11-01','10029021','1000003251',130.50);

insert into table test_three_ORDER values ('2017-11-03','10029022','1000003251',230.50);

insert into table test_three_ORDER values ('2017-11-04','10029023','1000003252',330.50);

insert into table test_three_ORDER values ('2017-11-05','10029024','1000003253',430.50);

insert into table test_three_ORDER values ('2017-11-07','10029025','1000003254',530.50);

insert into table test_three_ORDER values ('2017-11-15','10029026','1000003255',630.50);

insert into table test_three_ORDER values ('2017-12-01','10029027','1000003252',112.50);

insert into table test_three_ORDER values ('2017-12-03','10029028','1000003251',212.50);

insert into table test_three_ORDER values ('2017-12-04','10029029','1000003253',312.50);

insert into table test_three_ORDER values ('2017-12-05','10029030','1000003252',412.50);

insert into table test_three_ORDER values ('2017-12-07','10029031','1000003258',512.50);

insert into table test_three_ORDER values ('2017-12-15','10029032','1000003255',612.50);



-- 1）给出 2017年每个月的订单数、用户数、总成交金额。



select

    date_format(`date`,'yyyy-MM') `date`,

    count(*) `订单数`,

    count(distinct(user_id)) `用户数`,

    sum(amount) `总成交金额`

from test_three_ORDER

group by date_format(`date`,'yyyy-MM');



-- 2）给出2017年11月的新客数(指在11月才有第一笔订单)

select

    count(DISTINCT (t1.user_id))

from

(

    select

        user_id

    from test_three_ORDER

    where date_format(`date`,'yyyy-MM') = '2017-11'

    group by user_id

) t1

left join

(

select

    user_id

from test_three_ORDER

where date_format(`date`,'yyyy-MM') < '2017-11'

group by user_id

) t2

on t1.user_id = t2.user_id

where t2.user_id is null;

-- 第二种写法

select

    count(User_id) `11月新客数`

from

(

    SELECT

        User_id,

        Order_id,

        `Date`,

        LAG (`DATE`,1,0) over(partition by User_id order by `Date`) preOrder

    FROM

        test_three_ORDER

) t1

where date_format(`date`,'yyyy-MM')='2017-11' and preOrder=0;

第四题

-- 有一个5000万的用户文件(user_id,name,age),一个2亿记录的用户看电影的记录文件(user_id,url),

-- 根据年龄段观看电影的次数进行排序？

--建表

--用户表

drop table if exists test_four_log;

create table test_four_user(

    user_id string COMMENT '用户ID',

    name string COMMENT '用户姓名',

    age int COMMENT '用户年龄'

)

row format delimited fields terminated by '\t';

--日志表

drop table if exists test_four_log;

create table test_four_log(

    user_id string COMMENT '用户ID',

    url string COMMENT '链接'

)

row format delimited fields terminated by '\t';

--插入数据

insert into table test_four_user values ('1','1',8);

insert into table test_four_user values ('2','2',45);

insert into table test_four_user values ('3','3',14);

insert into table test_four_user values ('4','4',18);

insert into table test_four_user values ('5','5',17);

insert into table test_four_user values ('6','6',19);

insert into table test_four_user values ('7','7',26);

insert into table test_four_user values ('8','8',22);

insert into table test_four_log values('1','111');

insert into table test_four_log values('2','111');

insert into table test_four_log values('3','111');

insert into table test_four_log values('4','111');

insert into table test_four_log values('5','111');

insert into table test_four_log values('6','111');

insert into table test_four_log values('7','111');

insert into table test_four_log values('8','111');

insert into table test_four_log values('1','111');

insert into table test_four_log values('2','111');

insert into table test_four_log values('3','111');

insert into table test_four_log values('4','111');

insert into table test_four_log values('5','111');

insert into table test_four_log values('6','111');

insert into table test_four_log values('7','111');

insert into table test_four_log values('8','111');

insert into table test_four_log values('1','111');

insert into table test_four_log values('2','111');

insert into table test_four_log values('3','111');

insert into table test_four_log values('4','111');

insert into table test_four_log values('5','111');

insert into table test_four_log values('6','111');

insert into table test_four_log values('7','111');

insert into table test_four_log values('8','111');



-- 根据年龄段观看电影的次数进行排序？

select

    age_size `年龄段`,

    count(*) `观影次数`

from

(

    select

    u.*,

    l.url,

    case

    when u.age >=0 and u.age <= 10 then '1-10'

    when u.age >=11 and u.age <= 20 then '11-20'

    when u.age >=21 and u.age <= 30 then '21-30'

    when u.age >=31 and u.age <= 40 then '31-40'

    when u.age >=41 and u.age <= 50 then '41-50'

    else '51-100'

    end age_size

    from

    test_four_user u join test_four_log l on u.user_id = l.user_id

) t1

group by age_size

order by `观影次数` desc;

第五题

-- 有日志如下，请写出代码求得所有用户和活跃用户的总数及平均年龄。（活跃用户指连续两天都有访问记录的用户）

-- 日期 用户 年龄

-- 11,test_1,23

-- 11,test_2,19

-- 11,test_3,39

-- 11,test_1,23

-- 11,test_3,39

-- 11,test_1,23

-- 12,test_2,19

-- 13,test_1,23



create table test_five_active(

    active_time string COMMENT '活跃日期',

    user_id string COMMENT '用户id',

    age int COMMENT '用户年龄'

)

row format delimited fields terminated by '\t';



insert into table test_five_active values ('11','test_1',11);

insert into table test_five_active values ('11','test_2',22);

insert into table test_five_active values ('11','test_3',33);

insert into table test_five_active values ('11','test_4',44);



insert into table test_five_active values ('12','test_3',33);

insert into table test_five_active values ('12','test_5',55);

insert into table test_five_active values ('12','test_6',66);



insert into table test_five_active values ('13','test_4',44);

insert into table test_five_active values ('13','test_5',55);

insert into table test_five_active values ('13','test_7',77);



-- 所有用户的总数及平均年龄

select

    count(*) sum_user,

    avg(age) avg_age

from

(

    select

        user_id,

        avg(age) age

    from test_five_active

    group by user_id

) t1;



-- 活跃人数的总数及平均年龄

select  -- 最外一层算出活跃用户的个数以及平均年龄

    count(*),

    avg(d.age)

from

(

    select -- 最后还需要以user_id分组,去重(防止某个用户在11,12号连续活跃,然后在14,15号又连续活跃,导致diff求出不一致,所以此用户会出现两次)

        c.user_id,

        c.age

    from

    (

        select -- 以用户和差值diff分组,看分组下的数据的个数是否大于等于2(连续两天登录),取出活跃用户的数据

            b.user_id,

            b.age,

            b.diff,

            count(*) flag

        from

        (

            select  -- 用活跃日期减去排名,求出差值,看差值是否相等,相等差值的数据肯定是连续活跃的数据

                a.active_time,

                a.user_id,

                a.age,

                a.rank_time,

                a.active_time-a.rank_time diff

            from

            (

                select  -- 以用户和活跃日期分组(去重,防止某个用户在同一天活跃多次),求出每个用户的活跃日期排名

                    active_time,

                    user_id,

                    age,

                    rank() over(partition by user_id order by active_time) rank_time



                from test_five_active

                group by active_time,user_id,age  

            ) a

        ) b

        group by b.user_id,b.age,b.diff

        having count(*) >=2

    ) c

    group by c.user_id,c.age

) d;

第六题

请用sql写出所有用户中在今年10月份第一次购买商品的金额，

表ordertable字段（购买用户：userid，金额：money，购买时间：paymenttime(格式：2017-10-01)，订单id：orderid）



create table test_six_ordertable

(

    `userid` string COMMENT '购买用户',

    `money` decimal(10,2) COMMENT '金额',

    `paymenttime` string COMMENT '购买时间',

    `orderid` string COMMENT '订单id'

)

row format delimited fields terminated by '\t';



--插入数据

insert into table test_six_ordertable values('1',1,'2017-09-01','1');

insert into table test_six_ordertable values('2',2,'2017-09-02','2');

insert into table test_six_ordertable values('3',3,'2017-09-03','3');

insert into table test_six_ordertable values('4',4,'2017-09-04','4');



insert into table test_six_ordertable values('3',5,'2017-10-05','5');

insert into table test_six_ordertable values('6',6,'2017-10-06','6');

insert into table test_six_ordertable values('1',7,'2017-10-07','7');

insert into table test_six_ordertable values('8',8,'2017-10-09','8');

insert into table test_six_ordertable values('6',6,'2017-10-16','60');

insert into table test_six_ordertable values('1',7,'2017-10-17','70');



-- 写出所有用户中在今年10月份第一次购买商品的金额

select

    userid,

    `money`,

    paymenttime,

    orderid

from

(

    select

        userid,

        `money`,

        paymenttime,

        orderid,

        rank() over(partition by userid order by paymenttime) rank_time

    from test_six_ordertable

    where date_format(paymenttime,'yyyy-MM') = '2017-10'

) a

where rank_time=1;

第七题

--现有图书管理数据库的三个数据模型如下：



--图书（数据表名：BOOK）

--序号    字段名称     字段描述       字段类型

--1       BOOK_ID     总编号         文本

--2       SORT        分类号         文本

--3       BOOK_NAME   书名           文本

--4       WRITER      作者           文本

--5       OUTPUT      出版单位       文本

--6       PRICE       单价           数值（保留小数点后2位）



--读者（数据表名：READER）

--序号      字段名称        字段描述    字段类型

--1       READER_ID       借书证号         文本

--2       COMPANY         单位            文本

--3       NAME            姓名             文本

--4       SEX             性别          文本

--5       GRADE           职称          文本

--6       ADDR            地址          文本

--

--借阅记录（数据表名：BORROW LOG）

--序号      字段名称        字段描述        字段类型

--1        READER_ID      借书证号            文本

--2       BOOK_D          总编号             文本

--3       BORROW_ATE      借书日期            日期




--（1）创建图书管理库的图书、读者和借阅三个基本表的表结构。请写出建表语句。

--图书

create table test_seven_BOOK

(

    BOOK_ID String COMMENT '总编号',

    SORT String COMMENT '分类号',

    BOOK_NAME String COMMENT '书名',

    WRITER String COMMENT '作者',

    OUTPUT String COMMENT '出版单位',

    PRICE decimal(10,2) COMMENT '单价'

)

row format delimited fields terminated by '\t';



--读者

create table test_seven_READER

(  

    READER_ID String COMMENT '借书证号',

    COMPANY String COMMENT '单位',

    NAME String COMMENT '姓名',

    SEX String COMMENT '性别',

    GRADE String COMMENT '职称',

    ADDR String COMMENT '地址'

)

row format delimited fields terminated by '\t';



--借阅记录

create table test_seven_BORROW_LOG

(  

    READER_ID String COMMENT '借书证号',

    BOOK_D String COMMENT '总编号',

    BORROW_ATE date COMMENT '借书日期'

)

row format delimited fields terminated by '\t';



-- 插入数据

insert into table test_seven_book values ('1001','A1','Java','James Gosling','sun','11');

insert into table test_seven_book values ('1002','A2','linux','Linus Benedict Torvalds','sun','22');

insert into table test_seven_book values ('1003','A3','Java3','James Gosling3','sun3','33');

insert into table test_seven_book values ('1004','A4','Java4','James Gosling4','sun4','44');

insert into table test_seven_book values ('1005','B1','Java5','James Gosling5','sun','55');

insert into table test_seven_book values ('1006','C1','Java6','James Gosling6','sun5','66');

insert into table test_seven_book values ('1007','D1','Java7','James Gosling7','sun6','77');

insert into table test_seven_book values ('1008','E1','Java8','James Gosling4','sun3','88');

insert into table test_seven_reader values ('7','buu',decode(binary('李大帅'),'utf-8'),'man','lay1','beijing4');

insert into table test_seven_reader values ('2','buu2','苏大强','man','lay2','beijing2');

insert into table test_seven_reader values ('3','buu2','李二胖','woman','lay3','beijing3');

insert into table test_seven_reader values ('4','buu3','王三涛','man','lay4','beijing4');

insert into table test_seven_reader values ('5','buu4','刘四虎','woman','lay5','beijing1');

insert into table test_seven_reader values ('6','buu','宋冬野','woman','lay6','beijing5');

insert into table test_seven_borrow_log values ('1','1002','2019-06-01');

insert into table test_seven_borrow_log values ('1','1003','2019-06-02');

insert into table test_seven_borrow_log values ('1','1006','2019-06-03');

insert into table test_seven_borrow_log values ('2','1001','2019-06-04');

insert into table test_seven_borrow_log values ('3','1002','2019-06-05');

insert into table test_seven_borrow_log values ('4','1005','2019-06-06');

insert into table test_seven_borrow_log values ('5','1003','2019-06-06');

insert into table test_seven_borrow_log values ('3','1006','2019-06-07');

insert into table test_seven_borrow_log values ('2','1003','2019-06-03');

insert into table test_seven_borrow_log values ('3','1008','2019-06-03');

insert into table test_seven_borrow_log values ('1','1002','2019-06-04');

--（2）找出姓李的读者姓名（NAME）和所在单位（COMPANY）。

select name,company from test_seven_reader where name like '李%';

--（3）查找“高等教育出版社”的所有图书名称（BOOK_NAME）及单价（PRICE）,结果按单价降序排序。

select BOOK_NAME,PRICE from test_seven_book order by PRICE desc;

--（4）查找价格介于10元和20元之间的图书种类(SORT）出版单位（OUTPUT）和单价（PRICE）,结果按出版单位（OUTPUT）和单价（PRICE）升序排序。

select SORT,OUTPUT,PRICE from test_seven_book where PRICE between 10 and 20 order by OUTPUT,PRICE asc;



--（5）查找所有借了书的读者的姓名（NAME）及所在单位（COMPANY）。

select

    rd.name,

    rd.COMPANY

from

(

select

    READER_ID

from test_seven_borrow_log

group by READER_ID

) t1

join

test_seven_reader rd

on t1.READER_ID = rd.READER_ID;

--（6）求”科学出版社”图书的最高单价、最低单价、平均单价。

select

    max(PRICE) max,

    min(PRICE) min,

    avg(PRICE) avg

from

test_seven_book;

--（7）找出当前至少借阅了2本图书（大于等于2本）的读者姓名及其所在单位。

select

   rd.READER_ID,

   rd.name,

   rd.COMPANY

from

(

    select

        READER_ID,

        count(*) num

    from test_seven_BORROW_LOG

    group by READER_ID

    having count(*) >= 2

) t1

join

test_seven_reader rd

on t1.READER_ID = rd.READER_ID;



--（8）考虑到数据安全的需要,需定时将“借阅记录”中数据进行备份,

-- 请使用一条SQL语句,在备份用户bak下创建与“借阅记录”表结构完全一致的数据表BORROW_LOG_BAK.

--井且将“借阅记录”中现有数据全部复制到BORROW_l0G_BAK中。

create table BORROW_LOG_BAK

(

    READER_ID String COMMENT '借书证号',

    BOOK_D String COMMENT '总编号',

    BORROW_ATE date COMMENT '借书日期'

)

as select * from test_seven_BORROW_LOG;



--（9）现在需要将原Oracle数据库中数据迁移至Hive仓库,

--请写出“图书”在Hive中的建表语句（Hive实现,提示：列分隔符|；数据表数据需要外部导入：分区分别以month＿part、day＿part 命名）

create table test_seven_book_oracle (

    book_id string COMMENT '总编号',

    sort string COMMENT '分类号',

    book_name string COMMENT '书名',

    writer string COMMENT '作者',

    output string COMMENT '出版单位',

    price decimal(10,2) COMMENT '单价'

)

PARTITIONED BY (month string,day string)

row format delimited fields terminated by '|';



--（10）Hive中有表A,现在需要将表A的月分区　201505　中　user＿id为20000的user＿dinner字段更新为bonc8920,

--  其他用户user＿dinner字段数据不变,请列出更新的方法步骤。

--（Hive实现,提示：Hlive中无update语法,请通过其他办法进行数据更新）

create table tmp_A as select * from A where user_id<>20000 and month_part=201505;

insert into table tmp_A partition(month_part=’201505’) values(20000,其他字段,bonc8920);

insert overwrite table A partition(month_part=’201505’) select * from tmp_A where month_part=201505;

第八题

-- 有一个线上服务器访问日志格式如下（用sql答题）

--        时间                   接口                           ip地址

-- 2016-11-09 11：22：05    /api/user/login                  110.23.5.33

-- 2016-11-09 11：23：10    /api/user/detail                 57.3.2.16

-- .....

-- 2016-11-09 23：59：40    /api/user/login                  200.6.5.166

-- 求11月9号下午14点（14-15点），访问api/user/login接口的top10的ip地址



create table test_eight_serverlog

(

    server_time string COMMENT '时间',

    server_api  string comment '接口',

    server_ip string COMMENT 'ip地址'

)

row format delimited fields terminated by '\t';



insert into table test_eight_serverlog values ('2016-11-09 11:22:05','/api/user/login','110.23.5.33');

insert into table test_eight_serverlog values ('2016-11-09 11:23:10','/api/user/detail','57.3.2.16');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.161');

insert into table test_eight_serverlog values ('2016-11-09 14:22:05','/api/user/login','110.23.5.32');

insert into table test_eight_serverlog values ('2016-11-09 14:23:10','/api/user/detail','57.3.2.13');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.164');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.165');

insert into table test_eight_serverlog values ('2016-11-09 14:22:05','/api/user/login','110.23.5.36');

insert into table test_eight_serverlog values ('2016-11-09 14:23:10','/api/user/detail','57.3.2.17');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.168');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.168');

insert into table test_eight_serverlog values ('2016-11-09 14:22:05','/api/user/login','110.23.5.32');

insert into table test_eight_serverlog values ('2016-11-09 14:23:10','/api/user/detail','57.3.2.13');

insert into table test_eight_serverlog values ('2016-11-09 14:59:40','/api/user/login','200.6.5.164');

insert into table test_eight_serverlog values ('2016-11-09 15:22:05','/api/user/login','110.23.5.33');

insert into table test_eight_serverlog values ('2016-11-09 15:23:10','/api/user/detail','57.3.2.16');

insert into table test_eight_serverlog values ('2016-11-09 15:59:40','/api/user/login','200.6.5.166');




select

    server_ip,

    count(*) visit_time

from test_eight_serverlog

where date_format(server_time,'yyyy-MM-dd HH')='2016-11-09 14'

and server_api = '/api/user/login'

group by server_ip

order by visit_time desc;

第九题


-- 有一个充值日志表如下：

-- CREATE TABLE `credit log`

-- (

--     `dist_id` int（11）DEFAULT NULL COMMENT '区组id',

--     `account` varchar（100）DEFAULT NULL COMMENT '账号',

--     `money` int(11) DEFAULT NULL COMMENT '充值金额',

--     `create_time` datetime DEFAULT NULL COMMENT '订单时间'

-- )ENGINE=InnoDB DEFAUILT CHARSET-utf8

-- 请写出SQL语句,查询充值日志表2015年7月9号每个区组下充值额最大的账号,要求结果：

-- 区组id,账号,金额,充值时间

--建表

create table test_nine_credit_log(

    dist_id string COMMENT '区组id',

    account string COMMENT '账号',

    `money` decimal(10,2) COMMENT '充值金额',

    create_time string COMMENT '订单时间'

)

row format delimited fields terminated by '\t';



--插入数据

insert into table test_nine_credit_log values ('1','11',100006,'2019-01-02 13:00:01');

insert into table test_nine_credit_log values ('1','12',110000,'2019-01-02 13:00:02');

insert into table test_nine_credit_log values ('1','13',102000,'2019-01-02 13:00:03');

insert into table test_nine_credit_log values ('1','14',100300,'2019-01-02 13:00:04');

insert into table test_nine_credit_log values ('1','15',100040,'2019-01-02 13:00:05');

insert into table test_nine_credit_log values ('1','18',110000,'2019-01-02 13:00:02');

insert into table test_nine_credit_log values ('1','16',100005,'2019-01-03 13:00:06');

insert into table test_nine_credit_log values ('1','17',180000,'2019-01-03 13:00:07');




insert into table test_nine_credit_log values ('2','21',100800,'2019-01-02 13:00:11');

insert into table test_nine_credit_log values ('2','22',100030,'2019-01-02 13:00:12');

insert into table test_nine_credit_log values ('2','23',100000,'2019-01-02 13:00:13');

insert into table test_nine_credit_log values ('2','24',100010,'2019-01-03 13:00:14');

insert into table test_nine_credit_log values ('2','25',100070,'2019-01-03 13:00:15');

insert into table test_nine_credit_log values ('2','26',100800,'2019-01-02 15:00:11');



insert into table test_nine_credit_log values ('3','31',106000,'2019-01-02 13:00:08');

insert into table test_nine_credit_log values ('3','32',100400,'2019-01-02 13:00:09');

insert into table test_nine_credit_log values ('3','33',100030,'2019-01-02 13:00:10');

insert into table test_nine_credit_log values ('3','34',100003,'2019-01-02 13:00:20');

insert into table test_nine_credit_log values ('3','35',100020,'2019-01-02 13:00:30');

insert into table test_nine_credit_log values ('3','36',100500,'2019-01-02 13:00:40');

insert into table test_nine_credit_log values ('3','37',106000,'2019-01-03 13:00:50');

insert into table test_nine_credit_log values ('3','38',100800,'2019-01-03 13:00:59');



--查询充值日志表2019年1月2号每个区组下充值额最大的账号,要求结果：区组id,账号,金额,充值时间

select

    aaa.dist_id,

    aaa.account,

    aaa.`money`,

    aaa.create_time,

    aaa.money_rank

from

(

    select

        dist_id,

        account,

        `money`,

        create_time,

        dense_rank() over(partition by dist_id order by `money` desc) money_rank   -- dense_rank最完美,因为不仅可以求第一多,而且还可以求第二多,第三多...

    from test_nine_credit_log

    where date_format(create_time,'yyyy-MM-dd') = '2019-01-02'

) aaa

where money_rank = 1;



-- 第二种写法,不用开窗函数

with

tmp_max_money as(

    select

        dist_id,

        max(`money`) max

    from test_nine_credit_log

    where date_format(create_time,'yyyy-MM-dd')='2019-01-02'

    group by dist_id

)

select

    cl.dist_id dist_id,cl.account acount,cl.money money,cl.create_time create_time

from test_nine_credit_log cl

left join tmp_max_money mm

on cl.dist_id=mm.dist_id

where cl.money=mm.max and date_format(create_time,'yyyy-MM-dd')='2019-01-02';

第十题


-- 有一个账号表如下,请写出SQL语句,查询各自区组的money排名前十的账号（分组取前10）

-- CREATE TABIE `account`

-- (

--     `dist_id` int（11）

--     DEFAULT NULL COMMENT '区组id',

--     `account` varchar（100）DEFAULT NULL COMMENT '账号' ,

--     `gold` int（11）DEFAULT NULL COMMENT '金币'

--     PRIMARY KEY （`dist_id`,`account_id`）,

-- ）ENGINE=InnoDB DEFAULT CHARSET-utf8

-- 替换成hive表

drop table if exists `test_ten_account`;

create table `test_ten_account`(

    `dist_id` string COMMENT '区组id',

    `account` string COMMENT '账号',

    `gold` bigint COMMENT '金币'

)

row format delimited fields terminated by '\t';



insert into table test_ten_account values ('1','11',100006);

insert into table test_ten_account values ('1','12',110000);

insert into table test_ten_account values ('1','13',102000);

insert into table test_ten_account values ('1','14',100300);

insert into table test_ten_account values ('1','15',100040);

insert into table test_ten_account values ('1','18',110000);

insert into table test_ten_account values ('1','16',100005);

insert into table test_ten_account values ('1','17',180000);



insert into table test_ten_account values ('2','21',100800);

insert into table test_ten_account values ('2','22',100030);

insert into table test_ten_account values ('2','23',100000);

insert into table test_ten_account values ('2','24',100010);

insert into table test_ten_account values ('2','25',100070);

insert into table test_ten_account values ('2','26',100800);



insert into table test_ten_account values ('3','31',106000);

insert into table test_ten_account values ('3','32',100400);

insert into table test_ten_account values ('3','33',100030);

insert into table test_ten_account values ('3','34',100003);

insert into table test_ten_account values ('3','35',100020);

insert into table test_ten_account values ('3','36',100500);

insert into table test_ten_account values ('3','37',106000);

insert into table test_ten_account values ('3','38',100800);



select

    dist_id,

    account,

    gold,

    gold_rank

from

(

    select

        `dist_id`,

        `account`,

        `gold`,

        dense_rank() over(partition by dist_id order by gold desc) gold_rank

    from test_ten_account

) tmp

where gold_rank <= 3;

第十一题


-- 1）有三张表分别为会员表（member）销售表（sale）退货表（regoods）

-- （1）会员表有字段memberid（会员id,主键）credits（积分）；

-- （2）销售表有字段memberid（会员id,外键）购买金额（MNAccount）；

-- （3）退货表中有字段memberid（会员id,外键）退货金额（RMNAccount）；

-- 2）业务说明：

-- （1）销售表中的销售记录可以是会员购买,也可是非会员购买。（即销售表中的memberid可以为空）

-- （2）销售表中的一个会员可以有多条购买记录

-- （3）退货表中的退货记录可以是会员,也可是非会员4、一个会员可以有一条或多条退货记录

-- 查询需求：分组查出销售表中所有会员购买金额,同时分组查出退货表中所有会员的退货金额,

--  把会员id相同的购买金额-退款金额得到的结果更新到表会员表中对应会员的积分字段（credits）

-- 建表

--会员表

drop table if exists test_eleven_member;

create table test_eleven_member(

    memberid string COMMENT '会员id',

    credits bigint COMMENT '积分'

)

row format delimited fields terminated by '\t';

--销售表

drop table if exists test_eleven_sale;

create table test_eleven_sale(

    memberid string COMMENT '会员id',

    MNAccount decimal(10,2) COMMENT '购买金额'

)

row format delimited fields terminated by '\t';

--退货表

drop table if exists test_eleven_regoods;

create table test_eleven_regoods(

    memberid string COMMENT '会员id',

    RMNAccount decimal(10,2) COMMENT '退货金额'

)

row format delimited fields terminated by '\t';



insert into table test_eleven_member values('1001',0);

insert into table test_eleven_member values('1002',0);

insert into table test_eleven_member values('1003',0);

insert into table test_eleven_member values('1004',0);

insert into table test_eleven_member values('1005',0);

insert into table test_eleven_member values('1006',0);

insert into table test_eleven_member values('1007',0);



insert into table test_eleven_sale values('1001',5000);

insert into table test_eleven_sale values('1002',4000);

insert into table test_eleven_sale values('1003',5000);

insert into table test_eleven_sale values('1004',6000);

insert into table test_eleven_sale values('1005',7000);

insert into table test_eleven_sale values('1004',3000);

insert into table test_eleven_sale values('1002',6000);

insert into table test_eleven_sale values('1001',2000);

insert into table test_eleven_sale values('1004',3000);

insert into table test_eleven_sale values('1006',3000);

insert into table test_eleven_sale values(NULL,1000);

insert into table test_eleven_sale values(NULL,1000);

insert into table test_eleven_sale values(NULL,1000);

insert into table test_eleven_sale values(NULL,1000);



insert into table test_eleven_regoods values('1001',1000);

insert into table test_eleven_regoods values('1002',1000);

insert into table test_eleven_regoods values('1003',1000);

insert into table test_eleven_regoods values('1004',1000);

insert into table test_eleven_regoods values('1005',1000);

insert into table test_eleven_regoods values('1002',1000);

insert into table test_eleven_regoods values('1001',1000);

insert into table test_eleven_regoods values('1003',1000);

insert into table test_eleven_regoods values('1002',1000);

insert into table test_eleven_regoods values('1005',1000);

insert into table test_eleven_regoods values(NULL,1000);

insert into table test_eleven_regoods values(NULL,1000);

insert into table test_eleven_regoods values(NULL,1000);

insert into table test_eleven_regoods values(NULL,1000);



-- 分组查出销售表中所有会员购买金额,同时分组查出退货表中所有会员的退货金额,

-- 把会员id相同的购买金额-退款金额得到的结果更新到表会员表中对应会员的积分字段（credits）

with

tmp_member as

(  

    select memberid,sum(credits) credits

    from test_eleven_member

    group by memberid

),

tmp_sale as

(

    select memberid,sum(MNAccount) MNAccount

    from test_eleven_sale

    group by memberid

),

tmp_regoods as

(

    select memberid,sum(RMNAccount) RMNAccount

    from test_eleven_regoods

    group by memberid

)

insert overwrite table test_eleven_member

select

    t1.memberid,

    sum(t1.creadits)+sum(t1.MNAccount)-sum(t1.RMNAccount) credits

from

(

    select

        memberid,

        credits,

        0 MNAccount,

        0 RMNAccount

    from tmp_member

    union all

    select

        memberid,

        0 credits,

        MNAccount,

        0 RMNAccount

    from tmp_sale

    union all

    select

        memberid,

        0 credits,

        0 MNAccount,

        RMNAccount

    from tmp_regoods

) t1

where t1.memberid is not NULL

group by t1.memberid

---------------------第2种写法-用left join--------------------------

insert overwrite table test_eleven_member

select

    t3.memberid,

    sum(t3.credits) credits

from

(

    select

        t1.memberid,

        t1.MNAccount - NVL(t2.RMNAccount,0) credits

    from

    (

        select

            memberid,

            sum(MNAccount) MNAccount

        from test_eleven_sale

        group by memberid

    ) t1

    left join

    (

        select

            memberid,

            sum(RMNAccount) RMNAccount

        from test_eleven_regoods

        group by memberid

    )t2

    on t1.memberid = t2.memberid

    where t1.memberid is not NULL



    union all



    select

        memberid,

        credits

    from test_eleven_member

) t3

group by t3.memberid;

第十二题

--现在有三个表student（学生表）、course(课程表)、score（成绩单）,结构如下：

--建表

create table test_twelve_student

(

    id bigint comment '学号',

    name string comment '姓名',

    age bigint comment '年龄'

)

row format delimited fields terminated by '\t';



create table test_twelve_course

(

    cid string comment '课程号,001/002格式',

    cname string comment '课程名'

)

row format delimited fields terminated by '\t';



Create table test_twelve_score

(

    id bigint comment '学号',

    cid string comment '课程号',

    score bigint comment '成绩'

)

row format delimited fields terminated by '\t';




--插入数据

insert into table test_twelve_student values (1001,'wsl1',21);

insert into table test_twelve_student values (1002,'wsl2',22);

insert into table test_twelve_student values (1003,'wsl3',23);

insert into table test_twelve_student values (1004,'wsl4',24);

insert into table test_twelve_student values (1005,'wsl5',25);



insert into table test_twelve_course values ('001','math');

insert into table test_twelve_course values ('002','English');

insert into table test_twelve_course values ('003','Chinese');

insert into table test_twelve_course values ('004','music');



insert into table test_twelve_score values (1001,'004',10);

insert into table test_twelve_score values (1002,'003',21);

insert into table test_twelve_score values (1003,'002',32);

insert into table test_twelve_score values (1004,'001',43);

insert into table test_twelve_score values (1005,'003',54);

insert into table test_twelve_score values (1001,'002',65);

insert into table test_twelve_score values (1002,'004',76);

insert into table test_twelve_score values (1003,'002',77);

insert into table test_twelve_score values (1001,'004',48);

insert into table test_twelve_score values (1002,'003',39);




--其中score中的id、cid,分别是student、course中对应的列请根据上面的表结构,回答下面的问题

--1）请将本地文件（/home/users/test/20190301.csv）文件,加载到分区表score的20190301分区中,并覆盖之前的数据

load data local inpath '/home/users/test/20190301.csv' overwrite into table test_twelve_score partition(event_day='20190301');

--2）查出平均成绩大于60分的学生的姓名、年龄、平均成绩

select

    stu.name,

    stu.age,

    t1.avg_score

from

test_twelve_student stu

join

(

    select

        id,

        avg(score) avg_score

    from test_twelve_score

    group by id

) t1

on t1.id = stu.id

where avg_score > 60;

--3）查出没有'001'课程成绩的学生的姓名、年龄

select

    stu.name,

    stu.age

from

test_twelve_student stu

join

(

    select

        id

    from test_twelve_score

    where cid != 001

    group by id

) t1

on stu.id = t1.id;

--4）查出有'001'\'002'这两门课程下,成绩排名前3的学生的姓名、年龄

select

    stu.name,

    stu.age

from

(

    select

        id,

        cid,

        score,

        rank() over(partition by cid order by score desc) ran

    from

    test_twelve_score

    where cid = 001 or cid = 002

) t1

join test_twelve_student stu

on t1.id = stu.id

where ran <= 3;




--5）创建新的表score_20190317,并存入score表中20190317分区的数据

create table score_20190317

as select * from test_twelve_score where dt = '20190317';

--6）如果上面的score_20190317score表中,uid存在数据倾斜,请进行优化,查出在20190101-20190317中,学生的姓名、年龄、课程、课程的平均成绩

select

    stu.name,

    stu.age,

    cou.cname,

    t1.avg_score



from

(

    select

        id,

        cid,

        avg(score) avg_score

    from test_twelve_score

    group by id,cid

    where dt >= '20190101' and dt <= '20190317'

) t1

left join test_twelve_student stu on t1.id = stu.id

left join test_twelve_course cou on t1.cid = cou.cid

--7）描述一下union和union all的区别,以及在mysql和HQL中用法的不同之处？



union会对数据进行排序去重，union all不会排序去重。

HQL中要求union或union all操作时必须保证select 集合的结果相同个数的列，并且每个列的类型是一样的。



--8）简单描述一下lateral view语法在HQL中的应用场景,并写一个HQL实例

-- 比如一个学生表为：

-- 学号  姓名  年龄  成绩（语文|数学|英语）

-- 001   张三  16     90，80，95

-- 需要实现效果：

-- 学号  成绩

-- 001 90

-- 001 80

-- 001 95




create table student(

`id` string,

`name` string,

`age` int,

`scores` array

)

row format delimited fields terminated by '\t'

collection items terminated by ',';




select

    id,

    score

from

student lateral view explode(scores) tmp_score as score;

hivesql常用函数：

-------------------------Hive  SQL  常用函数  ----------------------------

--常用日期函数
--unix_timestamp:返回当前或指定时间的时间戳	        select unix_timestamp();  select unix_timestamp('2008-08-08 08:08:08'); 
--from_unixtime：将时间戳转为日期格式                 select from_unixtime(1218182888);
--current_date：当前日期                  select current_date();
--current_timestamp：当前的日期加时间     select current_timestamp();
--to_date：抽取日期部分                   select to_date('2008-08-08 08:08:08');   select to_date(current_timestamp());
--year：获取年                            select year(current_timestamp());
--month：获取月                           select month(current_timestamp());
--day：获取日                             select DAY(current_timestamp());
--hour：获取时                            select HOUR(current_timestamp());
--minute：获取分                          select minute(current_timestamp());
--second：获取秒                          select SECOND(current_timestamp());
--weekofyear：当前时间是一年中的第几周    select weekofyear(current_timestamp());  select weekofyear('2020-01-08');
--dayofmonth：当前时间是一个月中的第几天  select dayofmonth(current_timestamp());  select dayofmonth('2020-01-08');
--months_between： 两个日期间的月份       select months_between('2020-07-29','2020-06-28');
--add_months：日期加减月                  select add_months('2020-06-28',1);
--datediff：两个日期相差的天数            select datediff('2019-03-01','2019-02-01');   select datediff('2020-03-01','2020-02-01');
--date_add：日期加天数                    select date_add('2019-02-28',1);   select date_add('2020-02-28',1);
--date_sub：日期减天数                    select date_sub('2019-03-01',1);   select date_sub('2020-03-01',1);
--last_day：日期的当月的最后一天          select last_day('2020-02-28');   select last_day('2019-02-28');
--date_format() ：格式化日期   日期格式：'yyyy-MM-dd hh:mm:ss'   select date_format('2008-08-08 08:08:08','yyyy-MM-dd hh:mm:ss');  


--常用取整函数
--round： 四舍五入     select round(4.5);     
--ceil：  向上取整     select ceil(4.5);
--floor： 向下取整     select floor(4.5);
--
--常用字符串操作函数
--upper： 转大写         select upper('abcDEFg');
--lower： 转小写         select lower('abcDEFg');
--length： 长度          select length('abcDEFg');
--trim：  前后去空格     select length('   abcDEFg    ');  select length(trim('   abcDEFg    '));
--lpad： 向左补齐，到指定长度   select lpad('abc',11,'*');
--rpad：  向右补齐，到指定长度  select rpad('abc',11,'*');  
--substring: 剪切字符串         select substring('abcdefg',1,3);     select rpad(substring('13843838438',1,3),11,'*');
--regexp_replace： SELECT regexp_replace('100-200', '(\\d+)', 'num');   select regexp_replace('abc d e f',' ','');
--	使用正则表达式匹配目标字符串，匹配成功后替换！
--
--集合操作
--size： 集合中元素的个数
--map_keys： 返回map中的key
--map_values: 返回map中的value         select size(friends),map_keys(children),map_values(children) from person;
--array_contains: 判断array中是否包含某个元素     select array_contains(friends,'lili') from person;
--sort_array： 将array中的元素排序         select sort_array(split('1,3,4,5,2,6,9',','));   
--                                         select sort_array(split('a,d,g,b,c,f,e',','));

--------------------常用日期函数
--返回时间戳
select  unix_timestamp();--返回当前时间到1970年1月1号的时间戳(经过了多少秒)
select  unix_timestamp("1970-01-01 00:00:05");--指定时间的时间戳
--时间戳转日期
select from_unixtime(5);
--当前日期
select current_date();
--当前的日期加时间
select current_timestamp();
--抽取日期部分
select to_date(current_timestamp());
select to_date('2008-08-08 08:08:08');
--获取年月日、时分秒  (注意，必须满足日期和时间的格式才能识别)
select year(current_timestamp()),month (current_timestamp()),day(current_timestamp()),
hour(current_timestamp()),minute (current_timestamp()), second(current_timestamp());
--当前时间或指定时间是一年中的第几周 、 一个月中的第几天
select weekofyear(current_timestamp());
select weekofyear('2008-08-08 08:08:08');
select dayofmonth(CURRENT_date());
select dayofmonth('2008-08-08 08:08:08');
--两个日期间的月份    两个日期见相差的天数
select months_between('2008-08-08','2008-09-08');
select datediff('2008-09-08','2008-08-08');
--日期加减月、   加减天
select add_months('2008-08-08',1);
select date_add('2008-08-08',1);
select date_sub('2008-08-08',1);
--日期的当月的最后一天 
select last_day('2008-08-08');
--格式化日期   日期格式：'yyyy-MM-dd hh:mm:ss'  把日期转化为SQL能够识别的格式
select date_format('2008-08-08 08:08:08','yyyy-MM-dd hh:mm:ss');


--------------------------------常用取整函数          具体的使用看需求
--四舍五入
select round(4.6);
--向上取整
select ceil(4.01);
--向下取整
select floor(4.99);

------------------------------常用字符串操作函数
--转为大写
select upper('sdadsadASSS');
--转为小写
SELECT lower('AAAAASASDDDA');
--求字符串的长度
SELECT length('sdadasdasd');
--把字符串前后的空格去掉    字符串中间的空格去不掉，需要使用替换了
SELECT trim('    woshi   haoren     ');
--向左、右补齐，到指定长度       l表示左  r表示右   pad 填补
SELECT lpad('abc',8,'*');
SELECT rpad('abc',8,'*');
--剪切字符串       从哪开始剪，剪切的长度是多少
SELECT SUBSTRING('12345678',2,5); 
select rpad(substring('13843838438',1,3),11,'*');
--使用正则表达式匹配目标字符串，匹配成功后替换！
SELECT regexp_replace('a      b  c  -12 32',' ','');--去掉所有的空
select replace('d d d',' ','-');


------------------------------集合操作
show tables;
desc test;
SELECT * from test limit 10;
--集合中元素的个数         不含有struct ，而且struct也不属于集合
SELECT size(friends),size(children)
FROM test;
--返回map中的key，返回map中的value
SELECT map_keys(children),map_values(children)
from test;
--将array数组中的元素排序
SELECT sort_array(split('1,3,4,2,6,4,3,8',','));
SELECT sort_array(split('b,a,ss,a,z,w,f,z',','));

hiveSQL常用操作语句（mysql）

1.建库语句：
	CREATE DATABASE [IF NOT EXISTS] database_name
	[COMMENT database_comment]
	[LOCATION hdfs_path]
	[WITH DBPROPERTIES (property_name=property_value, ...)];
		例：
			create DATABASE if NOT EXISTS hive_db2
			comment "my first database"
			location "/hive_db2"
2.库的修改：
	alter database hive_db2 set DBPROPERTIES ("createtime"="2018-12-19");
3.库的删除
	drop database db_hive cascade if exists; -- 删除存在表的数据库
3.建表语句：
	CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name 
	[(col_name data_type [COMMENT col_comment], ...)] 
	[COMMENT table_comment] 
	[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] 
	[CLUSTERED BY (col_name, col_name, ...) 
	[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 
	[ROW FORMAT row_format] 
	DELIMITED [FIELDS TERMINATED BY char] [COLLECTION ITEMS TERMINATED BY char]
			[MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char] 
	   | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
	[STORED AS file_format] Textfile
	[LOCATION hdfs_path]
	[TBLPROPERTIES (property_name=property_value, ...)]
	[AS select_statement]
		例1：
			create table student2(id int COMMENT "xuehao", name string COMMENT "mingzi")
			COMMENT "xueshengbiao"
			ROW format delimited
			fields terminated by '\t'
			STORED as Textfile
			location '/student2' -- 直接加载该目录下的数据文件到表中
			TBLPROPERTIES ("createtime"="2018-12-18");
		例2：
			create table student(id int, name string)
			row format delimited
			fields terminated by '\t';
			load data local inpath '/opt/module/datas/student.txt' into table student;
		例3：
			create table student4 like student2; -- 仅复制表结构
4.导入数据语句
	4.1 不加local则导入hdfs上文件，但会剪贴原文件，local本地仅粘贴
			load data [local] inpath '/opt/module/datas/student.txt' 
			[overwrite] into table student [partition (partcol1=val1,…)];
	4.2 创建表并导入数据（依据以存在的表）
			create table student6 as select * from student; -- 仅导入数据，不会导入其他细节属性 --被创建表不能是外部表 -- 被创建表不支持分区及分桶
	4.3 覆盖插入
			insert overwrite table student3 select * from student;
	4.4 插入带分区的表
			insert into table stu_par partition(month = '08') select id ,name from stu_par where month = '09';
	4.5 将单表中数据导入多表
			from student
			insert into table student4 select *
			insert into table student5 select *;
	4.6 多分区导入单表
			from stu_par
			insert into table stu_par partition(month = '06')
			select id ,name where month = '08'
			insert into table stu_par partition(month = '07')
			select id,name where month = '10';
5.表的修改操作
	5.1 修改表的属性
			alter table stu_ex set TBLPROPERTIES ('EXTERNAL' = 'TRUE');
	5.2 重命名表名
			alter table student4 rename to student3;
	5.3 修改表的serde属性（序列化和反序列化）
		alter table table_name set serdepropertyes('field.delim'='\t');
6.列的更新操作
	6.1 修改列语法
		ALTER TABLE table_name CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name]
	6.2 增加或替换列语法
		ALTER TABLE table_name ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...) 
		例1：
			增加列：alter table student2 add COLUMNS (score double);
		例2：
			修改列：alter table student2 CHANGE COLUMN score score int AFTER id;
		例3：
			替换列（全部替换）：alter table student2 replace COLUMNS (id int, name string);
7.带有分区的表
	7.0 查看分区
			show partitions table_name;
	7.1 创建单个分区
			create table stu_par(id int, name string)
			partitioned by (month string)
			ROW format delimited
			FIELDS terminated by '\t';
		-- 错误示例
			create table stu_par2(id int, name string)
			partitioned by (id int)
			ROW format delimited
			FIELDS terminated by '\t';  错！！！！（不能以数据库字段作为分区）
		-- 加载数据到指定分区(分区不存在则自动创建)
			load data local inpath '/opt/module/datas/student.txt' into table stu_par partition(month = '12');
			load data local inpath '/opt/module/datas/student.txt' into table stu_par partition(month = '11');

		-- 合并分区查询结果
			select * from stu_par where month = '11'
			union
			select * from stu_par where month = '12';
	7.2 增加多个分区
		alter table stu_par add partition (month = '08') partition(month='07');
	7.3 删除多个分区
		alter table stu_par drop partition(month='08'),partition(month='09');
	7.4 创建多级分区
		create table stu_par2(id int, name string)
		partitioned  by (month string, day string)
		row format delimited
		FIELDS terminated by '\t';
	7.5 导入数据到多级分区
		load data local inpath '/opt/module/datas/student.txt' into table stu_par2 
		partition (month='12',day='19');
	7.6 向多级分区增加分区
		alter table stu_par2 add partition(month = '12', day = '17');
	7.7 查询多级分区中的数据
		select * from stu_par2 where day = '18';
	7.8 修复分区（也可以使用添加分区的语句）
		msck repair table dept_partition2;
8.创建外部表（删除表不会删除表中数据，仅删除表的元数据）
	create external table stu_ex2(id int, name string)
	ROW format delimited
	FIELDS terminated by '\t'
	location '/student';
	8.1 外部表与内部表的转换
		alter table stu_ex set TBLPROPERTIES ('EXTERNAL' = 'TRUE');
9.数据的导出
	9.1 导出同时格式化（不加local则导出到hdfs）
		insert overwrite local directory '/opt/module/datas/student'
		row format delimited
		fields terminated by '\t'
		select * from student;
	9.2 hadoop命令导出到本地
		dfs -get /user/hive/warehouse/student/month=201709/000000_0 /opt/module/datas/export/student3.txt;
	9.3 shell命令导出
		hive -f/-e 执行语句或者脚本 > file	-- -f跟文件，-e跟执行语句
	9.4 export仅可以导出到hdfs，常用于hdfs集群hive表迁徙
		export table default.student to '/user/hive/warehouse/export/student'; -- 同时会导出表的元数据
10.数据的导入(仅能导入export导出的数据，因为需要获取表的元数据)
	import table table_name from 'export导出数据的路径';
11.清除表中数据
	truncate table student; -- 只能删除管理表，不能删除外部表中数据
12.Like、RLike：RLike可以使用java的正则表达式
13.group by及having的使用 -- hive中对于使用group by后查询字段仅限group by的字段及聚合函数
	select deptno, avg(sal) avg_sal from emp group by deptno having avg_sal > 2000;
14.mapreduce的join	
	14.1 mapreduce中的reducejoin特点：在mapper阶段进行数据关联标记，在reducer阶段进行数据聚合
	14.2 mapreduce中的mapjoin特点：将小表加载到内存中，在mapper阶段根据内存中的数据对大表进行数据处理，没有reduce阶段
15.HQL的join
	15.1 仅支持等值连接不支持非等值连接
		例：不支持select * from A left join B on A.id != B.id;
	15.2 不支持在on条件中使用‘or’
	15.3 每个join都会启动一个mapreduce任务，但hive默认开启mapreduce优化
		关闭mapreduce优化：set hive.auto.convert.join=false;
16.order by
	会进行全局排序，则reduce数量被看作1个，效率低下
17.sort by -- 局部排序
	对于每个mapreduce各分区进行局部排序，分区中的数据随机给定
18.distribute by
	18.1 即mapreduce中自定义分区操作，hql书写规则：先分区后排序  
	18.2 distribute by的分区规则是根据分区字段的hash码与reduce的个数进行模除后，余数相同的分到一个区。
19.cluster by
	当distribute和sort字段相同时可用cluster进行替代，默认正序，单不支持desc倒序
20.分桶 -- 分桶表的数据需要通过子查询的方式导入
	20.1 开启分桶的设置
		set hive.enforce.bucketing=true;
	20.2 分桶表的创建
		create table stu_buck(id int, name string)
		clustered by(id) 
		into 4 buckets
		row format delimited fields terminated by '\t';
	20.3 分桶的规则
		用分桶字段的hash值与桶的个数求余，来决定数据存放在那个桶，
	20.4 分桶与分区区别
		a. 分桶结果在表的目录下存在多个分桶文件
		b. 分区是将数据存放在表所在目录不同文件路径下
		c. 分区针对是数据存储路径，分桶针对的是数据文件，分桶可以在分区的基础粒度细化
21.分桶的抽样	
	21.1 抽样语法 -- 必须 x<=y
		select * from table_name tablesample(bucket x out of y on bucketKey); -- on bucketKey可不写
	21.2 抽样规则
		a. y用来决定抽样比例，必须为bucket数的倍数或者因子，
			例：bucket数为4时，当y=2时，4/2=2,则抽取两个桶的数据，具体抽取哪个桶由x决定
		b. x用来决定抽取哪个桶中的数据
			例1：当bucket=4, y=4, x=2时，则需要抽取的数据量为bucket/y=1个桶，抽取第x桶的数据
			例2：当bucket=4, y=2, x=2时，则需要抽取的数据量为bucket/y=2个桶，抽取第x桶和第x+y桶的数据
			例3：当bucket=12, y=3, x=2时，抽bucket/y=4个桶，抽取第x桶和第x+2y桶的数据
22.NVL函数
	NVL(column_name, default_cvalue),如果该行字段值为null,则返回default_value的值
23.CONCAT_WS()函数
	使用规则：concat_ws(separator, [string | array(string)]+)
	例：select concat_ws('_', 'www', array('achong','com')) 拼接结果：www_achong_com
24.COLLECT_SET(col)函数
	使用规则：仅接受基本数据类型，将字段去重汇总，并返回array类型
	例（行转列）：表结构
			name	xingzuo     	  blood
			孙悟空	  白羊座	        A
			大海	  射手座	        A
			宋宋	  白羊座	        B
			猪八戒	  白羊座	        A
			凤姐	  射手座	        A
		需求：把星座和血型一样的人归类到一起
			射手座,A            大海|凤姐
			白羊座,A            孙悟空|猪八戒
			白羊座,B            宋宋
		查询语句：
			SELECT CONCAT_WS(',', xingzuo, blood), CONCAT_WS('|', COLLECT_SET(NAME))
			FROM xingzuo
			GROUP BY xingzuo, blood
25.EXPLODE(爆炸函数)及LATERAL_VIEW)(侧写函数)
	25.1 explode:将列中的array或者map结构拆分成多行 -- 一般需结合lateral_view使用
	25.2 lateral_view: LATERAL VIEW udtf(expression) 表别名 AS 列别名
	例（行转列）
		select movie, category_name
		from  movie_info 
		lateral view explode(category) table_tmp as category_name;
26.开窗函数 -- 常结合聚合函数使用，解决即需要聚合前的数据又需要聚合后的数据展示
	26.1 语法：UDAF() over (PARTITION By col1，col2 order by col3 窗口子句（rows between .. and ..）) AS 列别名
			（partition by .. order by）可替换为（distribute by .. sort by ..）
	26.2 over(): 指定分析数据窗口大小
	26.3 窗口子句 -- 先分区在排序然后接rows限定执行窗口
		26.3.01 n PRECEDING：往前n行数据
		26.3.02 n FOLLOWING：往后n行数据
			例：select name, orderdate, cost, sum(cost) over(
																partition by name 
																order by orderdate 
																rows between 1 PRECEDING and 1 FOLLOWING
															) from business;
		26.3.03 CURRENT ROW：当前行
		26.3.04 UNBOUNDED PRECEDING 表示从前面的起点
		26.3.05 UNBOUNDED FOLLOWING表示到后面的终点
			例：select name, orderdate, cost, sum(cost) over(
																partition by name 
																order by orderdate 
																rows between CURRENT ROW and UNBOUNDED FOLLOWING
															) from business;
27.LAG(col,n,default_val)：往前第n行数据
28.LEAD(col,n, default_val)：往后第n行数据
	例：select name, orderdate, cost, lag(orderdate, 1, 'null') over(partition by name order by orderdate)
		from business; -- 即获取前1行的orderDate数据
29.ntile(n):把有序分区中的行分为n组，每组编号从1开始 -- 分组规则详见:ntile的分组规则.sql
	例：select name,orderdate,cost, ntile(5) over(order by orderdate) num from business
30.Rank函数
	rank() 出现相同排序时，总数不变
	dense_rank() 出现相同排序时，总数减少
	row_number() 不会出现相同排序




sql执行顺序
	from... where...group by... having.... select ... order by...
	
hql执行顺序	
	from … where … group by … having … select … order by … 或
	from … on … join … where … group by … having … select … distinct … order by … limit
	存在开窗函数时，起码在order by之前执行

	
例题1：-- 集合类型数据导入
	{
		"name": "songsong",
		"friends": ["bingbing" , "lili"] ,       //列表Array, 
		"children": {                      //键值Map,
			"xiao song": 18 ,
			"xiaoxiao song": 19
		}
		"address": {                      //结构Struct,
			"street": "hui long guan" ,
			"city": "beijing" 
		}
	}

	基于上述数据结构，我们在Hive里创建对应的表，并导入数据。
	1.1 格式化数据为：
		songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,hui long guan_beijing
		yangyang,caicai_susu,xiao yang:18_xiaoxiao yang:19,chao yang_beijing
	1.2 建表语句：
		create table test(name string, 
		friends array,
		children map,
		address struct)
		row format delimited
		fields terminated by ','
		collection items terminated by '_'
		map keys terminated by ':';
	1.3 数据写入语句
		load data local inpath '/opt/module/datas/test.txt' into table test;
	1.4 查询语句
		select friends[0] friend,children['xiao song'] age,address.city from test where name = 'songsong';

你可能感兴趣的:(hive,大数据,mysql,hive)

CentOS7环境卸载MySQL5.7 Hadoop_Liang mysql 数据库 mysql
备份重要数据切记，卸载之前先备份mysql重要的数据。备份一个数据库例如：备份名为mydatabase的数据库到backup.sql的文件中mysqldump-uroot-ppassword123mydatabase>backup.sql备份所有数据库mysqldump-uroot-ppassword123--all-databases>all_databases_backup.sql注意：-p后
centos7安装 mysql5.7(安装包) heiPony linux mysql mariadb centos mysql
一.卸载centos7自带数据库查看系统自带的Mariadbrpm-qa|grepmariadbmariadb-libs-5.5.44-2.el7.centos.x86_64卸载rpm-e--nodepsmariadb-libs-5.5.44-2.el7.centos.x86_64删除etc目录下的my.cnfrm/etc/my.cnf二.检查mysql是否存在(有就卸载,删除相关文件)rpm-q
MySQL Explain 详解：从入门到精通，让你的 SQL 飞起来
引言：为什么Explain是SQL优化的“照妖镜”？在Java开发中，我们常常会遇到数据库性能瓶颈的问题。一条看似简单的SQL语句，在数据量增长到一定规模后，可能会从毫秒级响应变成秒级甚至分钟级响应，直接拖慢整个应用的性能。此时，你是否曾困惑于：为什么这条SQL突然变慢了？索引明明建了，为什么没生效？到底是哪里出了问题？答案就藏在MySQL的EXPLAIN命令里。EXPLAIN就像一面“照妖镜”，
Linux/Centos7离线安装并配置MySQL 5.7 有事开摆无事百杜同学 LInux/CentOS7 linux mysql 运维
Linux/Centos7离线安装并配置MySQL5.7超详细教程一、环境准备1.下载MySQL5.7离线包2.使用rpm工具卸载MariaDB（避免冲突）3.创建系统级别的MySQL专用用户二、安装与配置1.解压并重命名MySQL目录2.创建数据目录和配置文件3.设置目录权限4.初始化MySQL5.配置启动脚本6.配置环境变量三、启动与验证1.启动MySQL服务2.获取初始密码3.登录并修改密码
数字孪生技术为UI前端注入新活力：实现产品设计的沉浸式体验 ui设计前端开发老司机 ui
hello宝子们...我们是艾斯视觉擅长ui设计、前端开发、数字孪生、大数据、三维建模、三维动画10年+经验!希望我的分享能帮助到您!如需帮助可以评论关注私信我们一起探讨!致敬感谢感恩!一、引言：从“平面交互”到“沉浸体验”的UI革命当用户在电商APP中翻看3D家具模型却无法感知其与自家客厅的匹配度，当设计师在2D屏幕上绘制汽车内饰却难以预判实际乘坐体验——传统UI设计的“平面化、静态化、割裂感”
C++11中的std::function
文章转载自：http://www.jellythink.com/archives/771看看这段代码先来看看下面这两行代码：std::functiononKeyPressed;std::functiononKeyReleased;这两行代码是从Cocos2d-x中摘出来的，重点是这两行代码的定义啊。std::function这是什么东西？如果你对上述两行代码表示毫无压力，那就不妨再看看本文，就当温
提升企业级数据处理效率！TDengine 四个集群优化点详解 TDengine （老段） TDengine 运维大数据数据库物联网时序数据库服务器运维 tdengine
为了帮助企业更好地进行大数据处理，我们在此前TDengine3.x系列版本中进行了几项与集群相关的优化和新功能开发，以提升集群的稳定性和在异常情况下的恢复能力。这些优化包括clusterID隔离、leaderrebalance、raftlearner和restorednode。本文将对这几项重要优化进行详细阐述，以解答企业在此领域的疑问，并帮助大家更好地应对相关挑战。clusterID隔离问题fi
ETL可视化工具 DataX -- 简介( 一) dazhong2012 软件工具数据仓库 datax ETL
引言DataX系列文章：ETL可视化工具DataX–安装部署(二)ETL可视化工具DataX–DataX-Web安装(三)1.1DataX1.1.1DataX概览DataX是阿里云DataWorks数据集成的开源版本，在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX实现了包括MySQL、Oracle、OceanBase、SqlServer、Postgre、HDFS、Hive、ADS、
AnythingLLM教程系列之 12 AnythingLLM 上的 Ollama 与 MySQL+PostgreSQL 知识大胖 NVIDIA GPU和大语言模型开发教程 mysql postgresql 数据库 anythingllm ollama
简介一款全栈应用程序，可让您将任何文档、资源或内容转换为上下文，任何LLM都可以在聊天期间将其用作参考。此应用程序允许您选择要使用的LLM或矢量数据库，并支持多用户管理和权限。本文将介绍如何在AnythingLLM上将Ollama与MySQL+PostgreSQL连接起来。系列文章如何安装《无需任何代码构建自己的大模型知识库：AnythingLLM最易于使用的一体化AI应用程序，可以执行RAG、A
Java实习模拟面试之安徽九德 —— 面向对象编程、Spring框架与数据库技术详解培风图南以星河揽胜 java面试 java 面试 spring
关键词：Java实习生、模拟面试、安徽九德、SpringBoot、MySQL、Redis、面向对象编程、团队协作一、前言作为一名计算机相关专业的学生，想要顺利进入一家互联网公司或软件开发企业实习，技术面试是必须面对的一道门槛。本文将带你走进一场真实的Java实习生模拟面试场景，以“安徽九德”公司为背景，围绕其发布的招聘岗位要求，进行一次全方位的技术面试演练。本次模拟面试涵盖以下核心知识点：Java
优化版三国主题MySQL建表与查询练习（细节增强）韩公子的Linux大集市五 MySQL运维DBA mysql 数据库
文章目录优化版三国主题MySQL建表与查询练习（细节增强）题目一：三国人物信息表（全面优化）建表语句（增强约束与注释）插入数据（含完整信息）查询练习（增强实用性）题目二：三国战役表（增强关系设计）建表语句（完整关系模型）插入数据（完整战役信息）查询练习（多表关联）综合实战演练1.人物能力值分析2.战役地图查询3.胜负因素分析设计亮点总结优化版三国主题MySQL建表与查询练习（细节增强）题目一：三国
中国银联豪掷1亿采购海光C86架构服务器信创新态势海光芯片 C86 国产芯片海光信息
近日，中国银联国产服务器采购大单正式敲定，基于海光C86架构的服务器产品中标，项目金额超过1亿元。接下来，C86服务器将用于支撑中国银联的虚拟化、大数据、人工智能、研发测试等技术场景，进一步提升其业务处理能力、用户服务效率和信息安全水平。作为我国重要的银行卡组织和金融基础设施，中国银联在全球183个国家和地区设有银联受理网络，境内外成员机构超过2600家，是世界三大银行卡品牌之一。此次中国银联发力
Spring Boot基础小李是个程序 spring boot 后端 java
5.SpringBoot配置解析5.1.基础服务端口：server.port=8080（应用启动后监听8080端口）应用名称：spring.application.name=Chat64（注册到服务发现等场景时的标识）5.2.数据库连接（MySQL）URL：jdbc:mysql://localhost:3306/ai-chat（连接本地3306端口的ai-chat数据库，含时区、编码等参数）驱动：
Docker初识：mysql8主从复制（单向）- 主从搭建扩展知识滴水可藏海 #mysql 数据库
主从服务（master-slave）新学习到的知识。1、全库同步与部分同步上回书说到Docker初识：mysql8主从复制（单向）的配置都是针对全库配置的。但是实际上并不需要针对全库做备份，只需要对一些特别重要的库或者表来进行同步。例如information_schema等。可以通过配置文件中的一些属性指定需要针对哪些库或者哪些表记录binlog。Master配置：#需要同步的二进制数据库名bin
Mysql字段没有索引，通过where x = 3 for update是使用什么级别的锁
没有索引时，FORUPDATE会锁住整个表现在，你正在一本一本地翻看所有书，寻找“维修中”的书，并且你对管理员说：“在我清点和修改完之前，别人不能动这些书，也不能往这个范围里加新书！”问题1：如何锁住你找到的“维修中”的书？你每找到一本“维修中”的书，就给它贴上一个“正在处理，请勿触碰”的标签（行级排他锁）。问题2：如何防止别人“往这个范围里加新书”？这是最关键的。因为你没有“状态”的目录卡片（没
MySQL数据库访问（C/C++）敲上瘾 MySQL数据库 mysql 数据库 c++c语言数据库开发数据库架构
访问数据库的方式：命令行：使用命令行输入SQL指令直接访问。需记忆命令和SQL语法，对新手不友好。正因如此推荐新手使用该方式访问，能倒逼学习者对SQL语法的记忆，并对MySQL更深入理解。图形化界面访问：使用图形化界面工具，如：DBeaver、DataGrip、Navicat、HeidiSQL（MySQL）、MySQLWorkbench。特点：有语法提示，可以直接对数据手动增删改。编程接口：在编写
MySQL多表关系详解六七_Shmily 数据库 mysql android 数据库
MySQL中的多表关系是关系型数据库设计的核心，它描述了不同表之间数据如何相互关联。合理设计表关系是构建高效、无冗余、易于维护的数据库模式的关键。MySQL主要支持三种基本的多表关系：1.一对一关系(One-to-OneRelationship)概念：表A中的一条记录最多只与表B中的一条记录相关联，反之亦然。实现方式：共享主键：表B的主键同时也是指向表A主键的外键。这是最严格的实现，确保绝对的一对
AWS 管理秘籍（一）绝不原创的飞龙默认分类默认分类
原文：annas-archive.org/md5/cf1c4e1db999839ba88fc56df4011156译者：飞龙协议：CCBY-NC-SA4.0序言AWS平台的增长速度非常快，正在被各行各业广泛采用。正如俗话所说，朋友不会让朋友建立数据中心。不管从哪个角度看，按需计算、网络和存储的模式将持续存在。尤其是当你看到AWS平台在功能和增强方面的更新速度时，很难再去反对站在巨人的肩膀上，尤其是
全面探索Kafka：架构、应用与流处理
Kafka：企业级消息系统与流处理平台的深度解析ApacheKafka作为分布式流处理平台，广泛应用于大数据处理和实时分析领域。本文将基于其官方文档，详细探讨Kafka的核心功能、应用场景以及如何进行有效管理。背景简介Kafka作为高吞吐量的消息系统，支持企业级的发布-订阅模式。它能够处理大量实时数据，并支持高并发读写操作。本文将依据Kafka官方文档的内容，逐层深入，从入门到高级应用，帮助读者全
Flink时间窗口详解 bxlj_jcj Flink flink 大数据
一、引言在大数据流处理的领域中，Flink的时间窗口是一项极为关键的技术，想象一下，你要统计一个电商网站每小时的订单数量。由于订单数据是持续不断产生的，这就形成了一个无界数据流。如果没有时间窗口的概念，你就需要处理无穷无尽的数据，难以进行有效的统计分析。而时间窗口的作用，就是将这无界的数据流按照时间维度切割成一个个有限的“数据块”，方便我们对这些数据进行处理和分析。比如，我们可以定义一个1小时的时
探索实时流处理的未来：Kafka Streams 深度指南秋或依
探索实时流处理的未来：KafkaStreams深度指南项目介绍欢迎进入KafkaStreams：实时流处理的世界！这不仅仅是一本书，更是一个通往流处理领域深层奥秘的门户。由PrashantPandey编著，这本书以ApacheKafka2.1中的KafkaStreams库为核心，为读者铺就了一条从理解基础概念到熟练掌握KafkaStreams编程的路径。无论是软件工程师、数据架构师，还是对大数据处
Elasticsearch搜索引擎存储：从原理到实践的全景解析 Python×CATIA工业智造搜索引擎 elasticsearch 大数据
引言在大数据时代，数据规模呈指数级增长，传统数据库的模糊查询、实时分析能力逐渐成为瓶颈。Elasticsearch（简称ES）凭借其分布式架构、实时搜索和灵活的数据分析能力，成为企业级搜索与存储的核心引擎。截至2025年，ES在全球日志分析、电商搜索、实时监控等场景的市场占有率超过60%。本文将从存储架构、核心技术、应用场景及优化策略四个维度，深入解析Elasticsearch的设计哲学与实践价值
AWS Terraform 架构指南（二）绝不原创的飞龙默认分类默认分类
原文：annas-archive.org/md5/8b2d222956a050c7632b9eee086dadcf译者：飞龙协议：CCBY-NC-SA4.0第七章：7在项目中实现Terraform您准备好开始使用Terraform开发您的AWS基础设施了吗？在本章中，您将学习Terraform的基础知识，并了解如何在AWS中部署您的第一个模板。我们将介绍选择合适的AWS提供商和选择满足您项目需求的
ARM64+KylinOS环境下MySQL数据库的图文版安装步骤和故障排查 weixin_47690215 数据库 mysql
前言随着信息技术应用创新产业的快速发展，ARM64架构处理器与麒麟操作系统（KylinOS）已成为我国关键信息基础设施建设的核心组合。MySQL作为全球最流行的开源关系型数据库，在金融、政务等关键领域的国产化替代进程中发挥着重要作用。本文档针对ARM64架构与KylinOSV10SP2/SP3的深度适配需求，提供完整的MySQL8.0部署方案及故障排查体系。背景意义技术自主可控：基于华为鲲鹏、飞腾
mysql忘记密码的三种解决方案学掌门数据库程序员 IT mysql android 数据库
1、修改密码的三种方式mysql用户分为root用户（超级管理员，拥有所有权限）和普通用户，mysql服务器通过权限表来控制用户对数据库的访问,这些权限表存于root用户下的mysql数据库中。在使用mysql数据库过程中，往往需要修改密码的操作，下面介绍三种修改密码的方式：1）使用mysqladmin命令在命令行指定新密码mysqladmin-uroot-ppassword'新密码’回车，将提醒
【Kafka专栏 13】Kafka的消息确认机制：不是所有的“收到”都叫“确认”！
作者名称：夏之以寒作者简介：专注于Java和大数据领域，致力于探索技术的边界，分享前沿的实践和洞见文章专栏：夏之以寒-kafka专栏专栏介绍：本专栏旨在以浅显易懂的方式介绍Kafka的基本概念、核心组件和使用场景，一步步构建起消息队列和流处理的知识体系，无论是对分布式系统感兴趣，还是准备在大数据领域迈出第一步，本专栏都提供所需的一切资源、指导，以及相关面试题，立刻免费订阅，开启Kafka学习之旅！
MyBatis-Plus 使用wrapper自定义SQL
MyBatis-Plus使用wrapper自定义SQL，以下是单表查询。官方文档官方的例子：//mapper接口@Select("select*frommysql_data${ew.customSqlSegment}")ListgetAll(@Param(Constants.WRAPPER)Wrapperwrapper);//xmlListgetAll(Wrapperew);SELECT*FROM
SpringBoot + Logback 实现日志脱敏【手把手案例】甘蓝聊Java 【更新中...】项目中的那些事 spring boot logback Logback日志脱敏 Java日志脱敏
文章目录背景分析现有Logback配置了解PatternLayout中的Converter解决第1步：创建自定义Converter第2步：自定义logback配置文件第3步：调整yaml配置第4步：启动服务并验证参考背景SpringBoot+MyBatis+MySQL+Logback框架，使用MySQL的AES_DECRYPT()和AES_ENCRYPT()函数，由于日志设置为debug级别，导致
Mysql数据库可以使用命令行msyql -u root -p连接，但是Navicat连不上 2501_92753117 数据库 mysql
1.Mysql服务启动1.1输入命令回车输入密码可以正常连接msyql-uroot-p1.1.2Navicat连不上2.解决方案2.1连接mysqlmsyql-uroot-p1.2.2查询所有数据库showdatabases;1.2.3切换到mysql数据库usemysql;1.2.4查询hostSELECThost,userFROMuserWHEREuser='root';1.2.5更新任意ip
Hive简介
文章目录Hive简介Hive特点Hive和RDBMS的对比Hive的架构Hive的数据组织Hive数据类型Hive简介1、Hive由Facebook实现并开源2、是基于Hadoop的一个数据仓库工具3、可以将结构化的数据映射为一张数据库表4、并提供HQL(HiveSQL)查询功能5、底层数据是存储在HDFS上6、Hive的本质是将SQL语句转换为MapReduce任务运行7、使不熟悉MapRedu
强大的销售团队背后竟然是大数据分析的身影蓝儿唯美数据分析
Mark Roberge是HubSpot的首席财务官，在招聘销售职位时使用了大量数据分析。但是科技并没有挤走直觉。大家都知道数理学家实际上已经渗透到了各行各业。这些热衷数据的人们通过处理数据理解商业流程的各个方面，以重组弱点，增强优势。 Mark Roberge是美国HubSpot公司的首席财务官，HubSpot公司在构架集客营销现象方面出过一份力——因此他也是一位数理学家。他使用数据分析
Haproxy+Keepalived高可用双机单活 bylijinnan 负载均衡 keepalived haproxy 高可用
我们的应用MyApp不支持集群，但要求双机单活（两台机器：master和slave）： 1.正常情况下，只有master启动MyApp并提供服务 2.当master发生故障时，slave自动启动本机的MyApp，同时虚拟IP漂移至slave，保持对外提供服务的IP和端口不变 F5据说也能满足上面的需求，但F5的通常用法都是双机双活，单活的话还没研究过服务器资源 10.7
eclipse编辑器中文乱码问题解决 0624chenhong eclipse乱码
使用Eclipse编辑文件经常出现中文乱码或者文件中有中文不能保存的问题，Eclipse提供了灵活的设置文件编码格式的选项，我们可以通过设置编码格式解决乱码问题。在Eclipse可以从几个层面设置编码格式：Workspace、Project、Content Type、File 本文以Eclipse 3.3（英文）为例加以说明： 1. 设置Workspace的编码格式： Windows-&g
基础篇--resources资源不懂事的小屁孩 android
最近一直在做java开发，偶尔敲点android代码，突然发现有些基础给忘记了，今天用半天时间温顾一下resources的资源。 String.xml 字符串资源涉及国际化问题 http://www.2cto.com/kf/201302/190394.html string-array
接上篇补上window平台自动上传证书文件的批处理问卷酷的飞上天空 window
@echo off : host=服务器证书域名或ip，需要和部署时服务器的域名或ip一致 ou=公司名称, o=公司名称 set host=localhost set ou=localhost set o=localhost set password=123456 set validity=3650 set salias=s
企业物联网大潮涌动：如何做好准备？蓝儿唯美企业
物联网的可能性也许是无限的。要找出架构师可以做好准备的领域然后利用日益连接的世界。尽管物联网（IoT）还很新，企业架构师现在也应该为一个连接更加紧密的未来做好计划，而不是跟上闸门被打开后的集成挑战。“问题不在于物联网正在进入哪些领域，而是哪些地方物联网没有在企业推进，” Gartner研究总监Mike Walker说。 Gartner预测到2020年物联网设备安装量将达260亿，这些设备在全
spring学习——数据库（mybatis持久化框架配置） a-john mybatis
Spring提供了一组数据访问框架，集成了多种数据访问技术。无论是JDBC，iBATIS(mybatis)还是Hibernate，Spring都能够帮助消除持久化代码中单调枯燥的数据访问逻辑。可以依赖Spring来处理底层的数据访问。 mybatis是一种Spring持久化框架，要使用mybatis，就要做好相应的配置： 1，配置数据源。有很多数据源可以选择，如：DBCP，JDBC，aliba
Java静态代理、动态代理实例 aijuans Java静态代理
采用Java代理模式，代理类通过调用委托类对象的方法，来提供特定的服务。委托类需要实现一个业务接口，代理类返回委托类的实例接口对象。按照代理类的创建时期，可以分为：静态代理和动态代理。所谓静态代理：　指程序员创建好代理类，编译时直接生成代理类的字节码文件。所谓动态代理：　在程序运行时，通过反射机制动态生成代理类。一、静态代理类实例： 1、Serivce.ja
Struts1与Struts2的12点区别 asia007 Struts1与Struts2
1) 在Action实现类方面的对比：Struts 1要求Action类继承一个抽象基类；Struts 1的一个具体问题是使用抽象类编程而不是接口。Struts 2 Action类可以实现一个Action接口，也可以实现其他接口，使可选和定制的服务成为可能。Struts 2提供一个ActionSupport基类去实现常用的接口。即使Action接口不是必须实现的，只有一个包含execute方法的P
初学者要多看看帮助文档不要用js来写Jquery的代码百合不是茶 jquery js
解析json数据的时候需要将解析的数据写到文本框中, 出现了用js来写Jquery代码的问题; 1, JQuery的赋值有问题代码如下: data.username 表示的是: 网易 $("#use
经理怎么和员工搞好关系和信任 bijian1013 团队项目管理管理
产品经理应该有坚实的专业基础，这里的基础包括产品方向和产品策略的把握，包括设计，也包括对技术的理解和见识，对运营和市场的敏感，以及良好的沟通和协作能力。换言之，既然是产品经理，整个产品的方方面面都应该能摸得出门道。这也不懂那也不懂，如何让人信服？如何让自己懂？就是不断学习，不仅仅从书本中，更从平时和各种角色的沟通
如何为rich:tree不同类型节点设置右键菜单 sunjing contextMenu tree Richfaces
组合使用target和targetSelector就可以啦，如下： <rich:tree id="ruleTree" value="#{treeAction.ruleTree}" var="node" nodeType="#{node.type}" selectionChangeListener=&qu
【Redis二】Redis2.8.17搭建主从复制环境 bit1129 redis
开始使用Redis2.8.17 Redis第一篇在Redis2.4.5上搭建主从复制环境，对它的主从复制的工作机制，真正的惊呆了。不知道Redis2.8.17的主从复制机制是怎样的，Redis到了2.4.5这个版本，主从复制还做成那样，Impossible is nothing! 本篇把主从复制环境再搭一遍看看效果，这次在Unbuntu上用官方支持的版本。 Ubuntu上安装Red
JSONObject转换JSON--将Date转换为指定格式白糖_ JSONObject
项目中，经常会用JSONObject插件将JavaBean或List<JavaBean>转换为JSON格式的字符串，而JavaBean的属性有时候会有java.util.Date这个类型的时间对象，这时JSONObject默认会将Date属性转换成这样的格式： {"nanos":0,"time":-27076233600000,
JavaScript语言精粹读书笔记 braveCS JavaScript
【经典用法】： //①定义新方法 Function .prototype.method=function(name, func){ this.prototype[name]=func; return this; } //②给Object增加一个create方法，这个方法创建一个使用原对
编程之美-找符合条件的整数用字符串来表示大整数避免溢出 bylijinnan 编程之美
import java.util.LinkedList; public class FindInteger { /** * 编程之美找符合条件的整数用字符串来表示大整数避免溢出 * 题目：任意给定一个正整数N，求一个最小的正整数M(M>1)，使得N*M的十进制表示形式里只含有1和0 * * 假设当前正在搜索由0，1组成的K位十进制数
读书笔记 chengxuyuancsdn 读书笔记
1、Struts访问资源 2、把静态参数传递给一个动作 3、<result>type属性 4、s:iterator、s:if c:forEach 5、StringBuilder和StringBuffer 6、spring配置拦截器 1、访问资源 (1)通过ServletActionContext对象和实现ServletContextAware,ServletReque
[通讯与电力]光网城市建设的一些问题 comsci 问题
信号防护的问题,前面已经说过了,这里要说光网交换机与市电保障的关系我们过去用的ADSL线路,因为是电话线,在小区和街道电力中断的情况下,只要在家里用笔记本电脑+蓄电池,连接ADSL,同样可以上网........
oracle 空间RESUMABLE daizj oracle 空间不足 RESUMABLE 错误挂起
空间RESUMABLE操作转 Oracle从9i开始引入这个功能，当出现空间不足等相关的错误时，Oracle可以不是马上返回错误信息，并回滚当前的操作，而是将操作挂起，直到挂起时间超过RESUMABLE TIMEOUT，或者空间不足的错误被解决。这一篇简单介绍空间RESUMABLE的例子。第一次碰到这个特性是在一次安装9i数据库的过程中，在利用D
重构第一次写的线程池 dieslrae 线程池 python
最近没有什么学习欲望,修改之前的线程池的计划一直搁置,这几天比较闲,还是做了一次重构,由之前的2个类拆分为现在的4个类. 1、首先是工作线程类:TaskThread,此类为一个工作线程,用于完成一个工作任务,提供等待(wait),继续(proceed),绑定任务(bindTask)等方法 #!/usr/bin/env python # -*- coding:utf8 -*-
C语言学习六指针 dcj3sjt126com c
初识指针，简单示例程序： /* 指针就是地址，地址就是指针地址就是内存单元的编号指针变量是存放地址的变量指针和指针变量是两个不同的概念但是要注意：通常我们叙述时会把指针变量简称为指针，实际它们含义并不一样 */ # include <stdio.h> int main(void) { int * p; // p是变量的名字， int *
yii2 beforeSave afterSave beforeDelete dcj3sjt126com delete
public function afterSave($insert, $changedAttributes) { parent::afterSave($insert, $changedAttributes); if($insert) { //这里是新增数据 } else { //这里是更新数据 } }
timertask shuizhaosi888 timertask
java.util.Timer timer = new java.util.Timer(true); // true 说明这个timer以daemon方式运行（优先级低， // 程序结束timer也自动结束），注意，javax.swing // 包中也有一个Timer类，如果import中用到swing包， // 要注意名字的冲突。 TimerTask task = new
Spring Security（13）——session管理 234390216 session Spring Security 攻击保护超时
session管理目录 1.1 检测session超时 1.2 concurrency-control 1.3 session 固定攻击保护
公司项目NODEJS实践0.3[ mongo / session ...] 逐行分析JS源代码 mongodb session nodejs
http://www.upopen.cn 一、前言书接上回，我们搭建了WEB服务端路由、模板等功能，完成了register 通过ajax与后端的通信，今天主要完成数据与mongodb的存取，实现注册 / 登录 /
pojo.vo.po.domain区别 LiaoJuncai java VO POJO javabean domain
　　POJO = "Plain Old Java Object"，是MartinFowler等发明的一个术语，用来表示普通的Java对象，不是JavaBean, EntityBean 或者 SessionBean。POJO不但当任何特殊的角色，也不实现任何特殊的Java框架的接口如，EJB， JDBC等等。　　　　即POJO是一个简单的普通的Java对象，它包含业务逻辑
Windows Error Code OhMyCC windows
0 操作成功完成. 1 功能错误. 2 系统找不到指定的文件. 3 系统找不到指定的路径. 4 系统无法打开文件. 5 拒绝访问. 6 句柄无效. 7 存储控制块被损坏. 8 存储空间不足, 无法处理此命令. 9 存储控制块地址无效. 10 环境错误. 11 试图加载格式错误的程序. 12 访问码无效. 13 数据无效. 14 存储器不足, 无法完成此操作. 15 系
在storm集群环境下发布Topology roadrunners 集群 storm topology spout bolt
storm的topology设计和开发就略过了。本章主要来说说如何在storm的集群环境中，通过storm的管理命令来发布和管理集群中的topology。 1、打包打包插件是使用maven提供的maven-shade-plugin，详细见maven-shade-plugin。 <plugin> <groupId>org.apache.maven.
为什么不允许代码里出现“魔数” tomcat_oracle java
　　在一个新项目中，我最先做的事情之一，就是建立使用诸如Checkstyle和Findbugs之类工具的准则。目的是制定一些代码规范，以及避免通过静态代码分析就能够检测到的bug。　　迟早会有人给出案例说这样太离谱了。其中的一个案例是Checkstyle的魔数检查。它会对任何没有定义常量就使用的数字字面量给出警告，除了-1、0、1和2。　　很多开发者在这个检查方面都有问题，这可以从结果
zoj 3511 Cake Robbery(线段树) 阿尔萨斯线段树
题目链接：zoj 3511 Cake Robbery 题目大意：就是有一个N边形的蛋糕，切M刀，从中挑选一块边数最多的，保证没有两条边重叠。解题思路：有多少个顶点即为有多少条边，所以直接按照切刀切掉点的个数排序，然后用线段树维护剩下的还有哪些点。 #include <cstdio> #include <cstring> #include <vector&