2.字段预处理
awk: line 4: runaway string constant " …
sh文件有误
#!/bin/bash
#下面设置输入文件,把用户执行pre_deal.sh命令时提供的第一个参数作为输入文件名称
infile=$1
#下面设置输出文件,把用户执行pre_deal.sh命令时提供的第二个参数作为输出文件名称
outfile=$2
#注意,最后的$infile> $outfile必须跟在}’这两个字符的后面
awk -F "," 'BEGIN{
srand();
id=0;
Province[0]="山东";Province[1]="山西";Province[2]="河南";Province[3]="河北";Province[4]="陕西";Province[5]="内蒙古";Province[6]="上海市";
Province[7]="北京市";Province[8]="重庆市";Province[9]="天津市";Province[10]="福建";Province[11]="广东";Province[12]="广西";Province[13]="云南";
Province[14]="浙江";Province[15]="贵州";Province[16]="新疆";Province[17]="西藏";Province[18]="江西";Province[19]="湖南";Province[20]="湖北";
Province[21]="黑龙江";Province[22]="吉林";Province[23]="辽宁"; Province[24]="江苏";Province[25]="甘肃";Province[26]="青海";Province[27]="四川";
Province[28]="安徽"; Province[29]="宁夏";Province[30]="海南";Province[31]="香港";Province[32]="澳门";Province[33]="台湾";
}
{
id=id+1;
value=int(rand()*34);
print id"\t"$1"\t"$2"\t"$3"\t"$5"\t"substr($6,1,10)"\t"Province[value]
}' $infile> $outfile
./bin/hdfs dfs -mkdir -p /bigdatacase/dateset
./bin/hdfs dfs -put /usr/local/bigdatacase/dateset/user_table.txt /bigdatacase/dateset
./bin/hdfs dfs -cat /bigdatacase/dateset/user_table.txt | head -10
创建数据库
service mysql start
cd /usr/local/hive
./bin/hiv
CREATE EXTERNAL TABLE dblab.bigdata_user(id INT,uid STRING,item_id STRING,behavior_type INT,item_category STRING,visit_date DATE,province STRING) COMMENT 'Welcome to xmudblab!' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE LOCATION '/bigdatacase/dateset';
第一次尝试,只有ok没输出数据
原因:create表时没有真正导入数据,
location的位置错误。
设置别名,简化操作
select count(*) from (select uid,item_id,behavior_type,item_category,visit_date,province from bigdata_user group by uid,item_id,behavior_type,item_category,visit_date,province having count(*)=1)a;
select count(*) from bigdata_user where visit_date='2014-12-11'and behavior_type='4';#查询有多少用户在2014-12-11购买了商品
select count(*) from bigdata_user where visit_date ='2014-12-11';#查询有多少用户在2014-12-11点击了该店
select count(*) from bigdata_user where uid=10001082 and visit_date='2014-12-12';#查询用户10001082在2014-12-12点击网站的次数
select count(*) from bigdata_user where visit_date='2014-12-12';#查询所有用户在这一天点击该网站的次数
select uid from bigdata_user where behavior_type='4' and visit_date='2014-12-12' group by uid having count(behavior_type='4')>5;#查询某一天在该网站购买商品超过5次的用户id
create table scan(province STRING,scan INT) COMMENT 'This is the search of bigdataday' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;#创建新的数据表进行存储
insert overwrite table scan select province,count(behavior_type) from bigdata_user where behavior_type='1' group by province;#导入数据
select * from scan;#显示结果
create table dblab.user_action(id STRING,uid STRING, item_id STRING, behavior_type STRING, item_category STRING, visit_date DATE, province STRING) COMMENT 'Welcome to XMU dblab! ' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
INSERT OVERWRITE TABLE dblab.user_action select * from dblab.bigdata_user;
mysql> CREATE TABLE `dblab`.`user_action` (`id` varchar(50),`uid` varchar(50),`item_id` varchar(50),`behavior_type` varchar(10),`item_category` varchar(50), `visit_date` DATE,`province` varchar(20)) ENGINE=InnoDB DEFAULT CHARSET=utf8;
启动hiveserver2,设置端口10000
出现session id后等很久…
4. 编写java程序
修改mysql密码
service mysql stop
service mysql start
/usr/sbin/mysqld --skip-grant-tables
sudo mysql -uroot -p
mysql>
use mysql;
update user set authentication_string=password("5g") where user="root";
flush privileges;
注意:
";英文符号!
password不行就换成authentication_string
完事,重启mysql
service mysql restart
然后运行程序
依然报错
Exception in thread “main” java.sql.SQLException: Access denied for user ‘root’@‘localhost’
新建项目
运行程序
R链接mysql
3.分析消费者行为
4.分析销量最大月份
被购买总量前十的商品和被购买总量
购买商品的量最多的月份
分析国内最有购买欲望消费者所在省