ODS贴源层
1 ODS层存放您从业务系统获取的最原始的数据,是其他上层数据的源数据。
2 本项目中使用的ODS层数据主要包括:
用户基本信息、商品分类信息、商品信息、店铺信息、
订单数据、订单支付信息、活动信息、行为日志信息
业务数据库表介绍
中文含义 | MYSQL表名 | HIVE表名 |
---|---|---|
用户基本信息表 | nshop.customer | ods_nshop.ods_nshop_02_customer |
收货地址管理表 | nshop.customer_consignee | ods_nshop.ods_nshop_02_customer_consignee |
订单表 | nshop.orders | ods_nshop.ods_nshop_02_orders |
订单详情表 | nshop.order_detail | ods_nshop.ods_nshop_02_order_detail |
订单支付记录表 | nshop.orders_pay_records | ods_nshop.ods_nshop_02_orders_pay_records |
用户相关
- 用户基本信息表
create table if not exists nshop.customer (
customer_id varchar(20) NOT NULL COMMENT '用户ID',
customer_login varchar(20) NOT NULL COMMENT '用户登录名',
customer_nickname varchar(10) NOT NULL COMMENT '用户名(昵称)',
customer_name varchar(10) NOT NULL COMMENT '用户真实姓名',
customer_pass varchar(8) NOT NULL COMMENT '用户密码',
customer_mobile varchar(20) NOT NULL COMMENT '用户手机',
customer_idcard varchar(20) NOT NULL COMMENT '身份证',
customer_gender TINYINT NOT NULL COMMENT '性别:1男 0女',
customer_birthday varchar(10) NOT NULL COMMENT '出生年月',
customer_age TINYINT NOT NULL COMMENT '年龄',
customer_age_range varchar(2) NOT NULL COMMENT '年龄段',
customer_email varchar(50) NULL COMMENT '用户邮箱',
customer_natives varchar(10) NULL COMMENT '所在地区',
customer_ctime bigint NULL COMMENT '创建时间',
customer_utime bigint NULL COMMENT '修改时间',
customer_device_num varchar(20) NOT NULL COMMENT '用户手机设备号',
PRIMARY KEY (`customer_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 收货地址管理表
create table if not exists nshop.customer_consignee (
consignee_id varchar(20) NOT NULL COMMENT '收货地址ID',
customer_id varchar(20) NOT NULL COMMENT '用户ID',
consignee_name varchar(10) NOT NULL COMMENT '收货人',
consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话',
consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地区',
consignee_addr varchar(200) NOT NULL COMMENT '收货人详细地址',
consignee_tag varchar(10) NOT NULL COMMENT '标签:1家 2公司 3学校',
ctime bigint NULL COMMENT '创建时间',
PRIMARY KEY (`consignee_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `customer_attention` (
`customer_id` varchar(20) NOT NULL,
`attention_id` varchar(20) NOT NULL,
`attention_type` tinyint(4) NOT NULL,
`attention_status` int(11) NOT NULL DEFAULT '1',
`attention_ctime` bigint(20) NOT NULL,
KEY `customer_id` (`customer_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
商品相关
- 商品分类信息
create table if not exists nshop.category(
category_code VARCHAR(10) NOT NULL COMMENT '分类编码',
category_name VARCHAR(10) NOT NULL COMMENT '分类名称',
category_parent_id VARCHAR(10) NULL COMMENT '父分类ID',
category_status TINYINT NOT NULL DEFAULT 1 COMMENT '分类状态:0禁止,1启用',
category_utime bigint NULL COMMENT '最后修改时间',
PRIMARY KEY (`category_code`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 供应商信息表
create table if not exists nshop.supplier(
supplier_code varchar(10) NOT NULL COMMENT '供应商编码',
supplier_name varchar(30) NOT NULL COMMENT '供应商名称',
supplier_type TINYINT NOT NULL COMMENT '供应商类型:1.自营,2.官方 3其他',
supplier_status TINYINT NOT NULL DEFAULT 1 COMMENT '状态:0禁止,1启用',
supplier_utime bigint NULL COMMENT '最后修改时间',
PRIMARY KEY (`supplier_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 商品信息表
create table if not exists nshop.product(
product_code VARCHAR(20) NOT NULL COMMENT '商品ID(分类编码+供应商编码+编号)',
product_name VARCHAR(20) NOT NULL COMMENT '商品名称',
product_remark TEXT NOT NULL COMMENT '商品描述',
category_code VARCHAR(10) NOT NULL COMMENT '分类ID',
supplier_code varchar(10) NOT NULL COMMENT '商品的供应商编码',
product_price DECIMAL(5,1) NOT NULL COMMENT '商品销售价格',
product_weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格',
product_publish_status TINYINT NOT NULL DEFAULT 0 COMMENT '上下架状态:0下架 1上架',
product_audit_status TINYINT NOT NULL DEFAULT 0 COMMENT '审核状态:0未审核,1已审核',
product_bar_code VARCHAR(50) NOT NULL COMMENT '国条码',
product_weight FLOAT NULL COMMENT '商品重量',
product_length FLOAT NULL COMMENT '商品长度',
product_height FLOAT NULL COMMENT '商品高度',
product_width FLOAT NULL COMMENT '商品宽度',
product_colors SMALLINT UNSIGNED NOT NULL COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色',
product_date varchar(10) NOT NULL COMMENT '生产日期',
product_shelf_life INT NOT NULL COMMENT '商品有效期',
product_ctime bigint NULL COMMENT '商品录入时间',
product_utime bigint NULL COMMENT '最后修改时间',
PRIMARY KEY (`product_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
订单相关
- 订单表
create table if not exists nshop.orders (
order_id VARCHAR(50) NOT NULL COMMENT '订单ID(时间+商品ID+4位随机)',
customer_id varchar(20) NOT NULL COMMENT '下单用户ID',
order_status TINYINT NOT NULL COMMENT '订单状态:1已提交; 2待支付 3出货中 4已发货 5已收货(完成) 6投诉 7退货',
customer_ip varchar(20) NULL COMMENT '下单用户IP',
user_longitude varchar(20) NULL COMMENT '用户地理:经度',
user_latitude varchar(20) NULL COMMENT '用户地理:纬度',
user_areacode varchar(10) NULL COMMENT '用户所在地区',
consignee_name varchar(10) NOT NULL COMMENT '收货人',
consignee_mobile varchar(15) NOT NULL COMMENT '收货人电话',
consignee_zipcode varchar(10) NOT NULL COMMENT '收货人地址',
pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号',
pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付',
district_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额',
shipping_money DECIMAL(8,1) NOT NULL DEFAULT 0.0 COMMENT '运费金额',
payment_money DECIMAL(10,1) NOT NULL DEFAULT 0.0 COMMENT '支付金额',
order_ctime bigint NULL COMMENT '创建时间',
shipping_time bigint NULL COMMENT '发货时间',
receive_time bigint NULL COMMENT '收货时间',
PRIMARY KEY (`order_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 订单详情表
create table if not exists nshop.order_detail(
order_detail_id varchar(20) NOT NULL COMMENT '订单详情表ID',
order_id VARCHAR(50) NOT NULL COMMENT '订单表ID',
product_id varchar(20) NOT NULL COMMENT '订单商品ID',
product_name VARCHAR(50) NOT NULL COMMENT '商品名称',
product_remark VARCHAR(100) NOT NULL COMMENT '商品描述',
product_cnt INT NOT NULL DEFAULT 1 COMMENT '购买商品数量',
product_price DECIMAL(5,1) NOT NULL COMMENT '购买商品单价',
weighing_cost DECIMAL(2,1) NOT NULL COMMENT '商品加权价格',
district_money DECIMAL(4,1) NOT NULL DEFAULT 0.0 COMMENT '优惠金额',
is_activity NOT NULL DEFAULT 0 COMMENT '1:参加活动|0:没有参加活动',
order_detail_ctime bigint NULL COMMENT '下单时间',
PRIMARY KEY (`order_detail_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
- 订单支付记录
create table if not exists nshop.orders_pay_records (
pay_id varchar(20) NOT NULL COMMENT '支付记录ID',
order_id varchar(30) NOT NULL COMMENT '订单ID',
customer_id varchar(20) NOT NULL COMMENT '用户ID',
pay_status varchar(5) NOT NULL COMMENT '支付状态:0 支付失败| 1 支付成功',
pay_type varchar(5) NULL COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
pay_code varchar(30) NULL COMMENT '支付对应唯一标识,如微信号、支付宝号',
pay_nettype varchar(1) NOT NULL COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付',
pay_amount double(10,1) NOT NULL COMMENT '支付金额',
pay_ctime bigint NULL COMMENT '创建时间',
PRIMARY KEY (`pay_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
时间维度
create table if not exists nshop.date_dim (
date_day varchar(30) NOT NULL COMMENT '日期:yyyyMMdd 如20190520',
date_day_desc varchar(30) NOT NULL COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日',
date_day_month varchar(30) NOT NULL COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天',
date_day_year varchar(30) NOT NULL COMMENT '日期:139 本年第几天 如2019年5月20日为139天',
date_day_en varchar(30) NOT NULL COMMENT '日期:monday 星期几',
date_week varchar(30) NOT NULL COMMENT '周:本月第几周 4 如 20190520为本月第4周',
date_week_desc varchar(30) NOT NULL COMMENT '周:本月第几周 如 20190504',
date_month varchar(30) NOT NULL COMMENT '月: 5 如 20190520为本年5月',
date_month_en varchar(30) NOT NULL COMMENT '月: may ',
date_month_desc varchar(30) NOT NULL COMMENT '月:如 201905',
date_quarter varchar(30) NOT NULL COMMENT '季度:2',
date_quarter_en varchar(30) NOT NULL COMMENT '季度:Q2',
date_quarter_desc varchar(30) NOT NULL COMMENT '季度:2019Q2 如 20190520为2019Q2',
date_year varchar(30) NOT NULL COMMENT '年:2019',
PRIMARY KEY (`date_day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
地区列表
create table if not exists nshop.area_dim (
region_code varchar(30) NOT NULL COMMENT '地区编码 如110105 | 130406 ',
region_code_desc varchar(30) NOT NULL COMMENT '地区编码 如朝阳区 | 峰峰矿区',
region_city varchar(30) NOT NULL COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸',
region_city_desc varchar(30) NOT NULL COMMENT '地区编码 如1101 | 1304 邯郸市',
region_province varchar(30) NOT NULL COMMENT '地区编码 如11 北京市 | 13 河北省',
region_province_desc varchar(30) NOT NULL COMMENT '地区编码 如 北京市 | 河北',
PRIMARY KEY (`region_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
通用字典表
create table if not exists nshop.comm_dim(
dim_type VARCHAR(10) not NULL COMMENT '字典类型',
dim_code VARCHAR(30) not NULL COMMENT '字典编码',
dim_remark varchar(30) NULL COMMENT '字段描述',
dim_ext1 varchar(30) NULL COMMENT '扩展1',
dim_ext2 varchar(30) NULL COMMENT '扩展2',
dim_ext3 varchar(30) NULL COMMENT '扩展3',
dim_ext4 varchar(30) NULL COMMENT '扩展4',
ct timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
PRIMARY KEY (`dim_type`,`dim_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
页面布局表
create table if not exists nshop.page_dim (
page_code varchar(30) NOT NULL COMMENT '页面编码',
page_remark varchar(30) NULL COMMENT '页面描述',
page_type varchar(5) NOT NULL COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)',
page_target varchar(30) NULL COMMENT '页面对应实体编号(如产品、店铺)',
page_ctime bigint NOT NULL COMMENT '创建时间',
PRIMARY KEY (`page_code`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
同步业务数据到ODS
数仓ods层和业务数据表的对应关系
中文含义 | MYSQL表名 | HIVE表名 |
---|---|---|
用户基本信息表 | nshop.customer | ods_nshop.ods_02_customer |
用户关注表 | nshop_customer_attention | ods_nshop.ods_02_customer_attention |
用户收货地址管理表 | nshop.customer_consignee | ods_nshop.ods_02_customer_consignee |
订单表 | nshop.orders | ods_nshop.ods_02_orders |
订单详情表 | nshop.order_detail | ods_nshop.ods_02_order_detail |
订单支付记录表 | nshop.orders_pay_records | ods_nshop.ods_02_orders_pay_records |
商品分类表 | nshop.category | ods_nshop.dim_pub_category |
商品信息表 | nshop.product | ods_nshop.dim_pub_product |
供应商信息表 | nshop.supplier | ods_nshop.dim_pub_supplier |
页面布局表 | nshop.page_dim | ods_nshop.dim_pub_page |
地域字典表 | nshop.area_dim | ods_nshop.dim_pub_area |
通用字典表 | nshop.comm_dim | ods_nshop.dim_pub_comm |
时间字典表 | nshop.date_dim | ods_nshop.dim_pub_date |
数仓ods层库表建设
创建数据库
create database if not exists ods_nshop;
use ods_nshop;
用户基本信息表
create external table if not exists ods_nshop.ods_02_customer (
customer_id string COMMENT '用户ID',
customer_login string COMMENT '用户登录名',
customer_nickname string COMMENT '用户名(昵称)',
customer_name string COMMENT '用户真实姓名',
customer_pass string COMMENT '用户密码',
customer_mobile string COMMENT '用户手机',
customer_idcard string COMMENT '身份证',
customer_gender TINYINT COMMENT '性别:1男 0女',
customer_birthday string COMMENT '出生年月',
customer_age TINYINT COMMENT '年龄',
customer_age_range string COMMENT '年龄段',
customer_email string COMMENT '用户邮箱',
customer_natives string COMMENT '所在地区',
customer_ctime bigint COMMENT '创建时间',
customer_utime bigint COMMENT '修改时间',
customer_device_num string COMMENT '用户手机设备号'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/'
用户关注表
create external table if not exists ods_nshop.ods_02_customer_attention (
customer_id string COMMENT '用户ID',
attention_id string COMMENT '关注对象ID',
attention_type TINYINT COMMENT '关注类型:3店铺 4商品',
attention_status bigint COMMENT '关注状态:1关注 0取消',
attention_ctime bigint COMMENT '生成时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_attention/'
收货地址管理表
create external table if not exists ods_nshop.ods_02_customer_consignee (
consignee_id string COMMENT '收货地址ID',
customer_id string COMMENT '用户ID',
consignee_name string COMMENT '收货人',
consignee_mobile string COMMENT '收货人电话',
consignee_zipcode string COMMENT '收货人地区',
consignee_addr string COMMENT '收货人详细地址',
consignee_tag string COMMENT '标签:1家 2公司 3学校',
ctime bigint COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee/'
商品分类表
create external table if not exists ods_nshop.dim_pub_category (
category_code string COMMENT '分类编码',
category_name string COMMENT '分类名称',
category_parent_id string COMMENT '父分类ID',
category_status TINYINT COMMENT '分类状态:0禁止,1启用',
category_utime bigint COMMENT '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_category/'
供应商信息表
create external table if not exists ods_nshop.dim_pub_supplier(
supplier_code string COMMENT '供应商编码',
supplier_name string COMMENT '供应商名称',
supplier_type TINYINT COMMENT '供应商类型:1.自营,2.官方 3其他',
supplier_status TINYINT COMMENT '状态:0禁止,1启用',
supplier_utime bigint COMMENT '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_supplier/'
订单表
create external table if not exists ods_nshop.ods_02_orders (
order_id string COMMENT '订单ID(时间+商品ID+4位随机)',
customer_id string COMMENT '下单用户ID',
order_status TINYINT COMMENT '订单状态',
customer_ip string COMMENT '下单用户IP',
user_longitude string COMMENT ':经度',
user_latitude string COMMENT '用户地理:纬度',
user_areacode string COMMENT '用户所在地区',
consignee_name string COMMENT '收货人',
consignee_mobile string COMMENT '收货人电话',
consignee_zipcode string COMMENT '收货人地址',
pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号',
pay_nettype string COMMENT '支付网络方式:0 wifi | 1 4g | 2 3g |3 线下支付',
district_money DECIMAL(8,1) COMMENT '优惠金额',
shipping_money DECIMAL(8,1) COMMENT '运费金额',
payment_money DECIMAL(10,1) COMMENT '支付金额',
order_ctime bigint COMMENT '创建时间',
shipping_time bigint COMMENT '发货时间',
receive_time bigint COMMENT '收货时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_orders/'
订单详情表
create external table if not exists ods_nshop.ods_02_order_detail(
order_detail_id string COMMENT '订单详情表ID',
order_id string COMMENT '订单表ID',
product_id string COMMENT '订单商品ID',
product_name string COMMENT '商品名称',
product_remark string COMMENT '商品描述',
product_cnt INT COMMENT '购买商品数量',
product_price DECIMAL(5,1) COMMENT '购买商品单价',
weighing_cost DECIMAL(2,1) COMMENT '商品加权价格',
district_money DECIMAL(4,1) COMMENT '优惠金额',
is_activity bigint COMMENT '1:参加活动|0:没有参加活动',
order_detail_ctime bigint COMMENT '下单时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_order_detail/'
页面布局
create external table if not exists ods_nshop.dim_pub_page (
page_code string COMMENT '页面编码',
page_remark string COMMENT '页面描述',
page_type string COMMENT '页面类型(1:导航页 2:分类页 3:店铺页 4:产品页)',
page_target string COMMENT '页面对应实体编号(如产品、店铺)',
page_ctime bigint COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_page/'
地域表
create external table if not exists ods_nshop.dim_pub_area (
region_code string COMMENT '地区编码 如110105 | 130406 ',
region_code_desc string COMMENT '地区编码 如朝阳区 | 峰峰矿区',
region_city string COMMENT '地区编码 如1101 北京市朝阳区 | 1304 邯郸',
region_city_desc string COMMENT '地区编码 如1101 | 1304 邯郸市',
region_province string COMMENT '地区编码 如11 北京市 | 13 河北省',
region_province_desc string COMMENT '地区编码 如 北京市 | 河北'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_area/'
订单支付记录表
create external table if not exists ods_nshop.ods_02_orders_pay_records (
pay_id string COMMENT '支付记录ID',
order_id string COMMENT '订单ID',
customer_id string COMMENT '用户ID',
pay_status string COMMENT '支付状态:0 支付失败| 1 支付成功',
pay_type string COMMENT '支付类型:线上支付 10 网上银行 11 微信 12 支付宝 | 线下支付(货到付款) 20 ',
pay_code string COMMENT '支付对应唯一标识,如微信号、支付宝号',
pay_nettype string COMMENT '支付网络方式:1 wifi | 2 4g | 3 3g |4 线下支付',
pay_amount double COMMENT '支付金额',
pay_ctime bigint COMMENT '创建时间'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records/'
商品信息表
create external table if not exists ods_nshop.dim_pub_product(
product_code string COMMENT '商品ID(分类编码+供应商编码+编号)',
product_name string COMMENT '商品名称',
product_remark string COMMENT '商品描述',
category_code string COMMENT '分类ID',
supplier_code string COMMENT '商品的供应商编码',
product_price DECIMAL COMMENT '商品销售价格',
product_weighing_cost DECIMAL(2,1) COMMENT '商品加权价格',
product_publish_status TINYINT COMMENT '上下架状态:0下架 1上架',
product_audit_status TINYINT COMMENT '审核状态:0未审核,1已审核',
product_bar_code string COMMENT '国条码',
product_weight FLOAT COMMENT '商品重量',
product_length FLOAT COMMENT '商品长度',
product_height FLOAT COMMENT '商品高度',
product_width FLOAT COMMENT '商品宽度',
product_colors SMALLINT COMMENT '0:白|1:赤|2:红|3:黄|4:绿|5:青|6:蓝|7:紫|8:黑|9:彩色',
product_date string COMMENT '生产日期',
product_shelf_life bigint COMMENT '商品有效期',
product_ctime bigint COMMENT '商品录入时间',
product_utime bigint COMMENT '最后修改时间'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_product/'
日期维度表
create external table if not exists ods_nshop.dim_pub_date (
date_day string COMMENT '日期:yyyyMMdd 如20190520',
date_day_desc string COMMENT '日期格式:yyyy年MM月dd日 如2019年05月20日',
date_day_month string COMMENT '日期:20 本月第几天 如2019年5月20日为5月第20天',
date_day_year string COMMENT '日期:139 本年第几天 如2019年5月20日为139天',
date_day_en string COMMENT '日期:monday 星期几',
date_week string COMMENT '周:本月第几周 4 如 20190520为本月第4周',
date_week_desc string COMMENT '周:本月第几周 如 20190504',
date_month string COMMENT '月: 5 如 20190520为本年5月',
date_month_en string COMMENT '月: may ',
date_month_desc string COMMENT '月:如 201905',
date_quarter string COMMENT '季度:2',
date_quarter_en string COMMENT '季度:Q2',
date_quarter_desc string COMMENT '季度:2019Q2 如 20190520为2019Q2',
date_year string COMMENT '年:2019'
)
location '/shujia/bigdata17/data/nshop/ods/dim_pub_date/'
Sqoop导入脚本
create external table if not exists ods_nshop.ods_02_customer (
customer_id string COMMENT '用户ID',
customer_login string COMMENT '用户登录名',
customer_nickname string COMMENT '用户名(昵称)',
customer_name string COMMENT '用户真实姓名',
customer_pass string COMMENT '用户密码',
customer_mobile string COMMENT '用户手机',
customer_idcard string COMMENT '身份证',
customer_gender TINYINT COMMENT '性别:1男 0女',
customer_birthday string COMMENT '出生年月',
customer_age TINYINT COMMENT '年龄',
customer_age_range string COMMENT '年龄段',
customer_email string COMMENT '用户邮箱',
customer_natives string COMMENT '所在地区',
customer_ctime bigint COMMENT '创建时间',
customer_utime bigint COMMENT '修改时间',
customer_device_num string COMMENT '用户手机设备号'
)
location '/shujia/bigdata17/data/nshop/ods/ods_02_customer/'
--这两个同步通过命令行执行,其他的通过脚本执行
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer/ \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer \
--m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,customer_login,customer_nickname,customer_name,customer_pass,customer_mobile,customer_idcard,customer_gender,customer_birthday,customer_age,customer_age_range,customer_email,customer_natives,customer_ctime,customer_utime,customer_device_num from customer where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer \
--delete-target-dir \
-m 1
sqoop脚本文件如下:
#!/bin/bash
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select customer_id,attention_id,attention_type,attention_status,attention_ctime from customer_attention where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_attention \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer_attention \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--query "select consignee_id,customer_id,consignee_name,consignee_mobile,consignee_zipcode,consignee_addr,consignee_tag,ctime from customer_consignee where \$CONDITIONS" \
--username root \
--password 123456 \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_customer_consignee \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_customer_consignee \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select category_code,category_name,category_parent_id,category_status,category_utime from category where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_category \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_category \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select supplier_code,supplier_name,supplier_type,supplier_status,supplier_utime from supplier where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_supplier \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_supplier \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select order_id,customer_id,order_status,customer_ip,customer_longitude,customer_latitude,customer_areacode,consignee_name,consignee_mobile,consignee_zipcode,pay_type,pay_code,pay_nettype,district_money,shipping_money,payment_money,order_ctime,shipping_time,receive_time from orders where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_orders \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select order_detail_id,order_id,product_id,product_name,product_remark,product_cnt,product_price,weighing_cost,district_money,is_activity,order_detail_ctime from order_detail where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_order_detail \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_order_detail \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select page_code,page_remark,page_type,page_target,page_ctime from page_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_page \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_page \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select region_code,region_code_desc,region_city,region_city_desc,region_province,region_province_desc from area_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_area \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_area \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select date_day,date_day_desc,date_day_month,date_day_year,date_day_en,date_week,date_week_desc,date_month,date_month_en,date_month_desc,date_quarter,date_quarter_en,date_quarter_desc,date_year from date_dim where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_date \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_date \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select pay_id,order_id,customer_id,pay_status,pay_type,pay_code,pay_nettype,pay_amount,pay_ctime from orders_pay_records where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/ods_02_orders_pay_records \
--hive-import \
--hive-database ods_nshop \
--hive-table ods_02_orders_pay_records \
--delete-target-dir \
-m 1
sqoop import \
--connect jdbc:mysql://master:3306/nshop \
--username root \
--password 123456 \
--query "select product_code,product_name,product_remark,category_code,supplier_code,product_price,product_weighing_cost,product_publish_status,product_audit_status,product_bar_code,product_weight,product_length,product_height,product_width,product_colors,product_date,product_shelf_life,product_ctime,product_utime from product where \$CONDITIONS" \
--target-dir /shujia/bigdata17/data/nshop/ods/dim_pub_product \
--hive-import \
--hive-database ods_nshop \
--hive-table dim_pub_product \
--delete-target-dir \
-m 1
定时任务:
#设定定时任务
#crond定时方式的配置
vim /etc/crontab
#在最后一行加上定时任务的配置 每小时执行一次
* */1 * * * root /usr/local/shell/test.sh
/usr/local/soft/atao_file/xiangmu_1/sqoopshell.sh
#修改crontab的配置后需要重启crontab,使配置生效
/bin/systemctl restart crond.service
埋点日志
日志数据我们直接load就可以了,下面的外部数据再使用flume采集,为了演示不同的导数据方式,这里就直接load了
电商平台在网页版或APP版终端上进行业务埋点,采集相关用户行为数据发送给服务器进行用户行为分析。
日志文件如下(截取部分)
{"action":"05","event_type":"01","customer_id":"20101000324999676","device_num":"586344","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"2","area_code":"41092","longitude":"116.35636","latitude":"40.06919","extinfo":"{\"target_type\":\"4\",\"target_keys\":\"20402\",\"target_order\":\"31\",\"target_ids\":\"[\\\"4320402595801\\\",\\\"4320402133801\\\",\\\"4320402919201\\\",\\\"4320402238501\\\"]\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000405999595","device_num":"956236","device_type":"9","os":"2","os_version":"4.3","manufacturer":"08","carrier":"2","network_type":"3","area_code":"10015","longitude":"116.35985","latitude":"40.069590000000005","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320406544401\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000648999352","device_num":"876263","device_type":"2","os":"2","os_version":"6.2","manufacturer":"07","carrier":"2","network_type":"2","area_code":"42058","longitude":"116.34721","latitude":"40.06685","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701402201\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"07","event_type":"02","customer_id":"20101000729999271","device_num":"451481","device_type":"9","os":"2","os_version":"4.5","manufacturer":"09","carrier":"3","network_type":"2","area_code":"37162","longitude":"116.35737","latitude":"40.06962","extinfo":"{\"target_id\":\"4320207200701\"}","ct":1567896035000}
{"action":"06","event_type":"","customer_id":"20101000810999190","device_num":"114649","device_type":"9","os":"2","os_version":"8.2","manufacturer":"02","carrier":"3","network_type":"0","area_code":"35062","longitude":"116.38244999999999","latitude":"40.06447","extinfo":"{}","ct":1567896035000}
{"action":"07","event_type":"01","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_id\":\"4320701224301\"}","ct":1567896035000}
{"action":"05","event_type":"02","customer_id":"20101000972999028","device_num":"332835","device_type":"2","os":"2","os_version":"7.5","manufacturer":"02","carrier":"2","network_type":"0","area_code":"15082","longitude":"116.33457","latitude":"40.06449","extinfo":"{\"target_type\":\"3\",\"target_id\":\"4320701224301\",\"target_action\":\"01\"}","ct":1567896035000}
{"action":"08","event_type":"02","customer_id":"20101001377998623","device_num":"269526","device_type":"9","os":"2","os_version":"2.2","manufacturer":"05","carrier":"2","network_type":"3","area_code":"13053","longitude":"116.36325","latitude":"40.06456","extinfo":"{\"target_id\":\"4320901702901\"}","ct":1567896035000}
{"action":"08","event_type":"01","customer_id":"20101001458998542","device_num":"252612","device_type":"9","os":"2","os_version":"7.5","manufacturer":"03","carrier":"2","network_type":"2","area_code":"10013","longitude":"116.39949","latitude":"40.069250000000004","extinfo":"{\"target_id\":\"4320901657101\"}","ct":1567896035000}
中文含义 | HIVE表名 |
---|---|
用户行为日志表 | ods_nshop.ods_nshop_01_useractlog |
用户行为日志表
创建hadoop目录
hadoop fs -mkdir -p '/shujia/bigdata17/data/nshop/ods/user_action_log/'
create external table if not exists ods_nshop.ods_nshop_01_useractlog(
action string comment '行为类型:install安装|launch启动|interactive交互|page_enter_h5页面曝光|page_enter_native页面进入|exit退出',
event_type string comment '行为类型:click点击|view浏览|slide滑动|input输入',
customer_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
longitude string comment '经度',
latitude string comment '纬度',
extinfo string comment '扩展信息(json格式)',
duration string comment '停留时长',
ct bigint comment '创建时间'
) partitioned by (bdp_day string)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
location '/shujia/bigdata17/data/nshop/ods/user_action_log/'
注意
json格式数据表需要通过serde机制处理
1 在hive-site.xml中设置三方jar包
hive.aux.jars.path
/usr/local/hive-3.1.2/lib/
2 在hive.aux.jars.path设置的路径中增加hive-hcatalog-core-3.1.2.jar
[root@master hive-3.1.2]# cp $HIVE_HOME/hcatalog/share/hcatalog/hive-hcatalog-core-3.1.2.jar /usr/local/hive-3.1.2/lib/
同步数据到表
将0000文件上传服务器,然后使用hive命令映射到hive表
hive> load data local inpath "/usr/local/soft/atao_file/xiangmu_file/000000_0" into table ods_nshop.ods_nshop_01_useractlog partition(bdp_day="20220630");
外部数据
外部数据来源主要有:
1 企业间的接口调用(如其他企业平台上的广告投放营销)
2 公共数据的爬虫数据(公共数据、友商数据、社区|社交平台公开数据)
中文含义 | HIVE表名 |
---|---|
广告投放信息表 | ods_nshop.ods_nshop_01_releasedatas |
广告投放信息
create external table if not exists ods_nshop.ods_nshop_01_releasedatas(
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
area_code string comment '地区编码',
release_sid string comment '投放请求id',
release_session string comment '投放会话id',
release_sources string comment '投放渠道',
release_params string comment '投放请求参数',
ct string comment '创建时间'
) partitioned by (bdp_day string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
stored as textfile
location '/shujia/bigdata17/data/nshop/ods/release/'
采集数据
外部数据需要使用Flume进行数据采集操作,将采集到的数据映射到Hive中,首先创建Hive的表
外部数据如下(截取)
723748,2,2,6.1,3,540529,1572654785000fvkpnp,1572650000000000000000000,tencent,ip=3.61.2.222&deviceNum=723748&lon=null&lat=null&aid=null&ctime=1572654905000&sources=tencent&session=1572654785000723748416198&productPage=4320308033801,1573260000000
928223,2,2,2.8,7,441881,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000
928223,2,2,7.1,3,653123,1572654785000mvfnnn,1572650000000000000000000,liebao,ip=95.219.76.152&deviceNum=928223&lon=null&lat=null&aid=null&ctime=1572654919000&sources=liebao&session=1572654785000928223624678&productPage=4320102710201,1573260000000
784295,9,2,9.7,5,632726,1572654785000qvisko,1572650000000000000000000,baidu,ip=210.121.223.225&deviceNum=784295&lon=null&lat=null&aid=null&ctime=1572655033000&sources=baidu&session=1572654785000784295423824&productPage=4320206519001,1573260000000
496626,9,2,4.8,5,540522,1572654785000jdesth,1572650000000000000000000,douyin,ip=231.61.62.159&deviceNum=496626&lon=null&lat=null&aid=null&ctime=1572654947000&sources=douyin&session=1572654785000496626329353&productPage=4320303786701,1573260000000
235966,2,2,2.7,2,610602,1572654785000updxen,1572650000000000000000000,ximalaya,ip=246.85.131.184&deviceNum=235966&lon=null&lat=null&aid=null&ctime=1572654853000&sources=ximalaya&session=1572654785000235966421378&productPage=4320505806501,1573260000000
275157,9,2,2.4,5,632724,1572654785000pqwgch,1572650000000000000000000,liebao,ip=106.227.203.6&deviceNum=275157&lon=null&lat=null&aid=null&ctime=1572655014000&sources=liebao&session=1572654785000275157768731&productPage=4320603904901,1573260000000
322548,2,2,5.2,2,632822,1572654785000wtjppb,1572650000000000000000000,liebao,ip=105.247.218.235&deviceNum=322548&lon=null&lat=null&aid=null&ctime=1572654981000&sources=liebao&session=1572654785000322548126172&productPage=4320403173201,1573260000000
432626,9,2,1.4,5,540227,1572654785000dxifed,1572650000000000000000000,netyex,ip=216.169.184.123&deviceNum=432626&lon=null&lat=null&aid=null&ctime=1572654916000&sources=netyex&session=1572654785000432626892945&productPage=4320808279201,1573260000000
693898,9,2,2.2,9,620924,1572654785000tnjomq,1572650000000000000000000,ximalaya,ip=130.238.115.6&deviceNum=693898&lon=null&lat=null&aid=null&ctime=1572654861000&sources=ximalaya&session=1572654785000693898367422&productPage=4320503604301,1573260000000
164237,9,2,9.2,3,360423,1572654785000bpxghm,1572650000000000000000000,douyin,ip=119.26.191.164&deviceNum=164237&lon=null&lat=null&aid=null&ctime=1572655050000&sources=douyin&session=1572654785000164237721644&productPage=4320804560801,1573260000000
配置Flume采集文件
监控文件如下
# 命名
a1.sources = r1
a1.channels = c1
a1.sinks = s1
# 关联
a1.sources.r1.channels = c1
a1.sinks.s1.channel = c1
# 配置source类型和属性
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = g1
a1.sources.r1.filegroups.g1 = /usr/local/soft/flume-1.9.0/data/.*.csv
# 元数据保存位置
a1.sources.r1.positionFile = /usr/local/soft/flume-1.9.0/flume-log/taildir_position.json
# 配置channel类型属性
a1.channels.c1.type = memory
# 缓存池大小
a1.channels.c1.capacity = 1000
# 每个事务sink拉取的大小
a1.channels.c1.transactionCapacity = 100
# 配置Sink类型和属性
a1.sinks.s1.type = hdfs
a1.sinks.s1.hdfs.path = hdfs://master:9000/data/nshop/ods/release/%Y%m%d
a1.sinks.s1.hdfs.fileSuffix = .log
# 下面三个配置参数如果都设置为0,那么表示不执行次参数(失效)
a1.sinks.s1.hdfs.rollInterval = 10
a1.sinks.s1.hdfs.rollSize = 0
a1.sinks.s1.hdfs.rollCount = 0
# 设置采集文件格式 如果你是存文本文件,就是用DataStream
a1.sinks.s1.hdfs.fileType = DataStream
a1.sinks.s1.hdfs.writeFormat = Text
# 开启本地时间戳获取参数,因为我们的目录上面已经使用转义符号,所以要使用时间戳
a1.sinks.s1.hdfs.useLocalTimeStamp = true
创建文件
[root@master conf]# vim agentData.conf
启动执行
[root@master conf]# flume-ng agent -n a1 -c conf -f agentData.conf -Dflume.root.logger=INFO,console
编写一个sh脚本,然后生产数据到对应位置的文件夹下面
#!/bin/base
cat 2.txt >> /usr/local/soft/flume-1.9.0/data/2.txt
执行映射
hive> load data inpath "/data/nshop/ods/release22/20220630/*" into table ods_nshop.ods_nshop_01_releasedatas partition(bdp_day='20220630');