3.1建表语句mysql_edu.rpt_school_baomin
drop table rpt_school_baomin ;
create table rpt_school_baomin(
year int,
month int,
day int,
count_total int,
itcast_school_name varchar(20),
origin int
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ;
--3.2学科分类统计数量,建表语句edu_rpt.rpt_subject_baomin
truncate table edu_rpt.rpt_subject_baomin;
drop table edu_rpt.rpt_subject_baomin;
create table rpt_subject_baomin(
`year` int,
`month` int,
`day` int,
count_total int,
itcast_subject_name varchar(20),
origin int
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
--3.3
create table rpt_relationship(
`year` int,
`month` int,
`day` int,
paid_total int
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
--3.4
create table rpt_online_baomin_toal(
`year` int,
`month` int,
`day` int,
type_online_total int
);
--3.5
create table rpt_baomin_rate(
`year` int,
`month` int,
`day` int,
baomin_rate varchar(20)
);
--3.6
drop table edu_rpt.rpt_effective_rate;
create table rpt_effective_rate(
`year` int ,
`month` int ,
`day` int ,
online_baomin int,
effective_rate varchar(20)
);
--3.7(we)
create table rpt_day_trend(
`year` int comment '分组类型年',
`month` int comment '分组类型月',
`day` int comment '分组类型天',
type_online_total int comment'来源类型为online的总数',
type_local_total int comment'来源类型为local的总数'
);
--3.8
create table rpt_subject_top(
`year` int comment '分组类型年',
`month` int comment '分组类型月',
`day` int comment '分组类型天',
itcast_school_name varchar(20),
itcast_subject_name varchar(20),
count_total int,
count_ranking int comment '统计排名'
);
--3.9
create table rpt_origin_rate(
`year` int comment '分组类型年',
`month` int comment '分组类型月',
`day` int comment '分组类型天',
type_other_total int comment'来源类型为other的总数',
type_phone_total int comment'来源类型为phone的总数',
type_visited_total int comment'来源类型为visited的总数',
type_netservice_total int comment'来源类型为netservice的总数',
type_school_total int comment'来源类型为school的总数'
);
--3.10
create table employee_contribute(
`year` int comment '分组类型年',
`month` int comment '分组类型月',
`day` int comment '分组类型天',
sum_total int ,
online_rate varchar(20) comment '线上比率',
local_rate varchar(20) comment'线下比率'
);
3.1将数据插入数据库edu_olap中的表rpt_school_baomin
--使用presto工具导入语句(未成功)
错误:(SQL 错误 [1]: Query failed (#20231211_023332_00098_hwikf): line 8:3: mismatched input 'ENGINE'. Expecting: 'COMMENT', 'WITH', <EOF>)
insert into table mysql.edu_olap.rpt_school_baomin
select * from hive.edu_rpt.rpt_school_baomin
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_school_baomin \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_school_baomin \
--m 1;
--正确sqoop导入语句示例
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_rpt?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_campus_enrollment \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_campus_enrollment \
-m 1
3.2将数据插入数据库edu_olap中的表rpt_subject_baomin
insert into table mysql.edu_olap.rpt_subject_baomin
select * from hive.edu_rpt.rpt_subject_baomin
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_subject_baomin \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_subject_baomin \
--m 1;
----------------------------------------------------------------------------------------------------
3.3将数据插入数据库edu_olap中的表rpt_relationship
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_relationship \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_relationship \
--m 1;
----------------------------------------------------------------------------------------------------
3.4将数据插入数据库edu_olap中的表rpt_online_baomin_toal
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_online_baomin_toal \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_online_baomin_toal \
--m 1;
----------------------------------------------------------------------------------------------------
3.5将数据插入数据库edu_olap中的表rpt_baomin_rate
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_baomin_rate \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_baomin_rate \
--m 1;
----------------------------------------------------------------------------------------------------
3.6将数据插入数据库edu_olap中的表edu_rpt.rpt_effective_rate
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_effective_rate \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_effective_rate \
--m 1;
----------------------------------------------------------------------------------------------------
3.7将数据插入数据库edu_olap中的表edu_rpt.rpt_day_trend
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_day_trend \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_day_trend \
--m 1;
----------------------------------------------------------------------------------------------------
3.8将数据插入数据库edu_olap中的表edu_rpt.rpt_day_trend
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_subject_top \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_subject_top \
--m 1;
----------------------------------------------------------------------------------------------------
3.9将数据插入数据库edu_olap中的表edu_rpt.rpt_day_trend
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table rpt_origin_rate \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_origin_rate \
--m 1;
----------------------------------------------------------------------------------------------------
3.10将数据插入数据库edu_olap中的表edu_rpt.rpt_day_trend
--使用sqoop导入语句
/usr/bin/sqoop export "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" \
--connect 'jdbc:mysql://192.168.88.80:3306/edu_olap?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true' \
--username root \
--password 123456 \
--table employee_contribute \
--hcatalog-database edu_rpt \
--hcatalog-table rpt_employee_contribute \
--m 1;
1,如何分析需求?
从以下几个方面分析,维度,指标,表,字段
表名称,分区(根据是否需要优化,是否需要记录导入时间,正常情况下都需要分区)
表名称
ods_web_chat_ems
ods_web_chat_text_ems
分区
要分区,分区字段通常是两表连接字段
可以多一个分区字段,记录数据导入的时间
分桶
不需要分桶
存储
orc
压缩
zlib
表字段
保持不变,多一个分区字段
二,遇到的小问题
1,使用presto将rpt层的数据导入mysql层时遇到的问题,列的字符串值不正确
(可能的解决方法:设置mysql层建表时设置utf8,建数据库时设置编码格式)
Incorrect string value: ‘\xE4\xB8\x8A\xE6\xB5\xB7’ for column ‘itcast_school_name’ at row 1
第1行“itcast_school_name”列的字符串值不正确:“\xE4\xB8\x8A\xE6\xB5\xB7”
2,group by使用错误
“day”’ must be an aggregate expression or appear in GROUP BY clause
翻译:“day”必须是聚合表达式或出现在GROUP BY子句中
当有分组时,字段必须为聚合函数,或者在group by分组中
3,关于年月日等特殊字段,使用时需要引用`或者’,"符号来包围字段
backquoted identifiers are not supported; use double quotes to quote identifiers
不支持用反引号引起来的标识符;请使用双引号引起来标识符
4,列名写错了
invalid 无效的
Invalid column reference ‘tdepart_id’
无效的列引用“tdepart_id”
5,语法错误,忘记了写table
mismatched 不匹配的
mismatched input ‘table’. Expecting:
不匹配的输入“表”。期待:《标识符》
6,重复的字段名
Duplicate 复制重复
7,某个符号无效或者多了一个,号之类的问题
会报错near ……
8,当项目进行dwb层连接后,将dwd层导入dwb层时,因为动态分区太少会报错code2问题
解决:
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=10000;
set hive.exec.max.dynamic.partitions=100000;
set hive.exec.max.created.files=150000;
set mapreduce.reduce.memory.mb=1536; // 每个Reduce Task需要的内存量
set mapreduce.reduce.java.opts=-Xmx1152M; // 每个Reduce Task 的JVM最大使用内存