MYSQL:
IFNULL(expression,value)
MSSQLServer:
ISNULL(expression,value)
Oracle:
NVL(expression,value)
Hive:
COALESCE ( expression,value1,value2……,valuen)
获取日期格式中的年月日 https://blog.csdn.net/qq_32123787/article/details/93482283
hive:
日期函数:to_date(),year(),month(),day(),hour(),minute(),second(),weekofyear()日期转周
select to_date(‘2018-12-08 10:03:01’);–2018-12-08 返回日期时间字段中的日期部分
select year(‘2018-12-08 10:03:01’);–2018
select year(‘2018-12-08’);–2018
select month(‘2018-12-08 10:03:01’);–12
select month(‘2018-12-08’);–12
select day(‘2018-12-08 10:03:01’);–8
select day(‘2018-12-08’);–8
select hour(‘2018-12-08 10:03:01’);–10
select minute(‘2018-12-08 10:03:01’);–3
select second(‘2018-12-08 10:03:01’);–1
select weekofyear(‘2018-12-08 10:03:01’)–49
MySQL:
SELECT DATE_SUB(‘2017-05-15 10:37:14.123456’,INTERVAL 1 YEAR); 除了YEAR,还有MONTH,DAY,HOUR,MINUTE,SECOND,MICROSECOND;常用的还是:YEAR,MONTH,DAY
SELECT EXTRACT(WEEK FROM CURDATE()); 除了WEEK,还有YEAR MONTH DAY HOUR MINUTE SECOND MICROSECOND QUARTER WEEK YEAR_MONTH DAY_HOUR DAY_MINUTE(日时分)
DAY_SECOND(日时分秒) DAY_MICROSECOND(日时分秒毫秒) HOUR_MINUTE(时分) HOUR_SECOND(时分秒)
HOUR_MICROSECOND(日时分秒毫秒) MINUTE_SECOND(分秒) MINUTE_MICROSECOND(分秒毫秒) SECOND_MICROSECOND(秒毫秒)
Oracle:
select extract (month from sysdate) from dual
连续登录天数 和 连续未登录天数
https://blog.csdn.net/logao2012/article/details/104630231/
create table login_log(user_id integer,login_date date);
insert into login_log values(1,to_date(‘2020-01-01’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-02’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-04’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-05’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-06’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-07’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-08’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-09’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-10’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-12’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-13’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-15’,‘yyyy-mm-dd’));
insert into login_log values(1,to_date(‘2020-01-16’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-01’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-02’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-03’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-04’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-05’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-06’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-07’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-08’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-09’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-10’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-11’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-12’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-13’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-16’,‘yyyy-mm-dd’));
insert into login_log values(2,to_date(‘2020-01-17’,‘yyyy-mm-dd’));
----------------------连续登录天数---------------------------
create table log_rank as
select user_id,login_date,row_number() over(partition by user_id order by login_date) day_rank from login_log;
select tmp.user_id,tmp.login_date,count(1)
from (
select r1.user_id,r2.login_date
from login_rank r1 ,login_rank r2
where r1.user_id=r2.user_id
and r1.login_date<=r2.login_date
and r1.login_date-r1.login_rank=r2.login_date-r2.login_rank
) tmp group by tmp.user_id,tmp.login_date
----------------------连续未登录天数---------------------------
select r1.user_id,(r1.login_day-r2.login_day)-1 from login_rank r1 left join login_rank r2 on r1.user_id=r2.user_id and r1.day_rank-1=r2.day_rank
关于null和’’
hive: https://blog.csdn.net/weixin_40873462/article/details/101692970
hive中null表示空值 ‘‘表示空字符串,二者不一样
可以用nvl(a,b) coalesce(expr1,expr2,expr3…exprN)来处理空值。
mysql: https://blog.csdn.net/u010648555/article/details/94555199
mysql中null和’‘有区别, IS NULL/IS NOT NULL/IFNULL()识别空值null
=’‘判断空字符串
select length(NULL), length(’’), length(‘1’); => null 0 1
空值不占空间,NULL值占空间。当字段不为NULL时,也可以插入空值。
MYSQL 5.7+InnoDB 引擎 环境下,针对含有null的列设置单列索引和组合索引,用is null查询均未失效
Oracle https://blog.csdn.net/spw55381155/article/details/79896377
Oracle中两者也有区别:对char和varchar2字段来说,’‘就是null;但对于where 条件后的’’ 不是null。
说明:四条记录 1 1 、2 2、3 ‘’、 4 null select count(a) from table where b=’’ =>0 select count(a) from table where b is null =>2
null做任何运算均为null
用is null/is not null/nvl()做比较和运算
null不能被索引
count(1) count(*)均表示统计表中所有条目总数 count(字段)表示统计该字段不为null的条目总数
关于用户访问的数据清洗
create table user_visit_action as
select user_name user_id,user_password session_id,user_email page_url,to_date(create_time) action_time from sys_user;
INSERT INTO user_visit_action VALUES(‘001’,‘ss001’,‘http://a.com’,to_date(‘2020-08-06 13:34:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss001’,‘http://b.com’,to_date(‘2020-08-06 13:35:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss001’,‘http://c.com’,to_date(‘2020-08-06 13:36:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss002’,‘http://a.com’,to_date(‘2020-08-06 14:30:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss002’,‘http://b.com’,to_date(‘2020-08-06 14:31:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss002’,‘http://e.com’,to_date(‘2020-08-06 14:33:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘001’,‘ss002’,‘http://f.com’,to_date(‘2020-08-06 14:35:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘002’,‘ss003’,‘http://u.com’,to_date(‘2020-08-06 18:34:11’,‘yyyy-MM-dd HH24:mi:ss’));
INSERT INTO user_visit_action VALUES(‘002’,‘ss003’,‘http://k.com’,to_date(‘2020-08-06 18:38:11’,‘yyyy-MM-dd HH24:mi:ss’));
SELECT
user_id,
session_id,
page_url,
DENSE_RANK() OVER (PARTITION BY user_id, session_id ORDER BY action_time ASC) AS page_order,
MIN(action_time) OVER (PARTITION BY user_id, session_id) AS session_start_time,
MAX(action_time) OVER (PARTITION BY user_id, session_id) AS session_finisht_time
FROM user_visit_action
SELECT
user_id,
session_id,
page_url,
DENSE_RANK() OVER (PARTITION BY user_id, session_id ORDER BY action_time ASC) AS page_order,
MIN(action_time) OVER (PARTITION BY user_id, session_id) AS session_start_time,
MAX(action_time) OVER (PARTITION BY user_id, session_id) AS session_finisht_time
FROM user_visit_action order by user_id,session_id, page_order
关于两周 三周留存率的统计分析 https://segmentfault.com/a/1190000023537392
create table test_test_order
(
register_date varchar2(20),
test_order_date varchar2(20),
user_id varchar2(20),
country varchar2(20),
test_order_sales number(10,2),
test_order_id varchar2(20)
);
select * from test_order;
INSERT INTO test_order VALUES(‘2020-06-07’,‘2020-06-09’,‘001’,‘c0’,210,‘o1’);
INSERT INTO test_order VALUES(‘2020-06-08’,‘2020-06-09’,‘002’,‘c1’,220,‘o2’);
INSERT INTO test_order VALUES(‘2020-06-07’,‘2020-06-10’,‘003’,‘c2’,230,‘o3’);
INSERT INTO test_order VALUES(‘2020-06-09’,‘2020-06-10’,‘004’,‘c3’,200,‘o4’);
INSERT INTO test_order VALUES(‘2020-06-07’,‘2020-06-20’,‘005’,‘c4’,300,‘o5’);
INSERT INTO test_order VALUES(‘2020-06-10’,‘2020-06-23’,‘006’,‘c5’,400,‘o6’);
INSERT INTO test_order VALUES(‘2020-06-07’,‘2020-06-19’,‘007’,‘c6’,600,‘o7’);
INSERT INTO test_order VALUES(‘2020-06-12’,‘2020-06-18’,‘008’,‘c7’,700,‘o8’);
INSERT INTO test_order VALUES(‘2020-06-07’,‘2020-06-09’,‘009’,‘c8’,100,‘o9’);
INSERT INTO test_order VALUES(‘2020-06-15’,‘2020-06-18’,‘0010’,‘c9’,200,‘o10’);
INSERT INTO test_order VALUES(‘2020-06-15’,‘2020-06-19’,‘0011’,‘c10’,250,‘o11’);
INSERT INTO test_order VALUES(‘2020-06-12’,‘2020-06-29’,‘0012’,‘c11’,270,‘o12’);
INSERT INTO test_order VALUES(‘2020-06-16’,‘2020-06-19’,‘0013’,‘c12’,230,‘o13’);
INSERT INTO test_order VALUES(‘2020-06-17’,‘2020-06-20’,‘0014’,‘c13’,290,‘o14’);
INSERT INTO test_order VALUES(‘2020-06-20’,‘2020-06-29’,‘0015’,‘c14’,203,‘o15’);
------------------------------关于留存率----------------------------
SELECT
date_add(Next_day(register_date, ‘MO’),-1) AS week_end,
COUNT(DISTINCT CASE WHEN order_date BETWEEN register_date AND date_add(register_date,6) THEN user_id END) AS first_week_order,
COUNT(DISTINCT CASE WHEN order_date BETWEEN date_add(register_date ,7) AND date_add(register_date,13) THEN user_id END) AS sencod_week_order,
COUNT(DISTINCT CASE WHEN order_date BETWEEN date_add(register_date ,14) AND date_add(register_date,20) THEN user_id END) as third_week_order
FROM test_order
GROUP BY register_date;
对员工工资信息的统计 https://www.jianshu.com/p/4e5645c1b403?utm_campaign=maleskine&utm_content=note&utm_medium=seo_notes&utm_source=recommendation
------------------------------预取涨幅,必有连接----------------------------
select a.id,a.real_income,a.income_datetime,b.real_income,b.income_datetime
from (select id,nvl(tmp.income,0)-nvl(tmp.outcome,0) real_income
from (
select id,
case when type=‘收入’ then income_date as income,
case when type=‘罚款’ then income_data,‘0’ as outcome_data
from income_info
group by id,income_datetime)tmp)a,
(select id,nvl(tmp.income,0)-nvl(tmp.outcome,0)
from (
select id,
case when type=‘收入’ then income_date as income,
case when type=‘罚款’ then income_data,‘0’ as outcome_data
from income_info
group by id,income_datetime)tmp)b
where a.id=b.id
and year(a.income_datetiime)*12+month(a.income_datetime)-1 = year(b.income_datetiime)*12+month(b.income_datetime)
应为本身是DBA转数仓,所以想知道,如果能通过面试,您对我未来3-6个月的要求是什么




