计算新登陆用户次日的留存率、每个日期登陆的新用户数、 每个日期新用户的次日留存率
- 1. 请你写出一个sql语句查询新登录用户次日成功的留存率,即第1天登陆之后,第2天再次登陆的概率,保存小数点后面3位(3位之后的四舍五入)
- 2. 请你写出一个sql语句查询每个日期登录新用户个数,并且查询结果按照日期升序排序,
- 3.请你写出一个sql语句查询每个日期新用户的次日留存率,结果保留小数点后面3位数(3位之后的四舍五入),并且查询结果按照日期升序排序
- 4. 计算用户连续登录天数
- 5. 查询连续登陆5天的用户
1. 请你写出一个sql语句查询新登录用户次日成功的留存率,即第1天登陆之后,第2天再次登陆的概率,保存小数点后面3位(3位之后的四舍五入)
select round(count(t2.user_id)*1.0/count(t1.user_id),3)
from (select user_id,min(date) as date from login group by user_id) t1
left join login t2
on t1.user_id = t2.user_id and t2.date = date_add(t1.date,interval 1 day)
2. 请你写出一个sql语句查询每个日期登录新用户个数,并且查询结果按照日期升序排序,
select tmp.date,sum(case tmp.rk when 1 then 1 else 0 end) as new
from (select user_id,date,rank() over(partition by user_id order by date) as rk from login) tmp
group by tmp.date
select tmp.date,count(distinct tmp.user_id) as new
from (select user_id,min(date) date from login group by user_id) tmp
group by tmp.date
union
select date,0 as new from login
where date not in (select min(date) from login group by user_id)
order by date;
3.请你写出一个sql语句查询每个日期新用户的次日留存率,结果保留小数点后面3位数(3位之后的四舍五入),并且查询结果按照日期升序排序
select t1.date,round(count(distinct t2.user_id)/count(distinct t1.user_id),3) p
from (select user_id,min(date) date from login group by user_id) t1
left join login t2
on t1.user_id = t2.user_id and t2.date=date_add(t1.date,interval 1 day)
group by t1.date
union
select date,0.000 p from login
where date not in (select min(date) from login group by user_id)
order by date;
4. 计算用户连续登录天数
create table user_login(user_id varchar(100),login_time datetime
);insert into user_login values
(1,'2016-11-25 13:30:45'),
(1,'2016-11-24 13:30:45'),
(1,'2016-11-24 10:30:45'),
(1,'2016-11-24 09:30:45'),
(1,'2016-11-23 09:30:45'),
(1,'2016-11-10 09:30:45'),
(1,'2016-11-09 09:30:45'),
(1,'2016-11-01 09:30:45'),
(1,'2016-10-31 09:30:45'),
(2,'2016-11-25 13:30:45'),
(2,'2016-11-24 13:30:45'),
(2,'2016-11-23 10:30:45'),
(2,'2016-11-22 09:30:45'),
(2,'2016-11-21 09:30:45'),
(2,'2016-11-20 09:30:45'),
(2,'2016-11-19 09:30:45'),
(2,'2016-11-02 09:30:45'),
(2,'2016-11-01 09:30:45'),
(2,'2016-10-31 09:30:45'),
(2,'2016-10-30 09:30:45'),
(2,'2016-10-29 09:30:45'); 1. 一天中可能有多次打卡记录
create table user_login_date(select distinct user_id, date(login_time) login_date from user_login
);
2. 按照用户分区 login_date 从小到大 排序
select user_id,login_date,row_number() over(partition by user_id order by login_date) rk
from user_login_date;
3. 计算辅助列 login_date-rk
select user_id,login_date,rk,date_sub(login_date,interval rk day) t_date
from(select user_id,login_date,row_number() over(partition by user_id order by login_date) rkfrom user_login_date
)a
4. 按照用户和辅助列分组 计算个数
select user_id,min(login_date) start_date,max(login_date) end_date,count(t_date) c
from(select user_id,login_date,rk,date_sub(login_date,interval rk day) t_datefrom(select user_id,login_date,row_number() over(partition by user_id order by login_date) rkfrom user_login_date)a
)b
group by user_id,t_date;
+
| user_id | login_date | rk | t_date |
+
| 1 | 2016-10-31 | 1 | 2016-10-30 |
| 1 | 2016-11-01 | 2 | 2016-10-30 |
| 1 | 2016-11-09 | 3 | 2016-11-06 |
| 1 | 2016-11-10 | 4 | 2016-11-06 |
| 1 | 2016-11-23 | 5 | 2016-11-18 |
| 1 | 2016-11-24 | 6 | 2016-11-18 |
| 1 | 2016-11-25 | 7 | 2016-11-18 |
| 2 | 2016-10-29 | 1 | 2016-10-28 |
| 2 | 2016-10-30 | 2 | 2016-10-28 |
| 2 | 2016-10-31 | 3 | 2016-10-28 |
| 2 | 2016-11-01 | 4 | 2016-10-28 |
| 2 | 2016-11-02 | 5 | 2016-10-28 |
| 2 | 2016-11-19 | 6 | 2016-11-13 |
| 2 | 2016-11-20 | 7 | 2016-11-13 |
| 2 | 2016-11-21 | 8 | 2016-11-13 |
| 2 | 2016-11-22 | 9 | 2016-11-13 |
| 2 | 2016-11-23 | 10 | 2016-11-13 |
| 2 | 2016-11-24 | 11 | 2016-11-13 |
| 2 | 2016-11-25 | 12 | 2016-11-13 |
+
结果
+
| user_id | start_date | end_date | c |
+
| 1 | 2016-10-31 | 2016-11-01 | 2 |
| 1 | 2016-11-09 | 2016-11-10 | 2 |
| 1 | 2016-11-23 | 2016-11-25 | 3 |
| 2 | 2016-10-29 | 2016-11-02 | 5 |
| 2 | 2016-11-19 | 2016-11-25 | 7 |
+
5. 查询连续登陆5天的用户
1.采用lead函数 找到用户第五次登陆的日期
select *,lead(login_date,5) over(partition by user_id order by login_date) 5th_day
from user_login_date;+
| user_id | login_date | 5th_day |
+
| 1 | 2016-10-31 | 2016-11-24 |
| 1 | 2016-11-01 | 2016-11-25 |
| 1 | 2016-11-09 | NULL |
| 1 | 2016-11-10 | NULL |
| 1 | 2016-11-23 | NULL |
| 1 | 2016-11-24 | NULL |
| 1 | 2016-11-25 | NULL |
| 2 | 2016-10-29 | 2016-11-19 |
| 2 | 2016-10-30 | 2016-11-20 |
| 2 | 2016-10-31 | 2016-11-21 |
| 2 | 2016-11-01 | 2016-11-22 |
| 2 | 2016-11-02 | 2016-11-23 |
| 2 | 2016-11-19 | 2016-11-24 |
| 2 | 2016-11-20 | 2016-11-25 |
| 2 | 2016-11-21 | NULL |
| 2 | 2016-11-22 | NULL |
| 2 | 2016-11-23 | NULL |
| 2 | 2016-11-24 | NULL |
| 2 | 2016-11-25 | NULL |
+2. 计算第五次登陆时间和本次登陆时间相差5天的用户
select distinct user_id
from(select *,timestampdiff(day,login_date,5th_date) diff_datefrom (select *,lead(login_date,5) over(partition by user_id order by login_date) 5th_datefrom user_login_date)a
)b
where diff_date=5;
结果
+
| user_id |
+
| 2 |
+
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!