flink1.12.0学习笔记第5篇-业务案例实践

flink1.12.0学习笔记第 5 篇-业务案例实践

flink1.12.0学习笔记第1篇-部署与入门
flink1.12.0学习笔记第2篇-流批一体API
flink1.12.0学习笔记第3篇-高级API
flink1.12.0学习笔记第4篇-Table与SQL
flink1.12.0学习笔记第5篇-业务案例实践
flink1.12.0学习笔记第6篇-高级特性与新特性
flink1.12.0学习笔记第7篇-监控与优化

案例1.Flink模拟双十一实时大屏统计

1.需求

在大数据的实时处理中,实时的大屏展示已经成了一个很重要的展示项,比如最有名的双十一大屏实时销售总价展示。除了这个,还有一些其他场景的应用,比如我们在我们的后台系统实时的展示我们网站当前的pv、uv等等,其实做法都是类似的。

需求如下:

    1. 实时计算出当天零点截止到当前时间的销售总额
    1. 计算出各个分类的销售top3
    1. 每秒钟更新一次统计结果

2.数据

首先我们通过自定义source 模拟订单的生成,生成了一个Tuple2,第一个元素是分类,第二个元素表示这个分类下产生的订单金额,金额我们通过随机生成.

/*** 自定义数据源实时产生订单数据Tuple2<分类, 金额>*/
public static class MySource implements SourceFunction<Tuple2<String, Double>>{private boolean flag = true;private String[] categorys = {"女装", "男装","图书", "家电","洗护", "美妆","运动", "游戏","户外", "家具","乐器", "办公"};private Random random = new Random();@Overridepublic void run(SourceContext<Tuple2<String, Double>> ctx) throws Exception {while (flag){//随机生成分类和金额int index = random.nextInt(categorys.length);//[0~length) ==> [0~length-1]String category = categorys[index];//获取的随机分类double price = random.nextDouble() * 100;//注意nextDouble生成的是[0~1)之间的随机数,*100之后表示[0~100)ctx.collect(Tuple2.of(category,price));Thread.sleep(20);}}@Overridepublic void cancel() {flag = false;}
}

3.实现步骤

  1. env

  2. source

  3. transformation

  • 3.1定义大小为一天的窗口,第二个参数表示中国使用的UTC+08:00时区比UTC时间早

    .keyBy(0)

    window(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))

  • 3.2定义一个1s的触发器

    .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(1)))

  • 3.3聚合结果.aggregate(new PriceAggregate(), new WindowResult());

  • 3.4看一下聚合的结果

    CategoryPojo(category=男装, totalPrice=17225.26, dateTime=2020-10-20 08:04:12)

  1. 使用上面聚合的结果,实现业务需求:

    result.keyBy(“dateTime”)

    //每秒钟更新一次统计结果

​ .window(TumblingProcessingTimeWindows.of(Time.seconds(1)))

​ //在ProcessWindowFunction中实现该复杂业务逻辑

​ .process(new WindowResultProcess());

  • 4.1. 实时计算出当天零点截止到当前时间的销售总额

  • 4.2.计算出各个分类的销售top3

  • 4.3.每秒钟更新一次统计结果

  1. execute

4.代码示例

package cn.wangting.action;import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Random;
import java.util.stream.Collectors;public class DoubleElevenBigScreem {public static void main(String[] args) throws Exception {//TODO 1.envStreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);env.setParallelism(1);//TODO 2.sourceDataStream<Tuple2<String, Double>> orderDS = env.addSource(new MySource());//TODO 3.transformationDataStream<CategoryPojo> tempAggResult = orderDS.keyBy(t -> t.f0).window(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))).trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(1))).aggregate(new PriceAggregate(), new WindowResult());tempAggResult.print("初步聚合的各个分类的销售总额");//TODO 4.sinktempAggResult.keyBy(CategoryPojo::getDateTime).window(TumblingProcessingTimeWindows.of(Time.seconds(1))).process(new FinalResultWindowProcess());//TODO 5.executeenv.execute();}// 自定义数据源实时产生订单数据Tuple2<分类, 金额>public static class MySource implements SourceFunction<Tuple2<String, Double>> {private boolean flag = true;private String[] categorys = {"女装", "男装", "图书", "家电", "洗护", "美妆", "运动", "游戏", "户外", "家具", "乐器", "办公"};private Random random = new Random();@Overridepublic void run(SourceContext<Tuple2<String, Double>> ctx) throws Exception {while (flag) {int index = random.nextInt(categorys.length);String category = categorys[index];double price = random.nextDouble() * 100;ctx.collect(Tuple2.of(category, price));Thread.sleep(20);}}@Overridepublic void cancel() {flag = false;}}// 自定义聚合函数,指定聚合规则private static class PriceAggregate implements AggregateFunction<Tuple2<String, Double>, Double, Double> {@Overridepublic Double createAccumulator() {return 0D;}@Overridepublic Double add(Tuple2<String, Double> value, Double accumulator) {return value.f1 + accumulator;}@Overridepublic Double getResult(Double accumulator) {return accumulator;}@Overridepublic Double merge(Double a, Double b) {return a + b;}}// 自定义窗口函数,指定窗口数据收集规则private static class WindowResult implements WindowFunction<Double, CategoryPojo, String, TimeWindow> {private FastDateFormat df = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss");@Overridepublic void apply(String category, TimeWindow window, Iterable<Double> input, Collector<CategoryPojo> out) throws Exception {long currentTimeMillis = System.currentTimeMillis();String dateTime = df.format(currentTimeMillis);Double totalPrice = input.iterator().next();out.collect(new CategoryPojo(category,totalPrice,dateTime));}}// 用于存储聚合的结果@Data@AllArgsConstructor@NoArgsConstructorpublic static class CategoryPojo {private String category;private double totalPrice;private String dateTime;}// 自定义窗口完成销售总额统计和分类销售额top3统计并输出private static class FinalResultWindowProcess extends ProcessWindowFunction<CategoryPojo, Object, String, TimeWindow> {@Overridepublic void process(String dateTime, Context context, Iterable<CategoryPojo> elements, Collector<Object> out) throws Exception {double total = 0D;Queue<CategoryPojo> queue = new PriorityQueue<>(3,(c1, c2) -> c1.getTotalPrice() >= c2.getTotalPrice() ? 1 : -1);for (CategoryPojo element : elements) {double price = element.getTotalPrice();total += price;if(queue.size()< 3){queue.add(element);}else{if(price >= queue.peek().getTotalPrice()){queue.poll();queue.add(element);}}}List<String> top3List = queue.stream().sorted((c1, c2) -> c1.getTotalPrice() >= c2.getTotalPrice() ? -1 : 1).map(c -> "分类:" + c.getCategory() + " 金额:" + c.getTotalPrice()).collect(Collectors.toList());double roundResult = new BigDecimal(total).setScale(2, RoundingMode.HALF_UP).doubleValue();System.out.println("时间: "+dateTime +" 总金额 :" + roundResult);System.out.println("top3: \n" + StringUtils.join(top3List,"\n"));}}
}

5.运行效果

时间: 2022-09-29 15:01:50 总金额 :7342.27
top3: 
分类:家具 金额:1104.831122065498
分类:运动 金额:915.3967483816951
分类:家电 金额:841.1627522295081
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=家电, totalPrice=915.5064000160678, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=办公, totalPrice=1041.8726323584178, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=美妆, totalPrice=503.02560448369053, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=运动, totalPrice=1019.2793212446227, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=游戏, totalPrice=556.7830812774787, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=户外, totalPrice=696.3542078051894, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=乐器, totalPrice=483.0946634839456, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=洗护, totalPrice=725.1418698615925, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=男装, totalPrice=615.631037605471, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=女装, totalPrice=660.7646487962378, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=家具, totalPrice=1383.8312618867728, dateTime=2022-09-29 15:01:51)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=女装, totalPrice=906.2431893294321, dateTime=2022-09-29 15:01:52)
时间: 2022-09-29 15:01:51 总金额 :9095.84
top3: 
分类:家具 金额:1383.8312618867728
分类:办公 金额:1041.8726323584178
分类:运动 金额:1019.2793212446227
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=家具, totalPrice=1462.202891610573, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=运动, totalPrice=1109.9560846950264, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=美妆, totalPrice=515.9936934014466, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=家电, totalPrice=999.9819796981208, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=图书, totalPrice=708.0261692936298, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=办公, totalPrice=1178.0235983868834, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=男装, totalPrice=839.431520006262, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=户外, totalPrice=841.6338202091758, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=游戏, totalPrice=556.7830812774787, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=洗护, totalPrice=725.1418698615925, dateTime=2022-09-29 15:01:52)
初步聚合的各个分类的销售总额> DoubleElevenBigScreem.CategoryPojo(category=乐器, totalPrice=834.0851355693773, dateTime=2022-09-29 15:01:52)

案例2.Flink实现订单自动好评

1.需求

在电商领域会有这么一个场景,如果用户买了商品,在订单完成之后,一定时间之内没有做出评价,系统自动给与五星好评,用Flink的定时器来简单实现这一功能

2.数据

自定义source模拟生成一些订单数据,二元组Tuple3,包含用户id,订单id和订单完成时间三个字段.

/*** 自定义source实时产生订单数据Tuple3<用户id,订单id, 订单生成时间>*/
public static class MySource implements SourceFunction<Tuple3<String, String, Long>> {private boolean flag = true;@Overridepublic void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {Random random = new Random();while (flag) {String userId = random.nextInt(5) + "";String orderId = UUID.randomUUID().toString();long currentTimeMillis = System.currentTimeMillis();ctx.collect(Tuple3.of(userId, orderId, currentTimeMillis));Thread.sleep(500);}}@Overridepublic void cancel() {flag = false;}
}

3.实现步骤

  1. env

  2. source

  3. transformation

    设置经过interval毫秒用户未对订单做出评价,自动给与好评.为了演示方便,设置5s的时间

    long interval = 5000L;

    分组后使用自定义KeyedProcessFunction完成定时判断超时订单并自动好评

    dataStream.keyBy(0).process(new TimerProcessFuntion(interval));

  • 3.1定义MapState类型的状态,key是订单号,value是订单完成时间

  • 3.2创建MapState

    MapStateDescriptor mapStateDesc =

    ​ new MapStateDescriptor<>(“mapStateDesc”, String.class, Long.class);

    ​ mapState = getRuntimeContext().getMapState(mapStateDesc);

  • 3.3注册定时器

    mapState.put(value.f0, value.f1);

    ctx.timerService().registerProcessingTimeTimer(value.f1 + interval);

    3.4定时器被触发时执行并输出结果

  1. sink

  2. execute

4.代码示例

package cn.wangting.action;import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.Collector;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.UUID;public class OrderAutomaticFavorableComments {public static void main(String[] args) throws Exception {//TODO 1.envStreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);env.setParallelism(1);//TODO 2.sourceDataStream<Tuple3<String, String, Long>> orderDS = env.addSource(new MySource());//TODO 3.transformationlong interval = 5000L;//5sorderDS.keyBy(t -> t.f0).process(new TimerProcessFunction(interval));//TODO 4.sink//TODO 5.executeenv.execute();}// 自定义source实时产生订单数据Tuple3<用户id,订单id, 订单生成时间>public static class MySource implements SourceFunction<Tuple3<String, String, Long>> {private boolean flag = true;@Overridepublic void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {Random random = new Random();while (flag) {String userId = random.nextInt(5) + "";String orderId = UUID.randomUUID().toString();long currentTimeMillis = System.currentTimeMillis();ctx.collect(Tuple3.of(userId, orderId, currentTimeMillis));Thread.sleep(500);}}@Overridepublic void cancel() {flag = false;}}/*** 自定义ProcessFunction完成订单自动好评* 进来一条数据应该在interval时间后进行判断该订单是否超时是否需要自动好评* abstract class KeyedProcessFunction*/private static class TimerProcessFunction extends KeyedProcessFunction<String, Tuple3<String, String, Long>, Object> {private long interval;//订单超时时间 传进来的是5000ms/5spublic TimerProcessFunction(long interval) {this.interval = interval;}private MapState<String, Long> mapState = null;@Overridepublic void open(Configuration parameters) throws Exception {MapStateDescriptor<String, Long> mapStateDescriptor = new MapStateDescriptor<>("mapState", String.class, Long.class);mapState = getRuntimeContext().getMapState(mapStateDescriptor);}@Overridepublic void processElement(Tuple3<String, String, Long> value, Context ctx, Collector<Object> out) throws Exception {mapState.put(value.f1, value.f2);ctx.timerService().registerProcessingTimeTimer(value.f2 + interval);}@Overridepublic void onTimer(long timestamp, OnTimerContext ctx, Collector<Object> out) throws Exception {Iterator<Map.Entry<String, Long>> iterator = mapState.iterator();while (iterator.hasNext()) {Map.Entry<String, Long> map = iterator.next();String orderId = map.getKey();Long orderTime = map.getValue();if (!isFavorable(orderId)) {if (System.currentTimeMillis() - orderTime >= interval) {System.out.println("orderId:" + orderId + "该订单已经超时未评价,系统自动给与好评!....");iterator.remove();mapState.remove(orderId);}} else {System.out.println("orderId:" + orderId + "该订单已经评价....");iterator.remove();mapState.remove(orderId);}}}public boolean isFavorable(String orderId) {return orderId.hashCode() % 2 == 0;}}
}

5.运行效果

orderId:ece38a3b-444d-4f38-b491-35c0d77ad472该订单已经评价....
orderId:87ea561a-73ef-425b-8e1f-7a0387d0c7c9该订单已经评价....
orderId:4aafcce9-1fc6-4b52-863f-e5c7dacc5344该订单已经超时未评价,系统自动给与好评!....
orderId:98a19e40-dabf-43cc-a3ee-0c76d5402e86该订单已经超时未评价,系统自动给与好评!....
orderId:4f3f520a-160c-400d-8f58-fcb4b3e2ae8d该订单已经超时未评价,系统自动给与好评!....
orderId:d852a77f-7800-41af-801d-950baed8f65f该订单已经超时未评价,系统自动给与好评!....
orderId:7663584c-c90c-42d9-a0fb-53fcca7ffe43该订单已经超时未评价,系统自动给与好评!....
orderId:681c8d6c-079a-4ccd-8470-d5cb3c464fd1该订单已经评价....
orderId:31418829-eb7e-40d5-87ce-1702f64ffe3a该订单已经评价....
orderId:2ee6472c-1705-4372-b5b0-6f84b9763e25该订单已经超时未评价,系统自动给与好评!....
orderId:dfe6136b-ad6b-416f-ab04-47b0d42d3dbc该订单已经超时未评价,系统自动给与好评!....
orderId:638992d2-8983-4bef-928f-b8051d9f45b6该订单已经超时未评价,系统自动给与好评!....
orderId:cb57f913-152f-4535-b8fd-9d0498c46b1a该订单已经评价....
orderId:6efea3a6-4785-4e51-8fd6-e39057d9d1f9该订单已经评价....

flink1.12.0学习笔记第1篇-部署与入门
flink1.12.0学习笔记第2篇-流批一体API
flink1.12.0学习笔记第3篇-高级API
flink1.12.0学习笔记第4篇-Table与SQL
flink1.12.0学习笔记第5篇-业务案例实践
flink1.12.0学习笔记第6篇-高级特性与新特性
flink1.12.0学习笔记第7篇-监控与优化


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部