flink kafka消费pojo类型数据实战详解
1.pojo类数据接口
在实际生产环境中,我们一般会将数据封装成一个pojo类(或者其他rpc框架通过IDL生成一个java类),这样能方便我们后续的数据传输与解析。该pojo类就相当于标准数据接口,可以在任何地方被引用或者使用。下面我们来看看,怎么通过kafka的producer来生产这些pojo类数据,又怎么通过flink或者kafka的consumer来消费这些数据。
2.kafka producer生产数据
2.1 定义pojo类
首先,我们定义一个pojo类
public class User {public String name;public int age;public User() {}public User(String name, int age) {this.name = name;this.age = age;}public String getName() {return name;}public void setName(String name) {this.name = name;}public int getAge() {return age;}public void setAge(int age) {this.age = age;}@Overridepublic String toString() {return "user{" +"name='" + name + '\'' +", age=" + age +"}";}
}
我们定义了一个User类,该类有两个字段name与age。需要注意的是,该类必须要包含有默认的构造函数,否则后续代码使用过程中会出现问题,具体可以参考如下链接
https://stackoverflow.com/questions/7625783/jsonmappingexception-no-suitable-constructor-found-for-type-simple-type-class
2.2 定义序列化类
import org.apache.kafka.common.serialization.Serializer;
import org.codehaus.jackson.map.ObjectMapper;import java.io.IOException;
import java.util.Map;/*** author: wanglei* create: 2022-09-26*/
public class UserSerializer implements Serializer {@Overridepublic void configure(Map configs, boolean isKey) {}@Overridepublic byte[] serialize(String topic, User user) {if (user == null) return null;ObjectMapper objectMapper = new ObjectMapper();try {return objectMapper.writeValueAsString(user).getBytes();} catch (IOException e) {e.printStackTrace();}return null;}@Overridepublic void close() {}
}
上面类的作用,是将User对象序列化的过程,方便后面的数据传输。上面使用了ObjectMapper类进行序列化,需要引入如下依赖
org.codehaus.jackson jackson-mapper-asl 1.9.13
2.3 实现producer
import edu.bit.leilei.serialize.User;
import edu.bit.leilei.serialize.UserSerializer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;import java.util.Properties;/*** author: wanglei* create: 2022-09-26*/
public class PojoProducer {public static void main(String[] args) {Properties properties = new Properties();properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, UserSerializer.class.getName());KafkaProducer producer = new KafkaProducer(properties);String topic = "pojotest";for(int i=0; i<5; i++) {User user = new User("my name-" + i, i);ProducerRecord record = new ProducerRecord(topic, "key-"+i, user);producer.send(record);}producer.close();}
}
上面的代码,生成了5个User对象。将代码先运行2次,总共生成了10个user对象。再将里面的一行稍作修改
User user = new User("my name-" + i, i*10);
再运行一次,这样topic里面总共就有了15个对象。
3.kafka consumer消费数据
3.1 编写反序列化类
consumer要消费数据,首先需要做的是对数据进行反序列化。我们先来编写反序列化类代码
import org.apache.kafka.common.serialization.Deserializer;
import org.codehaus.jackson.map.ObjectMapper;import java.io.IOException;
import java.util.Map;/*** author: wanglei* create: 2022-09-26*/
public class UserDeserializer implements Deserializer {@Overridepublic void configure(Map configs, boolean isKey) {}@Overridepublic User deserialize(String topic, byte[] data) {ObjectMapper objectMapper = new ObjectMapper();try {return objectMapper.readValue(data, User.class);} catch (IOException e) {e.printStackTrace();}return null;}@Overridepublic void close() {}
}
上面同样用到的是ObjectMapper对byte[]数据进行反序列化。
3.2 kafka consumer代码
import edu.bit.leilei.serialize.User;
import edu.bit.leilei.serialize.UserDeserializer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;import java.util.Arrays;
import java.util.Properties;/*** author: wanglei* create: 2022-09-26*/
public class PojoConsumer {public static void main(String[] args) {String topic = "pojotest";String groupId = "group_leilei";Properties props = new Properties();props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, UserDeserializer.class.getName());props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);props.put("auto.commit.interval.ms", "1000");props.put("session.timeout.ms", "30000");props.put("enable.auto.commit", "true");//设置为自动提交props.put("auto.offset.reset", "earliest");KafkaConsumer consumer = new KafkaConsumer(props);consumer.subscribe(Arrays.asList(topic));while(true) {ConsumerRecords records = consumer.poll(1L);for(ConsumerRecord record : records) {System.out.printf("patition = %d , offset = %d, key = %s, value = %s%n",record.partition(), record.offset(), record.key(), record.value());}}}
}
上面的代码中,指定了反序列化类为UserDeserializer,并且从topic的最早位置开始进行消费。
最后输出的结果为
patition = 0 , offset = 0, key = key-0, value = user{name='my name-0', age=0}
patition = 0 , offset = 1, key = key-1, value = user{name='my name-1', age=1}
patition = 0 , offset = 2, key = key-2, value = user{name='my name-2', age=2}
patition = 0 , offset = 3, key = key-3, value = user{name='my name-3', age=3}
patition = 0 , offset = 4, key = key-4, value = user{name='my name-4', age=4}
patition = 0 , offset = 5, key = key-0, value = user{name='my name-0', age=0}
patition = 0 , offset = 6, key = key-1, value = user{name='my name-1', age=1}
patition = 0 , offset = 7, key = key-2, value = user{name='my name-2', age=2}
patition = 0 , offset = 8, key = key-3, value = user{name='my name-3', age=3}
patition = 0 , offset = 9, key = key-4, value = user{name='my name-4', age=4}
patition = 0 , offset = 10, key = key-0, value = user{name='my name-0', age=0}
patition = 0 , offset = 11, key = key-1, value = user{name='my name-1', age=10}
patition = 0 , offset = 12, key = key-2, value = user{name='my name-2', age=20}
patition = 0 , offset = 13, key = key-3, value = user{name='my name-3', age=30}
patition = 0 , offset = 14, key = key-4, value = user{name='my name-4', age=40}
4.flink消费pojo类型数据
我们尝试使用flink来消费pojo类的数据。
4.1 反序列化类
同样的,首先也需要编写flink中反序列化的schema类。
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.codehaus.jackson.map.ObjectMapper;import java.io.IOException;/*** author: wanglei* create: 2022-09-27*/
public class UserFlinkDeserializer implements DeserializationSchema {@Overridepublic User deserialize(byte[] message) throws IOException {ObjectMapper objectMapper = new ObjectMapper();try {return objectMapper.readValue(message, User.class);} catch (IOException e) {e.printStackTrace();}return null;}@Overridepublic boolean isEndOfStream(User nextElement) {return false;}@Overridepublic TypeInformation getProducedType() {return TypeInformation.of(User.class);}
}
2.编写flink相关代码
import edu.bit.leilei.serialize.User;
import edu.bit.leilei.serialize.UserFlinkDeserializer;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;import java.util.Properties;/*** author: wanglei* create: 2022-09-27*/
public class StreamPojo {public static void main(String[] args) throws Exception {StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();env.setParallelism(1);String topic = "pojotest";Properties properties = new Properties();properties.setProperty("bootstrap.servers", "localhost:9092");properties.put("group.id", topic);FlinkKafkaConsumer09 myConsumer = new FlinkKafkaConsumer09(topic, new UserFlinkDeserializer(), properties);myConsumer.setStartFromEarliest();DataStream stream = env.addSource(myConsumer);stream.print();env.execute();}
}
代码运行以后得到的输出结果为
user{name='my name-0', age=0}
user{name='my name-1', age=1}
user{name='my name-2', age=2}
user{name='my name-3', age=3}
user{name='my name-4', age=4}
user{name='my name-0', age=0}
user{name='my name-1', age=1}
user{name='my name-2', age=2}
user{name='my name-3', age=3}
user{name='my name-4', age=4}
user{name='my name-0', age=0}
user{name='my name-1', age=10}
user{name='my name-2', age=20}
user{name='my name-3', age=30}
user{name='my name-4', age=40}
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
