diff options
| author | qidaijie <[email protected]> | 2021-11-11 09:14:09 +0300 |
|---|---|---|
| committer | qidaijie <[email protected]> | 2021-11-11 09:14:09 +0300 |
| commit | 60e4bcfca08d3d30c4df66a76e7af604e955bac8 (patch) | |
| tree | bfb9d11e4f91800f91ee8aa42693e4286126cfbc | |
| parent | 8bf733385fa8a0daac2dd3bd9c8428f91ee0624a (diff) | |
toJSONString替换为fastjson工具类
| -rw-r--r-- | pom.xml | 2 | ||||
| -rw-r--r-- | properties/default_config.properties | 57 | ||||
| -rw-r--r-- | properties/service_flow_config.properties | 8 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/common/FlowWriteConfig.java | 1 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java | 11 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/utils/general/TransFormMap.java | 4 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/utils/general/TransFormObject.java | 4 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java | 16 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/utils/kafka/Consumer.java | 2 | ||||
| -rw-r--r-- | src/main/java/com/zdjizhi/utils/kafka/Producer.java | 1 |
10 files changed, 64 insertions, 42 deletions
@@ -6,7 +6,7 @@ <groupId>com.zdjizhi</groupId> <artifactId>log-completion-schema</artifactId> - <version>211105-flattenSpec</version> + <version>211109-Jackson</version> <name>log-completion-schema</name> <url>http://www.example.com</url> diff --git a/properties/default_config.properties b/properties/default_config.properties index fb9015e..01bb5ca 100644 --- a/properties/default_config.properties +++ b/properties/default_config.properties @@ -1,49 +1,54 @@ -#producer���ԵĴ������� +#====================Kafka Consumer====================# +#kafka source connection timeout +session.timeout.ms=60000 + +#kafka source poll +max.poll.records=3000 + +#kafka source poll bytes +max.partition.fetch.bytes=31457280 +#====================Kafka Producer====================# +#producer重试的次数设置 retries=0 -#���ĺ������˵һ��Batch������֮��������ã��������Batch��û��д���������뷢�ͳ�ȥ�� +#他的含义就是说一个Batch被创建之后,最多过多久,不管这个Batch有没有写满,都必须发送出去了 linger.ms=10 -#����ڳ�ʱ֮ǰδ�յ���Ӧ���ͻ��˽��ڱ�Ҫʱ���·������� +#如果在超时之前未收到响应,客户端将在必要时重新发送请求 request.timeout.ms=30000 -#producer���ǰ���batch���з��͵�,���δ�С��Ĭ��:16384 +#producer都是按照batch进行发送的,批次大小,默认:16384 batch.size=262144 -#Producer�����ڻ�����Ϣ�Ļ�������С +#Producer端用于缓存消息的缓冲区大小 #128M buffer.memory=134217728 -#�������������ÿ�η���Kafka���������������С,Ĭ��1048576 +#这个参数决定了每次发送给Kafka服务器请求的最大大小,默认1048576 #10M max.request.size=10485760 +#====================kafka default====================# +#kafka source protocol; SSL or SASL +kafka.source.protocol=SASL -#kafka source connection timeout -session.timeout.ms=60000 +#kafka sink protocol; SSL or SASL +kafka.sink.protocol=SSL -#kafka source poll -max.poll.records=3000 +#kafka SASL验证用户名 +kafka.user=admin -#kafka source poll bytes -max.partition.fetch.bytes=31457280 +#kafka SASL及SSL验证密码 +kafka.pin=galaxy2019 +#====================Topology Default====================# #hbase table name hbase.table.name=subscriber_info -#�ʼ�Ĭ�ϱ��� +#邮件默认编码 mail.default.charset=UTF-8 -#0�����κ�У�飬1ǿ����У�飬2������У�� -log.transform.type=0 - -#kafka source protocol; SSL or SASL -kafka.source.protocol=SASL - -#kafka sink protocol; SSL or SASL -kafka.sink.protocol=SSL - -#kafka SASL��֤�û��� -kafka.user=admin +#0不做任何校验,1强类型校验,2弱类型校验 +log.transform.type=2 -#kafka SASL��SSL��֤���� -kafka.pin=galaxy2019
\ No newline at end of file +#两个输出之间的最大时间(单位milliseconds) +buffer.timeout=100000
\ No newline at end of file diff --git a/properties/service_flow_config.properties b/properties/service_flow_config.properties index 51ecb4d..1a38ca4 100644 --- a/properties/service_flow_config.properties +++ b/properties/service_flow_config.properties @@ -1,7 +1,7 @@ #--------------------------------地址配置------------------------------# #管理kafka地址 -source.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224.11.17:9094,10.224.11.18:9094,10.224.11.19:9094,10.224.11.20:9094,10.224.11.21:9094,10.224.11.22:9094,10.224.11.23:9094 +source.kafka.servers=10.231.12.4:9094 #管理输出kafka地址 sink.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224.11.17:9094,10.224.11.18:9094,10.224.11.19:9094,10.224.11.20:9094,10.224.11.21:9094,10.224.11.22:9094,10.224.11.23:9094 @@ -10,7 +10,7 @@ sink.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224. zookeeper.servers=10.224.11.11:2181,10.224.11.12:2181,10.224.11.13:2181 #hbase zookeeper地址 用于连接HBase -hbase.zookeeper.servers=10.224.11.11:2181,10.224.11.12:2181,10.224.11.13:2181 +hbase.zookeeper.servers=10.231.12.4:2181 #--------------------------------HTTP/定位库------------------------------# #定位库地址 @@ -25,13 +25,13 @@ app.id.http=http://10.224.11.244:9999/open-api/appDicList #--------------------------------Kafka消费组信息------------------------------# #kafka 接收数据topic -source.kafka.topic=test +source.kafka.topic=SESSION-RECORD #补全数据 输出 topic sink.kafka.topic=test-result #读取topic,存储该spout id的消费offset信息,可通过该拓扑命名;具体存储offset的位置,确定下次读取不重复的数据; -group.id=flink-test +group.id=flink-test-1 #生产者压缩模式 none or snappy producer.kafka.compression.type=none diff --git a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java index e8c569c..e2d430a 100644 --- a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java +++ b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java @@ -29,6 +29,7 @@ public class FlowWriteConfig { public static final String MAIL_DEFAULT_CHARSET = FlowWriteConfigurations.getStringProperty(0, "mail.default.charset"); public static final String HBASE_TABLE_NAME = FlowWriteConfigurations.getStringProperty(1, "hbase.table.name"); public static final Integer LOG_TRANSFORM_TYPE = FlowWriteConfigurations.getIntProperty(1, "log.transform.type"); + public static final Integer BUFFER_TIMEOUT = FlowWriteConfigurations.getIntProperty(1, "buffer.timeout"); /** * kafka source config diff --git a/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java b/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java index b23492b..07e0407 100644 --- a/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java +++ b/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java @@ -28,8 +28,8 @@ public class LogFlowWriteTopology { //开启Checkpoint,interval用于指定checkpoint的触发间隔(单位milliseconds) // environment.enableCheckpointing(5000); - // - environment.setBufferTimeout(5000); + //两个输出之间的最大时间 (单位milliseconds) + environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT); DataStreamSource<String> streamSource = environment.addSource(Consumer.getKafkaConsumer()) .setParallelism(FlowWriteConfig.SOURCE_PARALLELISM); @@ -41,26 +41,31 @@ public class LogFlowWriteTopology { //对原始日志进行处理补全转换等,不对日志字段类型做校验。 cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); + break; case 1: //对原始日志进行处理补全转换等,强制要求日志字段类型与schema一致。 cleaningLog = streamSource.map(new ObjectCompletedFunction()).name("ObjectCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); + break; case 2: //对原始日志进行处理补全转换等,对日志字段类型做若校验,可根据schema进行强转。 cleaningLog = streamSource.map(new TypeMapCompletedFunction()).name("TypeMapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); + break; default: //对原始日志进行处理补全转换等,不对日志字段类型做校验。 cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); + } - //过滤空数据不发送到Kafka内 +// //过滤空数据不发送到Kafka内 DataStream<String> result = cleaningLog.filter(new FilterNullFunction()).name("FilterAbnormalData") .setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM); + //发送数据到Kafka result.addSink(Producer.getKafkaProducer()).name("LogSinkKafka") .setParallelism(FlowWriteConfig.SINK_PARALLELISM); diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormMap.java b/src/main/java/com/zdjizhi/utils/general/TransFormMap.java index 239d8db..5ae9859 100644 --- a/src/main/java/com/zdjizhi/utils/general/TransFormMap.java +++ b/src/main/java/com/zdjizhi/utils/general/TransFormMap.java @@ -54,11 +54,11 @@ public class TransFormMap { } return JsonMapper.toJsonString(jsonMap); } else { - return ""; + return null; } } catch (RuntimeException e) { logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message); - return ""; + return null; } } diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormObject.java b/src/main/java/com/zdjizhi/utils/general/TransFormObject.java index 9b776a9..54629db 100644 --- a/src/main/java/com/zdjizhi/utils/general/TransFormObject.java +++ b/src/main/java/com/zdjizhi/utils/general/TransFormObject.java @@ -62,11 +62,11 @@ public class TransFormObject { } return JsonMapper.toJsonString(object); } else { - return ""; + return null; } } catch (RuntimeException e) { logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message); - return ""; + return null; } } diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java b/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java index 4423f51..5f2100b 100644 --- a/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java +++ b/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java @@ -3,6 +3,8 @@ package com.zdjizhi.utils.general; import cn.hutool.log.Log; import cn.hutool.log.LogFactory; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.serializer.SerializerFeature; import com.zdjizhi.common.FlowWriteConfig; import com.zdjizhi.utils.JsonMapper; import com.zdjizhi.utils.StringUtil; @@ -12,6 +14,8 @@ import com.zdjizhi.utils.json.JsonTypeUtils; import java.util.ArrayList; import java.util.Map; +import static com.alibaba.fastjson.serializer.SerializerFeature.WriteMapNullValue; + /** * 描述:转换或补全工具类 @@ -53,13 +57,19 @@ public class TransFormTypeMap { String param = strings[3]; functionSet(function, jsonMap, appendToKeyName, appendToKeyValue, logValue, param); } - return JsonMapper.toJsonString(jsonMap); +// return JsonMapper.toJsonString(jsonMap); + + //fastjson test + return JSONObject.toJSONString(jsonMap, + SerializerFeature.DisableCircularReferenceDetect + ,SerializerFeature.WriteNullStringAsEmpty + ,SerializerFeature.WriteNullNumberAsZero); } else { - return ""; + return null; } } catch (RuntimeException e) { logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message); - return ""; + return null; } } diff --git a/src/main/java/com/zdjizhi/utils/kafka/Consumer.java b/src/main/java/com/zdjizhi/utils/kafka/Consumer.java index 6c495f7..339b7e3 100644 --- a/src/main/java/com/zdjizhi/utils/kafka/Consumer.java +++ b/src/main/java/com/zdjizhi/utils/kafka/Consumer.java @@ -5,6 +5,7 @@ import com.zdjizhi.common.FlowWriteConfig; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.kafka.clients.consumer.ConsumerConfig; +import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.common.config.SslConfigs; @@ -26,7 +27,6 @@ public class Consumer { properties.put("max.partition.fetch.bytes", FlowWriteConfig.MAX_PARTITION_FETCH_BYTES); properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); - CertUtils.chooseCert(FlowWriteConfig.KAFKA_SOURCE_PROTOCOL,properties); return properties; diff --git a/src/main/java/com/zdjizhi/utils/kafka/Producer.java b/src/main/java/com/zdjizhi/utils/kafka/Producer.java index f9bee25..1671643 100644 --- a/src/main/java/com/zdjizhi/utils/kafka/Producer.java +++ b/src/main/java/com/zdjizhi/utils/kafka/Producer.java @@ -43,6 +43,7 @@ public class Producer { kafkaProducer.setLogFailuresOnly(false); + // kafkaProducer.setWriteTimestampToKafka(true); return kafkaProducer; |
