summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorqidaijie <[email protected]>2021-11-11 09:14:09 +0300
committerqidaijie <[email protected]>2021-11-11 09:14:09 +0300
commit60e4bcfca08d3d30c4df66a76e7af604e955bac8 (patch)
treebfb9d11e4f91800f91ee8aa42693e4286126cfbc
parent8bf733385fa8a0daac2dd3bd9c8428f91ee0624a (diff)
toJSONString替换为fastjson工具类
-rw-r--r--pom.xml2
-rw-r--r--properties/default_config.properties57
-rw-r--r--properties/service_flow_config.properties8
-rw-r--r--src/main/java/com/zdjizhi/common/FlowWriteConfig.java1
-rw-r--r--src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java11
-rw-r--r--src/main/java/com/zdjizhi/utils/general/TransFormMap.java4
-rw-r--r--src/main/java/com/zdjizhi/utils/general/TransFormObject.java4
-rw-r--r--src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java16
-rw-r--r--src/main/java/com/zdjizhi/utils/kafka/Consumer.java2
-rw-r--r--src/main/java/com/zdjizhi/utils/kafka/Producer.java1
10 files changed, 64 insertions, 42 deletions
diff --git a/pom.xml b/pom.xml
index 42189ea..feb29c6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
<groupId>com.zdjizhi</groupId>
<artifactId>log-completion-schema</artifactId>
- <version>211105-flattenSpec</version>
+ <version>211109-Jackson</version>
<name>log-completion-schema</name>
<url>http://www.example.com</url>
diff --git a/properties/default_config.properties b/properties/default_config.properties
index fb9015e..01bb5ca 100644
--- a/properties/default_config.properties
+++ b/properties/default_config.properties
@@ -1,49 +1,54 @@
-#producer���ԵĴ�������
+#====================Kafka Consumer====================#
+#kafka source connection timeout
+session.timeout.ms=60000
+
+#kafka source poll
+max.poll.records=3000
+
+#kafka source poll bytes
+max.partition.fetch.bytes=31457280
+#====================Kafka Producer====================#
+#producer重试的次数设置
retries=0
-#���ĺ������˵һ��Batch������֮��������ã��������Batch��û��д���������뷢�ͳ�ȥ��
+#他的含义就是说一个Batch被创建之后,最多过多久,不管这个Batch有没有写满,都必须发送出去了
linger.ms=10
-#����ڳ�ʱ֮ǰδ�յ���Ӧ���ͻ��˽��ڱ�Ҫʱ���·�������
+#如果在超时之前未收到响应,客户端将在必要时重新发送请求
request.timeout.ms=30000
-#producer���ǰ���batch���з��͵�,���δ�С��Ĭ��:16384
+#producer都是按照batch进行发送的,批次大小,默认:16384
batch.size=262144
-#Producer�����ڻ�����Ϣ�Ļ�������С
+#Producer端用于缓存消息的缓冲区大小
#128M
buffer.memory=134217728
-#�������������ÿ�η��͸�Kafka���������������С,Ĭ��1048576
+#这个参数决定了每次发送给Kafka服务器请求的最大大小,默认1048576
#10M
max.request.size=10485760
+#====================kafka default====================#
+#kafka source protocol; SSL or SASL
+kafka.source.protocol=SASL
-#kafka source connection timeout
-session.timeout.ms=60000
+#kafka sink protocol; SSL or SASL
+kafka.sink.protocol=SSL
-#kafka source poll
-max.poll.records=3000
+#kafka SASL验证用户名
+kafka.user=admin
-#kafka source poll bytes
-max.partition.fetch.bytes=31457280
+#kafka SASL及SSL验证密码
+kafka.pin=galaxy2019
+#====================Topology Default====================#
#hbase table name
hbase.table.name=subscriber_info
-#�ʼ�Ĭ�ϱ���
+#邮件默认编码
mail.default.charset=UTF-8
-#0�����κ�У�飬1ǿ����У�飬2������У��
-log.transform.type=0
-
-#kafka source protocol; SSL or SASL
-kafka.source.protocol=SASL
-
-#kafka sink protocol; SSL or SASL
-kafka.sink.protocol=SSL
-
-#kafka SASL��֤�û���
-kafka.user=admin
+#0不做任何校验,1强类型校验,2弱类型校验
+log.transform.type=2
-#kafka SASL��SSL��֤����
-kafka.pin=galaxy2019 \ No newline at end of file
+#两个输出之间的最大时间(单位milliseconds)
+buffer.timeout=100000 \ No newline at end of file
diff --git a/properties/service_flow_config.properties b/properties/service_flow_config.properties
index 51ecb4d..1a38ca4 100644
--- a/properties/service_flow_config.properties
+++ b/properties/service_flow_config.properties
@@ -1,7 +1,7 @@
#--------------------------------地址配置------------------------------#
#管理kafka地址
-source.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224.11.17:9094,10.224.11.18:9094,10.224.11.19:9094,10.224.11.20:9094,10.224.11.21:9094,10.224.11.22:9094,10.224.11.23:9094
+source.kafka.servers=10.231.12.4:9094
#管理输出kafka地址
sink.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224.11.17:9094,10.224.11.18:9094,10.224.11.19:9094,10.224.11.20:9094,10.224.11.21:9094,10.224.11.22:9094,10.224.11.23:9094
@@ -10,7 +10,7 @@ sink.kafka.servers=10.224.11.14:9094,10.224.11.15:9094,10.224.11.16:9094,10.224.
zookeeper.servers=10.224.11.11:2181,10.224.11.12:2181,10.224.11.13:2181
#hbase zookeeper地址 用于连接HBase
-hbase.zookeeper.servers=10.224.11.11:2181,10.224.11.12:2181,10.224.11.13:2181
+hbase.zookeeper.servers=10.231.12.4:2181
#--------------------------------HTTP/定位库------------------------------#
#定位库地址
@@ -25,13 +25,13 @@ app.id.http=http://10.224.11.244:9999/open-api/appDicList
#--------------------------------Kafka消费组信息------------------------------#
#kafka 接收数据topic
-source.kafka.topic=test
+source.kafka.topic=SESSION-RECORD
#补全数据 输出 topic
sink.kafka.topic=test-result
#读取topic,存储该spout id的消费offset信息,可通过该拓扑命名;具体存储offset的位置,确定下次读取不重复的数据;
-group.id=flink-test
+group.id=flink-test-1
#生产者压缩模式 none or snappy
producer.kafka.compression.type=none
diff --git a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
index e8c569c..e2d430a 100644
--- a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
+++ b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
@@ -29,6 +29,7 @@ public class FlowWriteConfig {
public static final String MAIL_DEFAULT_CHARSET = FlowWriteConfigurations.getStringProperty(0, "mail.default.charset");
public static final String HBASE_TABLE_NAME = FlowWriteConfigurations.getStringProperty(1, "hbase.table.name");
public static final Integer LOG_TRANSFORM_TYPE = FlowWriteConfigurations.getIntProperty(1, "log.transform.type");
+ public static final Integer BUFFER_TIMEOUT = FlowWriteConfigurations.getIntProperty(1, "buffer.timeout");
/**
* kafka source config
diff --git a/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java b/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
index b23492b..07e0407 100644
--- a/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
+++ b/src/main/java/com/zdjizhi/topology/LogFlowWriteTopology.java
@@ -28,8 +28,8 @@ public class LogFlowWriteTopology {
//开启Checkpoint,interval用于指定checkpoint的触发间隔(单位milliseconds)
// environment.enableCheckpointing(5000);
- //
- environment.setBufferTimeout(5000);
+ //两个输出之间的最大时间 (单位milliseconds)
+ environment.setBufferTimeout(FlowWriteConfig.BUFFER_TIMEOUT);
DataStreamSource<String> streamSource = environment.addSource(Consumer.getKafkaConsumer())
.setParallelism(FlowWriteConfig.SOURCE_PARALLELISM);
@@ -41,26 +41,31 @@ public class LogFlowWriteTopology {
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
+
break;
case 1:
//对原始日志进行处理补全转换等,强制要求日志字段类型与schema一致。
cleaningLog = streamSource.map(new ObjectCompletedFunction()).name("ObjectCompletedFunction")
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
+
break;
case 2:
//对原始日志进行处理补全转换等,对日志字段类型做若校验,可根据schema进行强转。
cleaningLog = streamSource.map(new TypeMapCompletedFunction()).name("TypeMapCompletedFunction")
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
+
break;
default:
//对原始日志进行处理补全转换等,不对日志字段类型做校验。
cleaningLog = streamSource.map(new MapCompletedFunction()).name("MapCompletedFunction")
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
+
}
- //过滤空数据不发送到Kafka内
+// //过滤空数据不发送到Kafka内
DataStream<String> result = cleaningLog.filter(new FilterNullFunction()).name("FilterAbnormalData")
.setParallelism(FlowWriteConfig.TRANSFORM_PARALLELISM);
+
//发送数据到Kafka
result.addSink(Producer.getKafkaProducer()).name("LogSinkKafka")
.setParallelism(FlowWriteConfig.SINK_PARALLELISM);
diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormMap.java b/src/main/java/com/zdjizhi/utils/general/TransFormMap.java
index 239d8db..5ae9859 100644
--- a/src/main/java/com/zdjizhi/utils/general/TransFormMap.java
+++ b/src/main/java/com/zdjizhi/utils/general/TransFormMap.java
@@ -54,11 +54,11 @@ public class TransFormMap {
}
return JsonMapper.toJsonString(jsonMap);
} else {
- return "";
+ return null;
}
} catch (RuntimeException e) {
logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message);
- return "";
+ return null;
}
}
diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormObject.java b/src/main/java/com/zdjizhi/utils/general/TransFormObject.java
index 9b776a9..54629db 100644
--- a/src/main/java/com/zdjizhi/utils/general/TransFormObject.java
+++ b/src/main/java/com/zdjizhi/utils/general/TransFormObject.java
@@ -62,11 +62,11 @@ public class TransFormObject {
}
return JsonMapper.toJsonString(object);
} else {
- return "";
+ return null;
}
} catch (RuntimeException e) {
logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message);
- return "";
+ return null;
}
}
diff --git a/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java b/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java
index 4423f51..5f2100b 100644
--- a/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java
+++ b/src/main/java/com/zdjizhi/utils/general/TransFormTypeMap.java
@@ -3,6 +3,8 @@ package com.zdjizhi.utils.general;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
+import com.alibaba.fastjson.JSONObject;
+import com.alibaba.fastjson.serializer.SerializerFeature;
import com.zdjizhi.common.FlowWriteConfig;
import com.zdjizhi.utils.JsonMapper;
import com.zdjizhi.utils.StringUtil;
@@ -12,6 +14,8 @@ import com.zdjizhi.utils.json.JsonTypeUtils;
import java.util.ArrayList;
import java.util.Map;
+import static com.alibaba.fastjson.serializer.SerializerFeature.WriteMapNullValue;
+
/**
* 描述:转换或补全工具类
@@ -53,13 +57,19 @@ public class TransFormTypeMap {
String param = strings[3];
functionSet(function, jsonMap, appendToKeyName, appendToKeyValue, logValue, param);
}
- return JsonMapper.toJsonString(jsonMap);
+// return JsonMapper.toJsonString(jsonMap);
+
+ //fastjson test
+ return JSONObject.toJSONString(jsonMap,
+ SerializerFeature.DisableCircularReferenceDetect
+ ,SerializerFeature.WriteNullStringAsEmpty
+ ,SerializerFeature.WriteNullNumberAsZero);
} else {
- return "";
+ return null;
}
} catch (RuntimeException e) {
logger.error("解析补全日志信息过程异常,异常信息:" + e + "\n" + message);
- return "";
+ return null;
}
}
diff --git a/src/main/java/com/zdjizhi/utils/kafka/Consumer.java b/src/main/java/com/zdjizhi/utils/kafka/Consumer.java
index 6c495f7..339b7e3 100644
--- a/src/main/java/com/zdjizhi/utils/kafka/Consumer.java
+++ b/src/main/java/com/zdjizhi/utils/kafka/Consumer.java
@@ -5,6 +5,7 @@ import com.zdjizhi.common.FlowWriteConfig;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.config.SslConfigs;
@@ -26,7 +27,6 @@ public class Consumer {
properties.put("max.partition.fetch.bytes", FlowWriteConfig.MAX_PARTITION_FETCH_BYTES);
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
-
CertUtils.chooseCert(FlowWriteConfig.KAFKA_SOURCE_PROTOCOL,properties);
return properties;
diff --git a/src/main/java/com/zdjizhi/utils/kafka/Producer.java b/src/main/java/com/zdjizhi/utils/kafka/Producer.java
index f9bee25..1671643 100644
--- a/src/main/java/com/zdjizhi/utils/kafka/Producer.java
+++ b/src/main/java/com/zdjizhi/utils/kafka/Producer.java
@@ -43,6 +43,7 @@ public class Producer {
kafkaProducer.setLogFailuresOnly(false);
+
// kafkaProducer.setWriteTimestampToKafka(true);
return kafkaProducer;