summaryrefslogtreecommitdiff
path: root/bin/conf/sifter
diff options
context:
space:
mode:
authorlishu <[email protected]>2018-09-29 14:57:32 +0800
committerlishu <[email protected]>2018-09-29 14:57:32 +0800
commit19cfcaf353ae4488927fc250361f8baa48f9ffb9 (patch)
tree1cf82bd8c17044090777b067ed16c95b4269466b /bin/conf/sifter
20180929 first commit
Diffstat (limited to 'bin/conf/sifter')
-rw-r--r--bin/conf/sifter/app_data.sft11
-rw-r--r--bin/conf/sifter/dash_data.sft12
-rw-r--r--bin/conf/sifter/dash_index.sft13
-rw-r--r--bin/conf/sifter/frag_reassembly.conf57
-rw-r--r--bin/conf/sifter/hls_data.sft12
-rw-r--r--bin/conf/sifter/hls_index.sft13
-rw-r--r--bin/conf/sifter/iqiyi_data.sft12
-rw-r--r--bin/conf/sifter/main.conf147
-rw-r--r--bin/conf/sifter/mid_trace.conf1
-rw-r--r--bin/conf/sifter/osmf_data.sft12
-rw-r--r--bin/conf/sifter/sifter.json276
-rw-r--r--bin/conf/sifter/table_info.conf11
-rw-r--r--bin/conf/sifter/tudou_data.sft12
-rw-r--r--bin/conf/sifter/tudou_index.sft13
-rw-r--r--bin/conf/sifter/urloffset_data.sft12
15 files changed, 614 insertions, 0 deletions
diff --git a/bin/conf/sifter/app_data.sft b/bin/conf/sifter/app_data.sft
new file mode 100644
index 0000000..6c0479c
--- /dev/null
+++ b/bin/conf/sifter/app_data.sft
@@ -0,0 +1,11 @@
+[INFOR]
+sft_id = 3122
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$ \ No newline at end of file
diff --git a/bin/conf/sifter/dash_data.sft b/bin/conf/sifter/dash_data.sft
new file mode 100644
index 0000000..14f70a6
--- /dev/null
+++ b/bin/conf/sifter/dash_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 5222
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$Tag$/vmind$/$?cdncode=$
+{2}{FRAG_UNIT_ID}={61}{C2S_URL}$Tag$/vmind$/$?cdncode=$
diff --git a/bin/conf/sifter/dash_index.sft b/bin/conf/sifter/dash_index.sft
new file mode 100644
index 0000000..8ef625c
--- /dev/null
+++ b/bin/conf/sifter/dash_index.sft
@@ -0,0 +1,13 @@
+[INFOR]
+sft_id = 5221
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = [61][C2S_URL], [0xD1][S2C_CONTENT-DISPOSITION],[0xE1][C2S_CONTENT],[0xF1][S2C_CONTENT]
+{1}{MEDIA_ID}={61}{C2S_URL}$Tag$&vid=$\x0$&ad_type=$
+{2}{FRAG_UNIT_ID}={73}{S2C_CONTENT}$Tag$<image><index>0</index><url><![CDATA[http://$qq.com/$?vkey=$
+
diff --git a/bin/conf/sifter/frag_reassembly.conf b/bin/conf/sifter/frag_reassembly.conf
new file mode 100644
index 0000000..33df3b6
--- /dev/null
+++ b/bin/conf/sifter/frag_reassembly.conf
@@ -0,0 +1,57 @@
+[NETWORK]
+RedisBrokers=10.0.6.80:9001,10.0.6.80:9002,10.0.6.80:9003,10.0.6.80:9004,10.0.6.80:9005
+RedisTimeout=5
+
+[SYSTEM]
+ConvergeHashSize=65536
+ConvergeHashElemNum=1048576
+ConvergeHashExpireTime=120
+
+#same with protocol in frag_monitor
+ProtocolNetdisk=17
+ProtocolWebmail=18
+
+#webmail param================================start
+WebmailHashSize=65536
+WebmailHashElemNum=1048576
+WebmailHashExpireTime=120
+
+StoreidAttachHashSize=65536
+StoreidAttachHashElemNum=1048576
+StoreidAttachHashExpireTime=120
+
+StoreidMidHashSize=65536
+StoreidMidHashElemNum=1048576
+StoreidMidHashExpireTime=120
+
+#0, not open iterate thread to output
+WebmailIterateTimeout=60
+WebmailAttachIterateTimeout=60
+
+WebmailService=123,163,sina,sohu,qq
+WebmailAction=send,recv
+#webmail param================================end
+
+[LOG]
+#frag reassembly log ����ƴ�ӵ�������־
+FragReassemblyLogLevel=30
+FragReassemblyLogName=./frag_reassembly.log
+
+#media log
+MediaLogLevel=30
+MediaLogName=./frag_reassembly_media.log
+
+#stat log
+StatInterval=5
+StatFile=./frag_reassembly_stat.log
+SysinfoInterval=5
+SysinfoFile=./frag_reassembly_sysinfo.log
+
+#webmail log
+WebmailLogLevel=10
+WebmailLogName=./webmail.log
+
+#Maat
+MaatStatSwitch=0
+MaatPerfSwitch=0
+#MaatStatFile=./maat_stat.log
diff --git a/bin/conf/sifter/hls_data.sft b/bin/conf/sifter/hls_data.sft
new file mode 100644
index 0000000..aff8a83
--- /dev/null
+++ b/bin/conf/sifter/hls_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 2122
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
+{2}{FRAG_UNIT_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
diff --git a/bin/conf/sifter/hls_index.sft b/bin/conf/sifter/hls_index.sft
new file mode 100644
index 0000000..df7b616
--- /dev/null
+++ b/bin/conf/sifter/hls_index.sft
@@ -0,0 +1,13 @@
+[INFOR]
+sft_id = 2121
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = [61][C2S_URL], [0xD1][S2C_CONTENT-DISPOSITION],[0xE1][C2S_CONTENT],[0xF1][S2C_CONTENT]
+{1}{MEDIA_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
+{2}{FRAG_UNIT_ID}={73}{S2C_CONTENT}$Tag$#EXTINF:$,\x0a$\x0a$
+{4}{FRAG_UNIT_REOFFSET}={73}{S2C_CONTENT}$Tag$#EXT-X-MEDIA-SEQUENCE:$\x0$\x0a$
diff --git a/bin/conf/sifter/iqiyi_data.sft b/bin/conf/sifter/iqiyi_data.sft
new file mode 100644
index 0000000..fff3ec4
--- /dev/null
+++ b/bin/conf/sifter/iqiyi_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 3122
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$End$\x0$\x0$f4v$
+{4}{FRAG_UNIT_ABOFFSET}={61}{C2S_URL}$Tag$range=$\x0$-$
diff --git a/bin/conf/sifter/main.conf b/bin/conf/sifter/main.conf
new file mode 100644
index 0000000..8d3b761
--- /dev/null
+++ b/bin/conf/sifter/main.conf
@@ -0,0 +1,147 @@
+[NETWORK]
+LocalIP=em1
+
+#recv bizman param, recv data from sapp
+BizmanAckSmoothTime = 10
+BizmanAckAccumulateTime=10
+BizmanAckAccumulateNum=5
+BizmanPort=22082
+
+#sapp recv resp msg port
+BizmanAckPort=22084
+
+#msg port
+MsgPort=22080
+
+#unix-domain socket file path ��send frag
+UnixSocketSendSrcAddr=/home/lishu/frag/frag_rssb/maskey/bin/un_send
+UnixSocketSendDestAddr=/home/lishu/frag/frag_rssb/maskey/bin/data_recv
+
+#udp socket recv port ��recv response msg
+UnixSocketRecvAddr=/home/lishu/frag/frag_rssb/maskey/bin/un_recv
+
+#usm
+USM_ON_FLAG=1
+USM_SHM_KEY=1635151988
+#USM_SHM_KEY=12346
+USM_SHM_SIZE=100000
+USM_READER_CNT=3
+USM_SMOOTH_TIME=1000000
+USM_Q_SIZE=1048576
+USM_READER_PATH=/home/mcger/un_recv;/home/audiorecognition/aufilter/un_recv;/home/mesasoft/un_test/un_recv
+USM_LOG_LEVEL=40
+USM_LOG_PATH=./log/usm_writer0;./log/usm_writer1;./log/usm_writer2
+
+#special media type, send windows system , sep: ";" : IP1+PORT1/PORT2 or IP2+PORT1/PORT2
+SpecialMediaWindowsIP=10.0.6.5;10.0.6.1;
+SpecialMediaWindowsIPNum=2
+SpecialMediaWindowsPort=44096;44098
+
+#white list IP and PORT : IP1+PORT and IP2+PORT
+WhiteListIP = 10.52.178.15;
+WhiteListIPNum=1
+WhiteListPort=22085
+
+#av_dedup service ����ҵ�������������Ϣ
+ThriftIP=127.0.0.1;
+ThriftIPNum=1
+ThriftPort=9090
+DedupBalanceNum=1000
+DedupQueueNum=100000
+DedupStatInterval=60
+DedupThriftNum=5
+DedupHtableArg=0
+
+#JSON to kafka ��ý����Ϣ�ش�ҵ�����������
+MediaJSONKafkaBrokers=
+
+
+[SYSTEM]
+ThreadNum=4
+BizmanQueueMode=0
+
+#media renew time
+RenewTimeMax=7200
+RenewTimeMin=600
+RenewTimeStep=6
+
+#��ĿHASH��
+MediaHashSize=65536
+MediaHashElemNum=1048576
+MediaHashExpireTime=2
+
+#���ٻ�дhash�ļ�����DumpfileHashElemNum�������IJ����ļ���Ŀ
+DumpfileHashSize=1024
+DumpfileHashElemNum=512
+DumpfileHahsExpireTime=6
+
+#monitor service
+MonitorHashSize=65536
+MonitorHashElemNum=1048576
+MonitorHahsExpireTime=120
+
+MonitorFileSwitch=1
+MonitorFileRootDir=/home/yspdata/qd_monitor/
+MonitorFileDay=1
+
+#special media type, send windows system
+ForwardSpecialMediaSwitch=1
+SpecialMediaType=0x68
+
+#AV record switch
+AVRecordFileSwitch=0
+AVRecordFileRootDir=./AVrecord/
+AVRecordFileMaxNum=100000
+
+#����ҵ�񿪹�
+AVDedupSwitch=0
+#td=(URL+ServerIP...)+data(<=65536)
+DedupTdDataSize=0
+
+#media info send to kfaka ��ý��JSON��Ϣ�ش�KAFKA����
+MediaJSONSwitch=0
+
+#�ֻ�app����
+AppSwitch=0
+
+[LOG]
+#field_stat
+StatCycle=2
+StatFile=./log/rssb_stat.log
+
+SysinfoCycle=2
+SysinfoFile=./log/rssb_sysinfo.log
+
+#log in general
+LogLevel= 30
+LogPath= ./log/runtime.log
+
+#frag msg log
+FragLogLevel= 20
+FragLogPath= ./log/frag.log
+
+#av_dedup log
+DedupLogLevel= 10
+DedupLogPath= ./log/dedup.log
+
+#resp msg log
+RespLogPath= ./log/survey.log
+
+#media create log
+MediaCreateLogPath= ./log/media_create.log
+
+#media expire log
+MediaExpireLogPath= ./log/media_expire.log
+
+[DEBUG]
+/*����mid_trace.conf�Ľ�Ŀ���б��ش洢*/
+FileStoreSwitch=0
+FileStorePath=./log/file/
+
+/*���н�Ŀ�����б��ظ��ٻ�д*/
+AllHitMonitorSwitch=0
+#1: (reoffset-1)*content-length 0: accoffset
+HlsAboffsetInMode=1
+
+/*trace log path*/
+TracePath=./log/
diff --git a/bin/conf/sifter/mid_trace.conf b/bin/conf/sifter/mid_trace.conf
new file mode 100644
index 0000000..1235696
--- /dev/null
+++ b/bin/conf/sifter/mid_trace.conf
@@ -0,0 +1 @@
+#mid or pid
diff --git a/bin/conf/sifter/osmf_data.sft b/bin/conf/sifter/osmf_data.sft
new file mode 100644
index 0000000..f06e44a
--- /dev/null
+++ b/bin/conf/sifter/osmf_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 1122
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$End$\x0$\x0$Seg$
+{4}{FRAG_UNIT_REOFFSET}={61}{C2S_URL}$Tag$Frag$\x0$\x0$
diff --git a/bin/conf/sifter/sifter.json b/bin/conf/sifter/sifter.json
new file mode 100644
index 0000000..c78b10b
--- /dev/null
+++ b/bin/conf/sifter/sifter.json
@@ -0,0 +1,276 @@
+{
+ "compile_table": "SIFTER_CONFIG_FULL",
+ "group_table":"SIFTER_GROUP",
+ "rules":
+ [
+{
+ "compile_id": 1,
+ "service": 2221,
+ "action": 2,
+ "do_blacklist": 1,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "0_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "http://",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 2,
+ "service": 2221,
+ "action": 4,
+ "do_blacklist": 1,
+ "do_log": 2,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_REOFFSET",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "1_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "&ts_seg_no=",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 3,
+ "service": 5221,
+ "action": 1,
+ "do_blacklist": 2,
+ "do_log": 0,
+ "effective_rage": 0,
+ "user_region": "MEDIA_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "2_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "&vid=",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 4,
+ "service": 5221,
+ "action": 2,
+ "do_blacklist": 2,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "3_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "<image><index>0</index><url><![CDATA[http://",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 5,
+ "service": 3122,
+ "action": 4,
+ "do_blacklist": 4,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_ABOFFSET",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "4_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "range=",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 6,
+ "service": 1122,
+ "action": 4,
+ "do_blacklist": 5,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_REOFFSET",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "5_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "Frag",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 7,
+ "service": 2121,
+ "action": 2,
+ "do_blacklist": 7,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "6_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "#EXTINF:",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 8,
+ "service": 2121,
+ "action": 4,
+ "do_blacklist": 7,
+ "do_log": 2,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_REOFFSET",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "7_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "#EXT-X-MEDIA-SEQUENCE:",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 9,
+ "service": 5222,
+ "action": 1,
+ "do_blacklist": 8,
+ "do_log": 0,
+ "effective_rage": 0,
+ "user_region": "MEDIA_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "8_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "/vmind",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ },
+{
+ "compile_id": 10,
+ "service": 5222,
+ "action": 2,
+ "do_blacklist": 8,
+ "do_log": 1,
+ "effective_rage": 0,
+ "user_region": "FRAG_UNIT_ID",
+ "is_valid": "yes",
+ "groups": [
+ {
+ "group_name": "9_start",
+ "regions": [
+ {
+ "table_name": "SIFTER_START",
+ "table_type": "string",
+ "table_content": {
+ "keywords": "/vmind",
+ "expr_type": "none",
+ "match_method": "sub",
+ "format": "uncase plain"
+ }
+ }
+ ]
+ }
+ ]
+ } ]
+} \ No newline at end of file
diff --git a/bin/conf/sifter/table_info.conf b/bin/conf/sifter/table_info.conf
new file mode 100644
index 0000000..ae0c330
--- /dev/null
+++ b/bin/conf/sifter/table_info.conf
@@ -0,0 +1,11 @@
+#each collumn seperate with '\t'
+#id (0~65535(
+#name string
+#type one of ip,expr,compile or plugin
+#src_charset one of GBK,BIG5,UNICODE,UTF8
+#dst_charset combined by GBK,BIG5,UNICODE,UTF8,seperate with '/'
+#do_merege yes or no
+#id name type src_charset dst_charset do_merge
+1 SIFTER_CONFIG_FULL compile GBK GBK no
+2 SIFTER_GROUP group GBK GBK no
+3 SIFTER_START expr GBK GBK yes
diff --git a/bin/conf/sifter/tudou_data.sft b/bin/conf/sifter/tudou_data.sft
new file mode 100644
index 0000000..a1769e2
--- /dev/null
+++ b/bin/conf/sifter/tudou_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 2222
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
+{2}{FRAG_UNIT_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
diff --git a/bin/conf/sifter/tudou_index.sft b/bin/conf/sifter/tudou_index.sft
new file mode 100644
index 0000000..93aceb4
--- /dev/null
+++ b/bin/conf/sifter/tudou_index.sft
@@ -0,0 +1,13 @@
+[INFOR]
+sft_id = 2221
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = [61][C2S_URL], [0xD1][S2C_CONTENT-DISPOSITION],[0xE1][C2S_CONTENT],[0xF1][S2C_CONTENT]
+{1}{MEDIA_ID}={61}{C2S_URL}$RawText$\x0$\x0$\x0$
+{2}{FRAG_UNIT_ID}={73}{S2C_CONTENT}$Tag$http://$\x0$\x0d\x0a$
+{4}{FRAG_UNIT_REOFFSET}={73}{S2C_CONTENT}$Tag$&ts_seg_no=$\x0$&$
diff --git a/bin/conf/sifter/urloffset_data.sft b/bin/conf/sifter/urloffset_data.sft
new file mode 100644
index 0000000..f18b19b
--- /dev/null
+++ b/bin/conf/sifter/urloffset_data.sft
@@ -0,0 +1,12 @@
+[INFOR]
+sft_id = 3122
+
+[EXTRACT]
+#[expect_type][expect_name] = extract_location$extract_method$start$mid$end$
+#expect_type and expect_name must be one
+#sifter_method = Tag | RawText | Boundary | Reposition
+#special character : \r\n = 0x0d0x0a, \x30 = no end , \x0 = null
+#expect_name = {1}{MEDIA_ID}, {2}{FRAG_UNIT_ID}, {3}{FRAG_UNIT_ABOFFSET}, {4}{FRAG_UNIT_REOFFSET}, {5}{MEDIA_SIZE}, {6}{MEDIA_NAME}, {7}{MEDIA_CONTENT}
+#extract_location = {61}{C2S_URL}, {0xD1}{S2C_CONTENT-DISPOSITION},{0xE1}{C2S_CONTENT},{0xF1}{S2C_CONTENT}
+{1}{MEDIA_ID}={61}{C2S_URL}$End$\x0$\x0$f4v$
+{4}{FRAG_UNIT_REOFFSET}={61}{C2S_URL}$Tag$range=$\x0$-$