summaryrefslogtreecommitdiff
path: root/src/get_td_mistake_lost/new_TD.py
diff options
context:
space:
mode:
author陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
committer陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
commitb2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch)
treeb7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src/get_td_mistake_lost/new_TD.py
parentb026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff)
添加inc和srcHEADmaster
Diffstat (limited to 'src/get_td_mistake_lost/new_TD.py')
-rw-r--r--src/get_td_mistake_lost/new_TD.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/src/get_td_mistake_lost/new_TD.py b/src/get_td_mistake_lost/new_TD.py
new file mode 100644
index 0000000..5b7269f
--- /dev/null
+++ b/src/get_td_mistake_lost/new_TD.py
@@ -0,0 +1,34 @@
+#-*-coding:utf-8-*-
+import re
+import random
+import ConfigParser
+import bisect
+import commands
+import os
+import hashlib
+
+config = ConfigParser.RawConfigParser()
+config.read("file_digest.conf")
+raw_file_address=config.get("new_td","raw_file_address")
+ripe_files_address=config.get("new_td","ripe_files_address")
+print ("%s %s" %(raw_file_address,ripe_files_address))
+
+def get_md5_value(td_string):
+ my_md5 = hashlib.md5()
+ my_md5.update(td_string)
+ my_md5_string=str(my_md5.hexdigest())
+ return my_md5_string
+
+i=0
+with open(raw_file_address,'r') as infile:
+ with open(ripe_files_address,'w')as outfile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i;
+ data_line_val = re.split(r';',line)
+ data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4])
+ td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \
+ +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16])
+ new_td=get_md5_value(td_string)
+ outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n") \ No newline at end of file