diff options
| author | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
|---|---|---|
| committer | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
| commit | b2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch) | |
| tree | b7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src/get_td_mistake_lost/new_TD.py | |
| parent | b026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff) | |
Diffstat (limited to 'src/get_td_mistake_lost/new_TD.py')
| -rw-r--r-- | src/get_td_mistake_lost/new_TD.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/src/get_td_mistake_lost/new_TD.py b/src/get_td_mistake_lost/new_TD.py new file mode 100644 index 0000000..5b7269f --- /dev/null +++ b/src/get_td_mistake_lost/new_TD.py @@ -0,0 +1,34 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("new_td","raw_file_address") +ripe_files_address=config.get("new_td","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) + +def get_md5_value(td_string): + my_md5 = hashlib.md5() + my_md5.update(td_string) + my_md5_string=str(my_md5.hexdigest()) + return my_md5_string + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%100000==0): + print i; + data_line_val = re.split(r';',line) + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4]) + td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \ + +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16]) + new_td=get_md5_value(td_string) + outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n")
\ No newline at end of file |
