summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
author陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
committer陈冠林 <[email protected]>2019-06-18 10:44:20 +0800
commitb2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch)
treeb7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src
parentb026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff)
添加inc和srcHEADmaster
Diffstat (limited to 'src')
-rw-r--r--src/CMakeCache.txt278
-rw-r--r--src/CMakeFiles/CMakeCCompiler.cmake36
-rw-r--r--src/CMakeFiles/CMakeCXXCompiler.cmake36
-rw-r--r--src/CMakeFiles/CMakeDetermineCompilerABI_C.binbin0 -> 7045 bytes
-rw-r--r--src/CMakeFiles/CMakeDetermineCompilerABI_CXX.binbin0 -> 7331 bytes
-rw-r--r--src/CMakeFiles/CMakeDirectoryInformation.cmake21
-rw-r--r--src/CMakeFiles/CMakeOutput.log89
-rw-r--r--src/CMakeFiles/CMakeSystem.cmake15
-rw-r--r--src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c182
-rw-r--r--src/CMakeFiles/CompilerIdC/a.outbin0 -> 6518 bytes
-rw-r--r--src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp169
-rw-r--r--src/CMakeFiles/CompilerIdCXX/a.outbin0 -> 6891 bytes
-rw-r--r--src/CMakeFiles/Makefile.cmake52
-rw-r--r--src/CMakeFiles/Makefile299
-rw-r--r--src/CMakeFiles/Progress/11
-rw-r--r--src/CMakeFiles/Progress/count.txt1
-rw-r--r--src/CMakeFiles/cmake.check_cache1
-rw-r--r--src/CMakeFiles/grain.dir/C.includecache24
-rw-r--r--src/CMakeFiles/grain.dir/DependInfo.cmake13
-rw-r--r--src/CMakeFiles/grain.dir/build.make103
-rw-r--r--src/CMakeFiles/grain.dir/cmake_clean.cmake10
-rw-r--r--src/CMakeFiles/grain.dir/depend.internal5
-rw-r--r--src/CMakeFiles/grain.dir/depend.make5
-rw-r--r--src/CMakeFiles/grain.dir/flags.make8
-rw-r--r--src/CMakeFiles/grain.dir/link.txt1
-rw-r--r--src/CMakeFiles/grain.dir/progress.make2
-rw-r--r--src/CMakeFiles/progress.make1
-rw-r--r--src/Makefile163
-rw-r--r--src/TD代码说明.docxbin0 -> 13258 bytes
-rw-r--r--src/cmake_install.cmake44
-rw-r--r--src/dataset_build/CMakeLists.txt11
-rw-r--r--src/dataset_build/based_sfh.conf3
-rw-r--r--src/dataset_build/based_sfh.py44
-rw-r--r--src/dataset_build/cal_information.conf5
-rw-r--r--src/dataset_build/cal_information.py133
-rw-r--r--src/dataset_build/dataset_build.conf8
-rw-r--r--src/dataset_build/dataset_build.py144
-rw-r--r--src/dataset_build/feature_statistics.conf8
-rw-r--r--src/dataset_build/feature_statistics.py164
-rw-r--r--src/dataset_build/file_digest.py96
-rw-r--r--src/dataset_build/get_lost.c116
-rw-r--r--src/dataset_build/grain.conf5
-rw-r--r--src/dataset_build/td_classification.py5
-rw-r--r--src/dataset_build/vedio_id_build.c171
-rw-r--r--src/file_digest.conf3
-rw-r--r--src/file_digest.py104
-rw-r--r--src/get_td_mistake_lost/CMakeLists.txt11
-rw-r--r--src/get_td_mistake_lost/file_digest.conf6
-rw-r--r--src/get_td_mistake_lost/file_digest.py104
-rw-r--r--src/get_td_mistake_lost/get_TD_SFH.c162
-rw-r--r--src/get_td_mistake_lost/get_lost_rate.c210
-rw-r--r--src/get_td_mistake_lost/get_mistake_level.c366
-rw-r--r--src/get_td_mistake_lost/get_td_mistake_lost.sh5
-rw-r--r--src/get_td_mistake_lost/gram_index_engine.c1354
-rw-r--r--src/get_td_mistake_lost/new_TD.conf3
-rw-r--r--src/get_td_mistake_lost/new_TD.py34
-rw-r--r--src/rssb_statistics/all_len_st.py17
-rw-r--r--src/rssb_statistics/delete_len_st.py48
-rw-r--r--src/rssb_statistics/find_lost_td.conf14
-rw-r--r--src/rssb_statistics/find_lost_td.py147
-rw-r--r--src/rssb_statistics/harm_len_st.py29
-rw-r--r--src/rssb_statistics/recv_survey_mid_st.py3
-rw-r--r--src/rssb_statistics/service_st.py29
-rw-r--r--src/rssb_statistics/un_recv_st.py36
-rw-r--r--src/rssb_statistics/un_recv_survey_mid_st.py3
-rw-r--r--src/sfh_integrate/SFH_function.c177
-rw-r--r--src/test/digest_temp8
67 files changed, 5145 insertions, 0 deletions
diff --git a/src/CMakeCache.txt b/src/CMakeCache.txt
new file mode 100644
index 0000000..5aa46ec
--- /dev/null
+++ b/src/CMakeCache.txt
@@ -0,0 +1,278 @@
+# This is the CMakeCache file.
+# For build in directory: /home/chenguanlin/TD_evaluation/src
+# It was generated by CMake: /usr/bin/cmake
+# You can edit this file to change values found and used by cmake.
+# If you do not want to change any of the values, simply exit the editor.
+# If you do want to change a value, simply edit, save, and exit the editor.
+# The syntax for the file is as follows:
+# KEY:TYPE=VALUE
+# KEY is the name of a variable in the cache.
+# TYPE is a hint to GUI's for the type of VALUE, DO NOT EDIT TYPE!.
+# VALUE is the current value for the KEY.
+
+########################
+# EXTERNAL cache entries
+########################
+
+//Value Computed by CMake
+CALCULATE_BINARY_DIR:STATIC=/home/chenguanlin/TD_evaluation/src
+
+//Value Computed by CMake
+CALCULATE_SOURCE_DIR:STATIC=/home/chenguanlin/TD_evaluation/src
+
+//Path to a program.
+CMAKE_AR:FILEPATH=/usr/bin/ar
+
+//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or
+// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel.
+CMAKE_BUILD_TYPE:STRING=
+
+//Enable/Disable color output during build.
+CMAKE_COLOR_MAKEFILE:BOOL=ON
+
+//CXX compiler.
+CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++
+
+//Flags used by the compiler during all build types.
+CMAKE_CXX_FLAGS:STRING=
+
+//Flags used by the compiler during debug builds.
+CMAKE_CXX_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the compiler during release minsize builds.
+CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the compiler during release builds (/MD /Ob1 /Oi
+// /Ot /Oy /Gs will produce slightly less optimized but smaller
+// files).
+CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the compiler during Release with Debug Info builds.
+CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g
+
+//C compiler.
+CMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc
+
+//Flags used by the compiler during all build types.
+CMAKE_C_FLAGS:STRING=
+
+//Flags used by the compiler during debug builds.
+CMAKE_C_FLAGS_DEBUG:STRING=-g
+
+//Flags used by the compiler during release minsize builds.
+CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG
+
+//Flags used by the compiler during release builds (/MD /Ob1 /Oi
+// /Ot /Oy /Gs will produce slightly less optimized but smaller
+// files).
+CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG
+
+//Flags used by the compiler during Release with Debug Info builds.
+CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g
+
+//Flags used by the linker.
+CMAKE_EXE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Install path prefix, prepended onto install directories.
+CMAKE_INSTALL_PREFIX:PATH=/usr/local
+
+//Path to a program.
+CMAKE_LINKER:FILEPATH=/usr/bin/ld
+
+//Path to a program.
+CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake
+
+//Flags used by the linker during the creation of modules.
+CMAKE_MODULE_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//Path to a program.
+CMAKE_NM:FILEPATH=/usr/bin/nm
+
+//Path to a program.
+CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy
+
+//Path to a program.
+CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump
+
+//Path to a program.
+CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib
+
+//Flags used by the linker during the creation of dll's.
+CMAKE_SHARED_LINKER_FLAGS:STRING=
+
+//Flags used by the linker during debug builds.
+CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING=
+
+//Flags used by the linker during release minsize builds.
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING=
+
+//Flags used by the linker during release builds.
+CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING=
+
+//Flags used by the linker during Release with Debug Info builds.
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING=
+
+//If set, runtime paths are not added when using shared libraries.
+CMAKE_SKIP_RPATH:BOOL=NO
+
+//Path to a program.
+CMAKE_STRIP:FILEPATH=/usr/bin/strip
+
+//If true, cmake will use relative paths in makefiles and projects.
+CMAKE_USE_RELATIVE_PATHS:BOOL=OFF
+
+//If this value is on, makefiles will be generated without the
+// .SILENT directive, and all commands will be echoed to the console
+// during the make. This is useful for debugging only. With Visual
+// Studio IDE projects all commands are done without /nologo.
+CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE
+
+
+########################
+# INTERNAL cache entries
+########################
+
+//Advanced flag for variable: CMAKE_AR
+CMAKE_AR-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_BUILD_TOOL
+CMAKE_BUILD_TOOL-ADVANCED:INTERNAL=1
+//What is the target build tool cmake is generating for.
+CMAKE_BUILD_TOOL:INTERNAL=/usr/bin/gmake
+//This is the directory where this CMakeCahe.txt was created
+CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenguanlin/TD_evaluation/src
+//Major version of cmake used to create the current loaded cache
+CMAKE_CACHE_MAJOR_VERSION:INTERNAL=2
+//Minor version of cmake used to create the current loaded cache
+CMAKE_CACHE_MINOR_VERSION:INTERNAL=6
+//Major version of cmake used to create the current loaded cache
+CMAKE_CACHE_RELEASE_VERSION:INTERNAL=patch 4
+//Advanced flag for variable: CMAKE_COLOR_MAKEFILE
+CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1
+//Path to CMake executable.
+CMAKE_COMMAND:INTERNAL=/usr/bin/cmake
+//Path to cpack program executable.
+CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack
+//Path to ctest program executable.
+CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest
+//Advanced flag for variable: CMAKE_CXX_COMPILER
+CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1
+CMAKE_CXX_COMPILER_WORKS:INTERNAL=1
+//Advanced flag for variable: CMAKE_CXX_FLAGS
+CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_CXX_FLAGS_DEBUG
+CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_CXX_FLAGS_MINSIZEREL
+CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_CXX_FLAGS_RELEASE
+CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO
+CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_COMPILER
+CMAKE_C_COMPILER-ADVANCED:INTERNAL=1
+CMAKE_C_COMPILER_WORKS:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_FLAGS
+CMAKE_C_FLAGS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_FLAGS_DEBUG
+CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_FLAGS_MINSIZEREL
+CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_FLAGS_RELEASE
+CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_C_FLAGS_RELWITHDEBINFO
+CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Result of TRY_COMPILE
+CMAKE_DETERMINE_CXX_ABI_COMPILED:INTERNAL=TRUE
+//Result of TRY_COMPILE
+CMAKE_DETERMINE_C_ABI_COMPILED:INTERNAL=TRUE
+//Path to cache edit program executable.
+CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake
+//Executable file format
+CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF
+//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS
+CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG
+CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL
+CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE
+CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Name of generator.
+CMAKE_GENERATOR:INTERNAL=Unix Makefiles
+//Start directory with the top level CMakeLists.txt file for this
+// project
+CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenguanlin/TD_evaluation/src
+//Install .so files without execute permission.
+CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0
+//Advanced flag for variable: CMAKE_LINKER
+CMAKE_LINKER-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MAKE_PROGRAM
+CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS
+CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG
+CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL
+CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE
+CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_NM
+CMAKE_NM-ADVANCED:INTERNAL=1
+//number of local generators
+CMAKE_NUMBER_OF_LOCAL_GENERATORS:INTERNAL=1
+//Advanced flag for variable: CMAKE_OBJCOPY
+CMAKE_OBJCOPY-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_OBJDUMP
+CMAKE_OBJDUMP-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_RANLIB
+CMAKE_RANLIB-ADVANCED:INTERNAL=1
+//Path to CMake installation.
+CMAKE_ROOT:INTERNAL=/usr/share/cmake
+//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS
+CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG
+CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL
+CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE
+CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
+CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_SKIP_RPATH
+CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_STRIP
+CMAKE_STRIP-ADVANCED:INTERNAL=1
+//uname command
+CMAKE_UNAME:INTERNAL=/bin/uname
+//Advanced flag for variable: CMAKE_USE_RELATIVE_PATHS
+CMAKE_USE_RELATIVE_PATHS-ADVANCED:INTERNAL=1
+//Advanced flag for variable: CMAKE_VERBOSE_MAKEFILE
+CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1
+
diff --git a/src/CMakeFiles/CMakeCCompiler.cmake b/src/CMakeFiles/CMakeCCompiler.cmake
new file mode 100644
index 0000000..1d1bbea
--- /dev/null
+++ b/src/CMakeFiles/CMakeCCompiler.cmake
@@ -0,0 +1,36 @@
+SET(CMAKE_C_COMPILER "/usr/bin/gcc")
+SET(CMAKE_C_COMPILER_ARG1 "")
+SET(CMAKE_C_COMPILER_ID "GNU")
+SET(CMAKE_C_PLATFORM_ID "Linux")
+SET(CMAKE_AR "/usr/bin/ar")
+SET(CMAKE_RANLIB "/usr/bin/ranlib")
+SET(CMAKE_COMPILER_IS_GNUCC 1)
+SET(CMAKE_C_COMPILER_LOADED 1)
+SET(CMAKE_COMPILER_IS_MINGW )
+SET(CMAKE_COMPILER_IS_CYGWIN )
+IF(CMAKE_COMPILER_IS_CYGWIN)
+ SET(CYGWIN 1)
+ SET(UNIX 1)
+ENDIF(CMAKE_COMPILER_IS_CYGWIN)
+
+SET(CMAKE_C_COMPILER_ENV_VAR "CC")
+
+IF(CMAKE_COMPILER_IS_MINGW)
+ SET(MINGW 1)
+ENDIF(CMAKE_COMPILER_IS_MINGW)
+SET(CMAKE_C_COMPILER_ID_RUN 1)
+SET(CMAKE_C_SOURCE_FILE_EXTENSIONS c)
+SET(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
+SET(CMAKE_C_LINKER_PREFERENCE 10)
+
+# Save compiler ABI information.
+SET(CMAKE_C_SIZEOF_DATA_PTR "8")
+SET(CMAKE_C_COMPILER_ABI "ELF")
+
+IF(CMAKE_C_SIZEOF_DATA_PTR)
+ SET(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
+ENDIF(CMAKE_C_SIZEOF_DATA_PTR)
+
+IF(CMAKE_C_COMPILER_ABI)
+ SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
+ENDIF(CMAKE_C_COMPILER_ABI)
diff --git a/src/CMakeFiles/CMakeCXXCompiler.cmake b/src/CMakeFiles/CMakeCXXCompiler.cmake
new file mode 100644
index 0000000..64dad77
--- /dev/null
+++ b/src/CMakeFiles/CMakeCXXCompiler.cmake
@@ -0,0 +1,36 @@
+SET(CMAKE_CXX_COMPILER "/usr/bin/c++")
+SET(CMAKE_CXX_COMPILER_ARG1 "")
+SET(CMAKE_CXX_COMPILER_ID "GNU")
+SET(CMAKE_CXX_PLATFORM_ID "Linux")
+SET(CMAKE_AR "/usr/bin/ar")
+SET(CMAKE_RANLIB "/usr/bin/ranlib")
+SET(CMAKE_COMPILER_IS_GNUCXX 1)
+SET(CMAKE_CXX_COMPILER_LOADED 1)
+SET(CMAKE_COMPILER_IS_MINGW )
+SET(CMAKE_COMPILER_IS_CYGWIN )
+IF(CMAKE_COMPILER_IS_CYGWIN)
+ SET(CYGWIN 1)
+ SET(UNIX 1)
+ENDIF(CMAKE_COMPILER_IS_CYGWIN)
+
+SET(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
+
+IF(CMAKE_COMPILER_IS_MINGW)
+ SET(MINGW 1)
+ENDIF(CMAKE_COMPILER_IS_MINGW)
+SET(CMAKE_CXX_COMPILER_ID_RUN 1)
+SET(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;H;o;O;obj;OBJ;def;DEF;rc;RC)
+SET(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm)
+SET(CMAKE_CXX_LINKER_PREFERENCE 30)
+
+# Save compiler ABI information.
+SET(CMAKE_CXX_SIZEOF_DATA_PTR "8")
+SET(CMAKE_CXX_COMPILER_ABI "ELF")
+
+IF(CMAKE_CXX_SIZEOF_DATA_PTR)
+ SET(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
+ENDIF(CMAKE_CXX_SIZEOF_DATA_PTR)
+
+IF(CMAKE_CXX_COMPILER_ABI)
+ SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
+ENDIF(CMAKE_CXX_COMPILER_ABI)
diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin
new file mode 100644
index 0000000..9a91ed2
--- /dev/null
+++ b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin
Binary files differ
diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin
new file mode 100644
index 0000000..be1e6e2
--- /dev/null
+++ b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin
Binary files differ
diff --git a/src/CMakeFiles/CMakeDirectoryInformation.cmake b/src/CMakeFiles/CMakeDirectoryInformation.cmake
new file mode 100644
index 0000000..66066d5
--- /dev/null
+++ b/src/CMakeFiles/CMakeDirectoryInformation.cmake
@@ -0,0 +1,21 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+# Relative path conversion top directories.
+SET(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenguanlin/TD_evaluation/src")
+SET(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenguanlin/TD_evaluation/src")
+
+# Force unix paths in dependencies.
+SET(CMAKE_FORCE_UNIX_PATHS 1)
+
+# The C and CXX include file search paths:
+SET(CMAKE_C_INCLUDE_PATH
+ )
+SET(CMAKE_CXX_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH})
+SET(CMAKE_Fortran_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH})
+
+# The C and CXX include file regular expressions for this directory.
+SET(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
+SET(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
+SET(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
+SET(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
diff --git a/src/CMakeFiles/CMakeOutput.log b/src/CMakeFiles/CMakeOutput.log
new file mode 100644
index 0000000..3475153
--- /dev/null
+++ b/src/CMakeFiles/CMakeOutput.log
@@ -0,0 +1,89 @@
+The system is: Linux - 2.6.32-358.el6.x86_64 - x86_64
+Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded.
+Compiler: /usr/bin/gcc
+Build flags:
+Id flags:
+
+The output was:
+0
+
+
+Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out"
+
+The C compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdC/a.out"
+
+Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded.
+Compiler: /usr/bin/c++
+Build flags:
+Id flags:
+
+The output was:
+0
+CMakeCXXCompilerId.cpp:67: warning: deprecated conversion from string constant to ‘char*’
+CMakeCXXCompilerId.cpp:157: warning: deprecated conversion from string constant to ‘char*’
+
+
+Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out"
+
+The CXX compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdCXX/a.out"
+
+Determining if the C compiler works passed with the following output:
+Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp
+
+Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast"
+/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build
+gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1
+Building C object CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o
+/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCCompiler.c
+Linking C executable cmTryCompileExec
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1
+/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -o cmTryCompileExec -rdynamic
+gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+
+
+Detecting C compiler ABI info compiled with the following output:
+Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp
+
+Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast"
+/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build
+gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1
+Building C object CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o
+/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake/Modules/CMakeCCompilerABI.c
+Linking C executable cmTryCompileExec
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1
+/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -o cmTryCompileExec -rdynamic
+gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+
+
+Determining if the CXX compiler works passed with the following output:
+Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp
+
+Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast"
+/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build
+gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1
+Building CXX object CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o
+/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCXXCompiler.cxx
+Linking CXX executable cmTryCompileExec
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1
+/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -o cmTryCompileExec -rdynamic
+gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+
+
+Detecting CXX compiler ABI info compiled with the following output:
+Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp
+
+Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast"
+/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build
+gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1
+Building CXX object CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o
+/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp
+Linking CXX executable cmTryCompileExec
+/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1
+/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -o cmTryCompileExec -rdynamic
+gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp'
+
+
diff --git a/src/CMakeFiles/CMakeSystem.cmake b/src/CMakeFiles/CMakeSystem.cmake
new file mode 100644
index 0000000..3b3e967
--- /dev/null
+++ b/src/CMakeFiles/CMakeSystem.cmake
@@ -0,0 +1,15 @@
+
+
+SET(CMAKE_SYSTEM "Linux-2.6.32-358.el6.x86_64")
+SET(CMAKE_SYSTEM_NAME "Linux")
+SET(CMAKE_SYSTEM_VERSION "2.6.32-358.el6.x86_64")
+SET(CMAKE_SYSTEM_PROCESSOR "x86_64")
+
+SET(CMAKE_HOST_SYSTEM "Linux-2.6.32-358.el6.x86_64")
+SET(CMAKE_HOST_SYSTEM_NAME "Linux")
+SET(CMAKE_HOST_SYSTEM_VERSION "2.6.32-358.el6.x86_64")
+SET(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
+
+SET(CMAKE_CROSSCOMPILING "FALSE")
+
+SET(CMAKE_SYSTEM_LOADED 1)
diff --git a/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c
new file mode 100644
index 0000000..7fd0088
--- /dev/null
+++ b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c
@@ -0,0 +1,182 @@
+#ifdef __cplusplus
+# error "A C++ compiler has been selected for C."
+#endif
+
+#if defined(__18CXX)
+# define ID_VOID_MAIN
+#endif
+
+#if defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "Watcom"
+
+#elif defined(__SUNPRO_C)
+# define COMPILER_ID "SunPro"
+
+#elif defined(__HP_cc)
+# define COMPILER_ID "HP"
+
+#elif defined(__DECC)
+# define COMPILER_ID "Compaq"
+
+#elif defined(__IBMC__)
+# define COMPILER_ID "VisualAge"
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+
+#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+/* Analog Devices C++ compiler for Blackfin, TigerSHARC and
+ SHARC (21000) DSPs */
+# define COMPILER_ID "ADSP"
+
+/* IAR Systems compiler for embedded systems.
+ http://www.iar.com
+ Not supported yet by CMake
+#elif defined(__IAR_SYSTEMS_ICC__)
+# define COMPILER_ID "IAR" */
+
+/* sdcc, the small devices C compiler for embedded systems,
+ http://sdcc.sourceforge.net */
+#elif defined(SDCC)
+# define COMPILER_ID "SDCC"
+
+#elif defined(_COMPILER_VERSION)
+# define COMPILER_ID "MIPSpro"
+
+/* This compiler is either not known or is too old to define an
+ identification macro. Try to identify the platform and guess that
+ it is the native compiler. */
+#elif defined(__sgi)
+# define COMPILER_ID "MIPSpro"
+
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+ getting matched. Store it in a pointer rather than an array
+ because some compilers will just produce instructions to fill the
+ array rather than assigning a pointer to a static array. */
+char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+
+/* Identify known platforms by name. */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__sgi) || defined(__sgi__) || defined(_SGI)
+# define PLATFORM_ID "IRIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU)
+# define PLATFORM_ID "Haiku"
+/* Haiku also defines __BEOS__ so we must
+ put it prior to the check for __BEOS__
+*/
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#else /* unknown platform */
+# define PLATFORM_ID ""
+
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+ getting matched. Store it in a pointer rather than an array
+ because some compilers will just produce instructions to fill the
+ array rather than assigning a pointer to a static array. */
+char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+
+
+/*--------------------------------------------------------------------------*/
+
+#ifdef ID_VOID_MAIN
+void main() {}
+#else
+int main(int argc, char* argv[])
+{
+ int require = 0;
+ require += info_compiler[argc];
+ require += info_platform[argc];
+ (void)argv;
+ return require;
+}
+#endif
diff --git a/src/CMakeFiles/CompilerIdC/a.out b/src/CMakeFiles/CompilerIdC/a.out
new file mode 100644
index 0000000..c389161
--- /dev/null
+++ b/src/CMakeFiles/CompilerIdC/a.out
Binary files differ
diff --git a/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp
new file mode 100644
index 0000000..f8c041f
--- /dev/null
+++ b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp
@@ -0,0 +1,169 @@
+/* This source file must have a .cpp extension so that all C++ compilers
+ recognize the extension without flags. Borland does not know .cxx for
+ example. */
+#ifndef __cplusplus
+# error "A C compiler has been selected for C++."
+#endif
+
+#if defined(__COMO__)
+# define COMPILER_ID "Comeau"
+
+#elif defined(__INTEL_COMPILER) || defined(__ICC)
+# define COMPILER_ID "Intel"
+
+#elif defined(__BORLANDC__)
+# define COMPILER_ID "Borland"
+
+#elif defined(__WATCOMC__)
+# define COMPILER_ID "Watcom"
+
+#elif defined(__SUNPRO_CC)
+# define COMPILER_ID "SunPro"
+
+#elif defined(__HP_aCC)
+# define COMPILER_ID "HP"
+
+#elif defined(__DECCXX)
+# define COMPILER_ID "Compaq"
+
+#elif defined(__IBMCPP__)
+# define COMPILER_ID "VisualAge"
+
+#elif defined(__PGI)
+# define COMPILER_ID "PGI"
+
+#elif defined(__GNUC__)
+# define COMPILER_ID "GNU"
+
+#elif defined(_MSC_VER)
+# define COMPILER_ID "MSVC"
+
+#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__)
+/* Analog Devices C++ compiler for Blackfin, TigerSHARC and
+ SHARC (21000) DSPs */
+# define COMPILER_ID "ADSP"
+
+#elif defined(_COMPILER_VERSION)
+# define COMPILER_ID "MIPSpro"
+
+/* This compiler is either not known or is too old to define an
+ identification macro. Try to identify the platform and guess that
+ it is the native compiler. */
+#elif defined(__sgi)
+# define COMPILER_ID "MIPSpro"
+
+#elif defined(__hpux) || defined(__hpua)
+# define COMPILER_ID "HP"
+
+#else /* unknown compiler */
+# define COMPILER_ID ""
+
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+ getting matched. Store it in a pointer rather than an array
+ because some compilers will just produce instructions to fill the
+ array rather than assigning a pointer to a static array. */
+char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]";
+
+/* Identify known platforms by name. */
+#if defined(__linux) || defined(__linux__) || defined(linux)
+# define PLATFORM_ID "Linux"
+
+#elif defined(__CYGWIN__)
+# define PLATFORM_ID "Cygwin"
+
+#elif defined(__MINGW32__)
+# define PLATFORM_ID "MinGW"
+
+#elif defined(__APPLE__)
+# define PLATFORM_ID "Darwin"
+
+#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32)
+# define PLATFORM_ID "Windows"
+
+#elif defined(__FreeBSD__) || defined(__FreeBSD)
+# define PLATFORM_ID "FreeBSD"
+
+#elif defined(__NetBSD__) || defined(__NetBSD)
+# define PLATFORM_ID "NetBSD"
+
+#elif defined(__OpenBSD__) || defined(__OPENBSD)
+# define PLATFORM_ID "OpenBSD"
+
+#elif defined(__sun) || defined(sun)
+# define PLATFORM_ID "SunOS"
+
+#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__)
+# define PLATFORM_ID "AIX"
+
+#elif defined(__sgi) || defined(__sgi__) || defined(_SGI)
+# define PLATFORM_ID "IRIX"
+
+#elif defined(__hpux) || defined(__hpux__)
+# define PLATFORM_ID "HP-UX"
+
+#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU)
+# define PLATFORM_ID "Haiku"
+/* Haiku also defines __BEOS__ so we must
+ put it prior to the check for __BEOS__
+*/
+
+#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS)
+# define PLATFORM_ID "BeOS"
+
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define PLATFORM_ID "QNX"
+
+#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__)
+# define PLATFORM_ID "Tru64"
+
+#elif defined(__riscos) || defined(__riscos__)
+# define PLATFORM_ID "RISCos"
+
+#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__)
+# define PLATFORM_ID "SINIX"
+
+#elif defined(__UNIX_SV__)
+# define PLATFORM_ID "UNIX_SV"
+
+#elif defined(__bsdos__)
+# define PLATFORM_ID "BSDOS"
+
+#elif defined(_MPRAS) || defined(MPRAS)
+# define PLATFORM_ID "MP-RAS"
+
+#elif defined(__osf) || defined(__osf__)
+# define PLATFORM_ID "OSF1"
+
+#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv)
+# define PLATFORM_ID "SCO_SV"
+
+#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX)
+# define PLATFORM_ID "ULTRIX"
+
+#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX)
+# define PLATFORM_ID "Xenix"
+
+#else /* unknown platform */
+# define PLATFORM_ID ""
+
+#endif
+
+/* Construct the string literal in pieces to prevent the source from
+ getting matched. Store it in a pointer rather than an array
+ because some compilers will just produce instructions to fill the
+ array rather than assigning a pointer to a static array. */
+char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]";
+
+
+/*--------------------------------------------------------------------------*/
+
+int main(int argc, char* argv[])
+{
+ int require = 0;
+ require += info_compiler[argc];
+ require += info_platform[argc];
+ (void)argv;
+ return require;
+}
diff --git a/src/CMakeFiles/CompilerIdCXX/a.out b/src/CMakeFiles/CompilerIdCXX/a.out
new file mode 100644
index 0000000..65597e7
--- /dev/null
+++ b/src/CMakeFiles/CompilerIdCXX/a.out
Binary files differ
diff --git a/src/CMakeFiles/Makefile.cmake b/src/CMakeFiles/Makefile.cmake
new file mode 100644
index 0000000..8466809
--- /dev/null
+++ b/src/CMakeFiles/Makefile.cmake
@@ -0,0 +1,52 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+# The generator used is:
+SET(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
+
+# The top level Makefile was generated from the following files:
+SET(CMAKE_MAKEFILE_DEPENDS
+ "CMakeCache.txt"
+ "CMakeFiles/CMakeCCompiler.cmake"
+ "CMakeFiles/CMakeCXXCompiler.cmake"
+ "CMakeFiles/CMakeSystem.cmake"
+ "CMakeLists.txt"
+ "/usr/share/cmake/Modules/CMakeCCompiler.cmake.in"
+ "/usr/share/cmake/Modules/CMakeCCompilerABI.c"
+ "/usr/share/cmake/Modules/CMakeCInformation.cmake"
+ "/usr/share/cmake/Modules/CMakeCXXCompiler.cmake.in"
+ "/usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp"
+ "/usr/share/cmake/Modules/CMakeCXXInformation.cmake"
+ "/usr/share/cmake/Modules/CMakeCommonLanguageInclude.cmake"
+ "/usr/share/cmake/Modules/CMakeDetermineCCompiler.cmake"
+ "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake"
+ "/usr/share/cmake/Modules/CMakeDetermineCompilerABI.cmake"
+ "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake"
+ "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake"
+ "/usr/share/cmake/Modules/CMakeFindBinUtils.cmake"
+ "/usr/share/cmake/Modules/CMakeGenericSystem.cmake"
+ "/usr/share/cmake/Modules/CMakeSystem.cmake.in"
+ "/usr/share/cmake/Modules/CMakeSystemSpecificInformation.cmake"
+ "/usr/share/cmake/Modules/CMakeTestCCompiler.cmake"
+ "/usr/share/cmake/Modules/CMakeTestCXXCompiler.cmake"
+ "/usr/share/cmake/Modules/CMakeUnixFindMake.cmake"
+ "/usr/share/cmake/Modules/Platform/Linux.cmake"
+ "/usr/share/cmake/Modules/Platform/UnixPaths.cmake"
+ "/usr/share/cmake/Modules/Platform/gcc.cmake"
+ )
+
+# The corresponding makefile is:
+SET(CMAKE_MAKEFILE_OUTPUTS
+ "Makefile"
+ "CMakeFiles/cmake.check_cache"
+ "CMakeFiles/CMakeDirectoryInformation.cmake"
+ )
+
+# Byproducts of CMake generate step:
+SET(CMAKE_MAKEFILE_PRODUCTS
+ )
+
+# Dependency information for all targets:
+SET(CMAKE_DEPEND_INFO_FILES
+ "CMakeFiles/grain.dir/DependInfo.cmake"
+ )
diff --git a/src/CMakeFiles/Makefile2 b/src/CMakeFiles/Makefile2
new file mode 100644
index 0000000..405383d
--- /dev/null
+++ b/src/CMakeFiles/Makefile2
@@ -0,0 +1,99 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+# Default target executed when no arguments are given to make.
+default_target: all
+.PHONY : default_target
+
+# The main recursive all target
+all:
+.PHONY : all
+
+# The main recursive preinstall target
+preinstall:
+.PHONY : preinstall
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canoncical targets will work.
+.SUFFIXES:
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# The program to use to edit the cache.
+CMAKE_EDIT_COMMAND = /usr/bin/ccmake
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src
+
+#=============================================================================
+# Target rules for target CMakeFiles/grain.dir
+
+# All Build rule for target.
+CMakeFiles/grain.dir/all:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/depend
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build
+ $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles 1
+ @echo "Built target grain"
+.PHONY : CMakeFiles/grain.dir/all
+
+# Include target in all.
+all: CMakeFiles/grain.dir/all
+.PHONY : all
+
+# Build rule for subdir invocation for target.
+CMakeFiles/grain.dir/rule: cmake_check_build_system
+ $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 1
+ $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/grain.dir/all
+ $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0
+.PHONY : CMakeFiles/grain.dir/rule
+
+# Convenience name for target.
+grain: CMakeFiles/grain.dir/rule
+.PHONY : grain
+
+# clean rule for target.
+CMakeFiles/grain.dir/clean:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/clean
+.PHONY : CMakeFiles/grain.dir/clean
+
+# clean rule for target.
+clean: CMakeFiles/grain.dir/clean
+.PHONY : clean
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+ $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/src/CMakeFiles/Progress/1 b/src/CMakeFiles/Progress/1
new file mode 100644
index 0000000..7b4d68d
--- /dev/null
+++ b/src/CMakeFiles/Progress/1
@@ -0,0 +1 @@
+empty \ No newline at end of file
diff --git a/src/CMakeFiles/Progress/count.txt b/src/CMakeFiles/Progress/count.txt
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/src/CMakeFiles/Progress/count.txt
@@ -0,0 +1 @@
+1
diff --git a/src/CMakeFiles/cmake.check_cache b/src/CMakeFiles/cmake.check_cache
new file mode 100644
index 0000000..3dccd73
--- /dev/null
+++ b/src/CMakeFiles/cmake.check_cache
@@ -0,0 +1 @@
+# This file is generated by cmake for dependency checking of the CMakeCache.txt file
diff --git a/src/CMakeFiles/grain.dir/C.includecache b/src/CMakeFiles/grain.dir/C.includecache
new file mode 100644
index 0000000..d4110de
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/C.includecache
@@ -0,0 +1,24 @@
+#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">])
+
+#IncludeRegexScan: ^.*$
+
+#IncludeRegexComplain: ^$
+
+#IncludeRegexTransform:
+
+/home/chenguanlin/TD_evaluation/src/grain.c
+stdio.h
+-
+string.h
+-
+stdlib.h
+-
+gram_index_engine.h
+/home/chenguanlin/TD_evaluation/src/gram_index_engine.h
+MESA/MESA_htable.h
+-
+assert.h
+-
+ctype.h
+-
+
diff --git a/src/CMakeFiles/grain.dir/DependInfo.cmake b/src/CMakeFiles/grain.dir/DependInfo.cmake
new file mode 100644
index 0000000..0504394
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/DependInfo.cmake
@@ -0,0 +1,13 @@
+# The set of languages for which implicit dependencies are needed:
+SET(CMAKE_DEPENDS_LANGUAGES
+ "C"
+ )
+# The set of files for implicit dependencies of each language:
+SET(CMAKE_DEPENDS_CHECK_C
+ "/home/chenguanlin/TD_evaluation/src/grain.c" "/home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/grain.c.o"
+ )
+SET(CMAKE_C_COMPILER_ID "GNU")
+
+# Targets to which this target links.
+SET(CMAKE_TARGET_LINKED_INFO_FILES
+ )
diff --git a/src/CMakeFiles/grain.dir/build.make b/src/CMakeFiles/grain.dir/build.make
new file mode 100644
index 0000000..18e458a
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/build.make
@@ -0,0 +1,103 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canoncical targets will work.
+.SUFFIXES:
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# The program to use to edit the cache.
+CMAKE_EDIT_COMMAND = /usr/bin/ccmake
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src
+
+# Include any dependencies generated for this target.
+include CMakeFiles/grain.dir/depend.make
+
+# Include the progress variables for this target.
+include CMakeFiles/grain.dir/progress.make
+
+# Include the compile flags for this target's objects.
+include CMakeFiles/grain.dir/flags.make
+
+CMakeFiles/grain.dir/grain.c.o: CMakeFiles/grain.dir/flags.make
+CMakeFiles/grain.dir/grain.c.o: grain.c
+ $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles $(CMAKE_PROGRESS_1)
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Building C object CMakeFiles/grain.dir/grain.c.o"
+ /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -o CMakeFiles/grain.dir/grain.c.o -c /home/chenguanlin/TD_evaluation/src/grain.c
+
+CMakeFiles/grain.dir/grain.c.i: cmake_force
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing C source to CMakeFiles/grain.dir/grain.c.i"
+ /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -E /home/chenguanlin/TD_evaluation/src/grain.c > CMakeFiles/grain.dir/grain.c.i
+
+CMakeFiles/grain.dir/grain.c.s: cmake_force
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling C source to assembly CMakeFiles/grain.dir/grain.c.s"
+ /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -S /home/chenguanlin/TD_evaluation/src/grain.c -o CMakeFiles/grain.dir/grain.c.s
+
+CMakeFiles/grain.dir/grain.c.o.requires:
+.PHONY : CMakeFiles/grain.dir/grain.c.o.requires
+
+CMakeFiles/grain.dir/grain.c.o.provides: CMakeFiles/grain.dir/grain.c.o.requires
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o.provides.build
+.PHONY : CMakeFiles/grain.dir/grain.c.o.provides
+
+CMakeFiles/grain.dir/grain.c.o.provides.build: CMakeFiles/grain.dir/grain.c.o
+.PHONY : CMakeFiles/grain.dir/grain.c.o.provides.build
+
+# Object files for target grain
+grain_OBJECTS = \
+"CMakeFiles/grain.dir/grain.c.o"
+
+# External object files for target grain
+grain_EXTERNAL_OBJECTS =
+
+grain: CMakeFiles/grain.dir/grain.c.o
+grain: CMakeFiles/grain.dir/build.make
+grain: CMakeFiles/grain.dir/link.txt
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --red --bold "Linking C executable grain"
+ $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/grain.dir/link.txt --verbose=$(VERBOSE)
+
+# Rule to build all files generated by this target.
+CMakeFiles/grain.dir/build: grain
+.PHONY : CMakeFiles/grain.dir/build
+
+CMakeFiles/grain.dir/requires: CMakeFiles/grain.dir/grain.c.o.requires
+.PHONY : CMakeFiles/grain.dir/requires
+
+CMakeFiles/grain.dir/clean:
+ $(CMAKE_COMMAND) -P CMakeFiles/grain.dir/cmake_clean.cmake
+.PHONY : CMakeFiles/grain.dir/clean
+
+CMakeFiles/grain.dir/depend:
+ cd /home/chenguanlin/TD_evaluation/src && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/DependInfo.cmake --color=$(COLOR)
+.PHONY : CMakeFiles/grain.dir/depend
+
diff --git a/src/CMakeFiles/grain.dir/cmake_clean.cmake b/src/CMakeFiles/grain.dir/cmake_clean.cmake
new file mode 100644
index 0000000..54d1698
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/cmake_clean.cmake
@@ -0,0 +1,10 @@
+FILE(REMOVE_RECURSE
+ "CMakeFiles/grain.dir/grain.c.o"
+ "grain.pdb"
+ "grain"
+)
+
+# Per-language clean rules from dependency scanning.
+FOREACH(lang C)
+ INCLUDE(CMakeFiles/grain.dir/cmake_clean_${lang}.cmake OPTIONAL)
+ENDFOREACH(lang)
diff --git a/src/CMakeFiles/grain.dir/depend.internal b/src/CMakeFiles/grain.dir/depend.internal
new file mode 100644
index 0000000..f1b3d06
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/depend.internal
@@ -0,0 +1,5 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+CMakeFiles/grain.dir/grain.c.o
+ /home/chenguanlin/TD_evaluation/src/grain.c
diff --git a/src/CMakeFiles/grain.dir/depend.make b/src/CMakeFiles/grain.dir/depend.make
new file mode 100644
index 0000000..85fc728
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/depend.make
@@ -0,0 +1,5 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+CMakeFiles/grain.dir/grain.c.o: grain.c
+
diff --git a/src/CMakeFiles/grain.dir/flags.make b/src/CMakeFiles/grain.dir/flags.make
new file mode 100644
index 0000000..72791e9
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/flags.make
@@ -0,0 +1,8 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+# compile C with /usr/bin/gcc
+C_FLAGS = -g
+
+C_DEFINES =
+
diff --git a/src/CMakeFiles/grain.dir/link.txt b/src/CMakeFiles/grain.dir/link.txt
new file mode 100644
index 0000000..0f3e72b
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/link.txt
@@ -0,0 +1 @@
+/usr/bin/gcc -g -fPIC CMakeFiles/grain.dir/grain.c.o -o grain -rdynamic -lmaatframe -lMESA_htable -lpthread -lm
diff --git a/src/CMakeFiles/grain.dir/progress.make b/src/CMakeFiles/grain.dir/progress.make
new file mode 100644
index 0000000..781c7de
--- /dev/null
+++ b/src/CMakeFiles/grain.dir/progress.make
@@ -0,0 +1,2 @@
+CMAKE_PROGRESS_1 = 1
+
diff --git a/src/CMakeFiles/progress.make b/src/CMakeFiles/progress.make
new file mode 100644
index 0000000..d00491f
--- /dev/null
+++ b/src/CMakeFiles/progress.make
@@ -0,0 +1 @@
+1
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..a3fd6fa
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,163 @@
+# CMAKE generated file: DO NOT EDIT!
+# Generated by "Unix Makefiles" Generator, CMake Version 2.6
+
+# Default target executed when no arguments are given to make.
+default_target: all
+.PHONY : default_target
+
+#=============================================================================
+# Special targets provided by cmake.
+
+# Disable implicit rules so canoncical targets will work.
+.SUFFIXES:
+
+# Remove some rules from gmake that .SUFFIXES does not remove.
+SUFFIXES =
+
+.SUFFIXES: .hpux_make_needs_suffix_list
+
+# Suppress display of executed commands.
+$(VERBOSE).SILENT:
+
+# A target that is always out of date.
+cmake_force:
+.PHONY : cmake_force
+
+#=============================================================================
+# Set environment variables for the build.
+
+# The shell in which to execute make rules.
+SHELL = /bin/sh
+
+# The CMake executable.
+CMAKE_COMMAND = /usr/bin/cmake
+
+# The command to remove a file.
+RM = /usr/bin/cmake -E remove -f
+
+# The program to use to edit the cache.
+CMAKE_EDIT_COMMAND = /usr/bin/ccmake
+
+# The top-level source directory on which CMake was run.
+CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src
+
+# The top-level build directory on which CMake was run.
+CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src
+
+#=============================================================================
+# Targets provided globally by CMake.
+
+# Special rule for the target edit_cache
+edit_cache:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
+ /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : edit_cache
+
+# Special rule for the target edit_cache
+edit_cache/fast: edit_cache
+.PHONY : edit_cache/fast
+
+# Special rule for the target rebuild_cache
+rebuild_cache:
+ @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
+ /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
+.PHONY : rebuild_cache
+
+# Special rule for the target rebuild_cache
+rebuild_cache/fast: rebuild_cache
+.PHONY : rebuild_cache/fast
+
+# The main all target
+all: cmake_check_build_system
+ $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles /home/chenguanlin/TD_evaluation/src/CMakeFiles/progress.make
+ $(MAKE) -f CMakeFiles/Makefile2 all
+ $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0
+.PHONY : all
+
+# The main clean target
+clean:
+ $(MAKE) -f CMakeFiles/Makefile2 clean
+.PHONY : clean
+
+# The main clean target
+clean/fast: clean
+.PHONY : clean/fast
+
+# Prepare targets for installation.
+preinstall: all
+ $(MAKE) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall
+
+# Prepare targets for installation.
+preinstall/fast:
+ $(MAKE) -f CMakeFiles/Makefile2 preinstall
+.PHONY : preinstall/fast
+
+# clear depends
+depend:
+ $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
+.PHONY : depend
+
+#=============================================================================
+# Target rules for targets named grain
+
+# Build rule for target.
+grain: cmake_check_build_system
+ $(MAKE) -f CMakeFiles/Makefile2 grain
+.PHONY : grain
+
+# fast build rule for target.
+grain/fast:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build
+.PHONY : grain/fast
+
+grain.o: grain.c.o
+.PHONY : grain.o
+
+# target to build an object file
+grain.c.o:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o
+.PHONY : grain.c.o
+
+grain.i: grain.c.i
+.PHONY : grain.i
+
+# target to preprocess a source file
+grain.c.i:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.i
+.PHONY : grain.c.i
+
+grain.s: grain.c.s
+.PHONY : grain.s
+
+# target to generate assembly for a file
+grain.c.s:
+ $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.s
+.PHONY : grain.c.s
+
+# Help Target
+help:
+ @echo "The following are some of the valid targets for this Makefile:"
+ @echo "... all (the default if no target is provided)"
+ @echo "... clean"
+ @echo "... depend"
+ @echo "... edit_cache"
+ @echo "... grain"
+ @echo "... rebuild_cache"
+ @echo "... grain.o"
+ @echo "... grain.i"
+ @echo "... grain.s"
+.PHONY : help
+
+
+
+#=============================================================================
+# Special targets to cleanup operation of make.
+
+# Special rule to run CMake to check the build system integrity.
+# No rule that depends on this can have commands that come from listfiles
+# because they might be regenerated.
+cmake_check_build_system:
+ $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
+.PHONY : cmake_check_build_system
+
diff --git a/src/TD代码说明.docx b/src/TD代码说明.docx
new file mode 100644
index 0000000..4d37049
--- /dev/null
+++ b/src/TD代码说明.docx
Binary files differ
diff --git a/src/cmake_install.cmake b/src/cmake_install.cmake
new file mode 100644
index 0000000..75c1e3c
--- /dev/null
+++ b/src/cmake_install.cmake
@@ -0,0 +1,44 @@
+# Install script for directory: /home/chenguanlin/TD_evaluation/src
+
+# Set the install prefix
+IF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+ SET(CMAKE_INSTALL_PREFIX "/usr/local")
+ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+ IF(BUILD_TYPE)
+ STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+ CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+ ELSE(BUILD_TYPE)
+ SET(CMAKE_INSTALL_CONFIG_NAME "Debug")
+ ENDIF(BUILD_TYPE)
+ MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+
+# Set the component getting installed.
+IF(NOT CMAKE_INSTALL_COMPONENT)
+ IF(COMPONENT)
+ MESSAGE(STATUS "Install component: \"${COMPONENT}\"")
+ SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+ ELSE(COMPONENT)
+ SET(CMAKE_INSTALL_COMPONENT)
+ ENDIF(COMPONENT)
+ENDIF(NOT CMAKE_INSTALL_COMPONENT)
+
+# Install shared libraries without execute permission?
+IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+ SET(CMAKE_INSTALL_SO_NO_EXE "0")
+ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+
+IF(CMAKE_INSTALL_COMPONENT)
+ SET(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
+ELSE(CMAKE_INSTALL_COMPONENT)
+ SET(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
+ENDIF(CMAKE_INSTALL_COMPONENT)
+
+FILE(WRITE "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "")
+FOREACH(file ${CMAKE_INSTALL_MANIFEST_FILES})
+ FILE(APPEND "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "${file}\n")
+ENDFOREACH(file)
diff --git a/src/dataset_build/CMakeLists.txt b/src/dataset_build/CMakeLists.txt
new file mode 100644
index 0000000..8840a74
--- /dev/null
+++ b/src/dataset_build/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT (CALCULATE)
+SET (SRC_LIST get_lost.c)
+SET(CMAKE_BUILD_TYPE "Debug")
+SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb")
+SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
+MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR})
+MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR})
+#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/)
+#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/)
+ADD_EXECUTABLE(get_lost ${SRC_LIST})
+TARGET_LINK_LIBRARIES(get_lost maatframe libMESA_htable.so pthread m)
diff --git a/src/dataset_build/based_sfh.conf b/src/dataset_build/based_sfh.conf
new file mode 100644
index 0000000..cdcf4cf
--- /dev/null
+++ b/src/dataset_build/based_sfh.conf
@@ -0,0 +1,3 @@
+[file]
+raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest_nots
+ripe_file_address = ../../data/td_data_set/td_data_20171207/base_sfh_set \ No newline at end of file
diff --git a/src/dataset_build/based_sfh.py b/src/dataset_build/based_sfh.py
new file mode 100644
index 0000000..b3281ce
--- /dev/null
+++ b/src/dataset_build/based_sfh.py
@@ -0,0 +1,44 @@
+import re
+import ConfigParser
+import bisect
+import random
+
+term = {'not_null':(lambda x : len(x)!=0)}
+
+config = ConfigParser.RawConfigParser()
+config.read("based_sfh.conf")
+raw_file_address = config.get("file","raw_file_address")
+ripe_file_address = config.get("file","ripe_file_address")
+
+class sfh_fingerprint(object):
+
+ def __init__(self,sfh):
+ self.sfh = sfh
+
+ @staticmethod
+ def get_hashed_len(sfh):
+ p = r"\[+\d+?:+\d+?\]"
+ pattern = re.compile(p)
+ hashed_len_set = pattern.findall(sfh)
+ if (term['not_null'](hashed_len_set)):
+ hashed_len = 0
+ for x in xrange(0,len(hashed_len_set)):
+ hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x])
+ hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1])
+ return hashed_len/len(hashed_len_set)
+ else :
+ return -1
+
+i=0
+sfh_set = list()
+with open(raw_file_address,'r') as infile:
+ with open(ripe_file_address,'w') as outfile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i
+ result = re.split(r';',line)
+ if(term['not_null'](result[3]) and term['not_null'](result[19])):
+ hashed_len = sfh_fingerprint.get_hashed_len(result[19])
+ if(hashed_len/int(result[3])>0.8):
+ outfile.write(result[19]+'\n') \ No newline at end of file
diff --git a/src/dataset_build/cal_information.conf b/src/dataset_build/cal_information.conf
new file mode 100644
index 0000000..1571b8b
--- /dev/null
+++ b/src/dataset_build/cal_information.conf
@@ -0,0 +1,5 @@
+[file]
+raw_file_address = ../../data/ripe_data/td_data_20171207/video_id.txt
+ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic
+[feature]
+feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file
diff --git a/src/dataset_build/cal_information.py b/src/dataset_build/cal_information.py
new file mode 100644
index 0000000..19cd95c
--- /dev/null
+++ b/src/dataset_build/cal_information.py
@@ -0,0 +1,133 @@
+import re
+import numpy
+import ConfigParser
+import binascii
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==4),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20),
+ 'not_null':(lambda x : len(x)!=0)}
+
+class calculation(object):
+ """docstring for calculation"""
+ def __init__(self, arg):
+ super(calculation, self).__init__()
+ self.arg = arg
+
+ @staticmethod
+ def cal_ent(x):
+ x_value_list = set([x[i] for i in range(x.shape[0])])
+ ent = 0.0
+ num_0 = x[x == 0].shape[0]
+ for x_value in x_value_list:
+ if(x_value==0):
+ continue
+ p = float(x[x == x_value].shape[0])/(x.shape[0]- num_0)
+ logp = numpy.log2(p)
+ ent -=p*logp
+ return ent
+
+class data_value(object):
+ """docstring for data_value"""
+ def __init__(self, arg):
+ super(data_value, self).__init__()
+ self.arg = arg
+
+ @staticmethod
+ def get_data_values(data):
+ data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data)
+ #data_set[0]=null,data_set[1]=url
+ data_value_dic = [long(0)]*6
+ for x in xrange(1,len(feature_list)+1):
+ if(x==1):
+ if(term['not_null'](data_set[x])==True):
+ data_value_dic[0] = binascii.crc32(data_set[x])
+ else:
+ data_value_dic[0] = 0
+ elif(x==2):
+ if(term['not_null'](data_set[x])==True):
+ data_value_dic[1] = binascii.crc32(data_set[x])
+ else:
+ data_value_dic[1] = 0
+ elif(x==3):
+ data_value_dic[2] = long(data_set[x])
+ elif(x==4):
+ data_value_dic[3] = long(data_set[x])
+ elif(x==5):
+ if(term['not_null'](data_set[x])==True):
+ data_value_dic[4] = binascii.crc32(data_set[x])
+ else:
+ data_value_dic[4] = 0
+ elif(x==6):
+ if(term['not_null'](data_set[x])==True):
+ data_value_dic[5] = binascii.crc32(data_set[x])
+ else:
+ data_value_dic[5] = 0
+ return data_value_dic
+
+config = ConfigParser.RawConfigParser()
+config.read("cal_information.conf")
+
+raw_file_address = config.get("file","raw_file_address")
+ripe_file_address = config.get("file","ripe_file_address")
+feature_list =[i for i in config.get("feature","feature_name").split(",")]
+
+i=0
+with open(raw_file_address,'r') as infile:
+ for line in infile:
+ i+=1
+ if(i%10000==0):
+ print i
+ if(i==50000):
+ break
+ line_split = re.split(";",line)
+ data_value_temp = data_value.get_data_values(line_split[5])
+ data_value_temp.extend([binascii.crc32(line_split[j]) for j in range(6,19)])
+ data_value_temp.append(binascii.crc32(line_split[0]))
+ if(i==1):
+ a=numpy.array(data_value_temp)
+ else:
+ a=numpy.row_stack((a,numpy.array(data_value_temp)))
+
+for i in range(20):
+ if(i==0):
+ print "URL:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==1):
+ print "ServerIP:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==2):
+ print "MediaType:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==3):
+ print "MediaLen:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==4):
+ print "Etag:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==5):
+ print "LastModify:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==6):
+ print "td_0k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==7):
+ print "td_data_md5_1k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==8):
+ print "td_1k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==9):
+ print "td_data_md5_2k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==10):
+ print "td_2k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==11):
+ print "td_data_md5_4k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==12):
+ print "td_4k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==13):
+ print "td_data_md5_8k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==14):
+ print "td_8k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==15):
+ print "td_data_md5_16k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==16):
+ print "td_16k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==17):
+ print "td_data_md5_32k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==18):
+ print "td_32k:"+str(calculation.cal_ent(a[:,i]))
+ elif(i==19):
+ print "id:"+str(calculation.cal_ent(a[:,i]))
+
diff --git a/src/dataset_build/dataset_build.conf b/src/dataset_build/dataset_build.conf
new file mode 100644
index 0000000..400e160
--- /dev/null
+++ b/src/dataset_build/dataset_build.conf
@@ -0,0 +1,8 @@
+[file]
+raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level
+ripe_file_address = ../../data/td_data_set/td_data_20171207/td_dataset
+base_sfh_sets = ../../data/td_data_set/td_data_20171207/base_sfh_set
+[output]
+breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304
+[feature]
+feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file
diff --git a/src/dataset_build/dataset_build.py b/src/dataset_build/dataset_build.py
new file mode 100644
index 0000000..a832072
--- /dev/null
+++ b/src/dataset_build/dataset_build.py
@@ -0,0 +1,144 @@
+import re
+import ConfigParser
+import bisect
+import random
+import ctypes
+import hashlib
+import zlib
+import binascii
+import json
+import datetime
+import time
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==21),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)),
+ 'not_null':(lambda x : len(x)!=0),
+ 'ysp_len':(lambda x : int(x)!=0),
+ 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))}
+
+config = ConfigParser.RawConfigParser()
+config.read("dataset_build.conf")
+raw_file_address = config.get("file","raw_file_address")
+ripe_file_address = config.get("file","ripe_file_address")
+base_sfh_sets = config.get("file","base_sfh_sets")
+breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")]
+feature_list =[i for i in config.get("feature","feature_name").split(",")]
+ll=ctypes.cdll.LoadLibrary
+lib = ll("libmaatframe.so")
+lost = dict()
+
+
+class data_value(object):
+
+ @staticmethod
+ def get_feature(data):
+ return_data=list()
+ data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data[5])
+ for x in xrange(1,21):
+ if(x==1):
+ if(term['not_null'](data_set[6])):
+ try:
+ time1=datetime.datetime.strptime(data[1],'%Y-%m-%d %H:%M:%S')+datetime.timedelta(hours=int(8))
+ data_set[6]=data_set[6][0:25]
+ time2=datetime.datetime.strptime(data_set[6],'%a, %d %b %Y %H:%M:%S')
+ except Exception, e:
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ else:
+ return_data.append(str((time1-time2).seconds))
+ return_data.append(((time1-time2).seconds)/60)
+ return_data.append(((time1-time2).seconds)/3600)
+ return_data.append((time1-time2).days)
+ else:
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ elif(x==2):
+ continue
+ elif(x==3):
+ continue
+ elif(x==4):
+ return_data.append(long(data[4]))
+ elif(x==5):
+ if(term['not_null'](data_set[1])):
+ return_data.append(len(data_set[1]))
+ else:
+ return_data.append(-1)
+ if(term['not_null'](data_set[2])):
+ ip_set=re.split(r'\.',data_set[2])
+ return_data.append(ip_set[0])
+ return_data.append(ip_set[1])
+ return_data.append(ip_set[2])
+ return_data.append(ip_set[3])
+ else:
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ return_data.append(-1)
+ if(term['not_null'](data_set[3])):
+ return_data.append(int(data_set[3]))
+ else:
+ return_data.append(-1)
+ if(term['not_null'](data_set[5])):
+ return_data.append(binascii.crc32(data_set[5]))
+ else:
+ return_data.append(-1)
+ if(term['not_null'](data_set[6])):
+ return_data.append(binascii.crc32(data_set[6]))
+ else:
+ return_data.append(-1)
+ elif(x==7):
+ return_data.append(binascii.crc32(data[7]))
+ elif(x==9):
+ return_data.append(binascii.crc32(data[9]))
+ elif(x==11):
+ return_data.append(binascii.crc32(data[11]))
+ elif(x==13):
+ return_data.append(binascii.crc32(data[13]))
+ elif(x==15):
+ return_data.append(binascii.crc32(data[15]))
+ elif(x==17):
+ return_data.append(binascii.crc32(data[17]))
+ return return_data
+ # data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data)
+ # #data_set[0]=null,data_set[1]=url
+ # data_value_dic = dict()
+ # for x in xrange(1,len(feature_list)+1):
+ # if(x==1):
+ # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x])
+ # elif(x==2):
+ # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x])
+ # elif(x==3):
+ # data_value_dic[feature_list[x-1]] = data_set[x]
+ # elif(x==4):
+ # data_value_dic[feature_list[x-1]] = data_set[x]
+ # elif(x==5):
+ # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x])
+ # elif(x==6):
+ # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x])
+ # return data_value_dic
+
+
+i=0
+sfh_set = list()
+with open(raw_file_address,'r') as infile:
+ with open(ripe_file_address,'w') as outfile:
+ for line in infile:
+ i+=1
+ if(i%10000==0):
+ print i
+ line_return = re.split(r';',line)
+ # if(int(line_return[0])==0):
+ # print 'td is right'
+ outfile.write(str(line_return[0])+',')
+ return_data=data_value.get_feature(line_return)
+ for x in range(19):
+ if(x==18):
+ outfile.write(str(return_data[18])+'\n')
+ else:
+ outfile.write(str(return_data[x])+',')
diff --git a/src/dataset_build/feature_statistics.conf b/src/dataset_build/feature_statistics.conf
new file mode 100644
index 0000000..12cf089
--- /dev/null
+++ b/src/dataset_build/feature_statistics.conf
@@ -0,0 +1,8 @@
+[file]
+raw_file_address = ../../data/td_data_set/td_data_20171207/td.txt
+ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic
+[output]
+breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,44194304
+[feature]
+type = data_value_statistics
+feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file
diff --git a/src/dataset_build/feature_statistics.py b/src/dataset_build/feature_statistics.py
new file mode 100644
index 0000000..52ae8e0
--- /dev/null
+++ b/src/dataset_build/feature_statistics.py
@@ -0,0 +1,164 @@
+import re
+import ConfigParser
+import bisect
+import random
+import ctypes
+import hashlib
+import zlib
+import binascii
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==4),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20),
+ 'not_null':(lambda x : len(x)!=0)}
+
+class data_line(object):
+ """docstring for ClassName"""
+ def __init__(self):
+ super(ClassName, self).__init__()
+
+ @staticmethod
+ def if_error(data_line_str):
+ data_line_val = re.split(r';',data_line_str)
+ hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19])
+ if(term['data_num'](data_line_val) and term['sfh_len'](data_line_val[19]) and term['td_len'](data_line_val[9])\
+ and term['td_len'](data_line_val[2]) and term['td_len'](data_line_val[13]) and term['td_len'](data_line_val[15])\
+ and term['td_len'](data_line_val[17]) and term['not_null'](data_line_val[18]) and term['not_null'](data_line_val[19])\
+ and hashed_len/float(data_line_val[3])>0.8):
+ return data_line_val
+ else:
+ return -1
+
+
+class feature_statistics(object):
+ """YSP feature_statistics"""
+ def __init__(self):
+ super(feature_statistics, self).__init__()
+ self.meida_len_statistics_set = [0,0,0,0,0,0,0]
+ self.lost_dict = dict()
+
+ def meida_len_statistics(meida_len):
+ j = bisect.bisect(breakpoints,meida_len)
+ self.meida_len_statistics_set[j-1]+=1
+
+ def data_value_statistics(data_value_dic,data_value):
+ data_value_str = str()
+ for x in xrange(0,len(feature_list)):
+ data_value_str = data_value_str+str(data_value_dic[feature_list[x]])+','
+
+ if(self.lost_dict.has_key(data_value_str)==False):
+ self.lost_dict[data_value_str]=[0,1,0.]
+ else:
+ if (int(result[3])==1):
+ self.lost_dict[data_value_str][0] += 1
+ self.lost_dict[data_value_str][1] += 1
+ else:
+ self.lost_dict[data_value_str][1] += 1
+
+
+class sfh_fingerprint(object):
+
+ def __init__(self,sfh):
+ self.sfh = sfh
+
+ @staticmethod
+ def get_hashed_len(sfh):
+ p = r"\[+\d+?:+\d+?\]"
+ pattern = re.compile(p)
+ hashed_len_set = pattern.findall(sfh)
+ if (term['not_null'](hashed_len_set)):
+ hashed_len = 0
+ for x in xrange(0,len(hashed_len_set)):
+ hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x])
+ hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1])
+ return hashed_len/len(hashed_len_set)
+ else :
+ return -1
+
+ @staticmethod
+ def get_base_sfh(data_set):
+ base_sfh = list()
+ for x in xrange(0,10):
+ base_sfh.append(data_set[x])
+ return base_sfh
+
+
+
+
+class data_value(object):
+
+ @staticmethod
+ def get_data_values(data):
+ data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data)
+ #data_set[0]=null,data_set[1]=url
+ data_value_dic = dict()
+ for x in xrange(1,len(feature_list)+1):
+ if(x==1):
+ data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1
+ elif(x==2):
+ data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1
+ elif(x==3):
+ data_value_dic[feature_list[x-1]] = data_set[x]
+ elif(x==4):
+ data_value_dic[feature_list[x-1]] = bisect.bisect(breakpoints,int(data_set[x]))
+ elif(x==5):
+ data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1
+ elif(x==6):
+ data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1
+ return data_value_dic
+
+config = ConfigParser.RawConfigParser()
+config.read("feature_statistics.conf")
+
+feature_statistics_type = ("feature","type")
+raw_file_address = config.get("file","raw_file_address")
+ripe_file_address = config.get("file","ripe_file_address")
+
+if(feature_statistics_type=="meida_len_statistics"):
+ breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")]
+elif(feature_statistics_type=="data_value_statistics"):
+ feature_list =[i for i in config.get("feature","feature_name").split(",")]
+# ll=ctypes.cdll.LoadLibrary
+# lib = ll("libmaatframe.so")
+
+i=0
+sfh_set = list()
+statistic = feature_statistics()
+with open(raw_file_address,'r') as infile:
+ for line in infile:
+ i+=1
+
+
+
+
+ line_return = data_line.if_error(line)
+ if(line_return != -1):
+ if(feature_statistics_type=="meida_len_statistics"):
+ statistic.meida_len_statistics(line_return[3])
+ elif(feature_statistics_type=="data_value_statistics"):
+ lost_list = list()
+ statistic.meida_len_statistics(line_return)
+ for i in statistic.lost:
+ (statistic.lost[i])[2] = float((statistic.lost[i])[0])/(statistic.lost[i])[1]
+ tmp = (i,int((statistic.lost[i])[0]),int((statistic.lost[i])[1]),float((statistic.lost[i])[2]))
+ lost_list.append(tmp)
+ print sorted(lost_list,cmp=lambda x,y:cmp(x[2],y[2]))
+ # if(x == len(feature_list)-1):
+ # outfile.write(data_value_dic[feature_list[x]]+'\n')
+ # else:
+ # print lost
+ # outfile.write(str(data_value_dic[feature_list[x]])+',')
+ # outfile.write(result[3])
+ # sfh_dot=list()
+ # for x in xrange(0,10):
+ # #transform sfh to dot
+ # sfh_dot.append(lib.GIE_sfh_similiarity(result[19],len(result[19]),sfh_set[x],len(sfh_set[x])))
+ # if(len(data_set)==7):
+ # outfile.write(str(data_set[0])+','+str(data_set[1])+','+str(data_set[2])\
+ # +','+str(data_set[3])+','+str(data_set[4])+','+str(data_set[5])+','+result[5]\
+ # +','+result[7]+','+result[9]+','+result[11]+','+result[13]+','+result[15]+result[17]\
+ # +','+result[19]+'\n')
+
+# with open(ripe_file_address,'w') as outfile:
+# outfile.write(str(lost))
diff --git a/src/dataset_build/file_digest.py b/src/dataset_build/file_digest.py
new file mode 100644
index 0000000..590e059
--- /dev/null
+++ b/src/dataset_build/file_digest.py
@@ -0,0 +1,96 @@
+#-*-coding:utf-8-*-
+import re
+import random
+import ConfigParser
+import bisect
+import commands
+import os
+import hashlib
+
+class data_line(object):
+ """docstring for ClassName"""
+ def __init__(self):
+ super(ClassName, self).__init__()
+
+ @staticmethod
+ def if_error(data_line_str):
+ data_line_val = re.split(r';',data_line_str)
+ hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19])
+ if(term['data_num'](data_line_val) and \
+ term['not_null'](data_line_val[0]) and \
+ term['ysp_len'](data_line_val[3]) and \
+ term['not_null'](data_line_val[4]) and \
+ term['td_len'](data_line_val[6]) and \
+ term['td_len'](data_line_val[8]) and \
+ term['td_len'](data_line_val[10]) and \
+ term['td_len'](data_line_val[12]) and \
+ term['td_len'](data_line_val[14]) and \
+ term['td_len'](data_line_val[16]) and \
+ term['not_null'](data_line_val[18]) and \
+ term['sfh_len'](data_line_val[19]) and \
+ term['not_null'](data_line_val[20]) and \
+ hashed_len/float(data_line_val[3])>=0.8):
+ return data_line_val
+ else:
+ return -1
+
+class TD_fingerprint(object):
+ def __init__():
+ self.td = td
+ self.td_string = td_string
+ @staticmethod
+ def td_generate(td_string):
+ td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest()
+
+class sfh_fingerprint(object):
+
+ def __init__(self,sfh):
+ self.sfh = sfh
+
+ @staticmethod
+ def get_hashed_len(sfh):
+ p = r"\[+\d+?:+\d+?\]"
+ pattern = re.compile(p)
+ hashed_len_set = pattern.findall(sfh)
+ if (term['not_null'](hashed_len_set)):
+ hashed_len = 0
+ for x in xrange(0,len(hashed_len_set)):
+ hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x])
+ hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1])
+ return hashed_len/len(hashed_len_set)
+ else :
+ return -1
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==21),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)),
+ 'not_null':(lambda x : len(x)!=0),
+ 'ysp_len':(lambda x : int(x)!=0),
+ 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))}
+
+grain="./get_lost"
+ripe_files=[]
+config = ConfigParser.RawConfigParser()
+config.read("grain.conf")
+raw_file_address=config.get("file","raw_file_address")
+ripe_files_address=config.get("file","ripe_files_address")
+print ("%s %s" %(raw_file_address,ripe_files_address))
+num = [0,0,0,0,0,0,0]
+breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")]
+# i=0
+# for i in xrange(0,ripe_file_num):
+# outfile=open(ripe_files_address+str(i)+'.txt','w')
+# ripe_files.append(outfile)
+
+i=0
+with open(raw_file_address,'r') as infile:
+# with open('./ripe_data/mistake_td_sfh1_sfh2_sim_rate_len_url_unequal','r')as infile:
+ with open(ripe_files_address,'w')as outfile:
+ for line in infile:
+ i+=1
+ if(i%10000==0):
+ print i
+ line_return = data_line.if_error(line)
+ if(line_return != -1):
+ outfile.write(str(line)) \ No newline at end of file
diff --git a/src/dataset_build/get_lost.c b/src/dataset_build/get_lost.c
new file mode 100644
index 0000000..0e6c452
--- /dev/null
+++ b/src/dataset_build/get_lost.c
@@ -0,0 +1,116 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define HTABLE_SIZE 8*64*1024*1024
+#define SFH_PASS_RATE 0.8
+#define SIMILIAR 80
+
+typedef struct td
+{
+ char * tdstr;
+ unsigned int lost;
+}td;
+
+typedef struct file_sfh_data
+{
+ long id;
+ char * sfh;
+ td * td_value;
+ char * td_ori;
+}file_sfh_data;
+
+int main(int argc,char *argv[])
+{
+ FILE *fpread;//文件
+ FILE *fpwrite;//write file handle
+ int array_size = 1024;
+ file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size);
+ char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost";
+ //char* dirstr = *++argv;
+ char* writestr = "../../data/td_data_set/td_data_20171207/td.txt";
+ int total_len = 0;
+ char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10];
+ char buffer[1024*300+1];
+ int ret = 0;
+ int line = 0;
+ int thread_safe = 0;
+ int i;
+ int id;
+ int similiarity;
+ MESA_htable_handle htable = NULL;
+ fpread=fopen(dirstr,"rb");
+ fpwrite=fopen(writestr,"w");
+ printf("file str is %s\n",dirstr);
+ if(fpread==NULL)
+ {
+ printf("open file error\n");
+ return -1;
+ }
+ buffer[sizeof(buffer)]='\0';
+ while(feof(fpread)==0)
+ {
+ fgets(buffer,sizeof(buffer)-1,fpread);
+ ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp);
+ if(ret!=4)
+ {
+ continue;
+ }
+ file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data));
+ file_data[line]->id=line;
+ file_data[line]->sfh=strdup(SFH_tmp);
+ file_data[line]->td_value=(td*)calloc(1,sizeof(td));
+ file_data[line]->td_value->tdstr=strdup(TD_tmp);
+ file_data[line]->td_value->lost=0;
+ file_data[line]->td_ori=strdup(TD_ORI);
+ line++;
+ if(line==array_size)
+ {
+ array_size*=2;
+ file_data=realloc(file_data,sizeof(file_sfh_data)*array_size);
+ }
+ }
+ printf("read file success!\n");
+ htable = NULL;
+ htable=MESA_htable_born();
+ thread_safe = 0;
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ unsigned int slot_size=1024*1024*16;
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size));
+ MESA_htable_mature(htable);
+ for(i=0;i<line;i++)
+ {
+ if(MESA_htable_add(htable,(char*)(file_data[i]->td_value->tdstr),32,(void *)file_data[i]->id)<0)
+ {
+ id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32);
+ similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh));
+ if(similiarity<SIMILIAR)
+ {
+ file_data[id]->td_value->lost = 1;
+ file_data[i]->td_value->lost = 1;
+ }
+ }
+ }
+ for(i=0;i<line;i++)
+ {
+ fprintf(fpwrite,"%s;%s;%s;%d\n",file_data[i]->td_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost);
+ }
+ for(i=0;i<line;i++)
+ {
+ free(file_data[i]->sfh);
+ file_data[i]->sfh=NULL;
+ free(file_data[i]->td_value->tdstr);
+ file_data[i]->td_value->tdstr=NULL;
+ free(file_data[i]->td_value);
+ file_data[i]->td_value=NULL;
+ free(file_data[i]->td_ori);
+ file_data[i]->td_ori=NULL;
+ free(file_data[i]);
+ file_data[i]=NULL;
+ }
+ fclose(fpread);
+ fclose(fpwrite);
+ return 0;
+} \ No newline at end of file
diff --git a/src/dataset_build/grain.conf b/src/dataset_build/grain.conf
new file mode 100644
index 0000000..944b337
--- /dev/null
+++ b/src/dataset_build/grain.conf
@@ -0,0 +1,5 @@
+[file]
+ripe_files_address = ../../data/td_data_set/td_data_20171207/get_lost_raw_data
+raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest
+[output]
+breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 \ No newline at end of file
diff --git a/src/dataset_build/td_classification.py b/src/dataset_build/td_classification.py
new file mode 100644
index 0000000..8d4b97c
--- /dev/null
+++ b/src/dataset_build/td_classification.py
@@ -0,0 +1,5 @@
+from sklearn.datasets import load_iris
+from sklearn import tree
+
+with open() as infile:
+ \ No newline at end of file
diff --git a/src/dataset_build/vedio_id_build.c b/src/dataset_build/vedio_id_build.c
new file mode 100644
index 0000000..9faaa64
--- /dev/null
+++ b/src/dataset_build/vedio_id_build.c
@@ -0,0 +1,171 @@
+/*
+gcc -g vedio_id_build.c -o vedio_id_build -lmaatframe -I../../inc
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define BUFFER_LEN (10*1024)
+#define SFH_PASS_RATE 0.9
+#define SFH_LEN (10*1024)
+#define URL_LEN (10*1024)
+
+typedef struct video_id
+{
+ long id;
+ char *sfh;
+}video_id;
+
+typedef struct cache
+{
+ GIE_digest_t ** GIE_cache;
+ long cache_size;
+ long len;
+}cache;
+
+long get_hashed_len(const char* sfh)
+{
+ char *data=(char*)malloc(strlen(sfh)+1);
+ memcpy(data,sfh, strlen(sfh));
+ data[strlen(sfh)]='\0';
+ char *token=NULL,*sub_token=NULL,*saveptr;
+ long left_offset=0,right_offset=0,hashed_length=0;
+ int ret=0,first=0;
+ for (token = data; ;token= NULL)
+ {
+ sub_token= strtok_r(token,"[", &saveptr);
+ if (sub_token == NULL)
+ {
+ break;
+ }
+ if(first==0)//jump over the first sub string.
+ {
+ first=1;
+ continue;
+ }
+ ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset);
+ if(ret!=2)
+ {
+ return 0;
+ }
+ assert(ret==2);
+ hashed_length+=right_offset-left_offset+1;
+ }
+ //printf("hashed length=%ld\n",hashed_length);
+ free(data);
+ return hashed_length/2;
+}
+
+int main(int argc,char *argv[])
+{
+ FILE *video_id_sets_file;
+ FILE *new_sfh_file;
+ const char *video_id_sets_file_dir="../../data/td_data_set/td_data_20171207/video_id_raw_data";
+ const char *new_sfh_file_dir="../../data/ripe_data/td_data_20171207/video_id.txt";
+ char *buffer=NULL;
+ int ret = 0,hashed_len = 0,total_len = 0,resultnum = 0,i = 0;
+ int update = 0,video_id = 0,j = 0;
+ int* temp_int = NULL;
+ float temp_sfh_pass = 0;
+ char *sfh_str,*url_str;
+ GIE_digest_t *sfh_video_id = NULL;
+ GIE_result_t *query_result = NULL;
+ cache *GIE_digest_cache = NULL;
+ video_id_sets_file = fopen(video_id_sets_file_dir,"r+");
+ new_sfh_file = fopen(new_sfh_file_dir,"w");
+ if(video_id_sets_file == NULL)
+ {
+ printf("open video_id_sets_file error\n");
+ return -1;
+ }
+ if(new_sfh_file == NULL)
+ {
+ printf("open new_sfh_file error\n");
+ return -1;
+ }
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ GIE_create_para_t *query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t));
+ query_result = (GIE_result_t*)calloc(1,sizeof(GIE_result_t));
+ GIE_handle_t *query_handle;
+ query_para->gram_value = 7;
+ query_para->position_accuracy = 5;
+ query_handle=GIE_create((const GIE_create_para_t *)query_para);
+ free(query_para);
+ if(query_handle==NULL)
+ {
+ printf("create GIE handle error\n");
+ return -1;
+ }
+ sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t));
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ url_str = (char*)calloc(URL_LEN,sizeof(char));
+ i=0;
+ GIE_digest_cache =(cache*)calloc(1,sizeof(cache));
+ GIE_digest_cache->cache_size = 1000;
+ GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*));
+ GIE_digest_cache->len = 0;
+ while(feof(video_id_sets_file)==0)
+ {
+ i++;
+ if(i%10000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,video_id_sets_file);
+ ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ %*[^;];%*[^;];%*[^;];%[^;];%[^;]",sfh_str,url_str);
+ if(ret!=2)
+ {
+ continue;
+ }
+ hashed_len = get_hashed_len((const char*)sfh_str);
+ temp_sfh_pass = (float)hashed_len/total_len;
+ if(temp_sfh_pass<SFH_PASS_RATE)
+ {
+ continue;
+ }
+ resultnum=GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,1);
+ if(resultnum == 0)
+ {
+ temp_int=(int*)calloc(1,sizeof(int));
+ *temp_int=i;
+ sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t));
+ sfh_video_id->id=i;
+ sfh_video_id->sfh_length=strlen(sfh_str);
+ sfh_video_id->operation=GIE_INSERT_OPT;
+ sfh_video_id->cfds_lvl=5;
+ sfh_video_id->sfh=strdup(sfh_str);
+ sfh_video_id->tag=temp_int;
+ GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_video_id;
+ GIE_digest_cache->len++;
+ if(GIE_digest_cache->len==GIE_digest_cache->cache_size)
+ {
+ update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size);
+ GIE_digest_cache->len=0;
+ for(j=0;j<GIE_digest_cache->cache_size;j++)
+ {
+ free(GIE_digest_cache->GIE_cache[j]->sfh);
+ GIE_digest_cache->GIE_cache[j]->sfh=NULL;
+ free(GIE_digest_cache->GIE_cache[j]);
+ GIE_digest_cache->GIE_cache[j]=NULL;
+ }
+ }
+ fprintf(new_sfh_file,"%d,%s",i,buffer);
+ }
+ else
+ {
+ fprintf(new_sfh_file,"%d,%s",*((int*)query_result->tag),buffer);
+ }
+ }
+ free(buffer);
+ free(query_result);
+ free(sfh_video_id);
+ free(url_str);
+ free(sfh_str);
+ free(GIE_digest_cache);
+ return 0;
+} \ No newline at end of file
diff --git a/src/file_digest.conf b/src/file_digest.conf
new file mode 100644
index 0000000..a02cae2
--- /dev/null
+++ b/src/file_digest.conf
@@ -0,0 +1,3 @@
+[file]
+ripe_files_address = ../data/ripe_data/td_data_20171207/all_av_digest
+raw_file_address = ../data/td_data_20171207/td_data/all_av_digest
diff --git a/src/file_digest.py b/src/file_digest.py
new file mode 100644
index 0000000..3703794
--- /dev/null
+++ b/src/file_digest.py
@@ -0,0 +1,104 @@
+#-*-coding:utf-8-*-
+import re
+import random
+import ConfigParser
+import bisect
+import commands
+import os
+import hashlib
+
+class data_line(object):
+ """docstring for ClassName"""
+ def __init__(self):
+ super(ClassName, self).__init__()
+
+ @staticmethod
+ def if_error(data_line_str):
+ data_line_val = re.split(r';',data_line_str)
+ hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19])
+ if(term['data_num'](data_line_val) and \
+ term['not_null'](data_line_val[0]) and \
+ term['not_null'](data_line_val[1]) and \
+ term['not_null'](data_line_val[2]) and \
+ term['ysp_len'](data_line_val[3]) and \
+ term['not_null'](data_line_val[4]) and \
+ term['not_null'](data_line_val[5]) and \
+ term['td_len'](data_line_val[6]) and \
+ term['td_len'](data_line_val[7]) and \
+ term['td_len'](data_line_val[8]) and \
+ term['td_len'](data_line_val[9]) and \
+ term['td_len'](data_line_val[10]) and \
+ term['td_len'](data_line_val[11]) and \
+ term['td_len'](data_line_val[12]) and \
+ term['td_len'](data_line_val[13]) and \
+ term['td_len'](data_line_val[14]) and \
+ term['td_len'](data_line_val[15]) and \
+ term['td_len'](data_line_val[16]) and \
+ term['td_len'](data_line_val[17]) and \
+ term['not_null'](data_line_val[18]) and \
+ term['sfh_len'](data_line_val[19]) and \
+ term['not_null'](data_line_val[20]) and \
+ hashed_len/float(data_line_val[3])>0.999):
+ return data_line_val
+ else:
+ return -1
+
+class TD_fingerprint(object):
+ def __init__():
+ self.td = td
+ self.td_string = td_string
+ @staticmethod
+ def td_generate(td_string):
+ td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest()
+
+class sfh_fingerprint(object):
+
+ def __init__(self,sfh):
+ self.sfh = sfh
+
+ @staticmethod
+ def get_hashed_len(sfh):
+ p = r"\[+\d+?:+\d+?\]"
+ pattern = re.compile(p)
+ hashed_len_set = pattern.findall(sfh)
+ if (term['not_null'](hashed_len_set)):
+ hashed_len = 0
+ for x in xrange(0,len(hashed_len_set)):
+ hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x])
+ hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1])
+ return hashed_len/len(hashed_len_set)
+ else :
+ return -1
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==21),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)),
+ 'not_null':(lambda x : len(x)!=0),
+ 'ysp_len':(lambda x : int(x)!=0),
+ 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))}
+
+c_func="./"
+ripe_files=[]
+config = ConfigParser.RawConfigParser()
+config.read("file_digest.conf")
+raw_file_address=config.get("file","raw_file_address")
+ripe_files_address=config.get("file","ripe_files_address")
+print ("%s %s" %(raw_file_address,ripe_files_address))
+# num = [0,0,0,0,0,0,0]
+# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")]
+# i=0
+# for i in xrange(0,ripe_file_num):
+# outfile=open(ripe_files_address+str(i)+'.txt','w')
+# ripe_files.append(outfile)
+
+i=0
+with open(raw_file_address,'r') as infile:
+ with open(ripe_files_address,'w')as outfile:
+ for line in infile:
+ i+=1
+ if(i%10000==0):
+ print i
+ line_return = data_line.if_error(line)
+ if(line_return != -1):
+ outfile.write(str(line)) \ No newline at end of file
diff --git a/src/get_td_mistake_lost/CMakeLists.txt b/src/get_td_mistake_lost/CMakeLists.txt
new file mode 100644
index 0000000..87f4b6b
--- /dev/null
+++ b/src/get_td_mistake_lost/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT (CALCULATE)
+SET (SRC_LIST get_lost_rate.c)
+SET(CMAKE_BUILD_TYPE "Debug")
+SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb")
+SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")
+MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR})
+MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR})
+#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/)
+#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/)
+ADD_EXECUTABLE(get_lost_rate ${SRC_LIST} gram_index_engine.c)
+TARGET_LINK_LIBRARIES(get_lost_rate maatframe libMESA_htable.so pthread m)
diff --git a/src/get_td_mistake_lost/file_digest.conf b/src/get_td_mistake_lost/file_digest.conf
new file mode 100644
index 0000000..6d1c06b
--- /dev/null
+++ b/src/get_td_mistake_lost/file_digest.conf
@@ -0,0 +1,6 @@
+[file_digest]
+ripe_files_address = ../../data/ripe_data/td_data_20171207/all_av_digest
+raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest
+[new_td]
+ripe_files_address = ../../data/ripe_data/td_data_20171207/new_TD.txt
+raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest
diff --git a/src/get_td_mistake_lost/file_digest.py b/src/get_td_mistake_lost/file_digest.py
new file mode 100644
index 0000000..62786ef
--- /dev/null
+++ b/src/get_td_mistake_lost/file_digest.py
@@ -0,0 +1,104 @@
+#-*-coding:utf-8-*-
+import re
+import random
+import ConfigParser
+import bisect
+import commands
+import os
+import hashlib
+
+class data_line(object):
+ """docstring for ClassName"""
+ def __init__(self):
+ super(ClassName, self).__init__()
+
+ @staticmethod
+ def if_error(data_line_str):
+ data_line_val = re.split(r';',data_line_str)
+ hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19])
+ if(term['data_num'](data_line_val) and \
+ term['not_null'](data_line_val[0]) and \
+ term['not_null'](data_line_val[1]) and \
+ term['not_null'](data_line_val[2]) and \
+ term['ysp_len'](data_line_val[3]) and \
+ term['not_null'](data_line_val[4]) and \
+ term['not_null'](data_line_val[5]) and \
+ term['td_len'](data_line_val[6]) and \
+ term['td_len'](data_line_val[7]) and \
+ term['td_len'](data_line_val[8]) and \
+ term['td_len'](data_line_val[9]) and \
+ term['td_len'](data_line_val[10]) and \
+ term['td_len'](data_line_val[11]) and \
+ term['td_len'](data_line_val[12]) and \
+ term['td_len'](data_line_val[13]) and \
+ term['td_len'](data_line_val[14]) and \
+ term['td_len'](data_line_val[15]) and \
+ term['td_len'](data_line_val[16]) and \
+ term['td_len'](data_line_val[17]) and \
+ term['not_null'](data_line_val[18]) and \
+ term['sfh_len'](data_line_val[19]) and \
+ term['not_null'](data_line_val[20]) and \
+ hashed_len/float(data_line_val[3])>0.999):
+ return data_line_val
+ else:
+ return -1
+
+class TD_fingerprint(object):
+ def __init__():
+ self.td = td
+ self.td_string = td_string
+ @staticmethod
+ def td_generate(td_string):
+ td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest()
+
+class sfh_fingerprint(object):
+
+ def __init__(self,sfh):
+ self.sfh = sfh
+
+ @staticmethod
+ def get_hashed_len(sfh):
+ p = r"\[+\d+?:+\d+?\]"
+ pattern = re.compile(p)
+ hashed_len_set = pattern.findall(sfh)
+ if (term['not_null'](hashed_len_set)):
+ hashed_len = 0
+ for x in xrange(0,len(hashed_len_set)):
+ hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x])
+ hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1])
+ return hashed_len/len(hashed_len_set)
+ else :
+ return -1
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)==21),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)),
+ 'not_null':(lambda x : len(x)!=0),
+ 'ysp_len':(lambda x : int(x)!=0),
+ 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))}
+
+c_func="./"
+ripe_files=[]
+config = ConfigParser.RawConfigParser()
+config.read("file_digest.conf")
+raw_file_address=config.get("file_digest","raw_file_address")
+ripe_files_address=config.get("file_digest","ripe_files_address")
+print ("%s %s" %(raw_file_address,ripe_files_address))
+# num = [0,0,0,0,0,0,0]
+# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")]
+# i=0
+# for i in xrange(0,ripe_file_num):
+# outfile=open(ripe_files_address+str(i)+'.txt','w')
+# ripe_files.append(outfile)
+
+i=0
+with open(raw_file_address,'r') as infile:
+ with open(ripe_files_address,'w')as outfile:
+ for line in infile:
+ i+=1
+ if(i%10000==0):
+ print i
+ line_return = data_line.if_error(line)
+ if(line_return != -1):
+ outfile.write(str(line)) \ No newline at end of file
diff --git a/src/get_td_mistake_lost/get_TD_SFH.c b/src/get_td_mistake_lost/get_TD_SFH.c
new file mode 100644
index 0000000..2ed3ecd
--- /dev/null
+++ b/src/get_td_mistake_lost/get_TD_SFH.c
@@ -0,0 +1,162 @@
+/*
+gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include
+./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define BUFFER_LEN (15*1024)
+#define SFH_LEN (10*1024)
+#define TD_LEN 33
+#define THREAD_SAFE 0
+#define SLOT_SIZE (1024*1024*16)
+#define TD_STR_LEN (10*1024)
+#define TIME_STR_LEN 128
+
+typedef struct sfh_link
+{
+ // char *time_str;
+ char *sfh_str;
+ char *td_ori;
+ // char *md5_32k;
+ int similiar;
+ int all_similiar;
+ // long hash_len;
+ struct sfh_link *next;
+}sfh_link;
+
+typedef struct sfh
+{
+ int all_num;
+ int all_similiar;
+ char *sfh_str;
+ // long hash_len;
+ sfh_link *sfh_link_items;
+}sfh;
+
+void print_td_sfh(const uchar *key,uint size,void *data,void *arg)
+{
+ FILE *ripe_file=(FILE*)arg;
+ sfh *temp_sfh=(sfh*)data;
+ fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str);
+}
+
+int main()
+{
+ FILE *raw_file;
+ FILE *ripe_file;
+ char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt";
+ char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3";
+ raw_file = fopen(raw_file_dir,"r+");
+ ripe_file = fopen(ripe_file_dir,"w+");
+ if(raw_file==NULL)
+ {
+ printf("open all_av_digest error\n");
+ return -1;
+ }
+ if(ripe_file==NULL)
+ {
+ printf("open all_av_digest_mistake_level error");
+ return -1;
+ }
+ MESA_htable_handle htable=NULL;
+ char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL;
+ int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0;
+ unsigned int slot_size=SLOT_SIZE;
+ sfh *temp_sfh=NULL;
+ sfh_link *temp_sfh_link=NULL;
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ td = (char*)calloc(TD_LEN,sizeof(char));
+ td[32]='\0';
+ td_str = (char*)calloc(TD_STR_LEN,sizeof(char));
+ // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char));
+ // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char));
+ // time_str[TIME_STR_LEN-1]='\0';
+ // md5_32k_str[32]='\0';
+ htable=MESA_htable_born();
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int));
+ MESA_htable_mature(htable);
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%100000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ buffer[BUFFER_LEN-1]='\0';
+ // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str);
+ // assert(ret==5);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==3);
+ td[32]='\0';
+ // md5_32k_str[32]='\0';
+ if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL)
+ {
+ temp_sfh=(sfh*)calloc(1,sizeof(sfh));
+ temp_sfh->all_num=1;
+ temp_sfh->all_similiar=0;
+ temp_sfh->sfh_str=strdup(sfh_str);
+ temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str);
+ temp_sfh->sfh_link_items->td_ori=strdup(td_str);
+ // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str);
+ // temp_sfh->sfh_link_items->time_str=strdup(time_str);
+ temp_sfh->sfh_link_items->similiar=0;
+ temp_sfh->sfh_link_items->all_similiar=0;
+ temp_sfh->sfh_link_items->next=NULL;
+ ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh);
+ assert(ret>0);
+ }
+ else
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh->all_similiar+=temp_similiar;
+ temp_sfh_link=temp_sfh->sfh_link_items;
+ for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next)
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh_link->all_similiar+=temp_similiar;
+ temp_all_similiar+=temp_similiar;
+ if(temp_sfh_link->all_similiar>temp_sfh->all_similiar)
+ {
+ free(temp_sfh->sfh_str);
+ temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str);
+ temp_sfh->all_similiar=temp_sfh_link->all_similiar;
+ }
+ if(temp_sfh_link->next==NULL)
+ {
+ break;
+ }
+ }
+ temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ temp_sfh_link->next->td_ori=strdup(td_str);
+ // temp_sfh_link->next->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->next->time_str=strdup(time_str);
+ temp_sfh_link->next->similiar=0;
+ temp_sfh_link->next->all_similiar=temp_all_similiar;
+ temp_sfh_link->next->next=NULL;
+ temp_sfh->all_num+=1;
+ }
+ }
+ fclose(raw_file);
+ MESA_htable_iterate(htable,print_td_sfh,ripe_file);
+ free(sfh_str);
+ free(td);
+ free(td_str);
+ // free(md5_32k_str);
+ MESA_htable_destroy(htable,NULL);
+ // fclose(raw_file);
+ fclose(ripe_file);
+ return 0;
+} \ No newline at end of file
diff --git a/src/get_td_mistake_lost/get_lost_rate.c b/src/get_td_mistake_lost/get_lost_rate.c
new file mode 100644
index 0000000..d983a00
--- /dev/null
+++ b/src/get_td_mistake_lost/get_lost_rate.c
@@ -0,0 +1,210 @@
+/*
+gcc -g get_lost_rate.c -o get_lost_rate -lmaatframe -I../include
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <assert.h>
+#include <ctype.h>
+#define BUFFER_LEN (10*1024)
+#define CACHE_SIZE 2000000
+#define SFH_LEN (10*1024)
+#define TD_LEN 33
+#define RESULT_NUM 10000
+#define TIME_STR_LEN 128
+#define TD_STR_LEN (10*1024)
+
+typedef struct cache
+{
+ GIE_digest_t ** GIE_cache;
+ long cache_size;
+ long len;
+}cache;
+
+typedef struct GIE_tag
+{
+ char *td;
+ char *td_str;
+ char *sfh_str;
+}GIE_tag;
+
+int main()
+{
+ FILE *td_sfh_file;
+ FILE *raw_file;
+ FILE *ripe_file;
+ const char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt";
+ const char *td_sfh_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_1";
+ const char *ripe_file_dir="../../data/ripe_data/td_data_20171207/get_lost_ripe_data_1";
+ td_sfh_file = fopen(td_sfh_file_dir,"r+");
+ raw_file = fopen(raw_file_dir,"r+");
+ ripe_file = fopen(ripe_file_dir,"w+");
+ char *buffer=NULL,*sfh_str=NULL,*td=NULL,*time_str=NULL,*td_str=NULL;
+ GIE_create_para_t *query_para=NULL;
+ GIE_handle_t *query_handle=NULL;
+ GIE_result_t *query_result = NULL;
+ cache *GIE_digest_cache = NULL;
+ GIE_digest_t *sfh_td = NULL;
+ int i=0,w=0,ret=0,lost=0,j=0,update=0,resultnum=0,temp_len=0;
+ GIE_tag *temp_tag =NULL;
+ if(td_sfh_file == NULL)
+ {
+ printf("open td_sfh_file_dir error\n");
+ return -1;
+ }
+ if(raw_file == NULL)
+ {
+ printf("open raw_file_dir error\n");
+ return -1;
+ }
+ if(ripe_file == NULL)
+ {
+ printf("open ripe_file_dir error\n");
+ return -1;
+ }
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ sfh_str[SFH_LEN-1]='\0';
+ td = (char*)calloc(TD_LEN,sizeof(char));
+ td[32]='\0';
+ time_str = (char*)calloc(TIME_STR_LEN,sizeof(char));
+ time_str[TIME_STR_LEN-1]='\0';
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ td_str = (char*)calloc(TD_STR_LEN,sizeof(char));
+ query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t));
+ query_para->gram_value = 7;
+ query_para->position_accuracy = 5;
+ query_para->ED_reexamine=1;
+ query_para->format=GIE_INPUT_FORMAT_SFH;
+ query_handle=GIE_create((const GIE_create_para_t *)query_para);
+ free(query_para);
+ query_result = (GIE_result_t*)calloc(RESULT_NUM,sizeof(GIE_result_t));
+ GIE_digest_cache =(cache*)calloc(1,sizeof(cache));
+ GIE_digest_cache->cache_size = CACHE_SIZE;
+ GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*));
+ GIE_digest_cache->len = 0;
+ if(query_handle==NULL)
+ {
+ printf("create GIE handle error\n");
+ return -1;
+ }
+ while(feof(td_sfh_file)==0)
+ {
+ i++;
+ if(i%100000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,td_sfh_file);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td,td_str,sfh_str);
+ assert(ret==3);
+ td[32]='\0';
+ sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t));
+ sfh_td->id=i;
+ temp_len=strlen(sfh_str);
+ sfh_td->sfh_length=temp_len;
+ sfh_str[temp_len-1]='\0';
+ sfh_td->operation=GIE_INSERT_OPT;
+ sfh_td->cfds_lvl=5;
+ sfh_td->sfh=strdup(sfh_str);
+ temp_tag=(GIE_tag*)calloc(1,sizeof(GIE_tag));
+ temp_tag->td=strdup(td);
+ temp_tag->td_str=strdup(td_str);
+ temp_tag->sfh_str=strdup(sfh_str);
+ sfh_td->tag=(void*)temp_tag;
+ GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td;
+ GIE_digest_cache->len++;
+ // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM);
+ // if(resultnum==0)
+ // {
+ // sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t));
+ // sfh_td->id=i;
+ // sfh_td->sfh_length=strlen(sfh_str);
+ // sfh_td->operation=GIE_INSERT_OPT;
+ // sfh_td->cfds_lvl=5;
+ // sfh_td->sfh=strdup(sfh_str);
+ // sfh_td->tag=(void*)strdup(td);
+ // GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td;
+ // GIE_digest_cache->len++;
+ // }
+ // else
+ // {
+ // for(j=0;j<resultnum;j++)
+ // {
+ // if(strcmp((char*)((query_result+j)->tag),td)!=0)
+ // {
+ // lost++;
+ // fprintf(ripe_file,"%s,%s,%s\n",(char*)((query_result+j)->tag),td,sfh_str);
+ // }
+ // }
+ // continue;
+ // }
+ // if(GIE_digest_cache->len==GIE_digest_cache->cache_size)
+ // {
+ // update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size);
+ // assert(update==GIE_digest_cache->len);
+ // GIE_digest_cache->len=0;
+ // for(j=0;j<GIE_digest_cache->cache_size;j++)
+ // {
+ // free(GIE_digest_cache->GIE_cache[j]->sfh);
+ // GIE_digest_cache->GIE_cache[j]->sfh=NULL;
+ // free(GIE_digest_cache->GIE_cache[j]);
+ // GIE_digest_cache->GIE_cache[j]=NULL;
+ // }
+ // }
+ // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM);
+ // for(i=0;i<resultnum;i++)
+ // {
+ // if(strcmp((char*)query_result[i]->tag,td)!=0)
+ // {
+ // lost++;
+ // }
+ // }
+ }
+ fclose(td_sfh_file);
+ update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->len);
+ for(j=0;j<GIE_digest_cache->len;j++)
+ {
+ free(GIE_digest_cache->GIE_cache[j]->sfh);
+ GIE_digest_cache->GIE_cache[j]->sfh=NULL;
+ free(GIE_digest_cache->GIE_cache[j]);
+ GIE_digest_cache->GIE_cache[j]=NULL;
+ }
+ i=0;
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%100000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ // ret=sscanf(buffer,"%[^;];%[^;]",td,sfh_str);
+ // assert(ret==2);
+ // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %*[^;];%[^;];%*[^;];%[^;];%*[^;]",td_str,td,sfh_str);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==3);
+ resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM);
+ if(resultnum>1)
+ {
+ for(j=0;j<resultnum;j++)
+ {
+ if(strcmp(((GIE_tag*)(query_result+j)->tag)->td,td)!=0)
+ {
+ w=1;
+ fprintf(ripe_file,"%u,%s,%s,%s,%s,%s,%s\n",(query_result+j)->id,((GIE_tag*)((query_result+j)->tag))->td_str,((GIE_tag*)((query_result+j)->tag))->td,((GIE_tag*)((query_result+j)->tag))->sfh_str,td_str,td,sfh_str);
+ }
+ }
+ lost+=w;
+ w=0;
+ }
+
+ }
+ printf("%d;%d\n",lost,i);
+ free(sfh_str);
+ free(td);
+ free(time_str);
+ free(td_str);
+} \ No newline at end of file
diff --git a/src/get_td_mistake_lost/get_mistake_level.c b/src/get_td_mistake_lost/get_mistake_level.c
new file mode 100644
index 0000000..5f03974
--- /dev/null
+++ b/src/get_td_mistake_lost/get_mistake_level.c
@@ -0,0 +1,366 @@
+/*
+gcc -g get_mistake_level.c -o get_mistake_level -lMESA_htable -lmaatframe -I../../include
+./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define THREAD_SAFE 0
+#define SLOT_SIZE (1024*1024*16)
+#define SIMILIAR_RATE 90
+#define TD_STR_LEN (10*1024)
+#define TIME_STR_LEN 128
+#define RAODONG_RATE 0.1
+#define BUFFER_LEN (15*1024)
+#define SFH_LEN (10*1024)
+#define TD_LEN 33
+
+typedef struct sfh_link
+{
+ // char *time_str;
+ char *sfh_str;
+ char *td_ori;
+ // char *md5_32k;
+ int similiar;
+ int all_similiar;
+ // long hash_len;
+ struct sfh_link *next;
+}sfh_link;
+
+typedef struct mistake_sfh
+{
+ int mistake_num;
+ int all_num;
+ int all_similiar;
+ char *sfh_str;
+ // long hash_len;
+ sfh_link *sfh_link_items;
+}mistake_sfh;
+
+typedef struct temp_parameter
+{
+ int mistake_num;
+ FILE *ripe_file;
+}temp_parameter;
+
+long get_hashed_len(const char* sfh)
+{
+ char *data=(char*)malloc(strlen(sfh)+1);
+ memcpy(data,sfh, strlen(sfh));
+ data[strlen(sfh)]='\0';
+ char *token=NULL,*sub_token=NULL,*saveptr;
+ long left_offset=0,right_offset=0,hashed_length=0;
+ int ret=0,first=0;
+ for (token = data; ; token= NULL)
+ {
+ sub_token= strtok_r(token,"[", &saveptr);
+ if (sub_token == NULL)
+ {
+ break;
+ }
+ if(first==0)//jump over the first sub string.
+ {
+ first=1;
+ continue;
+ }
+ ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset);
+ if(ret!=2)
+ {
+ return 0;
+ }
+ assert(ret==2);
+ hashed_length+=right_offset-left_offset+1;
+ }
+ //printf("hashed length=%ld\n",hashed_length);
+ free(data);
+ return hashed_length/2;
+}
+
+void print_mistake_td(const uchar *key,uint size,void *data,void *arg)
+{
+ temp_parameter *parameter = (temp_parameter*)arg;
+ mistake_sfh *temp_mistake_sfh=(mistake_sfh*)data;
+ float temp_rate=0;
+ temp_rate=(float)temp_mistake_sfh->mistake_num/(float)temp_mistake_sfh->all_num;
+ if(temp_rate>RAODONG_RATE)
+ {
+ parameter->mistake_num+=temp_mistake_sfh->mistake_num;
+ fprintf(parameter->ripe_file,"%d;%s\n",temp_mistake_sfh->mistake_num,temp_mistake_sfh->sfh_str);
+ sfh_link *temp_sfh_link=temp_mistake_sfh->sfh_link_items;
+ for(;;temp_sfh_link=temp_sfh_link->next)
+ {
+ if(temp_sfh_link==NULL)
+ {
+ break;
+ }
+ temp_sfh_link->similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str));
+ // fprintf(parameter->ripe_file,"%s,%d;%s;%s;%s\n",temp_sfh_link->time_str,temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori,temp_sfh_link->md5_32k);
+ fprintf(parameter->ripe_file,"%d;%s;%s\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori);
+ }
+ fprintf(parameter->ripe_file,"\n");
+ }
+}
+
+int main(int argc,char *argv[])
+{
+ FILE *raw_file;
+ FILE *ripe_file;
+ char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt";
+ char *ripe_file_dir="../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_3";
+ char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL;
+ raw_file = fopen(raw_file_dir,"r+");
+ ripe_file = fopen(ripe_file_dir,"w+");
+ int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0;
+ long temp_hash_len=0;
+ unsigned int slot_size=SLOT_SIZE;
+ mistake_sfh *temp_mistake_sfh=NULL;
+ sfh_link *temp_sfh_link=NULL;
+ MESA_htable_handle htable=NULL;
+ temp_parameter *parameter=NULL;
+ if(raw_file==NULL)
+ {
+ printf("open all_av_digest error\n");
+ return -1;
+ }
+
+
+ if(ripe_file==NULL)
+ {
+ printf("open all_av_digest_mistake_level error");
+ return -1;
+ }
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ td = (char*)calloc(TD_LEN,sizeof(char));
+ td[32]='\0';
+ td_str = (char*)calloc(TD_STR_LEN,sizeof(char));
+ // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char));
+ // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char));
+ // time_str[TIME_STR_LEN-1]='\0';
+ // md5_32k_str[32]='\0';
+ htable=MESA_htable_born();
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int));
+ MESA_htable_mature(htable);
+ parameter=(temp_parameter*)calloc(1,sizeof(temp_parameter));
+ parameter->mistake_num=0;
+ parameter->ripe_file=ripe_file;
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%100000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ buffer[BUFFER_LEN-1]='\0';
+ // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==3);
+ td[32]='\0';
+ // md5_32k_str[32]='\0';
+ if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL)
+ {
+ temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh));
+ temp_mistake_sfh->mistake_num=0;
+ temp_mistake_sfh->all_num=1;
+ temp_mistake_sfh->all_similiar=0;
+ // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str);
+ temp_mistake_sfh->sfh_str=strdup(sfh_str);
+ temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str);
+ temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str);
+ // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str);
+ // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str);
+ temp_mistake_sfh->sfh_link_items->similiar=0;
+ temp_mistake_sfh->sfh_link_items->all_similiar=0;
+ temp_mistake_sfh->sfh_link_items->next=NULL;
+ ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh);
+ assert(ret>0);
+ }
+ else
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_mistake_sfh->all_similiar+=temp_similiar;
+ temp_sfh_link=temp_mistake_sfh->sfh_link_items;
+ for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next)
+ {
+ // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))<SIMILIAR_RATE)
+ // {
+ // temp_mistake=1;
+ // }
+ temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh_link->all_similiar+=temp_similiar;
+ temp_all_similiar+=temp_similiar;
+ if(temp_sfh_link->all_similiar>temp_mistake_sfh->all_similiar)
+ {
+ free(temp_mistake_sfh->sfh_str);
+ temp_mistake_sfh->sfh_str=strdup(temp_sfh_link->sfh_str);
+ temp_mistake_sfh->all_similiar=temp_sfh_link->all_similiar;
+ }
+ if(temp_sfh_link->next==NULL)
+ {
+ break;
+ }
+ }
+ // if(temp_hash_len>temp_mistake_sfh->hash_len)
+ // {
+ // temp_mistake_sfh->hash_len=temp_hash_len;
+ // free(temp_mistake_sfh->sfh_str);
+ // temp_mistake_sfh->sfh_str=strdup(sfh_str);
+ // }
+ temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ temp_sfh_link->next->td_ori=strdup(td_str);
+ // temp_sfh_link->next->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->next->time_str=strdup(time_str);
+ temp_sfh_link->next->similiar=0;
+ temp_sfh_link->next->all_similiar=temp_all_similiar;
+ temp_sfh_link->next->next=NULL;
+ temp_mistake_sfh->all_num+=1;
+ }
+ }
+ fclose(raw_file);
+ raw_file = fopen(raw_file_dir,"r+");
+ if(raw_file==NULL)
+ {
+ printf("open all_av_digest error\n");
+ return -1;
+ }
+ i=0;
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%10000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ buffer[BUFFER_LEN-1]='\0';
+ // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str);
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==3);
+ td[32]='\0';
+ // md5_32k_str[32]='\0';
+ temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN);
+ assert(temp_mistake_sfh!=NULL);
+ // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL)
+ // {
+ // temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh));
+ // temp_mistake_sfh->num=0;
+ // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str);
+ // temp_mistake_sfh->sfh_str=strdup(sfh_str);
+ // temp_sfh_link=(sfh_link*)calloc(1,sizeof(sfh_link));
+ // temp_sfh_link->sfh_str=strdup(sfh_str);
+ // temp_sfh_link->td_ori=strdup(td_str);
+ // temp_sfh_link->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->time_str=strdup(time_str);
+ // temp_sfh_link->next=NULL;
+ // temp_mistake_sfh->sfh_link_items=temp_sfh_link;
+ // ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh);
+ // assert(ret>0);
+ // }
+ // else
+ // {
+ // temp_hash_len=get_hashed_len(sfh_str);
+ // if(temp_hash_len>temp_mistake_sfh->hash_len)
+ // {
+ // temp_sfh_link->hash_len=get_hashed_len();
+ // free(temp_sfh_link->sfh_str);
+ // temp_sfh_link->sfh_str=strdup(sfh_str);
+ // }
+ temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str));
+ if(temp_similiar<SIMILIAR_RATE)
+ {
+ temp_mistake_sfh->mistake_num+=1;
+ }
+ // if(temp_mistake_sfh->sfh_link_items!=NULL)
+ // {
+ // temp_sfh_link=temp_mistake_sfh->sfh_link_items;
+ // for(;;temp_sfh_link=temp_sfh_link->next)
+ // {
+ // // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))<SIMILIAR_RATE)
+ // // {
+ // // temp_mistake=1;
+ // // }
+ // if(temp_sfh_link->next==NULL)
+ // {
+ // break;
+ // }
+ // }
+ // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ // temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ // temp_sfh_link->next->td_ori=strdup(td_str);
+ // temp_sfh_link->next->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->next->time_str=strdup(time_str);
+ // temp_sfh_link->next->similiar=temp_similiar;
+ // temp_sfh_link->next->next=NULL;
+ // }
+ // else
+ // {
+ // temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link));
+ // temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str);
+ // temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str);
+ // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str);
+ // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str);
+ // temp_mistake_sfh->sfh_link_items->similiar=temp_similiar;
+ // temp_mistake_sfh->sfh_link_items->next=NULL;
+ // }
+ // if(temp_mistake==1)
+ // {
+ // temp_mistake_sfh->num+=temp_mistake;
+ // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ // temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ // temp_sfh_link->next->td_ori=strdup(td_str);
+ // temp_sfh_link->next->md5_32k=strdup(md5_32k_str);
+ // temp_sfh_link->next->next=NULL;
+ // temp_mistake=0;
+ // }
+ }
+ fclose(raw_file);
+ // raw_file=NULL;
+ // raw_file = fopen(raw_file_dir,"r+");
+ // if(raw_file==NULL)
+ // {
+ // printf("open all_av_digest error\n");
+ // return -1;
+ // }
+ // i=0;
+ // while(feof(raw_file)==0)
+ // {
+ // i++;
+ // if(i%10000==0)
+ // {
+ // printf("%d\n",i);
+ // }
+ // fgets(buffer,BUFFER_LEN-1,raw_file);
+ // buffer[BUFFER_LEN-1]='\0';
+ // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\
+ // %*[^;];%[^;];%*[^;];%*[^;];%*[^;]",td);
+ // assert(ret==1);
+ // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))!=NULL)
+ // {
+ // fprintf(ripe_file,"%d;%s",temp_mistake_sfh->num,buffer);
+ // }
+ // }
+ MESA_htable_iterate(htable,print_mistake_td,(void*)parameter);
+ printf("%d,%d\n",parameter->mistake_num,i);
+ free(buffer);
+ free(sfh_str);
+ free(td);
+ free(td_str);
+ // free(md5_32k_str);
+ MESA_htable_destroy(htable,NULL);
+ // fclose(raw_file);
+ fclose(ripe_file);
+ return 0;
+} \ No newline at end of file
diff --git a/src/get_td_mistake_lost/get_td_mistake_lost.sh b/src/get_td_mistake_lost/get_td_mistake_lost.sh
new file mode 100644
index 0000000..7c851b8
--- /dev/null
+++ b/src/get_td_mistake_lost/get_td_mistake_lost.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+python new_TD.py
+./get_mistake_level
+./get_TD_SFH
+./get_lost_rate
diff --git a/src/get_td_mistake_lost/gram_index_engine.c b/src/get_td_mistake_lost/gram_index_engine.c
new file mode 100644
index 0000000..0f503db
--- /dev/null
+++ b/src/get_td_mistake_lost/gram_index_engine.c
@@ -0,0 +1,1354 @@
+#include<stdio.h>
+#include<stdlib.h>
+#include<string.h>
+#include<math.h>
+#include<assert.h>
+#include<MESA/MESA_htable.h>
+#include<unistd.h>
+
+#include "gram_index_engine.h"
+#include "queue.h"
+
+#define HTABLE_SIZE 1024 *1024
+#define GRAM_CNT_MAX 2
+#define GRAM_MAX 128
+#define TOLERENCE_SIZE 0
+#define UNION_INIT_SIZE 1000
+#define BLOCKSIZE_MIN 3
+#define MEM_OCCUPY 1
+#define CNT_MAX 10
+#define GRAM_CNT_THRESHOLD 10
+#define QUERY_LEN_ACCURACY 0.1
+#define HTABLE_NUM 8
+//#define GIE_INPUT_FORMAT_SFH 1
+//#define GIE_INPUT_FORMAT_PLAIN 0
+#define MAX_LENGTH 10000
+#define KEY_MAX_LENGTH 10
+#define EDIT_DISTN_INSERT_COST 1
+#define EDIT_DISTN_REMOVE_COST 1
+#define EDIT_DISTN_REPLACE_COST 2
+#define MIN(x,y) ((x)<(y)?(x):(y))
+
+int before(unsigned int off1, unsigned int off2)
+{
+ return (signed int)(off1-off2)<0;
+}
+#define after(off2,off1) before(off1,off2)
+
+typedef struct
+{
+ unsigned int user_gram_value;
+ unsigned int user_position_accuracy;
+ short ED_reexamine;
+ short input_format;
+ MESA_htable_handle id_table;
+ MESA_htable_handle index_table[HTABLE_NUM];
+ unsigned long long mem_occupy;
+ unsigned long long hash_cnt;
+}GIE_handle_inner_t;
+
+
+struct linklist_node
+{
+ short * position;
+ struct id_table_data * basicinfo;
+ short size;
+ short index;
+ unsigned long long blocksize;
+ TAILQ_ENTRY(linklist_node) listentry;
+};
+
+
+struct index_table_data
+{
+ struct TQ * listhead;
+ int cnt;
+};
+
+
+struct id_table_data
+{
+ unsigned int id;
+ short sfh_length;
+ short gram_cnt;
+ unsigned long long blocksize;
+ char * sfh;
+ void * tag;
+ char cfds_lvl;
+};
+
+
+struct htable_handle
+{
+ MESA_htable_handle runtime_table;
+ MESA_htable_handle para;
+};
+
+struct key_list_node
+{
+ char * key;
+ int digest_id;
+ int pos;
+ unsigned long long blocksize;
+ TAILQ_ENTRY(key_list_node) keylistentry;
+};
+
+
+unsigned long long hash_cnt;
+unsigned long long cnt_sum;
+
+TAILQ_HEAD(TQ, linklist_node);
+TAILQ_HEAD(KL, key_list_node);
+
+void idtable_free(void * data);
+void indextable_free(void * data);
+int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2);
+int GIE_insert_indextable(MESA_htable_handle handle, struct id_table_data * info, char * key, unsigned int index,unsigned long long blocksize);
+
+int GIE_delete_from_indextable_by_key(MESA_htable_handle handle, char * key, unsigned int id);
+int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t * digest);
+int GIE_cmp(const void * a, const void * b);
+inline unsigned int get_real_length(const char * string, unsigned int length);
+void print_item_iterate(const uchar * key, unsigned int size, void * data, void * user);
+inline unsigned long long calc_fh_blocksize(unsigned long long orilen);
+inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len);
+
+MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data));
+void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user);
+void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user);
+
+GIE_handle_t * GIE_create(const GIE_create_para_t * para)
+{
+ int i = 0;
+ GIE_handle_inner_t * handle = (GIE_handle_inner_t *)calloc(1, sizeof(GIE_handle_inner_t));
+ handle->mem_occupy = 0;
+ handle->mem_occupy += sizeof(GIE_handle_inner_t);
+
+ handle->user_gram_value = para->gram_value;
+ handle->user_position_accuracy = para->position_accuracy;
+ handle->input_format = para->format;
+ //handle->user_cmp = GIE_INPUT_FORMAT_PLAIN;
+ handle->ED_reexamine = para->ED_reexamine;
+ handle->hash_cnt = 0;
+
+
+ MESA_htable_create_args_t idtable_args,indextable_args[HTABLE_NUM];
+ memset(&idtable_args, 0, sizeof(idtable_args));
+ idtable_args.thread_safe = 0;
+ idtable_args.hash_slot_size = HTABLE_SIZE;
+ idtable_args.max_elem_num = 0;
+ idtable_args.expire_time = 0;
+ idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO;
+ idtable_args.key_comp = NULL;
+ idtable_args.key2index = NULL;
+ idtable_args.data_free = idtable_free;
+ idtable_args.data_expire_with_condition = NULL;
+ idtable_args.recursive = 0;
+ handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args));
+
+ for(i = 0;i < HTABLE_NUM;i++)
+ {
+ memset(&indextable_args[i], 0, sizeof(indextable_args[i]));
+ indextable_args[i].thread_safe = 0;
+ indextable_args[i].hash_slot_size = HTABLE_SIZE;
+ indextable_args[i].max_elem_num = 0;
+ indextable_args[i].expire_time = 0;
+ indextable_args[i].eliminate_type = HASH_ELIMINATE_ALGO_FIFO;
+ indextable_args[i].key_comp = key_compare;
+ indextable_args[i].key2index = NULL;
+ indextable_args[i].data_free = indextable_free;
+ indextable_args[i].data_expire_with_condition = NULL;
+ indextable_args[i].recursive = 0;
+ handle->index_table[i] = MESA_htable_create(&indextable_args[i], sizeof(indextable_args[i]));
+ }
+
+ return (GIE_handle_t *)(handle);
+}
+
+int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2)
+{
+ return ( (*(long*)key1) - (*(long*)key2));
+}
+
+
+void idtable_free(void * data)
+{
+ struct id_table_data * tmp = (struct id_table_data *)data;
+ free(tmp->sfh);
+ tmp->sfh = NULL;
+ tmp->tag = NULL;
+ free(tmp);
+ tmp = NULL;
+
+ return;
+}
+
+void indextable_delete_with_threshold(MESA_htable_handle * htable_handle, struct index_table_data * tmp, char * key)
+{
+ int key_length = strnlen(key,KEY_MAX_LENGTH);
+ struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead);
+ while(tmp_node != NULL)
+ {
+ struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node,listentry);
+ if(tmp_node->basicinfo->gram_cnt <= GRAM_CNT_THRESHOLD)
+ {
+ tmp_node = linklist_tmp;
+ continue;
+ }
+ TAILQ_REMOVE(tmp->listhead, tmp_node, listentry);
+ tmp_node->basicinfo->gram_cnt--;
+ tmp->cnt--;
+ if(TAILQ_EMPTY(tmp->listhead) == 1)
+ {
+ //_handle->hash_cnt--;
+ //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ));
+ if(MESA_htable_del(htable_handle, (const uchar *)(key), key_length, indextable_free) < 0)
+ {
+ printf("indextable backtrack delete error!\n");
+ assert(0);
+ return;
+ }
+ }
+ //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp_node->size));
+ free(tmp_node->position);
+ tmp_node->position = NULL;
+ free(tmp_node);
+ tmp_node = NULL;
+ tmp_node = linklist_tmp;
+
+ }
+ return;
+}
+
+
+void indextable_free(void * data)
+{
+ struct index_table_data * tmp = (struct index_table_data *)data;
+ struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead);
+ while(tmp_node != NULL)
+ {
+ struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry);
+ TAILQ_REMOVE(tmp->listhead, tmp_node, listentry);
+ tmp->cnt--;
+ free(tmp_node->position);
+ tmp_node->position = NULL;
+ free(tmp_node);
+ tmp_node = NULL;
+ tmp_node = linklist_tmp;
+ }
+ free(tmp->listhead);
+ tmp->listhead = NULL;
+ free(tmp);
+ tmp = NULL;
+ return;
+}
+
+
+void indextable_free_cnt(void * data)
+{
+ struct index_table_data * tmp = (struct index_table_data *)data;
+ hash_cnt++;
+ cnt_sum += tmp->cnt;
+ struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead);
+ while(tmp_node != NULL)
+ {
+ struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry);
+ TAILQ_REMOVE(tmp->listhead, tmp_node, listentry);
+ tmp->cnt--;
+ free(tmp_node->position);
+ tmp_node->position = NULL;
+ free(tmp_node);
+ tmp_node = NULL;
+ tmp_node = linklist_tmp;
+ }
+ free(tmp->listhead);
+ tmp->listhead = NULL;
+ free(tmp);
+ tmp = NULL;
+ return;
+}
+
+void print_item_iterate_idtable(const uchar * key, uint size, void * data, void * user)
+{
+ struct id_table_data * id_data = (struct id_table_data *)data;
+ printf("id:%u\n",id_data->id);
+}
+
+
+
+void print_item_iterate(const uchar * key, uint size, void * data, void * user)
+{
+ struct index_table_data * index_data = (struct index_table_data *)data;
+ printf("%s %d\n", (char *)key, index_data->cnt);
+ struct linklist_node * tmp_node = NULL;
+ int i = 0;
+ TAILQ_FOREACH(tmp_node, index_data->listhead, listentry)
+ {
+ printf("id = %u\n",tmp_node->basicinfo->id);
+ printf("position is :\n");
+ for(i = 0;i < tmp_node->index;i++)
+ {
+ printf("%d ",tmp_node->position[i]);
+ }
+ printf("\n");
+ }
+ printf("\n");
+}
+
+int edit_distn(const char *s1, int s1len, const char *s2, int s2len)
+{
+ long int max_len = 0;
+ if(s1len >= s2len)
+ {
+ max_len = s1len;
+ }
+ else
+ {
+ max_len = s2len;
+ }
+ int **t = (int **)malloc(2*sizeof(int *));
+ t[0] = (int *)malloc((max_len +1)*sizeof(int));
+ t[1] = (int *)malloc((max_len +1)*sizeof(int));
+ //int t[2][EDIT_DISTN_MAXLEN+1];
+ int *t1 = t[0];
+ int *t2 = t[1];
+ int *t3;
+ size_t i1, i2;
+ for (i2 = 0; i2 <= s2len; i2++)
+ t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST;
+ for (i1 = 0; i1 < s1len; i1++) {
+ t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST;
+ for (i2 = 0; i2 < s2len; i2++) {
+ int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST;
+ int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST;
+ int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST);
+ t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r);
+ }
+ t3 = t1;
+ t1 = t2;
+ t2 = t3;
+ }
+ long int ret = t1[s2len];
+ free(t[0]);
+ free(t[1]);
+ free(t);
+ return ret;
+ //return t1[s2len];
+}
+
+
+void GIE_destory(GIE_handle_t * handle)
+{
+ GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
+ //printf("hash_cnt:%llu\n",_handle->hash_cnt);
+ //printf("mem_occupy:%llu\n",_handle->mem_occupy);
+ int i = 0;
+ for(i = 0;i < HTABLE_NUM;i++)
+ {
+ MESA_htable_destroy(_handle->index_table[i], indextable_free_cnt);
+ }
+ MESA_htable_destroy(_handle->id_table, idtable_free);
+ //printf("index_free hash_cnt :%llu\n", hash_cnt);
+ //printf("cnt sum :%llu\n",cnt_sum);
+ free(_handle);
+ _handle = NULL;
+}
+
+
+int grab_key_set(char * str_begin,short str_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list, unsigned long long blocksize)
+{
+ int k = 0,j = 0;
+ char * tmp_gram = str_begin;
+ char key[gram_value+1];
+ int sum = 0,htable_index = 0;
+ if(str_length < gram_value)
+ {
+ return 0;
+ }
+ str_length = MIN(str_length,strnlen(str_begin,str_length));
+ *gram_cnt = str_length - gram_value + 1;
+ //printf("str_length:%d\n",str_length);
+ for(k = 0; k < str_length - gram_value + 1; k++)
+ {
+ sum = 0;
+ memset(key,'\0', gram_value+1);
+ memcpy(key, tmp_gram++, gram_value);
+ //printf("k:%d key:%s\n",k,key);
+ for(j = 0; j < gram_value; j++)
+ {
+ sum += key[j];
+ }
+ htable_index = sum%HTABLE_NUM;
+ struct key_list_node *tmp_node = (struct key_list_node *)calloc(1,sizeof(struct key_list_node));
+ tmp_node->key = (char *)calloc(gram_value+1,sizeof(char));
+ memcpy(tmp_node->key,key,gram_value);
+ tmp_node->digest_id = i;
+ tmp_node->pos = k;
+ tmp_node->blocksize = blocksize;
+ TAILQ_INSERT_TAIL(to_process_list[htable_index], tmp_node, keylistentry);
+ }
+ return 1;
+}
+int sfh_grab_key_set(char *sfh,short sfh_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list)
+{
+ int t = 0;
+ char * tmp_gram = sfh;
+ unsigned long long blocksize = 0;
+ for(t = 0; t < 2;t++)
+ {
+ blocksize = get_blocksize_from_head(tmp_gram, sfh_length);
+ while(*tmp_gram != '\0')
+ {
+ if(*tmp_gram == ':')
+ {
+ tmp_gram++;
+ break;
+ }
+ tmp_gram++;
+ }
+ unsigned int real_length = get_real_length(tmp_gram, sfh_length);
+ if(real_length < gram_value)
+ {
+ if(t==0)
+ {
+ return 0;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ grab_key_set(tmp_gram, real_length, i, gram_value, gram_cnt, to_process_list, blocksize);
+ while(*tmp_gram != '\0')
+ {
+ if(*tmp_gram == '#')
+ {
+ tmp_gram++;
+ break;
+ }
+ tmp_gram++;
+ }
+ }
+ return 1;
+}
+
+void free_key_set(struct KL ** to_process_list,int size)
+{
+ int i = 0;
+ for(i = 0;i < size;i++)
+ {
+ struct key_list_node *tmp_node = TAILQ_FIRST(to_process_list[i]);
+ while(tmp_node != NULL)
+ {
+ struct key_list_node *key_list_tmp = TAILQ_NEXT(tmp_node, keylistentry);
+ TAILQ_REMOVE(to_process_list[i], tmp_node, keylistentry);
+ free(tmp_node->key);
+ tmp_node->key = NULL;
+ free(tmp_node);
+ tmp_node = NULL;
+ tmp_node = key_list_tmp;
+ }
+ free(to_process_list[i]);
+ to_process_list[i]= NULL;
+ }
+}
+
+int GIE_update(GIE_handle_t * handle,GIE_digest_t * * digests,int size)
+{
+ GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
+ struct id_table_data * info = NULL;
+ int success_cnt = 0;
+ int m = 0, i = 0, grab_ret = 0;
+ short gram_cnt = 0;
+ unsigned int input_fh_len = 0;
+ unsigned int gram_value = _handle->user_gram_value;
+ struct KL* to_process_list[HTABLE_NUM];
+
+ MESA_htable_handle htable_index_copy;
+ MESA_htable_handle htable_id_copy;
+ MESA_htable_handle htable_tmp_index=NULL,htable_tmp_id=NULL;
+ struct htable_handle * htable_copied_id_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle));
+ struct htable_handle * htable_copied_index_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle));
+
+ htable_copied_id_para->runtime_table = _handle->id_table;
+ htable_copied_id_para->para = NULL;
+ htable_id_copy = copy_htable((void *)htable_copied_id_para, copy_idtable_item_iterate,idtable_free);
+
+ MESA_htable_handle garbage_htable[HTABLE_NUM];
+ /*if(MESA_htable_iterate(htable_id_copy, print_item_iterate_idtable, NULL) == -1)
+ {
+ printf("iterate error!\n");
+ }
+ printf("size:%u\n",id_size);*/
+
+ for(m = 0;m < HTABLE_NUM;m++)
+ {
+ to_process_list[m]=(struct KL*)calloc(1,sizeof(struct KL));
+ TAILQ_INIT(to_process_list[m]);
+ }
+
+ for(i = 0; i < size; i++)
+ {
+ switch(digests[i]->operation)
+ {
+ case GIE_INSERT_OPT:
+ {
+ assert(digests[i]->tag!=NULL);
+ if(_handle->input_format == GIE_INPUT_FORMAT_SFH)
+ {
+ grab_ret = sfh_grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list);
+ }
+ else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN)
+ {
+
+ grab_ret = grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list,0);
+ }
+ if(grab_ret == 0)
+ {
+ continue;
+ }
+ else
+ {
+ info = (struct id_table_data *)calloc(1,sizeof(struct id_table_data));
+ input_fh_len = digests[i]->sfh_length;
+ info->sfh = (char *)calloc(input_fh_len + 1,sizeof(char));
+ memcpy(info->sfh, digests[i]->sfh, input_fh_len);
+ _handle->mem_occupy += sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1);
+ info->sfh_length = digests[i]->sfh_length;
+ info->gram_cnt = gram_cnt;
+
+ /*int tag_len = strnlen(digests[i]->tag,MAX_LENGTH);
+ info->tag = (char *)calloc(tag_len+1,sizeof(char));
+ memcpy(info->tag,digests[i]->tag,tag_len);*/
+ info->tag = digests[i]->tag;
+
+ info->id = digests[i]->id;
+ info->cfds_lvl = digests[i]->cfds_lvl;
+ if(_handle->input_format == GIE_INPUT_FORMAT_SFH)
+ {
+ info->blocksize = get_blocksize_from_head(digests[i]->sfh, digests[i]->sfh_length);
+ }
+ else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN)
+ {
+ info->blocksize = 0;
+ }
+
+ if(MESA_htable_add(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0)
+ {
+ _handle->mem_occupy -= (sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1));
+ free(info->sfh);
+ info->sfh = NULL;
+ free(info);
+ info = NULL;
+ continue;
+ }
+ }
+ success_cnt ++;
+ break;
+ }
+
+ case GIE_DELETE_OPT:
+ {
+
+ struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(htable_id_copy, \
+ (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id));
+ if(ret!= NULL)
+ {
+ if(_handle->input_format == GIE_INPUT_FORMAT_SFH)
+ {
+ success_cnt += sfh_grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list);
+ }
+ else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN)
+ {
+
+ success_cnt += grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list,0);
+ }
+ }
+ else
+ {
+ break;
+ }
+ if(MESA_htable_del(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0)
+ {
+ printf("delete id failed!");
+ assert(0);
+ }
+ //success_cnt += GIE_delete(_handle, digests[i]);
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ }
+ unsigned int digest_id = 0;
+ struct id_table_data * tmp_info= NULL;
+
+ for(i = 0;i < HTABLE_NUM;i++)
+ {
+ htable_copied_index_para->runtime_table = _handle->index_table[i];
+ htable_copied_index_para->para = htable_id_copy;
+ htable_index_copy = copy_htable((void *)htable_copied_index_para,copy_indextable_item_iterate,indextable_free);
+ struct key_list_node * tmp_node;
+ TAILQ_FOREACH(tmp_node, to_process_list[i], keylistentry)
+ {
+ digest_id = tmp_node->digest_id;
+ if(digests[digest_id]->operation == GIE_INSERT_OPT)
+ {
+ tmp_info =(struct id_table_data *)MESA_htable_search(htable_id_copy, (const uchar *)(&(digests[digest_id])->id), \
+ sizeof((digests[digest_id])->id));
+ if(tmp_info == NULL)
+ {
+ printf("id %u not insert\n",digests[digest_id]->id);
+ }
+ if(GIE_insert_indextable(htable_index_copy, tmp_info, tmp_node->key, tmp_node->pos,tmp_node->blocksize) < 0)
+ {
+ printf("insert %d indextable failed!\n",digests[digest_id]->id);
+ continue;
+ }
+ }
+ else if(digests[digest_id]->operation == GIE_DELETE_OPT)
+ {
+ if(GIE_delete_from_indextable_by_key(htable_index_copy, tmp_node->key, (digests[digest_id])->id) < 0)
+ {
+ printf("delete %d indextable failed!\n",digests[digest_id]->id);
+ continue;
+ }
+ }
+ }
+ htable_tmp_index= _handle->index_table[i];
+ _handle->index_table[i] = htable_index_copy;
+ garbage_htable[i]=htable_tmp_index;
+ }
+
+ htable_tmp_id = _handle->id_table;
+ _handle->id_table = htable_id_copy;
+ usleep(200);
+ MESA_htable_destroy(htable_tmp_id, idtable_free);
+ /*if(MESA_htable_iterate(_handle->index_table, print_item_iterate, NULL) == -1)
+ {
+ printf("iterate error!\n");
+ }*/
+ for(i=0;i<HTABLE_NUM;i++)
+ {
+ MESA_htable_destroy(garbage_htable[i], indextable_free_cnt);
+
+ }
+ free_key_set(to_process_list,HTABLE_NUM);
+ free(htable_copied_id_para);
+ htable_copied_id_para = NULL;
+ free(htable_copied_index_para);
+ htable_copied_index_para = NULL;
+ return success_cnt;
+}
+
+
+MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data))
+{
+ MESA_htable_create_args_t copy_table_args;
+ memset(&copy_table_args, 0, sizeof(copy_table_args));
+ copy_table_args.thread_safe = 0;
+ copy_table_args.hash_slot_size = HTABLE_SIZE;
+ copy_table_args.max_elem_num = 0;
+ copy_table_args.expire_time = 0;
+ copy_table_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO;
+ copy_table_args.key_comp = NULL;
+ copy_table_args.key2index = NULL;
+ copy_table_args.data_free = free_fuc;
+ copy_table_args.data_expire_with_condition = NULL;
+ copy_table_args.recursive = 0;
+ MESA_htable_handle copy_htable_handle = MESA_htable_create(&copy_table_args, sizeof(copy_table_args));
+
+ struct htable_handle * htable_copied_para = (struct htable_handle *)htable_para;
+ struct htable_handle * htable_iterate_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle));
+ htable_iterate_para->runtime_table = copy_htable_handle;
+ htable_iterate_para->para = htable_copied_para->para;
+
+ if(MESA_htable_iterate(htable_copied_para->runtime_table, func, htable_iterate_para) == -1)
+ {
+ printf("iterate error!\n");
+ }
+ free(htable_iterate_para);
+ htable_copied_para=NULL;
+ return copy_htable_handle;
+}
+
+void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user)
+{
+ struct index_table_data * index_data = (struct index_table_data *)data;
+ struct htable_handle * htable_copied_para = (struct htable_handle *)user;
+
+ struct index_table_data * index_data_copy = (struct index_table_data *)calloc(1, sizeof(struct index_table_data));
+ struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ));
+ index_data_copy->listhead = head;
+ index_data_copy->cnt = index_data->cnt;
+
+ TAILQ_INIT(head);
+ struct linklist_node * tmp_node = NULL;
+ struct id_table_data * ret = NULL;
+ int i = 0;
+
+ TAILQ_FOREACH(tmp_node, index_data->listhead, listentry)
+ {
+ struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node));
+ node_data->size = tmp_node->size;
+ node_data->position = (short *)calloc(node_data->size, sizeof(short));
+ for(i = 0;i < tmp_node->index;i++)
+ {
+ node_data->position[i] = tmp_node->position[i];
+ }
+ ret = (struct id_table_data *)MESA_htable_search(htable_copied_para->para, (const uchar *)(&(tmp_node->basicinfo->id)), sizeof(tmp_node->basicinfo->id));
+ if(ret == NULL)
+ {
+ //printf("copy id %u not exist\n",tmp_node->basicinfo->id);
+ free(node_data->position);
+ node_data->position = NULL;
+ free(node_data);
+ node_data = NULL;
+ continue;
+ }
+ node_data->basicinfo = ret;
+ node_data->index = tmp_node->index;
+ node_data->blocksize = tmp_node->blocksize;
+ TAILQ_INSERT_TAIL(head, node_data, listentry);
+ }
+ MESA_htable_add(htable_copied_para->runtime_table, key, size, (const void *)index_data_copy);
+}
+//TODO: Using the orginal value instead of make a duplication to be faster.
+void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user)
+{
+ struct id_table_data * id_data = (struct id_table_data *)data;
+ struct htable_handle * htable_para = (struct htable_handle *)user;
+ struct id_table_data * id_data_copy = (struct id_table_data *)calloc(1, sizeof(struct id_table_data));
+ assert(id_data->tag!=NULL);
+ memcpy(id_data_copy,id_data,sizeof(struct id_table_data));
+ id_data_copy->sfh = (char *)calloc(id_data_copy->sfh_length,sizeof(char));
+ memcpy(id_data_copy->sfh,id_data->sfh,id_data_copy->sfh_length);
+
+ MESA_htable_add(htable_para->runtime_table, (const uchar *)(&(id_data_copy->id)), sizeof(id_data_copy->id), (const void *)id_data_copy);
+}
+
+
+
+
+int GIE_insert_indextable(MESA_htable_handle htable_copy, struct id_table_data * info, char * key, unsigned int index, unsigned long long blocksize)
+{
+ int key_length = strnlen(key,KEY_MAX_LENGTH);
+ struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node));
+ node_data->size = GRAM_CNT_MAX;
+ node_data->position = (short *)calloc(node_data->size, sizeof(short));
+ node_data->basicinfo = info;
+ node_data->index = 0;
+ node_data->position[(node_data->index)++] = index;
+ node_data->blocksize = blocksize;
+
+ //_handle->mem_occupy += sizeof(struct linklist_node) + sizeof(short)*(node_data->size);
+
+ struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable_copy, \
+ (const uchar *)(key), key_length));
+
+
+ if(ret != NULL)
+ {
+ struct linklist_node * tmp = NULL;
+ TAILQ_FOREACH(tmp, ret->listhead, listentry)
+ {
+ if(tmp->basicinfo->id > node_data->basicinfo->id)
+ {
+ TAILQ_INSERT_BEFORE(tmp, node_data, listentry);
+ ret->cnt ++;
+ if(ret->cnt >= CNT_MAX)
+ {
+ indextable_delete_with_threshold(htable_copy,ret,key);
+ }
+ return 0;
+ }
+ if(tmp->basicinfo->id == node_data->basicinfo->id && tmp->blocksize == blocksize)
+ {
+ if(tmp->index >= tmp->size)
+ {
+ tmp->size *= 2;
+ tmp->position = realloc(tmp->position, (tmp->size)*sizeof(short));
+ }
+ tmp->position[(tmp->index)++] = index;
+ //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(node_data->size));
+ free(node_data->position);
+ node_data->position = NULL;
+ free(node_data);
+ node_data = NULL;
+ return 0;
+ }
+ }
+ TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry);
+ ret->cnt ++;
+ if(ret->cnt >= CNT_MAX)
+ {
+ indextable_delete_with_threshold(htable_copy,ret,key);
+ }
+ }
+
+ else
+ {
+ struct index_table_data * index_data = (struct index_table_data *)calloc(1, sizeof(struct index_table_data));
+ struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ));
+ //_handle->mem_occupy += sizeof(struct index_table_data) + sizeof(struct TQ);
+
+ index_data->listhead = head;
+ index_data->cnt = 0;
+
+ TAILQ_INIT(head);
+ TAILQ_INSERT_TAIL(head, node_data, listentry);
+ index_data->cnt++;
+ //_handle->hash_cnt++;
+ if(MESA_htable_add(htable_copy, (const uchar *)(key), key_length, (const void *)index_data) < 0)
+ {
+ printf("add index_table failed!\n");
+ assert(0);
+ return -1;
+ }
+ }
+ return 0;
+
+}
+
+
+
+int GIE_delete(GIE_handle_inner_t * _handle, GIE_digest_t * digest)
+{
+ int success_cnt = 0;
+ struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(_handle->id_table, \
+ (const uchar *)(&(digest->id)), sizeof(digest->id));
+ if(ret == NULL)
+ {
+ printf("del %d doesn't exist!\n",digest->id);
+ return -1;
+ }
+ else
+ {
+ int gram_value = _handle->user_gram_value;
+ char key[gram_value+1];
+ char * tmp_gram = ret->sfh;
+ while(*tmp_gram != '\0')
+ {
+ if(*tmp_gram == ':')
+ {
+ tmp_gram++;
+ break;
+ }
+ tmp_gram++;
+ }
+ unsigned int real_length = get_real_length(tmp_gram, ret->sfh_length);
+ int gram_cnt = real_length - gram_value + 1;
+ int k = 0;
+ for(k = 0; k < gram_cnt; k++)
+ {
+ memset(key, '\0', gram_value+1);
+ memcpy(key, tmp_gram++, gram_value);
+ if(GIE_delete_from_indextable_by_key(_handle, key, digest->id) < 0)
+ {
+ printf("delete %d indextable failed!\n",digest->id);
+ continue;
+ }
+ }
+ success_cnt++;
+ }
+
+ return success_cnt;
+}
+
+
+
+int GIE_delete_from_indextable_by_key(MESA_htable_handle htable, char * key, unsigned int id)
+{
+ int key_length = strnlen(key,KEY_MAX_LENGTH);
+ struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable, \
+ (const uchar *)(key), key_length));
+ if(ret == NULL)
+ {
+ return 0;
+ }
+
+
+ struct linklist_node * tmp = TAILQ_FIRST(ret->listhead);
+ while(tmp != NULL)
+ {
+ struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp, listentry);
+ if(tmp->basicinfo->id != id)
+ {
+ tmp=linklist_tmp;
+ continue;
+ }
+ TAILQ_REMOVE(ret->listhead, tmp, listentry);
+ ret->cnt--;
+ //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp->size));
+ free(tmp->position);
+ tmp->position = NULL;
+ free(tmp);
+ tmp = NULL;
+ if(TAILQ_EMPTY(ret->listhead) == 1)
+ {
+ //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ));
+ int ret = MESA_htable_del(htable, (const uchar *)(key), key_length, indextable_free);
+ if(ret < 0)
+ {
+ printf("indextable backtrack delete error!\n");
+ assert(0);
+ return -1;
+ }
+
+ }
+ }
+ return 0;
+}
+
+
+
+
+int GIE_cmp(const void * a, const void * b)
+{
+ unsigned int tmp_a = *(unsigned int *)a;
+ unsigned int tmp_b = *(unsigned int *)b;
+ if(before(tmp_a, tmp_b))
+ {
+ return -1;
+ }
+ else if(after(tmp_a, tmp_b))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+
+inline unsigned int get_real_length(const char * string, unsigned int length)
+{
+ unsigned int ret = 0;
+ const char * tmp_str = string;
+ while(*tmp_str != '\0')
+ {
+ if(*tmp_str == '[')
+ {
+ break;
+ }
+ tmp_str++;
+ ret ++;
+ }
+ return ret;
+}
+
+
+inline int GIE_part_query(GIE_handle_inner_t * _handle, const char * query_string, int index_begin, int part_query_len,unsigned int ** id_union, unsigned int * union_index, unsigned int * union_size, unsigned long long blocksize)
+{
+ unsigned int gram_value = _handle->user_gram_value;
+
+ unsigned int real_length = part_query_len;
+ unsigned int chunk_count_max = 0;
+ if(real_length < gram_value)
+ {
+ return 0;
+ }
+ else
+ {
+ chunk_count_max = real_length/gram_value;
+ }
+ char key[gram_value+1];
+ struct index_table_data * ret = NULL;
+ struct linklist_node * tmp_node_t = NULL;
+
+ unsigned int position_accuracy = _handle->user_position_accuracy;
+
+ int i=0,j=0,k=0;
+ unsigned int tmp_min = 0;
+ int sum = 0, htable_index = 0;
+ for(i = index_begin; i < chunk_count_max + index_begin; i++)
+ {
+ sum = 0;
+ memset(key,'\0',gram_value+1);
+ memcpy(key, query_string, gram_value);
+ for(k = 0; k < gram_value; k++)
+ {
+ sum += key[k];
+ }
+ htable_index = sum%HTABLE_NUM;
+ ret = (struct index_table_data *) MESA_htable_search(_handle->index_table[htable_index], \
+ (const uchar *)(key), strnlen(key,gram_value));
+ query_string = query_string + gram_value;
+
+ if(ret ==NULL)
+ {
+ break;
+ }
+
+ tmp_node_t = NULL;
+ TAILQ_FOREACH(tmp_node_t, ret->listhead, listentry)
+ {
+ tmp_min = 0;
+ if(i*gram_value >= position_accuracy)
+ {
+ tmp_min = i*gram_value - position_accuracy;
+ }
+ for(j = 0; j < tmp_node_t->index; j++)
+ {
+ if((blocksize == tmp_node_t->basicinfo->blocksize) && (tmp_node_t->position[j] >= tmp_min) && (tmp_node_t->position[j] <= i*gram_value + position_accuracy))
+ //if(blocksize == tmp_node_t->basicinfo->blocksize)
+ {
+ if((*union_index) >= (*union_size))
+ {
+ *union_size = (*union_size) * 2;
+ *id_union = (unsigned int *)realloc(*id_union, (*union_size)*sizeof(unsigned int));
+ }
+ (*id_union)[(*union_index)] = tmp_node_t->basicinfo->id;
+ (*union_index)++;
+ break;
+ }
+ }
+ }
+ }
+ return chunk_count_max;
+}
+
+inline int GIE_gram_with_position(GIE_handle_inner_t * _handle, unsigned long long query_blocksize, const char * fuzzy_string, unsigned int ** id_union,
+ unsigned int * union_index,unsigned int * union_size, unsigned int * chunk_cnt)
+{
+ const char * tmpstr = fuzzy_string;
+ const char * query_string_begin;
+ unsigned long long blocksize = query_blocksize;
+ int part_query_len = 0;
+ int query_actual_len = 0;
+ while(*tmpstr != ':'&& *tmpstr != '\0')
+ {
+ tmpstr ++;
+ }
+ if(*tmpstr == ':')
+ {
+ tmpstr ++;
+ }
+ else
+ {
+ return 0;
+ }
+ query_string_begin = tmpstr;
+ char *p = NULL;
+
+ while((*query_string_begin) != '\0')
+ {
+ int left = 0;
+ int right = 0;
+ p=strchr(query_string_begin,'[');
+ if(p!=NULL)
+ {
+ part_query_len = p-query_string_begin;
+ int ret = sscanf(p,"[%d:%d]",&left,&right);
+ if(ret != 2)
+ {
+ break;
+ }
+ p=strchr(p,']');
+ if(p != NULL && (*p) != '\0')
+ {
+ int index_begin = (left/blocksize - TOLERENCE_SIZE > 0 ? (left/blocksize - TOLERENCE_SIZE) : 0);
+ (*chunk_cnt) += GIE_part_query(_handle,query_string_begin,index_begin, part_query_len,
+ id_union, union_index, union_size, blocksize);
+ query_actual_len += part_query_len;
+ query_string_begin = p+1;
+ }
+ else
+ {
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ return query_actual_len;
+}
+
+inline unsigned long long calc_fh_blocksize(unsigned long long orilen)
+{
+ double tmp = orilen/(64 * BLOCKSIZE_MIN);
+ double index = floor(log(tmp)/log(2));
+ double tmp_t = pow(2,index);
+ unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
+ return blocksize;
+}
+
+inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len)
+{
+ const char * tmp_str = fuzzy_string;
+ char blk[100];
+ memset(blk,'\0',sizeof(blk));
+ unsigned long long blocksize = 0;
+ int i = 0;
+ while(*tmp_str != '\0' && *tmp_str != ':' && str_len != 0 && i < 100)
+ {
+ blk[i++] = *tmp_str;
+ tmp_str++;
+ str_len--;
+ }
+ blocksize = (unsigned long long)atoi(blk);
+ return blocksize;
+}
+int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2)
+{
+ int edit_distance=0;
+ int conf=0;
+ edit_distance = edit_distn(str1, len1,str2,len2);
+ conf = 100-(edit_distance*100)/(len1 + len2);
+ return conf;
+}
+
+int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2)
+{
+ int j = 0, t = 0;
+ unsigned long long query_blocksize = 0, index_blocksize = 0;
+ unsigned int query_real_length = 0, index_real_length = 0;
+ const char *query_gram_begin = sfh1;
+ const char *index_gram_begin = sfh2;
+ char *splice_str = (char *)malloc(sizeof(char)*len1);
+ memset(splice_str,'\0',len1);
+ char *spli_str_begin = splice_str;
+ int edit_distance = 0;
+ int ret = 0;
+ char *p = NULL;
+ int splice_len = 0;
+
+ for(j = 0; j < 2; j++)
+ {
+ index_blocksize = get_blocksize_from_head(index_gram_begin, len2);
+ while((*index_gram_begin) != '\0')
+ {
+ if((*index_gram_begin) == ':')
+ {
+ index_gram_begin++;
+ break;
+ }
+ index_gram_begin++;
+ }
+ index_real_length = get_real_length(index_gram_begin, len2);
+ query_gram_begin = sfh1;
+ for(t = 0; t < 2; t++)
+ {
+ query_blocksize = get_blocksize_from_head(query_gram_begin, len1);
+ //printf("gram_begin:%c\n",*index_gram_begin);
+ //printf("gram_str:%s\n",index_gram_begin);
+ while((*query_gram_begin) != '\0')
+ {
+ if((*query_gram_begin) == ':')
+ {
+ query_gram_begin++;
+ break;
+ }
+ query_gram_begin++;
+ }
+ //printf("query_blocksize:%lld, index_blocksize:%lld\n",query_blocksize,index_blocksize);
+ //index_real_length = get_real_length(index_gram_begin, len1);
+ if(query_blocksize == index_blocksize)
+ {
+ while((*query_gram_begin) != '#' && (*query_gram_begin) != '\0')
+ {
+ p=strchr(query_gram_begin,'[');
+ if(p!=NULL)
+ {
+ query_real_length = p-query_gram_begin;
+ p=strchr(p,']');
+ if(p != NULL && (*p) != '\0')
+ {
+
+ memcpy(spli_str_begin,query_gram_begin,query_real_length);
+ spli_str_begin += query_real_length;
+ //edit_distance += edit_distn(query_gram_begin, query_real_length, index_gram_begin, index_real_length);
+ query_gram_begin = p+1;
+ }
+ else
+ {
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ splice_len = strnlen(splice_str,len1);
+ edit_distance = edit_distn(index_gram_begin, index_real_length, splice_str, splice_len);
+ //printf("query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance);
+ ret = 100-(edit_distance*100)/(index_real_length + splice_len);
+ //ret = (100*ret)/SPAM_LENGTH;
+ //ret = 100-ret;
+ //ret = 100 - (100*edit_distance)/(query_real_length);
+ free(splice_str);
+ return ret;
+ }
+ while(*query_gram_begin != '\0')
+ {
+ if(*query_gram_begin == '#')
+ {
+ query_gram_begin++;
+ break;
+ }
+ query_gram_begin++;
+ }
+
+ }
+ while(*index_gram_begin != '\0')
+ {
+ if(*index_gram_begin == '#')
+ {
+ index_gram_begin++;
+ break;
+ }
+ index_gram_begin++;
+ }
+ }
+ //printf("no blocksize:query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance);
+ free(splice_str);
+ return 0;
+}
+
+
+
+
+int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size)
+{
+ GIE_handle_inner_t * _handle = (GIE_handle_inner_t *) handle;
+ int i = 0, j = 0;
+ unsigned int union_index = 0;
+ unsigned int gram_value = _handle->user_gram_value;
+ unsigned int query_actual_len = 0;
+ unsigned int union_size = UNION_INIT_SIZE;
+ unsigned int chunk_cnt = 0;
+ const char *fuzzy_string_begin = data;
+ unsigned int * id_union =(unsigned int *)calloc(union_size, sizeof(unsigned int));
+ unsigned long long query_blocksize = 0;
+ unsigned int fuzzy_string_len = (unsigned int)data_len;
+
+ if(_handle->input_format == GIE_INPUT_FORMAT_SFH)
+ {
+ for(j = 0;j < 2;j++)
+ {
+ query_blocksize = get_blocksize_from_head(fuzzy_string_begin, fuzzy_string_len);
+ if(query_blocksize == 0)
+ {
+ return 0;
+ }
+ query_actual_len += GIE_gram_with_position(_handle, query_blocksize, fuzzy_string_begin, &id_union, &union_index, &union_size, &chunk_cnt);
+ while(*fuzzy_string_begin != '#' && *fuzzy_string_begin != '\0')
+ {
+ fuzzy_string_begin++;
+ }
+ if(*fuzzy_string_begin == '#')
+ {
+ fuzzy_string_begin++;
+ }
+ }
+ }
+ else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN)
+ {
+ query_actual_len = fuzzy_string_len;
+ chunk_cnt = GIE_part_query(_handle, fuzzy_string_begin, 0, query_actual_len, &id_union, &union_index, &union_size, 0);
+ }
+
+ if(union_index == 0)
+ {
+ free(id_union);
+ id_union = NULL;
+ return 0;
+ }
+
+ qsort(id_union, union_index, sizeof(id_union[0]), GIE_cmp);
+
+ unsigned int current_id = id_union[0];
+ unsigned int * tmp_id = id_union;
+ unsigned int count = 0;
+ struct id_table_data * ret_tmp = NULL;
+ short conf = 0;
+ int ret_size = 0;
+ for(i = 0; i <= union_index; i++)
+ {
+ if( i == union_index || *tmp_id != current_id )
+ {
+ ret_tmp = (struct id_table_data *) MESA_htable_search(_handle->id_table, \
+ (const uchar *)(&(current_id)), sizeof(current_id));
+
+ if(ret_tmp == NULL)
+ {
+ break;
+ }
+ char * tmp_gram = ret_tmp->sfh;
+ int length = ret_tmp->sfh_length;
+ if(ret_tmp->gram_cnt == 0||chunk_cnt == 0)
+ {
+ conf = 0;
+ }
+ else
+ {
+ conf = (count*(query_actual_len-gram_value+1)*10)/(chunk_cnt*(ret_tmp->gram_cnt));
+ }
+
+ if(_handle->ED_reexamine == 1)
+ {
+ if(_handle->input_format == GIE_INPUT_FORMAT_SFH)
+ {
+ conf = GIE_sfh_similiarity(data, fuzzy_string_len, tmp_gram, length);
+ }
+ else
+ {
+ conf=GIE_string_similiarity(data, fuzzy_string_len, tmp_gram, length);
+ }
+ }
+
+ if(conf >= ret_tmp->cfds_lvl)
+ {
+ results[ret_size].cfds_lvl = conf;
+ results[ret_size].id = current_id;
+ /*results[ret_size].tag = (char *)malloc((ret_tmp->sfh_length + 1)*sizeof(char));
+ memset(results[ret_size].tag,'\0',(ret_tmp->sfh_length+1));
+ memcpy(results[ret_size].tag, ret_tmp->sfh,ret_tmp->sfh_length);*/
+ results[ret_size].tag = ret_tmp->tag;
+ ret_size++;
+ }
+
+ if(ret_size == result_size)
+ {
+ break;
+ }
+
+ current_id = *tmp_id;
+ count = 1;
+
+ }
+ else
+ {
+ count++;
+ }
+
+ tmp_id ++;
+ }
+
+ free(id_union);
+ id_union = NULL;
+ return ret_size;
+}
+
+
+unsigned long long GIE_status(GIE_handle_t * handle, int type)
+{
+ unsigned long long length;
+ GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle;
+ switch(type)
+ {
+ case MEM_OCCUPY:
+ length = _handle->mem_occupy;
+ break;
+ default:
+ return 0;
+ }
+ return length;
+}
+
diff --git a/src/get_td_mistake_lost/new_TD.conf b/src/get_td_mistake_lost/new_TD.conf
new file mode 100644
index 0000000..be9301e
--- /dev/null
+++ b/src/get_td_mistake_lost/new_TD.conf
@@ -0,0 +1,3 @@
+[file]
+ripe_files_address = ../data/ripe_data/td_data_20171207/new_TD.txt
+raw_file_address = ../data/ripe_data/td_data_20171207/all_av_digest
diff --git a/src/get_td_mistake_lost/new_TD.py b/src/get_td_mistake_lost/new_TD.py
new file mode 100644
index 0000000..5b7269f
--- /dev/null
+++ b/src/get_td_mistake_lost/new_TD.py
@@ -0,0 +1,34 @@
+#-*-coding:utf-8-*-
+import re
+import random
+import ConfigParser
+import bisect
+import commands
+import os
+import hashlib
+
+config = ConfigParser.RawConfigParser()
+config.read("file_digest.conf")
+raw_file_address=config.get("new_td","raw_file_address")
+ripe_files_address=config.get("new_td","ripe_files_address")
+print ("%s %s" %(raw_file_address,ripe_files_address))
+
+def get_md5_value(td_string):
+ my_md5 = hashlib.md5()
+ my_md5.update(td_string)
+ my_md5_string=str(my_md5.hexdigest())
+ return my_md5_string
+
+i=0
+with open(raw_file_address,'r') as infile:
+ with open(ripe_files_address,'w')as outfile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i;
+ data_line_val = re.split(r';',line)
+ data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4])
+ td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \
+ +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16])
+ new_td=get_md5_value(td_string)
+ outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n") \ No newline at end of file
diff --git a/src/rssb_statistics/all_len_st.py b/src/rssb_statistics/all_len_st.py
new file mode 100644
index 0000000..6e32518
--- /dev/null
+++ b/src/rssb_statistics/all_len_st.py
@@ -0,0 +1,17 @@
+import os
+import re
+import csv
+import bisect
+# cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log"
+# cmd2 = "cat media_expire.log.2018* > media_expire.log"
+# os.system(cmd1)
+# os.system(cmd2)
+breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304]
+st_num = [0,0,0,0,0,0,0,0]
+
+with open("media_expire.log",'r') as infile:
+ for line in infile:
+ line_result = re.split(r",|MID:|media_len:",line)
+ j = bisect.bisect(breakpoints,int(line_result[6]))
+ st_num[j]+=1
+print st_num \ No newline at end of file
diff --git a/src/rssb_statistics/delete_len_st.py b/src/rssb_statistics/delete_len_st.py
new file mode 100644
index 0000000..010dc84
--- /dev/null
+++ b/src/rssb_statistics/delete_len_st.py
@@ -0,0 +1,48 @@
+import os
+import re
+import csv
+import bisect
+cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log"
+cmd2 = "cat ../12.log/media_expire.log.2018* > ../12.log/media_expire.log"
+cmd3 = "cat media_expire.log.2018* > media_expire.log"
+os.system(cmd1)
+os.system(cmd2)
+os.system(cmd3)
+breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304]
+st_num = [0,0,0,0,0,0,0,0]
+mid_13_set=set()
+mid_12_set=set()
+mid_12_file = "../12.log/media_expire.log"
+mid_13_file = "media_expire.log"
+
+i=0
+with open(mid_13_file,'r') as infile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i
+ line_result = re.split(r",|MID:|media_len:",line)
+ mid_13_set.add(int(line_result[3]))
+
+i=0
+with open(mid_12_file,'r') as infile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i
+ line_result = re.split(r",|MID:|media_len:",line)
+ mid_12_set.add(int(line_result[3]))
+
+different = mid_12_set.difference(mid_13_set)
+
+i=0
+with open(mid_12_file,'r') as infile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i
+ line_result = re.split(r",|MID:|media_len:",line)
+ if((int(line_result[3]) in different)==True):
+ j = bisect.bisect(breakpoints,int(line_result[6]))
+ st_num[j]+=1
+print st_num \ No newline at end of file
diff --git a/src/rssb_statistics/find_lost_td.conf b/src/rssb_statistics/find_lost_td.conf
new file mode 100644
index 0000000..6ab40d9
--- /dev/null
+++ b/src/rssb_statistics/find_lost_td.conf
@@ -0,0 +1,14 @@
+[file]
+raw_survey_file_13 = ../data/data_20180423/14.log/survey.recv.log
+raw_deup_file = ../data/data_20180423/13.log/dedup.log
+raw_survey_file_12 = ../data/data_20180423/12.log/survey.recv.log
+run_time_file = ../data/data_20180423/runtime_log
+lost_td_line = ../data/data_20180423/ripe_file/lost_td_line
+mid_12_file = ../data/data_20180423/ripe_file/mid_12_file
+mid_13_file = ../data/data_20180423/ripe_file/mid_14_file
+list_12_file = ../data/data_20180423/ripe_file/list_12_file
+list_13_file = ../data/data_20180423/ripe_file/list_14_file
+different_mid_file_13 = ../data/data_20180423/ripe_file/different_mid_file_14
+different_mid_file_12 = ../data/data_20180423/ripe_file/different_mid_file_12
+different_list_file_13 = ../data/data_20180423/ripe_file/different_list_file_14
+different_list_file_12 = ../data/data_20180423/ripe_file/different_list_file_12 \ No newline at end of file
diff --git a/src/rssb_statistics/find_lost_td.py b/src/rssb_statistics/find_lost_td.py
new file mode 100644
index 0000000..50f3fab
--- /dev/null
+++ b/src/rssb_statistics/find_lost_td.py
@@ -0,0 +1,147 @@
+import ConfigParser
+import re
+
+config = ConfigParser.RawConfigParser()
+config.read("find_lost_td.conf")
+raw_survey_file_13 = config.get("file","raw_survey_file_13")
+raw_deup_file = config.get("file","raw_deup_file")
+# run_time_file = config.get("file","run_time_file")
+raw_survey_file_12 = config.get("file","raw_survey_file_12")
+# lost_td_line = config.get("file","lost_td_line")
+mid_12_file = config.get("file","mid_12_file")
+mid_13_file = config.get("file","mid_13_file")
+list_12_file = config.get("file","list_12_file")
+list_13_file = config.get("file","list_13_file")
+different_mid_file_13 = config.get("file","different_mid_file_13")
+different_mid_file_12 = config.get("file","different_mid_file_12")
+different_list_file_13 = config.get("file","different_list_file_13")
+different_list_file_12 = config.get("file","different_list_file_12")
+
+term = {'td_len':(lambda x : len(x)==32),
+ 'data_num':(lambda x : len(x)>7),
+ 'url':(lambda x : x.find['NUll']),
+ 'sfh_len':(lambda x : len(x)>20),
+ 'not_null':(lambda x : len(x)!=0)}
+
+mid_13=dict()
+with open(raw_survey_file_13,'r') as infile:
+ for line in infile:
+ data_line_val = re.split(r',',line)
+ if(len(data_line_val)==8):
+ mid_string = (re.split(r"MID:",data_line_val[2]))[1]
+ mid_13[mid_string]=list()
+
+
+with open(mid_13_file,'w') as outfile:
+ for key in mid_13:
+ outfile.write(key+'\n')
+
+mid_12=dict()
+with open(raw_survey_file_12,'r') as infile:
+ for line in infile:
+ data_line_val = re.split(r',',line)
+ if(len(data_line_val)==8):
+ mid_string = (re.split(r"MID:",data_line_val[2]))[1]
+ mid_12[mid_string]=list()
+
+with open(mid_12_file,'w') as outfile:
+ for key in mid_12:
+ outfile.write(key+'\n')
+
+different_mid_13 = list()
+with open(different_mid_file_13,'w') as outfile:
+ for key in mid_13:
+ if(mid_12.has_key(key)==False):
+ different_mid_13.append(key)
+ outfile.write(key+'\n')
+
+different_mid_12 = list()
+with open(different_mid_file_12,'w') as outfile:
+ for key in mid_12:
+ if(mid_13.has_key(key)==False):
+ different_mid_12.append(key)
+ outfile.write(key+'\n')
+
+i=0
+with open(raw_deup_file,'r') as infile:
+ for line in infile:
+ i+=1
+ if(i%100000==0):
+ print i
+ data_line_val = re.split(r",|MID:|TD:",line)
+ if(term['data_num'](data_line_val) and \
+ mid_13.has_key(str(data_line_val[4])) == True):
+ mid_13[data_line_val[4]].append(data_line_val[6])
+ if(term['data_num'](data_line_val) and \
+ mid_12.has_key(str(data_line_val[4])) == True):
+ mid_12[data_line_val[4]].append(data_line_val[6])
+
+td_list_13 =list()
+with open(list_13_file,'w') as outfile:
+ for key in mid_13.keys():
+ for td in mid_13[key]:
+ if(term['not_null'](td) and td_list_13.count(td)==0):
+ td_list_13.append(td)
+ outfile.write(td+'\n')
+
+td_list_12 =list()
+with open(list_12_file,'w') as outfile:
+ for key in mid_12.keys():
+ for td in mid_12[key]:
+ if(term['not_null'](td) and td_list_12.count(td)==0):
+ td_list_12.append(td)
+ outfile.write(td+'\n')
+
+different_list_12 = list()
+with open(different_list_file_12,'w') as outfile:
+ for x in td_list_12:
+ if(td_list_13.count(x)==0):
+ different_list_12.append(x)
+ outfile.write(x+'\n')
+
+different_list_13 = list()
+with open(different_list_file_13,'w') as outfile:
+ for x in td_list_13:
+ if(td_list_12.count(x)==0):
+ different_list_13.append(x)
+ outfile.write(x+'\n')
+
+td_dict=dict()
+for i in different_list_12:
+ td_dict[i]=list()
+
+# i=0
+# with open(run_time_file,'r') as infile:
+# for line in infile:
+# i+=1
+# if(i%100000==0):
+# print i
+# if(line.find("NCHK_QUREY__KNOW")!=-1):
+# data_line_val = re.split(r',|TD:',line)
+# if(td_dict.has_key(data_line_val[6]) == True):
+# td_dict[data_line_val[6]].insert(0,"NCHK_QUREY__KNOW"+'\n')
+# td_dict[data_line_val[6]].append(line)
+# elif(line.find("NCHK_QUREY__UNKNOW")!=-1):
+# data_line_val = re.split(r',|TD:',line)
+# if(td_dict.has_key(data_line_val[6]) == True):
+# td_dict[data_line_val[6]].append(line)
+# elif(line.find("NCHK_REPORT__SUCC")!=-1):
+# data_line_val = re.split(r',|TD:',line)
+# if(td_dict.has_key(data_line_val[6]) == True):
+# td_dict[data_line_val[6]].append(line)
+
+# else:
+# continue
+
+
+print len(different_list_12),len(different_list_13),\
+len(td_list_12),len(td_list_13),\
+len(mid_12),len(mid_13),len(different_mid_13),len(different_mid_12)
+
+# with open(lost_td_line,'w') as outfile:
+# for key in td_dict.keys():
+# if(len(td_dict[key])>2 and td_dict[key][0]=="NCHK_QUREY__KNOW"):
+# outfile.write(key+':\n')
+# for i in td_dict[key]:
+# outfile.write(i)
+
diff --git a/src/rssb_statistics/harm_len_st.py b/src/rssb_statistics/harm_len_st.py
new file mode 100644
index 0000000..8a372b3
--- /dev/null
+++ b/src/rssb_statistics/harm_len_st.py
@@ -0,0 +1,29 @@
+import os
+import re
+import csv
+import bisect
+cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log"
+cmd2 = "cat media_expire.log.2018* > media_expire.log"
+os.system(cmd1)
+os.system(cmd2)
+breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304]
+st_num = [0,0,0,0,0,0,0,0]
+harm_mid_dic=dict()
+with open("survey.recv_survey.log",'r') as infile:
+ for line in infile:
+ data_line_val = re.split(r',',line)
+ if(len(data_line_val)==8):
+ mid_string = (re.split(r"MID:",data_line_val[2]))[1]
+ harm_mid_dic[mid_string]=list()
+
+with open("media_expire.log",'r') as infile:
+ for line in infile:
+ line_result = re.split(r",|MID:|media_len:",line)
+ if(harm_mid_dic.has_key(str(line_result[3]))==True):
+ # print int(line_result[6])
+ j = bisect.bisect(breakpoints,int(line_result[6]))
+ st_num[j]+=1
+print st_num
+# with open("un_recv_list.csv",'w') as csvfile:
+# writer = csv.writer(csvfile)
+# writer.writerow(un_recv_list) \ No newline at end of file
diff --git a/src/rssb_statistics/recv_survey_mid_st.py b/src/rssb_statistics/recv_survey_mid_st.py
new file mode 100644
index 0000000..f7faaeb
--- /dev/null
+++ b/src/rssb_statistics/recv_survey_mid_st.py
@@ -0,0 +1,3 @@
+import os
+cmd = "cat survey.log.2018* | grep \"recv survey\" | wc -l"
+os.system(cmd) \ No newline at end of file
diff --git a/src/rssb_statistics/service_st.py b/src/rssb_statistics/service_st.py
new file mode 100644
index 0000000..7fecd33
--- /dev/null
+++ b/src/rssb_statistics/service_st.py
@@ -0,0 +1,29 @@
+import re
+
+different_mid_12=set()
+different_mid_14=set()
+service_dic=dict()
+
+with open("different_mid_file_12",'r') as infile:
+ for line in infile:
+ different_mid_12.add(long(line))
+with open("different_mid_file_14",'r') as infile:
+ for line in infile:
+ different_mid_14.add(long(line))
+with open("../12.log/survey.recv_survey.log",'r') as infile:
+ for line in infile:
+ line_result = re.split(r",|MID:|service:",line)
+ if((long(line_result[3]) in different_mid_12)==True):
+ if(service_dic.has_key(line_result[5])==True):
+ service_dic[line_result[5]]+=1
+ else:
+ service_dic[line_result[5]]=0
+with open("../14.log/survey.recv.log",'r') as infile:
+ for line in infile:
+ line_result = re.split(r",|MID:|service:",line)
+ if((long(line_result[3]) in different_mid_14)==True):
+ if(service_dic.has_key(line_result[5])==True):
+ service_dic[line_result[5]]+=1
+ else:
+ service_dic[line_result[5]]=0
+print service_dic \ No newline at end of file
diff --git a/src/rssb_statistics/un_recv_st.py b/src/rssb_statistics/un_recv_st.py
new file mode 100644
index 0000000..9d3d234
--- /dev/null
+++ b/src/rssb_statistics/un_recv_st.py
@@ -0,0 +1,36 @@
+import os
+import re
+import csv
+cmd = "cat rssb_stat.log.2018-04-16 | grep \"/home/audiorecognition/aufilter/un_recv\" > rssb_stat.log.un_recv"
+os.system(cmd)
+i=0
+last_len = 0
+add_len = 0
+num = 0
+un_recv_list = list()
+with open("rssb_stat.log.un_recv",'r') as infile:
+ for line in infile:
+ line_result = re.split(r'\t',line)
+ if(i==0):
+ last_len = long(line_result[2])
+ break
+
+with open("rssb_stat.log.un_recv",'r') as infile:
+ for line in infile:
+ line_result = re.split(r'\t',line)
+ if(last_len <= long(line_result[2])):
+ add_len = add_len+long(line_result[2])-last_len
+ last_len = long(line_result[2])
+ else:
+ add_len = add_len+long(line_result[2])
+ last_len = long(line_result[2])
+ i+=1
+ if(i>=120):
+ i=0
+ un_recv_list.append(str(add_len))
+ add_len=0
+
+with open("un_recv_list.csv",'w') as csvfile:
+ writer = csv.writer(csvfile)
+ writer.writerow(un_recv_list)
+
diff --git a/src/rssb_statistics/un_recv_survey_mid_st.py b/src/rssb_statistics/un_recv_survey_mid_st.py
new file mode 100644
index 0000000..e1e970f
--- /dev/null
+++ b/src/rssb_statistics/un_recv_survey_mid_st.py
@@ -0,0 +1,3 @@
+import os
+cmd = "cat survey.log.2018* | grep \"sync_audio\" | wc -l"
+os.system(cmd) \ No newline at end of file
diff --git a/src/sfh_integrate/SFH_function.c b/src/sfh_integrate/SFH_function.c
new file mode 100644
index 0000000..a311f9c
--- /dev/null
+++ b/src/sfh_integrate/SFH_function.c
@@ -0,0 +1,177 @@
+/*
+gcc -g SFH_function.c -o SFH_function -lmaatframe -lMESA_htable -I../include
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gram_index_engine.h"
+#include <MESA/MESA_htable.h>
+#include <assert.h>
+#include <ctype.h>
+#define SLOT_SIZE (1024*1024*16)
+#define THREAD_SAFE 0
+#define BUFFER_LEN (10*1024)
+#define SFH_LEN (10*1024)
+#define TD_LEN 33
+
+typedef struct sfh_link
+{
+ char *sfh_str;
+ int similiar;
+ int all_similiar;
+ long hash_len;
+ struct sfh_link *next;
+}sfh_link;
+
+typedef struct top_similiar_sfh
+{
+ int all_num;
+ int all_similiar;
+ char *sfh_str;
+ long hash_len;
+ sfh_link *sfh_link_items;
+}top_similiar_sfh;
+
+long get_hashed_len(const char* sfh)
+{
+ char *data=(char*)malloc(strlen(sfh)+1);
+ memcpy(data,sfh, strlen(sfh));
+ data[strlen(sfh)]='\0';
+ char *token=NULL,*sub_token=NULL,*saveptr;
+ long left_offset=0,right_offset=0,hashed_length=0;
+ int ret=0,first=0;
+ for (token = data; ; token= NULL)
+ {
+ sub_token= strtok_r(token,"[", &saveptr);
+ if (sub_token == NULL)
+ {
+ break;
+ }
+ if(first==0)//jump over the first sub string.
+ {
+ first=1;
+ continue;
+ }
+ ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset);
+ if(ret!=2)
+ {
+ return 0;
+ }
+ assert(ret==2);
+ hashed_length+=right_offset-left_offset+1;
+ }
+ //printf("hashed length=%ld\n",hashed_length);
+ free(data);
+ return hashed_length/2;
+}
+
+void print_mistake_td(const uchar *key,uint size,void *data,void *arg)
+{
+ FILE *ripe_file = (FILE*)arg;
+ top_similiar_sfh *temp_top_similiar_sfh=(top_similiar_sfh*)data;
+ fprintf(ripe_file,"%s,%s\n",key,temp_top_similiar_sfh->sfh_str);
+ sfh_link *temp_sfh_link=temp_top_similiar_sfh->sfh_link_items;
+ for(;;temp_sfh_link=temp_sfh_link->next)
+ {
+ if(temp_sfh_link==NULL)
+ {
+ break;
+ }
+ fprintf(ripe_file,"%d;%s;%d\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->hash_len);
+ }
+ fprintf(ripe_file,"\n");
+}
+int main()
+{
+ FILE *raw_file;
+ FILE *ripe_file;
+ char *raw_file_dir="../data/td_data_set/td_data_20171207/video_id_raw_data";
+ char *ripe_file_dir="../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_2";
+ char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL;
+ raw_file = fopen(raw_file_dir,"r+");
+ ripe_file = fopen(ripe_file_dir,"w+");
+ long temp_hash_len=0;
+ unsigned int slot_size=SLOT_SIZE;
+ int i=0,thread_safe=THREAD_SAFE,ret=0,temp_similiar=0,temp_all_similiar=0;
+ top_similiar_sfh *temp_top_similiar_sfh=NULL;
+ sfh_link *temp_sfh_link=NULL;
+ MESA_htable_handle htable=NULL;
+ if(raw_file==NULL)
+ {
+ printf("open all_av_digest error\n");
+ return -1;
+ }
+
+ if(ripe_file==NULL)
+ {
+ printf("open all_av_digest_mistake_level error");
+ return -1;
+ }
+ buffer = (char*)calloc(BUFFER_LEN,sizeof(char));
+ sfh_str = (char*)calloc(SFH_LEN,sizeof(char));
+ td = (char*)calloc(TD_LEN,sizeof(char));
+ td[32]='\0';
+ htable=MESA_htable_born();
+ MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int));
+ MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int));
+ MESA_htable_mature(htable);
+ while(feof(raw_file)==0)
+ {
+ i++;
+ if(i%10000==0)
+ {
+ printf("%d\n",i);
+ }
+ fgets(buffer,BUFFER_LEN-1,raw_file);
+ buffer[BUFFER_LEN-1]='\0';
+ ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str);
+ assert(ret==2);
+ td[32]='\0';
+ if((temp_top_similiar_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL)
+ {
+ temp_top_similiar_sfh=(top_similiar_sfh*)calloc(1,sizeof(top_similiar_sfh));
+ temp_top_similiar_sfh->all_num=1;
+ temp_top_similiar_sfh->all_similiar=0;
+ temp_top_similiar_sfh->hash_len=get_hashed_len(sfh_str);
+ temp_top_similiar_sfh->sfh_str=strdup(sfh_str);
+ temp_top_similiar_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_top_similiar_sfh->sfh_link_items->sfh_str=strdup(sfh_str);
+ temp_top_similiar_sfh->sfh_link_items->similiar=0;
+ temp_top_similiar_sfh->sfh_link_items->all_similiar=0;
+ temp_top_similiar_sfh->sfh_link_items->next=NULL;
+ ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_top_similiar_sfh);
+ assert(ret>0);
+ }
+ else
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_top_similiar_sfh->sfh_str,(int)strlen(temp_top_similiar_sfh->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_top_similiar_sfh->all_similiar+=temp_similiar;
+ temp_sfh_link=temp_top_similiar_sfh->sfh_link_items;
+ for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next)
+ {
+ temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str));
+ temp_sfh_link->all_similiar+=temp_similiar;
+ temp_all_similiar+=temp_similiar;
+ if(temp_sfh_link->all_similiar>temp_top_similiar_sfh->all_similiar)
+ {
+ free(temp_top_similiar_sfh->sfh_str);
+ temp_top_similiar_sfh->sfh_str=strdup(temp_sfh_link->sfh_str);
+ temp_top_similiar_sfh->all_similiar=temp_sfh_link->all_similiar;
+ }
+ if(temp_sfh_link->next==NULL)
+ {
+ break;
+ }
+ }
+ temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link));
+ temp_sfh_link->next->sfh_str=strdup(sfh_str);
+ temp_sfh_link->next->hash_len=get_hashed_len(sfh_str);
+ temp_sfh_link->next->similiar=0;
+ temp_sfh_link->next->all_similiar=temp_all_similiar;
+ temp_sfh_link->next->next=NULL;
+ temp_top_similiar_sfh->all_num+=1;
+ }
+ }
+ fclose(raw_file);
+ MESA_htable_iterate(htable,print_mistake_td,ripe_file);
+} \ No newline at end of file
diff --git a/src/test/digest_temp b/src/test/digest_temp
new file mode 100644
index 0000000..3cde5ae
--- /dev/null
+++ b/src/test/digest_temp
@@ -0,0 +1,8 @@
+with open('./temp1','r') as file_sfh:
+ with open('./temp2','w') as out_file:
+ for line in file_sfh:
+ line=line.replace("\n","").replace("\"","").replace("\t","").replace(",","").strip()
+ if(line.find("mid")>=0):
+ out_file.write(line[4:]+",")
+ else:
+ out_file.write(line[7:]+"\n") \ No newline at end of file