From b2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 Mon Sep 17 00:00:00 2001 From: 陈冠林 Date: Tue, 18 Jun 2019 10:44:20 +0800 Subject: 添加inc和src MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeCache.txt | 278 ---- CMakeFiles/CMakeCCompiler.cmake | 36 - CMakeFiles/CMakeCXXCompiler.cmake | 36 - CMakeFiles/CMakeDetermineCompilerABI_C.bin | Bin 7045 -> 0 bytes CMakeFiles/CMakeDetermineCompilerABI_CXX.bin | Bin 7331 -> 0 bytes CMakeFiles/CMakeDirectoryInformation.cmake | 21 - CMakeFiles/CMakeOutput.log | 89 -- CMakeFiles/CMakeSystem.cmake | 15 - CMakeFiles/CompilerIdC/CMakeCCompilerId.c | 182 --- CMakeFiles/CompilerIdC/a.out | Bin 6518 -> 0 bytes CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp | 169 --- CMakeFiles/CompilerIdCXX/a.out | Bin 6891 -> 0 bytes CMakeFiles/Makefile.cmake | 52 - CMakeFiles/Makefile2 | 99 -- CMakeFiles/Progress/1 | 1 - CMakeFiles/Progress/count.txt | 1 - CMakeFiles/cmake.check_cache | 1 - CMakeFiles/grain.dir/C.includecache | 24 - CMakeFiles/grain.dir/DependInfo.cmake | 13 - CMakeFiles/grain.dir/build.make | 103 -- CMakeFiles/grain.dir/cmake_clean.cmake | 10 - CMakeFiles/grain.dir/depend.internal | 5 - CMakeFiles/grain.dir/depend.make | 5 - CMakeFiles/grain.dir/flags.make | 8 - CMakeFiles/grain.dir/link.txt | 1 - CMakeFiles/grain.dir/progress.make | 2 - CMakeFiles/progress.make | 1 - Makefile | 163 --- ...3\243\347\240\201\350\257\264\346\230\216.docx" | Bin 13258 -> 0 bytes cmake_install.cmake | 44 - dataset_build/CMakeLists.txt | 11 - dataset_build/based_sfh.conf | 3 - dataset_build/based_sfh.py | 44 - dataset_build/cal_information.conf | 5 - dataset_build/cal_information.py | 133 -- dataset_build/dataset_build.conf | 8 - dataset_build/dataset_build.py | 144 --- dataset_build/feature_statistics.conf | 8 - dataset_build/feature_statistics.py | 164 --- dataset_build/file_digest.py | 96 -- dataset_build/get_lost.c | 116 -- dataset_build/grain.conf | 5 - dataset_build/td_classification.py | 5 - dataset_build/vedio_id_build.c | 171 --- file_digest.conf | 3 - file_digest.py | 104 -- get_td_mistake_lost/CMakeLists.txt | 11 - get_td_mistake_lost/file_digest.conf | 6 - get_td_mistake_lost/file_digest.py | 104 -- get_td_mistake_lost/get_TD_SFH.c | 162 --- get_td_mistake_lost/get_lost_rate.c | 210 --- get_td_mistake_lost/get_mistake_level.c | 366 ------ get_td_mistake_lost/get_td_mistake_lost.sh | 5 - get_td_mistake_lost/gram_index_engine.c | 1354 -------------------- get_td_mistake_lost/new_TD.conf | 3 - get_td_mistake_lost/new_TD.py | 34 - include/gram_index_engine.h | 68 + rssb_statistics/all_len_st.py | 17 - rssb_statistics/delete_len_st.py | 48 - rssb_statistics/find_lost_td.conf | 14 - rssb_statistics/find_lost_td.py | 147 --- rssb_statistics/harm_len_st.py | 29 - rssb_statistics/recv_survey_mid_st.py | 3 - rssb_statistics/service_st.py | 29 - rssb_statistics/un_recv_st.py | 36 - rssb_statistics/un_recv_survey_mid_st.py | 3 - sfh_integrate/SFH_function.c | 177 --- src/CMakeCache.txt | 278 ++++ src/CMakeFiles/CMakeCCompiler.cmake | 36 + src/CMakeFiles/CMakeCXXCompiler.cmake | 36 + src/CMakeFiles/CMakeDetermineCompilerABI_C.bin | Bin 0 -> 7045 bytes src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin | Bin 0 -> 7331 bytes src/CMakeFiles/CMakeDirectoryInformation.cmake | 21 + src/CMakeFiles/CMakeOutput.log | 89 ++ src/CMakeFiles/CMakeSystem.cmake | 15 + src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c | 182 +++ src/CMakeFiles/CompilerIdC/a.out | Bin 0 -> 6518 bytes .../CompilerIdCXX/CMakeCXXCompilerId.cpp | 169 +++ src/CMakeFiles/CompilerIdCXX/a.out | Bin 0 -> 6891 bytes src/CMakeFiles/Makefile.cmake | 52 + src/CMakeFiles/Makefile2 | 99 ++ src/CMakeFiles/Progress/1 | 1 + src/CMakeFiles/Progress/count.txt | 1 + src/CMakeFiles/cmake.check_cache | 1 + src/CMakeFiles/grain.dir/C.includecache | 24 + src/CMakeFiles/grain.dir/DependInfo.cmake | 13 + src/CMakeFiles/grain.dir/build.make | 103 ++ src/CMakeFiles/grain.dir/cmake_clean.cmake | 10 + src/CMakeFiles/grain.dir/depend.internal | 5 + src/CMakeFiles/grain.dir/depend.make | 5 + src/CMakeFiles/grain.dir/flags.make | 8 + src/CMakeFiles/grain.dir/link.txt | 1 + src/CMakeFiles/grain.dir/progress.make | 2 + src/CMakeFiles/progress.make | 1 + src/Makefile | 163 +++ ...3\243\347\240\201\350\257\264\346\230\216.docx" | Bin 0 -> 13258 bytes src/cmake_install.cmake | 44 + src/dataset_build/CMakeLists.txt | 11 + src/dataset_build/based_sfh.conf | 3 + src/dataset_build/based_sfh.py | 44 + src/dataset_build/cal_information.conf | 5 + src/dataset_build/cal_information.py | 133 ++ src/dataset_build/dataset_build.conf | 8 + src/dataset_build/dataset_build.py | 144 +++ src/dataset_build/feature_statistics.conf | 8 + src/dataset_build/feature_statistics.py | 164 +++ src/dataset_build/file_digest.py | 96 ++ src/dataset_build/get_lost.c | 116 ++ src/dataset_build/grain.conf | 5 + src/dataset_build/td_classification.py | 5 + src/dataset_build/vedio_id_build.c | 171 +++ src/file_digest.conf | 3 + src/file_digest.py | 104 ++ src/get_td_mistake_lost/CMakeLists.txt | 11 + src/get_td_mistake_lost/file_digest.conf | 6 + src/get_td_mistake_lost/file_digest.py | 104 ++ src/get_td_mistake_lost/get_TD_SFH.c | 162 +++ src/get_td_mistake_lost/get_lost_rate.c | 210 +++ src/get_td_mistake_lost/get_mistake_level.c | 366 ++++++ src/get_td_mistake_lost/get_td_mistake_lost.sh | 5 + src/get_td_mistake_lost/gram_index_engine.c | 1354 ++++++++++++++++++++ src/get_td_mistake_lost/new_TD.conf | 3 + src/get_td_mistake_lost/new_TD.py | 34 + src/rssb_statistics/all_len_st.py | 17 + src/rssb_statistics/delete_len_st.py | 48 + src/rssb_statistics/find_lost_td.conf | 14 + src/rssb_statistics/find_lost_td.py | 147 +++ src/rssb_statistics/harm_len_st.py | 29 + src/rssb_statistics/recv_survey_mid_st.py | 3 + src/rssb_statistics/service_st.py | 29 + src/rssb_statistics/un_recv_st.py | 36 + src/rssb_statistics/un_recv_survey_mid_st.py | 3 + src/sfh_integrate/SFH_function.c | 177 +++ src/test/digest_temp | 8 + test/digest_temp | 8 - 135 files changed, 5213 insertions(+), 5145 deletions(-) delete mode 100644 CMakeCache.txt delete mode 100644 CMakeFiles/CMakeCCompiler.cmake delete mode 100644 CMakeFiles/CMakeCXXCompiler.cmake delete mode 100644 CMakeFiles/CMakeDetermineCompilerABI_C.bin delete mode 100644 CMakeFiles/CMakeDetermineCompilerABI_CXX.bin delete mode 100644 CMakeFiles/CMakeDirectoryInformation.cmake delete mode 100644 CMakeFiles/CMakeOutput.log delete mode 100644 CMakeFiles/CMakeSystem.cmake delete mode 100644 CMakeFiles/CompilerIdC/CMakeCCompilerId.c delete mode 100644 CMakeFiles/CompilerIdC/a.out delete mode 100644 CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp delete mode 100644 CMakeFiles/CompilerIdCXX/a.out delete mode 100644 CMakeFiles/Makefile.cmake delete mode 100644 CMakeFiles/Makefile2 delete mode 100644 CMakeFiles/Progress/1 delete mode 100644 CMakeFiles/Progress/count.txt delete mode 100644 CMakeFiles/cmake.check_cache delete mode 100644 CMakeFiles/grain.dir/C.includecache delete mode 100644 CMakeFiles/grain.dir/DependInfo.cmake delete mode 100644 CMakeFiles/grain.dir/build.make delete mode 100644 CMakeFiles/grain.dir/cmake_clean.cmake delete mode 100644 CMakeFiles/grain.dir/depend.internal delete mode 100644 CMakeFiles/grain.dir/depend.make delete mode 100644 CMakeFiles/grain.dir/flags.make delete mode 100644 CMakeFiles/grain.dir/link.txt delete mode 100644 CMakeFiles/grain.dir/progress.make delete mode 100644 CMakeFiles/progress.make delete mode 100644 Makefile delete mode 100644 "TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" delete mode 100644 cmake_install.cmake delete mode 100644 dataset_build/CMakeLists.txt delete mode 100644 dataset_build/based_sfh.conf delete mode 100644 dataset_build/based_sfh.py delete mode 100644 dataset_build/cal_information.conf delete mode 100644 dataset_build/cal_information.py delete mode 100644 dataset_build/dataset_build.conf delete mode 100644 dataset_build/dataset_build.py delete mode 100644 dataset_build/feature_statistics.conf delete mode 100644 dataset_build/feature_statistics.py delete mode 100644 dataset_build/file_digest.py delete mode 100644 dataset_build/get_lost.c delete mode 100644 dataset_build/grain.conf delete mode 100644 dataset_build/td_classification.py delete mode 100644 dataset_build/vedio_id_build.c delete mode 100644 file_digest.conf delete mode 100644 file_digest.py delete mode 100644 get_td_mistake_lost/CMakeLists.txt delete mode 100644 get_td_mistake_lost/file_digest.conf delete mode 100644 get_td_mistake_lost/file_digest.py delete mode 100644 get_td_mistake_lost/get_TD_SFH.c delete mode 100644 get_td_mistake_lost/get_lost_rate.c delete mode 100644 get_td_mistake_lost/get_mistake_level.c delete mode 100644 get_td_mistake_lost/get_td_mistake_lost.sh delete mode 100644 get_td_mistake_lost/gram_index_engine.c delete mode 100644 get_td_mistake_lost/new_TD.conf delete mode 100644 get_td_mistake_lost/new_TD.py create mode 100644 include/gram_index_engine.h delete mode 100644 rssb_statistics/all_len_st.py delete mode 100644 rssb_statistics/delete_len_st.py delete mode 100644 rssb_statistics/find_lost_td.conf delete mode 100644 rssb_statistics/find_lost_td.py delete mode 100644 rssb_statistics/harm_len_st.py delete mode 100644 rssb_statistics/recv_survey_mid_st.py delete mode 100644 rssb_statistics/service_st.py delete mode 100644 rssb_statistics/un_recv_st.py delete mode 100644 rssb_statistics/un_recv_survey_mid_st.py delete mode 100644 sfh_integrate/SFH_function.c create mode 100644 src/CMakeCache.txt create mode 100644 src/CMakeFiles/CMakeCCompiler.cmake create mode 100644 src/CMakeFiles/CMakeCXXCompiler.cmake create mode 100644 src/CMakeFiles/CMakeDetermineCompilerABI_C.bin create mode 100644 src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin create mode 100644 src/CMakeFiles/CMakeDirectoryInformation.cmake create mode 100644 src/CMakeFiles/CMakeOutput.log create mode 100644 src/CMakeFiles/CMakeSystem.cmake create mode 100644 src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c create mode 100644 src/CMakeFiles/CompilerIdC/a.out create mode 100644 src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp create mode 100644 src/CMakeFiles/CompilerIdCXX/a.out create mode 100644 src/CMakeFiles/Makefile.cmake create mode 100644 src/CMakeFiles/Makefile2 create mode 100644 src/CMakeFiles/Progress/1 create mode 100644 src/CMakeFiles/Progress/count.txt create mode 100644 src/CMakeFiles/cmake.check_cache create mode 100644 src/CMakeFiles/grain.dir/C.includecache create mode 100644 src/CMakeFiles/grain.dir/DependInfo.cmake create mode 100644 src/CMakeFiles/grain.dir/build.make create mode 100644 src/CMakeFiles/grain.dir/cmake_clean.cmake create mode 100644 src/CMakeFiles/grain.dir/depend.internal create mode 100644 src/CMakeFiles/grain.dir/depend.make create mode 100644 src/CMakeFiles/grain.dir/flags.make create mode 100644 src/CMakeFiles/grain.dir/link.txt create mode 100644 src/CMakeFiles/grain.dir/progress.make create mode 100644 src/CMakeFiles/progress.make create mode 100644 src/Makefile create mode 100644 "src/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" create mode 100644 src/cmake_install.cmake create mode 100644 src/dataset_build/CMakeLists.txt create mode 100644 src/dataset_build/based_sfh.conf create mode 100644 src/dataset_build/based_sfh.py create mode 100644 src/dataset_build/cal_information.conf create mode 100644 src/dataset_build/cal_information.py create mode 100644 src/dataset_build/dataset_build.conf create mode 100644 src/dataset_build/dataset_build.py create mode 100644 src/dataset_build/feature_statistics.conf create mode 100644 src/dataset_build/feature_statistics.py create mode 100644 src/dataset_build/file_digest.py create mode 100644 src/dataset_build/get_lost.c create mode 100644 src/dataset_build/grain.conf create mode 100644 src/dataset_build/td_classification.py create mode 100644 src/dataset_build/vedio_id_build.c create mode 100644 src/file_digest.conf create mode 100644 src/file_digest.py create mode 100644 src/get_td_mistake_lost/CMakeLists.txt create mode 100644 src/get_td_mistake_lost/file_digest.conf create mode 100644 src/get_td_mistake_lost/file_digest.py create mode 100644 src/get_td_mistake_lost/get_TD_SFH.c create mode 100644 src/get_td_mistake_lost/get_lost_rate.c create mode 100644 src/get_td_mistake_lost/get_mistake_level.c create mode 100644 src/get_td_mistake_lost/get_td_mistake_lost.sh create mode 100644 src/get_td_mistake_lost/gram_index_engine.c create mode 100644 src/get_td_mistake_lost/new_TD.conf create mode 100644 src/get_td_mistake_lost/new_TD.py create mode 100644 src/rssb_statistics/all_len_st.py create mode 100644 src/rssb_statistics/delete_len_st.py create mode 100644 src/rssb_statistics/find_lost_td.conf create mode 100644 src/rssb_statistics/find_lost_td.py create mode 100644 src/rssb_statistics/harm_len_st.py create mode 100644 src/rssb_statistics/recv_survey_mid_st.py create mode 100644 src/rssb_statistics/service_st.py create mode 100644 src/rssb_statistics/un_recv_st.py create mode 100644 src/rssb_statistics/un_recv_survey_mid_st.py create mode 100644 src/sfh_integrate/SFH_function.c create mode 100644 src/test/digest_temp delete mode 100644 test/digest_temp diff --git a/CMakeCache.txt b/CMakeCache.txt deleted file mode 100644 index 5aa46ec..0000000 --- a/CMakeCache.txt +++ /dev/null @@ -1,278 +0,0 @@ -# This is the CMakeCache file. -# For build in directory: /home/chenguanlin/TD_evaluation/src -# It was generated by CMake: /usr/bin/cmake -# You can edit this file to change values found and used by cmake. -# If you do not want to change any of the values, simply exit the editor. -# If you do want to change a value, simply edit, save, and exit the editor. -# The syntax for the file is as follows: -# KEY:TYPE=VALUE -# KEY is the name of a variable in the cache. -# TYPE is a hint to GUI's for the type of VALUE, DO NOT EDIT TYPE!. -# VALUE is the current value for the KEY. - -######################## -# EXTERNAL cache entries -######################## - -//Value Computed by CMake -CALCULATE_BINARY_DIR:STATIC=/home/chenguanlin/TD_evaluation/src - -//Value Computed by CMake -CALCULATE_SOURCE_DIR:STATIC=/home/chenguanlin/TD_evaluation/src - -//Path to a program. -CMAKE_AR:FILEPATH=/usr/bin/ar - -//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or -// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel. -CMAKE_BUILD_TYPE:STRING= - -//Enable/Disable color output during build. -CMAKE_COLOR_MAKEFILE:BOOL=ON - -//CXX compiler. -CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ - -//Flags used by the compiler during all build types. -CMAKE_CXX_FLAGS:STRING= - -//Flags used by the compiler during debug builds. -CMAKE_CXX_FLAGS_DEBUG:STRING=-g - -//Flags used by the compiler during release minsize builds. -CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG - -//Flags used by the compiler during release builds (/MD /Ob1 /Oi -// /Ot /Oy /Gs will produce slightly less optimized but smaller -// files). -CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG - -//Flags used by the compiler during Release with Debug Info builds. -CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g - -//C compiler. -CMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc - -//Flags used by the compiler during all build types. -CMAKE_C_FLAGS:STRING= - -//Flags used by the compiler during debug builds. -CMAKE_C_FLAGS_DEBUG:STRING=-g - -//Flags used by the compiler during release minsize builds. -CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG - -//Flags used by the compiler during release builds (/MD /Ob1 /Oi -// /Ot /Oy /Gs will produce slightly less optimized but smaller -// files). -CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG - -//Flags used by the compiler during Release with Debug Info builds. -CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g - -//Flags used by the linker. -CMAKE_EXE_LINKER_FLAGS:STRING= - -//Flags used by the linker during debug builds. -CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= - -//Flags used by the linker during release minsize builds. -CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= - -//Flags used by the linker during release builds. -CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= - -//Flags used by the linker during Release with Debug Info builds. -CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= - -//Install path prefix, prepended onto install directories. -CMAKE_INSTALL_PREFIX:PATH=/usr/local - -//Path to a program. -CMAKE_LINKER:FILEPATH=/usr/bin/ld - -//Path to a program. -CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake - -//Flags used by the linker during the creation of modules. -CMAKE_MODULE_LINKER_FLAGS:STRING= - -//Flags used by the linker during debug builds. -CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= - -//Flags used by the linker during release minsize builds. -CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= - -//Flags used by the linker during release builds. -CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= - -//Flags used by the linker during Release with Debug Info builds. -CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= - -//Path to a program. -CMAKE_NM:FILEPATH=/usr/bin/nm - -//Path to a program. -CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy - -//Path to a program. -CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump - -//Path to a program. -CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib - -//Flags used by the linker during the creation of dll's. -CMAKE_SHARED_LINKER_FLAGS:STRING= - -//Flags used by the linker during debug builds. -CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= - -//Flags used by the linker during release minsize builds. -CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= - -//Flags used by the linker during release builds. -CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= - -//Flags used by the linker during Release with Debug Info builds. -CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= - -//If set, runtime paths are not added when using shared libraries. -CMAKE_SKIP_RPATH:BOOL=NO - -//Path to a program. -CMAKE_STRIP:FILEPATH=/usr/bin/strip - -//If true, cmake will use relative paths in makefiles and projects. -CMAKE_USE_RELATIVE_PATHS:BOOL=OFF - -//If this value is on, makefiles will be generated without the -// .SILENT directive, and all commands will be echoed to the console -// during the make. This is useful for debugging only. With Visual -// Studio IDE projects all commands are done without /nologo. -CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE - - -######################## -# INTERNAL cache entries -######################## - -//Advanced flag for variable: CMAKE_AR -CMAKE_AR-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_BUILD_TOOL -CMAKE_BUILD_TOOL-ADVANCED:INTERNAL=1 -//What is the target build tool cmake is generating for. -CMAKE_BUILD_TOOL:INTERNAL=/usr/bin/gmake -//This is the directory where this CMakeCahe.txt was created -CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenguanlin/TD_evaluation/src -//Major version of cmake used to create the current loaded cache -CMAKE_CACHE_MAJOR_VERSION:INTERNAL=2 -//Minor version of cmake used to create the current loaded cache -CMAKE_CACHE_MINOR_VERSION:INTERNAL=6 -//Major version of cmake used to create the current loaded cache -CMAKE_CACHE_RELEASE_VERSION:INTERNAL=patch 4 -//Advanced flag for variable: CMAKE_COLOR_MAKEFILE -CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 -//Path to CMake executable. -CMAKE_COMMAND:INTERNAL=/usr/bin/cmake -//Path to cpack program executable. -CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack -//Path to ctest program executable. -CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest -//Advanced flag for variable: CMAKE_CXX_COMPILER -CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 -CMAKE_CXX_COMPILER_WORKS:INTERNAL=1 -//Advanced flag for variable: CMAKE_CXX_FLAGS -CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_CXX_FLAGS_DEBUG -CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_CXX_FLAGS_MINSIZEREL -CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_CXX_FLAGS_RELEASE -CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO -CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_COMPILER -CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 -CMAKE_C_COMPILER_WORKS:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_FLAGS -CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_FLAGS_DEBUG -CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_FLAGS_MINSIZEREL -CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_FLAGS_RELEASE -CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_C_FLAGS_RELWITHDEBINFO -CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Result of TRY_COMPILE -CMAKE_DETERMINE_CXX_ABI_COMPILED:INTERNAL=TRUE -//Result of TRY_COMPILE -CMAKE_DETERMINE_C_ABI_COMPILED:INTERNAL=TRUE -//Path to cache edit program executable. -CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake -//Executable file format -CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF -//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS -CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG -CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL -CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE -CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Name of generator. -CMAKE_GENERATOR:INTERNAL=Unix Makefiles -//Start directory with the top level CMakeLists.txt file for this -// project -CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenguanlin/TD_evaluation/src -//Install .so files without execute permission. -CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0 -//Advanced flag for variable: CMAKE_LINKER -CMAKE_LINKER-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MAKE_PROGRAM -CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS -CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG -CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL -CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE -CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_NM -CMAKE_NM-ADVANCED:INTERNAL=1 -//number of local generators -CMAKE_NUMBER_OF_LOCAL_GENERATORS:INTERNAL=1 -//Advanced flag for variable: CMAKE_OBJCOPY -CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_OBJDUMP -CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_RANLIB -CMAKE_RANLIB-ADVANCED:INTERNAL=1 -//Path to CMake installation. -CMAKE_ROOT:INTERNAL=/usr/share/cmake -//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS -CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG -CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL -CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE -CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO -CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_SKIP_RPATH -CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_STRIP -CMAKE_STRIP-ADVANCED:INTERNAL=1 -//uname command -CMAKE_UNAME:INTERNAL=/bin/uname -//Advanced flag for variable: CMAKE_USE_RELATIVE_PATHS -CMAKE_USE_RELATIVE_PATHS-ADVANCED:INTERNAL=1 -//Advanced flag for variable: CMAKE_VERBOSE_MAKEFILE -CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 - diff --git a/CMakeFiles/CMakeCCompiler.cmake b/CMakeFiles/CMakeCCompiler.cmake deleted file mode 100644 index 1d1bbea..0000000 --- a/CMakeFiles/CMakeCCompiler.cmake +++ /dev/null @@ -1,36 +0,0 @@ -SET(CMAKE_C_COMPILER "/usr/bin/gcc") -SET(CMAKE_C_COMPILER_ARG1 "") -SET(CMAKE_C_COMPILER_ID "GNU") -SET(CMAKE_C_PLATFORM_ID "Linux") -SET(CMAKE_AR "/usr/bin/ar") -SET(CMAKE_RANLIB "/usr/bin/ranlib") -SET(CMAKE_COMPILER_IS_GNUCC 1) -SET(CMAKE_C_COMPILER_LOADED 1) -SET(CMAKE_COMPILER_IS_MINGW ) -SET(CMAKE_COMPILER_IS_CYGWIN ) -IF(CMAKE_COMPILER_IS_CYGWIN) - SET(CYGWIN 1) - SET(UNIX 1) -ENDIF(CMAKE_COMPILER_IS_CYGWIN) - -SET(CMAKE_C_COMPILER_ENV_VAR "CC") - -IF(CMAKE_COMPILER_IS_MINGW) - SET(MINGW 1) -ENDIF(CMAKE_COMPILER_IS_MINGW) -SET(CMAKE_C_COMPILER_ID_RUN 1) -SET(CMAKE_C_SOURCE_FILE_EXTENSIONS c) -SET(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) -SET(CMAKE_C_LINKER_PREFERENCE 10) - -# Save compiler ABI information. -SET(CMAKE_C_SIZEOF_DATA_PTR "8") -SET(CMAKE_C_COMPILER_ABI "ELF") - -IF(CMAKE_C_SIZEOF_DATA_PTR) - SET(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") -ENDIF(CMAKE_C_SIZEOF_DATA_PTR) - -IF(CMAKE_C_COMPILER_ABI) - SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") -ENDIF(CMAKE_C_COMPILER_ABI) diff --git a/CMakeFiles/CMakeCXXCompiler.cmake b/CMakeFiles/CMakeCXXCompiler.cmake deleted file mode 100644 index 64dad77..0000000 --- a/CMakeFiles/CMakeCXXCompiler.cmake +++ /dev/null @@ -1,36 +0,0 @@ -SET(CMAKE_CXX_COMPILER "/usr/bin/c++") -SET(CMAKE_CXX_COMPILER_ARG1 "") -SET(CMAKE_CXX_COMPILER_ID "GNU") -SET(CMAKE_CXX_PLATFORM_ID "Linux") -SET(CMAKE_AR "/usr/bin/ar") -SET(CMAKE_RANLIB "/usr/bin/ranlib") -SET(CMAKE_COMPILER_IS_GNUCXX 1) -SET(CMAKE_CXX_COMPILER_LOADED 1) -SET(CMAKE_COMPILER_IS_MINGW ) -SET(CMAKE_COMPILER_IS_CYGWIN ) -IF(CMAKE_COMPILER_IS_CYGWIN) - SET(CYGWIN 1) - SET(UNIX 1) -ENDIF(CMAKE_COMPILER_IS_CYGWIN) - -SET(CMAKE_CXX_COMPILER_ENV_VAR "CXX") - -IF(CMAKE_COMPILER_IS_MINGW) - SET(MINGW 1) -ENDIF(CMAKE_COMPILER_IS_MINGW) -SET(CMAKE_CXX_COMPILER_ID_RUN 1) -SET(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;H;o;O;obj;OBJ;def;DEF;rc;RC) -SET(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm) -SET(CMAKE_CXX_LINKER_PREFERENCE 30) - -# Save compiler ABI information. -SET(CMAKE_CXX_SIZEOF_DATA_PTR "8") -SET(CMAKE_CXX_COMPILER_ABI "ELF") - -IF(CMAKE_CXX_SIZEOF_DATA_PTR) - SET(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") -ENDIF(CMAKE_CXX_SIZEOF_DATA_PTR) - -IF(CMAKE_CXX_COMPILER_ABI) - SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") -ENDIF(CMAKE_CXX_COMPILER_ABI) diff --git a/CMakeFiles/CMakeDetermineCompilerABI_C.bin b/CMakeFiles/CMakeDetermineCompilerABI_C.bin deleted file mode 100644 index 9a91ed2..0000000 Binary files a/CMakeFiles/CMakeDetermineCompilerABI_C.bin and /dev/null differ diff --git a/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin b/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin deleted file mode 100644 index be1e6e2..0000000 Binary files a/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin and /dev/null differ diff --git a/CMakeFiles/CMakeDirectoryInformation.cmake b/CMakeFiles/CMakeDirectoryInformation.cmake deleted file mode 100644 index 66066d5..0000000 --- a/CMakeFiles/CMakeDirectoryInformation.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -# Relative path conversion top directories. -SET(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenguanlin/TD_evaluation/src") -SET(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenguanlin/TD_evaluation/src") - -# Force unix paths in dependencies. -SET(CMAKE_FORCE_UNIX_PATHS 1) - -# The C and CXX include file search paths: -SET(CMAKE_C_INCLUDE_PATH - ) -SET(CMAKE_CXX_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) -SET(CMAKE_Fortran_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) - -# The C and CXX include file regular expressions for this directory. -SET(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") -SET(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") -SET(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) -SET(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/CMakeFiles/CMakeOutput.log b/CMakeFiles/CMakeOutput.log deleted file mode 100644 index 3475153..0000000 --- a/CMakeFiles/CMakeOutput.log +++ /dev/null @@ -1,89 +0,0 @@ -The system is: Linux - 2.6.32-358.el6.x86_64 - x86_64 -Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. -Compiler: /usr/bin/gcc -Build flags: -Id flags: - -The output was: -0 - - -Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" - -The C compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdC/a.out" - -Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. -Compiler: /usr/bin/c++ -Build flags: -Id flags: - -The output was: -0 -CMakeCXXCompilerId.cpp:67: warning: deprecated conversion from string constant to ‘char*’ -CMakeCXXCompilerId.cpp:157: warning: deprecated conversion from string constant to ‘char*’ - - -Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" - -The CXX compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdCXX/a.out" - -Determining if the C compiler works passed with the following output: -Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp - -Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" -/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build -gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' -/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 -Building C object CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCCompiler.c -Linking C executable cmTryCompileExec -/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 -/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -o cmTryCompileExec -rdynamic -gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' - - -Detecting C compiler ABI info compiled with the following output: -Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp - -Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" -/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build -gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' -/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 -Building C object CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake/Modules/CMakeCCompilerABI.c -Linking C executable cmTryCompileExec -/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 -/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -o cmTryCompileExec -rdynamic -gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' - - -Determining if the CXX compiler works passed with the following output: -Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp - -Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" -/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build -gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' -/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 -Building CXX object CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCXXCompiler.cxx -Linking CXX executable cmTryCompileExec -/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 -/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -o cmTryCompileExec -rdynamic -gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' - - -Detecting CXX compiler ABI info compiled with the following output: -Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp - -Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" -/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build -gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' -/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 -Building CXX object CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp -Linking CXX executable cmTryCompileExec -/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 -/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -o cmTryCompileExec -rdynamic -gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' - - diff --git a/CMakeFiles/CMakeSystem.cmake b/CMakeFiles/CMakeSystem.cmake deleted file mode 100644 index 3b3e967..0000000 --- a/CMakeFiles/CMakeSystem.cmake +++ /dev/null @@ -1,15 +0,0 @@ - - -SET(CMAKE_SYSTEM "Linux-2.6.32-358.el6.x86_64") -SET(CMAKE_SYSTEM_NAME "Linux") -SET(CMAKE_SYSTEM_VERSION "2.6.32-358.el6.x86_64") -SET(CMAKE_SYSTEM_PROCESSOR "x86_64") - -SET(CMAKE_HOST_SYSTEM "Linux-2.6.32-358.el6.x86_64") -SET(CMAKE_HOST_SYSTEM_NAME "Linux") -SET(CMAKE_HOST_SYSTEM_VERSION "2.6.32-358.el6.x86_64") -SET(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") - -SET(CMAKE_CROSSCOMPILING "FALSE") - -SET(CMAKE_SYSTEM_LOADED 1) diff --git a/CMakeFiles/CompilerIdC/CMakeCCompilerId.c b/CMakeFiles/CompilerIdC/CMakeCCompilerId.c deleted file mode 100644 index 7fd0088..0000000 --- a/CMakeFiles/CompilerIdC/CMakeCCompilerId.c +++ /dev/null @@ -1,182 +0,0 @@ -#ifdef __cplusplus -# error "A C++ compiler has been selected for C." -#endif - -#if defined(__18CXX) -# define ID_VOID_MAIN -#endif - -#if defined(__INTEL_COMPILER) || defined(__ICC) -# define COMPILER_ID "Intel" - -#elif defined(__BORLANDC__) -# define COMPILER_ID "Borland" - -#elif defined(__WATCOMC__) -# define COMPILER_ID "Watcom" - -#elif defined(__SUNPRO_C) -# define COMPILER_ID "SunPro" - -#elif defined(__HP_cc) -# define COMPILER_ID "HP" - -#elif defined(__DECC) -# define COMPILER_ID "Compaq" - -#elif defined(__IBMC__) -# define COMPILER_ID "VisualAge" - -#elif defined(__PGI) -# define COMPILER_ID "PGI" - -#elif defined(__GNUC__) -# define COMPILER_ID "GNU" - -#elif defined(_MSC_VER) -# define COMPILER_ID "MSVC" - -#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) -/* Analog Devices C++ compiler for Blackfin, TigerSHARC and - SHARC (21000) DSPs */ -# define COMPILER_ID "ADSP" - -/* IAR Systems compiler for embedded systems. - http://www.iar.com - Not supported yet by CMake -#elif defined(__IAR_SYSTEMS_ICC__) -# define COMPILER_ID "IAR" */ - -/* sdcc, the small devices C compiler for embedded systems, - http://sdcc.sourceforge.net */ -#elif defined(SDCC) -# define COMPILER_ID "SDCC" - -#elif defined(_COMPILER_VERSION) -# define COMPILER_ID "MIPSpro" - -/* This compiler is either not known or is too old to define an - identification macro. Try to identify the platform and guess that - it is the native compiler. */ -#elif defined(__sgi) -# define COMPILER_ID "MIPSpro" - -#elif defined(__hpux) || defined(__hpua) -# define COMPILER_ID "HP" - -#else /* unknown compiler */ -# define COMPILER_ID "" - -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; - -/* Identify known platforms by name. */ -#if defined(__linux) || defined(__linux__) || defined(linux) -# define PLATFORM_ID "Linux" - -#elif defined(__CYGWIN__) -# define PLATFORM_ID "Cygwin" - -#elif defined(__MINGW32__) -# define PLATFORM_ID "MinGW" - -#elif defined(__APPLE__) -# define PLATFORM_ID "Darwin" - -#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) -# define PLATFORM_ID "Windows" - -#elif defined(__FreeBSD__) || defined(__FreeBSD) -# define PLATFORM_ID "FreeBSD" - -#elif defined(__NetBSD__) || defined(__NetBSD) -# define PLATFORM_ID "NetBSD" - -#elif defined(__OpenBSD__) || defined(__OPENBSD) -# define PLATFORM_ID "OpenBSD" - -#elif defined(__sun) || defined(sun) -# define PLATFORM_ID "SunOS" - -#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) -# define PLATFORM_ID "AIX" - -#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) -# define PLATFORM_ID "IRIX" - -#elif defined(__hpux) || defined(__hpux__) -# define PLATFORM_ID "HP-UX" - -#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) -# define PLATFORM_ID "Haiku" -/* Haiku also defines __BEOS__ so we must - put it prior to the check for __BEOS__ -*/ - -#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) -# define PLATFORM_ID "BeOS" - -#elif defined(__QNX__) || defined(__QNXNTO__) -# define PLATFORM_ID "QNX" - -#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) -# define PLATFORM_ID "Tru64" - -#elif defined(__riscos) || defined(__riscos__) -# define PLATFORM_ID "RISCos" - -#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) -# define PLATFORM_ID "SINIX" - -#elif defined(__UNIX_SV__) -# define PLATFORM_ID "UNIX_SV" - -#elif defined(__bsdos__) -# define PLATFORM_ID "BSDOS" - -#elif defined(_MPRAS) || defined(MPRAS) -# define PLATFORM_ID "MP-RAS" - -#elif defined(__osf) || defined(__osf__) -# define PLATFORM_ID "OSF1" - -#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) -# define PLATFORM_ID "SCO_SV" - -#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) -# define PLATFORM_ID "ULTRIX" - -#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) -# define PLATFORM_ID "Xenix" - -#else /* unknown platform */ -# define PLATFORM_ID "" - -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; - - -/*--------------------------------------------------------------------------*/ - -#ifdef ID_VOID_MAIN -void main() {} -#else -int main(int argc, char* argv[]) -{ - int require = 0; - require += info_compiler[argc]; - require += info_platform[argc]; - (void)argv; - return require; -} -#endif diff --git a/CMakeFiles/CompilerIdC/a.out b/CMakeFiles/CompilerIdC/a.out deleted file mode 100644 index c389161..0000000 Binary files a/CMakeFiles/CompilerIdC/a.out and /dev/null differ diff --git a/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp b/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp deleted file mode 100644 index f8c041f..0000000 --- a/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp +++ /dev/null @@ -1,169 +0,0 @@ -/* This source file must have a .cpp extension so that all C++ compilers - recognize the extension without flags. Borland does not know .cxx for - example. */ -#ifndef __cplusplus -# error "A C compiler has been selected for C++." -#endif - -#if defined(__COMO__) -# define COMPILER_ID "Comeau" - -#elif defined(__INTEL_COMPILER) || defined(__ICC) -# define COMPILER_ID "Intel" - -#elif defined(__BORLANDC__) -# define COMPILER_ID "Borland" - -#elif defined(__WATCOMC__) -# define COMPILER_ID "Watcom" - -#elif defined(__SUNPRO_CC) -# define COMPILER_ID "SunPro" - -#elif defined(__HP_aCC) -# define COMPILER_ID "HP" - -#elif defined(__DECCXX) -# define COMPILER_ID "Compaq" - -#elif defined(__IBMCPP__) -# define COMPILER_ID "VisualAge" - -#elif defined(__PGI) -# define COMPILER_ID "PGI" - -#elif defined(__GNUC__) -# define COMPILER_ID "GNU" - -#elif defined(_MSC_VER) -# define COMPILER_ID "MSVC" - -#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) -/* Analog Devices C++ compiler for Blackfin, TigerSHARC and - SHARC (21000) DSPs */ -# define COMPILER_ID "ADSP" - -#elif defined(_COMPILER_VERSION) -# define COMPILER_ID "MIPSpro" - -/* This compiler is either not known or is too old to define an - identification macro. Try to identify the platform and guess that - it is the native compiler. */ -#elif defined(__sgi) -# define COMPILER_ID "MIPSpro" - -#elif defined(__hpux) || defined(__hpua) -# define COMPILER_ID "HP" - -#else /* unknown compiler */ -# define COMPILER_ID "" - -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; - -/* Identify known platforms by name. */ -#if defined(__linux) || defined(__linux__) || defined(linux) -# define PLATFORM_ID "Linux" - -#elif defined(__CYGWIN__) -# define PLATFORM_ID "Cygwin" - -#elif defined(__MINGW32__) -# define PLATFORM_ID "MinGW" - -#elif defined(__APPLE__) -# define PLATFORM_ID "Darwin" - -#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) -# define PLATFORM_ID "Windows" - -#elif defined(__FreeBSD__) || defined(__FreeBSD) -# define PLATFORM_ID "FreeBSD" - -#elif defined(__NetBSD__) || defined(__NetBSD) -# define PLATFORM_ID "NetBSD" - -#elif defined(__OpenBSD__) || defined(__OPENBSD) -# define PLATFORM_ID "OpenBSD" - -#elif defined(__sun) || defined(sun) -# define PLATFORM_ID "SunOS" - -#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) -# define PLATFORM_ID "AIX" - -#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) -# define PLATFORM_ID "IRIX" - -#elif defined(__hpux) || defined(__hpux__) -# define PLATFORM_ID "HP-UX" - -#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) -# define PLATFORM_ID "Haiku" -/* Haiku also defines __BEOS__ so we must - put it prior to the check for __BEOS__ -*/ - -#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) -# define PLATFORM_ID "BeOS" - -#elif defined(__QNX__) || defined(__QNXNTO__) -# define PLATFORM_ID "QNX" - -#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) -# define PLATFORM_ID "Tru64" - -#elif defined(__riscos) || defined(__riscos__) -# define PLATFORM_ID "RISCos" - -#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) -# define PLATFORM_ID "SINIX" - -#elif defined(__UNIX_SV__) -# define PLATFORM_ID "UNIX_SV" - -#elif defined(__bsdos__) -# define PLATFORM_ID "BSDOS" - -#elif defined(_MPRAS) || defined(MPRAS) -# define PLATFORM_ID "MP-RAS" - -#elif defined(__osf) || defined(__osf__) -# define PLATFORM_ID "OSF1" - -#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) -# define PLATFORM_ID "SCO_SV" - -#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) -# define PLATFORM_ID "ULTRIX" - -#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) -# define PLATFORM_ID "Xenix" - -#else /* unknown platform */ -# define PLATFORM_ID "" - -#endif - -/* Construct the string literal in pieces to prevent the source from - getting matched. Store it in a pointer rather than an array - because some compilers will just produce instructions to fill the - array rather than assigning a pointer to a static array. */ -char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; - - -/*--------------------------------------------------------------------------*/ - -int main(int argc, char* argv[]) -{ - int require = 0; - require += info_compiler[argc]; - require += info_platform[argc]; - (void)argv; - return require; -} diff --git a/CMakeFiles/CompilerIdCXX/a.out b/CMakeFiles/CompilerIdCXX/a.out deleted file mode 100644 index 65597e7..0000000 Binary files a/CMakeFiles/CompilerIdCXX/a.out and /dev/null differ diff --git a/CMakeFiles/Makefile.cmake b/CMakeFiles/Makefile.cmake deleted file mode 100644 index 8466809..0000000 --- a/CMakeFiles/Makefile.cmake +++ /dev/null @@ -1,52 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -# The generator used is: -SET(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") - -# The top level Makefile was generated from the following files: -SET(CMAKE_MAKEFILE_DEPENDS - "CMakeCache.txt" - "CMakeFiles/CMakeCCompiler.cmake" - "CMakeFiles/CMakeCXXCompiler.cmake" - "CMakeFiles/CMakeSystem.cmake" - "CMakeLists.txt" - "/usr/share/cmake/Modules/CMakeCCompiler.cmake.in" - "/usr/share/cmake/Modules/CMakeCCompilerABI.c" - "/usr/share/cmake/Modules/CMakeCInformation.cmake" - "/usr/share/cmake/Modules/CMakeCXXCompiler.cmake.in" - "/usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp" - "/usr/share/cmake/Modules/CMakeCXXInformation.cmake" - "/usr/share/cmake/Modules/CMakeCommonLanguageInclude.cmake" - "/usr/share/cmake/Modules/CMakeDetermineCCompiler.cmake" - "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake" - "/usr/share/cmake/Modules/CMakeDetermineCompilerABI.cmake" - "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake" - "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake" - "/usr/share/cmake/Modules/CMakeFindBinUtils.cmake" - "/usr/share/cmake/Modules/CMakeGenericSystem.cmake" - "/usr/share/cmake/Modules/CMakeSystem.cmake.in" - "/usr/share/cmake/Modules/CMakeSystemSpecificInformation.cmake" - "/usr/share/cmake/Modules/CMakeTestCCompiler.cmake" - "/usr/share/cmake/Modules/CMakeTestCXXCompiler.cmake" - "/usr/share/cmake/Modules/CMakeUnixFindMake.cmake" - "/usr/share/cmake/Modules/Platform/Linux.cmake" - "/usr/share/cmake/Modules/Platform/UnixPaths.cmake" - "/usr/share/cmake/Modules/Platform/gcc.cmake" - ) - -# The corresponding makefile is: -SET(CMAKE_MAKEFILE_OUTPUTS - "Makefile" - "CMakeFiles/cmake.check_cache" - "CMakeFiles/CMakeDirectoryInformation.cmake" - ) - -# Byproducts of CMake generate step: -SET(CMAKE_MAKEFILE_PRODUCTS - ) - -# Dependency information for all targets: -SET(CMAKE_DEPEND_INFO_FILES - "CMakeFiles/grain.dir/DependInfo.cmake" - ) diff --git a/CMakeFiles/Makefile2 b/CMakeFiles/Makefile2 deleted file mode 100644 index 405383d..0000000 --- a/CMakeFiles/Makefile2 +++ /dev/null @@ -1,99 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -# Default target executed when no arguments are given to make. -default_target: all -.PHONY : default_target - -# The main recursive all target -all: -.PHONY : all - -# The main recursive preinstall target -preinstall: -.PHONY : preinstall - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canoncical targets will work. -.SUFFIXES: - -# Remove some rules from gmake that .SUFFIXES does not remove. -SUFFIXES = - -.SUFFIXES: .hpux_make_needs_suffix_list - -# Suppress display of executed commands. -$(VERBOSE).SILENT: - -# A target that is always out of date. -cmake_force: -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake - -# The command to remove a file. -RM = /usr/bin/cmake -E remove -f - -# The program to use to edit the cache. -CMAKE_EDIT_COMMAND = /usr/bin/ccmake - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src - -#============================================================================= -# Target rules for target CMakeFiles/grain.dir - -# All Build rule for target. -CMakeFiles/grain.dir/all: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/depend - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build - $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 - @echo "Built target grain" -.PHONY : CMakeFiles/grain.dir/all - -# Include target in all. -all: CMakeFiles/grain.dir/all -.PHONY : all - -# Build rule for subdir invocation for target. -CMakeFiles/grain.dir/rule: cmake_check_build_system - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 - $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/grain.dir/all - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 -.PHONY : CMakeFiles/grain.dir/rule - -# Convenience name for target. -grain: CMakeFiles/grain.dir/rule -.PHONY : grain - -# clean rule for target. -CMakeFiles/grain.dir/clean: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/clean -.PHONY : CMakeFiles/grain.dir/clean - -# clean rule for target. -clean: CMakeFiles/grain.dir/clean -.PHONY : clean - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - diff --git a/CMakeFiles/Progress/1 b/CMakeFiles/Progress/1 deleted file mode 100644 index 7b4d68d..0000000 --- a/CMakeFiles/Progress/1 +++ /dev/null @@ -1 +0,0 @@ -empty \ No newline at end of file diff --git a/CMakeFiles/Progress/count.txt b/CMakeFiles/Progress/count.txt deleted file mode 100644 index d00491f..0000000 --- a/CMakeFiles/Progress/count.txt +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/CMakeFiles/cmake.check_cache b/CMakeFiles/cmake.check_cache deleted file mode 100644 index 3dccd73..0000000 --- a/CMakeFiles/cmake.check_cache +++ /dev/null @@ -1 +0,0 @@ -# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/CMakeFiles/grain.dir/C.includecache b/CMakeFiles/grain.dir/C.includecache deleted file mode 100644 index d4110de..0000000 --- a/CMakeFiles/grain.dir/C.includecache +++ /dev/null @@ -1,24 +0,0 @@ -#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) - -#IncludeRegexScan: ^.*$ - -#IncludeRegexComplain: ^$ - -#IncludeRegexTransform: - -/home/chenguanlin/TD_evaluation/src/grain.c -stdio.h -- -string.h -- -stdlib.h -- -gram_index_engine.h -/home/chenguanlin/TD_evaluation/src/gram_index_engine.h -MESA/MESA_htable.h -- -assert.h -- -ctype.h -- - diff --git a/CMakeFiles/grain.dir/DependInfo.cmake b/CMakeFiles/grain.dir/DependInfo.cmake deleted file mode 100644 index 0504394..0000000 --- a/CMakeFiles/grain.dir/DependInfo.cmake +++ /dev/null @@ -1,13 +0,0 @@ -# The set of languages for which implicit dependencies are needed: -SET(CMAKE_DEPENDS_LANGUAGES - "C" - ) -# The set of files for implicit dependencies of each language: -SET(CMAKE_DEPENDS_CHECK_C - "/home/chenguanlin/TD_evaluation/src/grain.c" "/home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/grain.c.o" - ) -SET(CMAKE_C_COMPILER_ID "GNU") - -# Targets to which this target links. -SET(CMAKE_TARGET_LINKED_INFO_FILES - ) diff --git a/CMakeFiles/grain.dir/build.make b/CMakeFiles/grain.dir/build.make deleted file mode 100644 index 18e458a..0000000 --- a/CMakeFiles/grain.dir/build.make +++ /dev/null @@ -1,103 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canoncical targets will work. -.SUFFIXES: - -# Remove some rules from gmake that .SUFFIXES does not remove. -SUFFIXES = - -.SUFFIXES: .hpux_make_needs_suffix_list - -# Suppress display of executed commands. -$(VERBOSE).SILENT: - -# A target that is always out of date. -cmake_force: -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake - -# The command to remove a file. -RM = /usr/bin/cmake -E remove -f - -# The program to use to edit the cache. -CMAKE_EDIT_COMMAND = /usr/bin/ccmake - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src - -# Include any dependencies generated for this target. -include CMakeFiles/grain.dir/depend.make - -# Include the progress variables for this target. -include CMakeFiles/grain.dir/progress.make - -# Include the compile flags for this target's objects. -include CMakeFiles/grain.dir/flags.make - -CMakeFiles/grain.dir/grain.c.o: CMakeFiles/grain.dir/flags.make -CMakeFiles/grain.dir/grain.c.o: grain.c - $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles $(CMAKE_PROGRESS_1) - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Building C object CMakeFiles/grain.dir/grain.c.o" - /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -o CMakeFiles/grain.dir/grain.c.o -c /home/chenguanlin/TD_evaluation/src/grain.c - -CMakeFiles/grain.dir/grain.c.i: cmake_force - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing C source to CMakeFiles/grain.dir/grain.c.i" - /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -E /home/chenguanlin/TD_evaluation/src/grain.c > CMakeFiles/grain.dir/grain.c.i - -CMakeFiles/grain.dir/grain.c.s: cmake_force - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling C source to assembly CMakeFiles/grain.dir/grain.c.s" - /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -S /home/chenguanlin/TD_evaluation/src/grain.c -o CMakeFiles/grain.dir/grain.c.s - -CMakeFiles/grain.dir/grain.c.o.requires: -.PHONY : CMakeFiles/grain.dir/grain.c.o.requires - -CMakeFiles/grain.dir/grain.c.o.provides: CMakeFiles/grain.dir/grain.c.o.requires - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o.provides.build -.PHONY : CMakeFiles/grain.dir/grain.c.o.provides - -CMakeFiles/grain.dir/grain.c.o.provides.build: CMakeFiles/grain.dir/grain.c.o -.PHONY : CMakeFiles/grain.dir/grain.c.o.provides.build - -# Object files for target grain -grain_OBJECTS = \ -"CMakeFiles/grain.dir/grain.c.o" - -# External object files for target grain -grain_EXTERNAL_OBJECTS = - -grain: CMakeFiles/grain.dir/grain.c.o -grain: CMakeFiles/grain.dir/build.make -grain: CMakeFiles/grain.dir/link.txt - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --red --bold "Linking C executable grain" - $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/grain.dir/link.txt --verbose=$(VERBOSE) - -# Rule to build all files generated by this target. -CMakeFiles/grain.dir/build: grain -.PHONY : CMakeFiles/grain.dir/build - -CMakeFiles/grain.dir/requires: CMakeFiles/grain.dir/grain.c.o.requires -.PHONY : CMakeFiles/grain.dir/requires - -CMakeFiles/grain.dir/clean: - $(CMAKE_COMMAND) -P CMakeFiles/grain.dir/cmake_clean.cmake -.PHONY : CMakeFiles/grain.dir/clean - -CMakeFiles/grain.dir/depend: - cd /home/chenguanlin/TD_evaluation/src && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/DependInfo.cmake --color=$(COLOR) -.PHONY : CMakeFiles/grain.dir/depend - diff --git a/CMakeFiles/grain.dir/cmake_clean.cmake b/CMakeFiles/grain.dir/cmake_clean.cmake deleted file mode 100644 index 54d1698..0000000 --- a/CMakeFiles/grain.dir/cmake_clean.cmake +++ /dev/null @@ -1,10 +0,0 @@ -FILE(REMOVE_RECURSE - "CMakeFiles/grain.dir/grain.c.o" - "grain.pdb" - "grain" -) - -# Per-language clean rules from dependency scanning. -FOREACH(lang C) - INCLUDE(CMakeFiles/grain.dir/cmake_clean_${lang}.cmake OPTIONAL) -ENDFOREACH(lang) diff --git a/CMakeFiles/grain.dir/depend.internal b/CMakeFiles/grain.dir/depend.internal deleted file mode 100644 index f1b3d06..0000000 --- a/CMakeFiles/grain.dir/depend.internal +++ /dev/null @@ -1,5 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -CMakeFiles/grain.dir/grain.c.o - /home/chenguanlin/TD_evaluation/src/grain.c diff --git a/CMakeFiles/grain.dir/depend.make b/CMakeFiles/grain.dir/depend.make deleted file mode 100644 index 85fc728..0000000 --- a/CMakeFiles/grain.dir/depend.make +++ /dev/null @@ -1,5 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -CMakeFiles/grain.dir/grain.c.o: grain.c - diff --git a/CMakeFiles/grain.dir/flags.make b/CMakeFiles/grain.dir/flags.make deleted file mode 100644 index 72791e9..0000000 --- a/CMakeFiles/grain.dir/flags.make +++ /dev/null @@ -1,8 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -# compile C with /usr/bin/gcc -C_FLAGS = -g - -C_DEFINES = - diff --git a/CMakeFiles/grain.dir/link.txt b/CMakeFiles/grain.dir/link.txt deleted file mode 100644 index 0f3e72b..0000000 --- a/CMakeFiles/grain.dir/link.txt +++ /dev/null @@ -1 +0,0 @@ -/usr/bin/gcc -g -fPIC CMakeFiles/grain.dir/grain.c.o -o grain -rdynamic -lmaatframe -lMESA_htable -lpthread -lm diff --git a/CMakeFiles/grain.dir/progress.make b/CMakeFiles/grain.dir/progress.make deleted file mode 100644 index 781c7de..0000000 --- a/CMakeFiles/grain.dir/progress.make +++ /dev/null @@ -1,2 +0,0 @@ -CMAKE_PROGRESS_1 = 1 - diff --git a/CMakeFiles/progress.make b/CMakeFiles/progress.make deleted file mode 100644 index d00491f..0000000 --- a/CMakeFiles/progress.make +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/Makefile b/Makefile deleted file mode 100644 index a3fd6fa..0000000 --- a/Makefile +++ /dev/null @@ -1,163 +0,0 @@ -# CMAKE generated file: DO NOT EDIT! -# Generated by "Unix Makefiles" Generator, CMake Version 2.6 - -# Default target executed when no arguments are given to make. -default_target: all -.PHONY : default_target - -#============================================================================= -# Special targets provided by cmake. - -# Disable implicit rules so canoncical targets will work. -.SUFFIXES: - -# Remove some rules from gmake that .SUFFIXES does not remove. -SUFFIXES = - -.SUFFIXES: .hpux_make_needs_suffix_list - -# Suppress display of executed commands. -$(VERBOSE).SILENT: - -# A target that is always out of date. -cmake_force: -.PHONY : cmake_force - -#============================================================================= -# Set environment variables for the build. - -# The shell in which to execute make rules. -SHELL = /bin/sh - -# The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake - -# The command to remove a file. -RM = /usr/bin/cmake -E remove -f - -# The program to use to edit the cache. -CMAKE_EDIT_COMMAND = /usr/bin/ccmake - -# The top-level source directory on which CMake was run. -CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src - -# The top-level build directory on which CMake was run. -CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src - -#============================================================================= -# Targets provided globally by CMake. - -# Special rule for the target edit_cache -edit_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." - /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : edit_cache - -# Special rule for the target edit_cache -edit_cache/fast: edit_cache -.PHONY : edit_cache/fast - -# Special rule for the target rebuild_cache -rebuild_cache: - @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." - /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) -.PHONY : rebuild_cache - -# Special rule for the target rebuild_cache -rebuild_cache/fast: rebuild_cache -.PHONY : rebuild_cache/fast - -# The main all target -all: cmake_check_build_system - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles /home/chenguanlin/TD_evaluation/src/CMakeFiles/progress.make - $(MAKE) -f CMakeFiles/Makefile2 all - $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 -.PHONY : all - -# The main clean target -clean: - $(MAKE) -f CMakeFiles/Makefile2 clean -.PHONY : clean - -# The main clean target -clean/fast: clean -.PHONY : clean/fast - -# Prepare targets for installation. -preinstall: all - $(MAKE) -f CMakeFiles/Makefile2 preinstall -.PHONY : preinstall - -# Prepare targets for installation. -preinstall/fast: - $(MAKE) -f CMakeFiles/Makefile2 preinstall -.PHONY : preinstall/fast - -# clear depends -depend: - $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 -.PHONY : depend - -#============================================================================= -# Target rules for targets named grain - -# Build rule for target. -grain: cmake_check_build_system - $(MAKE) -f CMakeFiles/Makefile2 grain -.PHONY : grain - -# fast build rule for target. -grain/fast: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build -.PHONY : grain/fast - -grain.o: grain.c.o -.PHONY : grain.o - -# target to build an object file -grain.c.o: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o -.PHONY : grain.c.o - -grain.i: grain.c.i -.PHONY : grain.i - -# target to preprocess a source file -grain.c.i: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.i -.PHONY : grain.c.i - -grain.s: grain.c.s -.PHONY : grain.s - -# target to generate assembly for a file -grain.c.s: - $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.s -.PHONY : grain.c.s - -# Help Target -help: - @echo "The following are some of the valid targets for this Makefile:" - @echo "... all (the default if no target is provided)" - @echo "... clean" - @echo "... depend" - @echo "... edit_cache" - @echo "... grain" - @echo "... rebuild_cache" - @echo "... grain.o" - @echo "... grain.i" - @echo "... grain.s" -.PHONY : help - - - -#============================================================================= -# Special targets to cleanup operation of make. - -# Special rule to run CMake to check the build system integrity. -# No rule that depends on this can have commands that come from listfiles -# because they might be regenerated. -cmake_check_build_system: - $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 -.PHONY : cmake_check_build_system - diff --git "a/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" "b/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" deleted file mode 100644 index 4d37049..0000000 Binary files "a/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" and /dev/null differ diff --git a/cmake_install.cmake b/cmake_install.cmake deleted file mode 100644 index 75c1e3c..0000000 --- a/cmake_install.cmake +++ /dev/null @@ -1,44 +0,0 @@ -# Install script for directory: /home/chenguanlin/TD_evaluation/src - -# Set the install prefix -IF(NOT DEFINED CMAKE_INSTALL_PREFIX) - SET(CMAKE_INSTALL_PREFIX "/usr/local") -ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX) -STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") - -# Set the install configuration name. -IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) - IF(BUILD_TYPE) - STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" "" - CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") - ELSE(BUILD_TYPE) - SET(CMAKE_INSTALL_CONFIG_NAME "Debug") - ENDIF(BUILD_TYPE) - MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") -ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) - -# Set the component getting installed. -IF(NOT CMAKE_INSTALL_COMPONENT) - IF(COMPONENT) - MESSAGE(STATUS "Install component: \"${COMPONENT}\"") - SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}") - ELSE(COMPONENT) - SET(CMAKE_INSTALL_COMPONENT) - ENDIF(COMPONENT) -ENDIF(NOT CMAKE_INSTALL_COMPONENT) - -# Install shared libraries without execute permission? -IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) - SET(CMAKE_INSTALL_SO_NO_EXE "0") -ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) - -IF(CMAKE_INSTALL_COMPONENT) - SET(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") -ELSE(CMAKE_INSTALL_COMPONENT) - SET(CMAKE_INSTALL_MANIFEST "install_manifest.txt") -ENDIF(CMAKE_INSTALL_COMPONENT) - -FILE(WRITE "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "") -FOREACH(file ${CMAKE_INSTALL_MANIFEST_FILES}) - FILE(APPEND "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "${file}\n") -ENDFOREACH(file) diff --git a/dataset_build/CMakeLists.txt b/dataset_build/CMakeLists.txt deleted file mode 100644 index 8840a74..0000000 --- a/dataset_build/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -PROJECT (CALCULATE) -SET (SRC_LIST get_lost.c) -SET(CMAKE_BUILD_TYPE "Debug") -SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") -SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") -MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) -MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) -#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) -#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) -ADD_EXECUTABLE(get_lost ${SRC_LIST}) -TARGET_LINK_LIBRARIES(get_lost maatframe libMESA_htable.so pthread m) diff --git a/dataset_build/based_sfh.conf b/dataset_build/based_sfh.conf deleted file mode 100644 index cdcf4cf..0000000 --- a/dataset_build/based_sfh.conf +++ /dev/null @@ -1,3 +0,0 @@ -[file] -raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest_nots -ripe_file_address = ../../data/td_data_set/td_data_20171207/base_sfh_set \ No newline at end of file diff --git a/dataset_build/based_sfh.py b/dataset_build/based_sfh.py deleted file mode 100644 index b3281ce..0000000 --- a/dataset_build/based_sfh.py +++ /dev/null @@ -1,44 +0,0 @@ -import re -import ConfigParser -import bisect -import random - -term = {'not_null':(lambda x : len(x)!=0)} - -config = ConfigParser.RawConfigParser() -config.read("based_sfh.conf") -raw_file_address = config.get("file","raw_file_address") -ripe_file_address = config.get("file","ripe_file_address") - -class sfh_fingerprint(object): - - def __init__(self,sfh): - self.sfh = sfh - - @staticmethod - def get_hashed_len(sfh): - p = r"\[+\d+?:+\d+?\]" - pattern = re.compile(p) - hashed_len_set = pattern.findall(sfh) - if (term['not_null'](hashed_len_set)): - hashed_len = 0 - for x in xrange(0,len(hashed_len_set)): - hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) - hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) - return hashed_len/len(hashed_len_set) - else : - return -1 - -i=0 -sfh_set = list() -with open(raw_file_address,'r') as infile: - with open(ripe_file_address,'w') as outfile: - for line in infile: - i+=1 - if(i%100000==0): - print i - result = re.split(r';',line) - if(term['not_null'](result[3]) and term['not_null'](result[19])): - hashed_len = sfh_fingerprint.get_hashed_len(result[19]) - if(hashed_len/int(result[3])>0.8): - outfile.write(result[19]+'\n') \ No newline at end of file diff --git a/dataset_build/cal_information.conf b/dataset_build/cal_information.conf deleted file mode 100644 index 1571b8b..0000000 --- a/dataset_build/cal_information.conf +++ /dev/null @@ -1,5 +0,0 @@ -[file] -raw_file_address = ../../data/ripe_data/td_data_20171207/video_id.txt -ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic -[feature] -feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/dataset_build/cal_information.py b/dataset_build/cal_information.py deleted file mode 100644 index 19cd95c..0000000 --- a/dataset_build/cal_information.py +++ /dev/null @@ -1,133 +0,0 @@ -import re -import numpy -import ConfigParser -import binascii -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==4), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20), - 'not_null':(lambda x : len(x)!=0)} - -class calculation(object): - """docstring for calculation""" - def __init__(self, arg): - super(calculation, self).__init__() - self.arg = arg - - @staticmethod - def cal_ent(x): - x_value_list = set([x[i] for i in range(x.shape[0])]) - ent = 0.0 - num_0 = x[x == 0].shape[0] - for x_value in x_value_list: - if(x_value==0): - continue - p = float(x[x == x_value].shape[0])/(x.shape[0]- num_0) - logp = numpy.log2(p) - ent -=p*logp - return ent - -class data_value(object): - """docstring for data_value""" - def __init__(self, arg): - super(data_value, self).__init__() - self.arg = arg - - @staticmethod - def get_data_values(data): - data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) - #data_set[0]=null,data_set[1]=url - data_value_dic = [long(0)]*6 - for x in xrange(1,len(feature_list)+1): - if(x==1): - if(term['not_null'](data_set[x])==True): - data_value_dic[0] = binascii.crc32(data_set[x]) - else: - data_value_dic[0] = 0 - elif(x==2): - if(term['not_null'](data_set[x])==True): - data_value_dic[1] = binascii.crc32(data_set[x]) - else: - data_value_dic[1] = 0 - elif(x==3): - data_value_dic[2] = long(data_set[x]) - elif(x==4): - data_value_dic[3] = long(data_set[x]) - elif(x==5): - if(term['not_null'](data_set[x])==True): - data_value_dic[4] = binascii.crc32(data_set[x]) - else: - data_value_dic[4] = 0 - elif(x==6): - if(term['not_null'](data_set[x])==True): - data_value_dic[5] = binascii.crc32(data_set[x]) - else: - data_value_dic[5] = 0 - return data_value_dic - -config = ConfigParser.RawConfigParser() -config.read("cal_information.conf") - -raw_file_address = config.get("file","raw_file_address") -ripe_file_address = config.get("file","ripe_file_address") -feature_list =[i for i in config.get("feature","feature_name").split(",")] - -i=0 -with open(raw_file_address,'r') as infile: - for line in infile: - i+=1 - if(i%10000==0): - print i - if(i==50000): - break - line_split = re.split(";",line) - data_value_temp = data_value.get_data_values(line_split[5]) - data_value_temp.extend([binascii.crc32(line_split[j]) for j in range(6,19)]) - data_value_temp.append(binascii.crc32(line_split[0])) - if(i==1): - a=numpy.array(data_value_temp) - else: - a=numpy.row_stack((a,numpy.array(data_value_temp))) - -for i in range(20): - if(i==0): - print "URL:"+str(calculation.cal_ent(a[:,i])) - elif(i==1): - print "ServerIP:"+str(calculation.cal_ent(a[:,i])) - elif(i==2): - print "MediaType:"+str(calculation.cal_ent(a[:,i])) - elif(i==3): - print "MediaLen:"+str(calculation.cal_ent(a[:,i])) - elif(i==4): - print "Etag:"+str(calculation.cal_ent(a[:,i])) - elif(i==5): - print "LastModify:"+str(calculation.cal_ent(a[:,i])) - elif(i==6): - print "td_0k:"+str(calculation.cal_ent(a[:,i])) - elif(i==7): - print "td_data_md5_1k:"+str(calculation.cal_ent(a[:,i])) - elif(i==8): - print "td_1k:"+str(calculation.cal_ent(a[:,i])) - elif(i==9): - print "td_data_md5_2k:"+str(calculation.cal_ent(a[:,i])) - elif(i==10): - print "td_2k:"+str(calculation.cal_ent(a[:,i])) - elif(i==11): - print "td_data_md5_4k:"+str(calculation.cal_ent(a[:,i])) - elif(i==12): - print "td_4k:"+str(calculation.cal_ent(a[:,i])) - elif(i==13): - print "td_data_md5_8k:"+str(calculation.cal_ent(a[:,i])) - elif(i==14): - print "td_8k:"+str(calculation.cal_ent(a[:,i])) - elif(i==15): - print "td_data_md5_16k:"+str(calculation.cal_ent(a[:,i])) - elif(i==16): - print "td_16k:"+str(calculation.cal_ent(a[:,i])) - elif(i==17): - print "td_data_md5_32k:"+str(calculation.cal_ent(a[:,i])) - elif(i==18): - print "td_32k:"+str(calculation.cal_ent(a[:,i])) - elif(i==19): - print "id:"+str(calculation.cal_ent(a[:,i])) - diff --git a/dataset_build/dataset_build.conf b/dataset_build/dataset_build.conf deleted file mode 100644 index 400e160..0000000 --- a/dataset_build/dataset_build.conf +++ /dev/null @@ -1,8 +0,0 @@ -[file] -raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level -ripe_file_address = ../../data/td_data_set/td_data_20171207/td_dataset -base_sfh_sets = ../../data/td_data_set/td_data_20171207/base_sfh_set -[output] -breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 -[feature] -feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/dataset_build/dataset_build.py b/dataset_build/dataset_build.py deleted file mode 100644 index a832072..0000000 --- a/dataset_build/dataset_build.py +++ /dev/null @@ -1,144 +0,0 @@ -import re -import ConfigParser -import bisect -import random -import ctypes -import hashlib -import zlib -import binascii -import json -import datetime -import time - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==21), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), - 'not_null':(lambda x : len(x)!=0), - 'ysp_len':(lambda x : int(x)!=0), - 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} - -config = ConfigParser.RawConfigParser() -config.read("dataset_build.conf") -raw_file_address = config.get("file","raw_file_address") -ripe_file_address = config.get("file","ripe_file_address") -base_sfh_sets = config.get("file","base_sfh_sets") -breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] -feature_list =[i for i in config.get("feature","feature_name").split(",")] -ll=ctypes.cdll.LoadLibrary -lib = ll("libmaatframe.so") -lost = dict() - - -class data_value(object): - - @staticmethod - def get_feature(data): - return_data=list() - data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data[5]) - for x in xrange(1,21): - if(x==1): - if(term['not_null'](data_set[6])): - try: - time1=datetime.datetime.strptime(data[1],'%Y-%m-%d %H:%M:%S')+datetime.timedelta(hours=int(8)) - data_set[6]=data_set[6][0:25] - time2=datetime.datetime.strptime(data_set[6],'%a, %d %b %Y %H:%M:%S') - except Exception, e: - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - else: - return_data.append(str((time1-time2).seconds)) - return_data.append(((time1-time2).seconds)/60) - return_data.append(((time1-time2).seconds)/3600) - return_data.append((time1-time2).days) - else: - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - elif(x==2): - continue - elif(x==3): - continue - elif(x==4): - return_data.append(long(data[4])) - elif(x==5): - if(term['not_null'](data_set[1])): - return_data.append(len(data_set[1])) - else: - return_data.append(-1) - if(term['not_null'](data_set[2])): - ip_set=re.split(r'\.',data_set[2]) - return_data.append(ip_set[0]) - return_data.append(ip_set[1]) - return_data.append(ip_set[2]) - return_data.append(ip_set[3]) - else: - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - return_data.append(-1) - if(term['not_null'](data_set[3])): - return_data.append(int(data_set[3])) - else: - return_data.append(-1) - if(term['not_null'](data_set[5])): - return_data.append(binascii.crc32(data_set[5])) - else: - return_data.append(-1) - if(term['not_null'](data_set[6])): - return_data.append(binascii.crc32(data_set[6])) - else: - return_data.append(-1) - elif(x==7): - return_data.append(binascii.crc32(data[7])) - elif(x==9): - return_data.append(binascii.crc32(data[9])) - elif(x==11): - return_data.append(binascii.crc32(data[11])) - elif(x==13): - return_data.append(binascii.crc32(data[13])) - elif(x==15): - return_data.append(binascii.crc32(data[15])) - elif(x==17): - return_data.append(binascii.crc32(data[17])) - return return_data - # data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) - # #data_set[0]=null,data_set[1]=url - # data_value_dic = dict() - # for x in xrange(1,len(feature_list)+1): - # if(x==1): - # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) - # elif(x==2): - # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) - # elif(x==3): - # data_value_dic[feature_list[x-1]] = data_set[x] - # elif(x==4): - # data_value_dic[feature_list[x-1]] = data_set[x] - # elif(x==5): - # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) - # elif(x==6): - # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) - # return data_value_dic - - -i=0 -sfh_set = list() -with open(raw_file_address,'r') as infile: - with open(ripe_file_address,'w') as outfile: - for line in infile: - i+=1 - if(i%10000==0): - print i - line_return = re.split(r';',line) - # if(int(line_return[0])==0): - # print 'td is right' - outfile.write(str(line_return[0])+',') - return_data=data_value.get_feature(line_return) - for x in range(19): - if(x==18): - outfile.write(str(return_data[18])+'\n') - else: - outfile.write(str(return_data[x])+',') diff --git a/dataset_build/feature_statistics.conf b/dataset_build/feature_statistics.conf deleted file mode 100644 index 12cf089..0000000 --- a/dataset_build/feature_statistics.conf +++ /dev/null @@ -1,8 +0,0 @@ -[file] -raw_file_address = ../../data/td_data_set/td_data_20171207/td.txt -ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic -[output] -breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,44194304 -[feature] -type = data_value_statistics -feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/dataset_build/feature_statistics.py b/dataset_build/feature_statistics.py deleted file mode 100644 index 52ae8e0..0000000 --- a/dataset_build/feature_statistics.py +++ /dev/null @@ -1,164 +0,0 @@ -import re -import ConfigParser -import bisect -import random -import ctypes -import hashlib -import zlib -import binascii - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==4), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20), - 'not_null':(lambda x : len(x)!=0)} - -class data_line(object): - """docstring for ClassName""" - def __init__(self): - super(ClassName, self).__init__() - - @staticmethod - def if_error(data_line_str): - data_line_val = re.split(r';',data_line_str) - hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) - if(term['data_num'](data_line_val) and term['sfh_len'](data_line_val[19]) and term['td_len'](data_line_val[9])\ - and term['td_len'](data_line_val[2]) and term['td_len'](data_line_val[13]) and term['td_len'](data_line_val[15])\ - and term['td_len'](data_line_val[17]) and term['not_null'](data_line_val[18]) and term['not_null'](data_line_val[19])\ - and hashed_len/float(data_line_val[3])>0.8): - return data_line_val - else: - return -1 - - -class feature_statistics(object): - """YSP feature_statistics""" - def __init__(self): - super(feature_statistics, self).__init__() - self.meida_len_statistics_set = [0,0,0,0,0,0,0] - self.lost_dict = dict() - - def meida_len_statistics(meida_len): - j = bisect.bisect(breakpoints,meida_len) - self.meida_len_statistics_set[j-1]+=1 - - def data_value_statistics(data_value_dic,data_value): - data_value_str = str() - for x in xrange(0,len(feature_list)): - data_value_str = data_value_str+str(data_value_dic[feature_list[x]])+',' - - if(self.lost_dict.has_key(data_value_str)==False): - self.lost_dict[data_value_str]=[0,1,0.] - else: - if (int(result[3])==1): - self.lost_dict[data_value_str][0] += 1 - self.lost_dict[data_value_str][1] += 1 - else: - self.lost_dict[data_value_str][1] += 1 - - -class sfh_fingerprint(object): - - def __init__(self,sfh): - self.sfh = sfh - - @staticmethod - def get_hashed_len(sfh): - p = r"\[+\d+?:+\d+?\]" - pattern = re.compile(p) - hashed_len_set = pattern.findall(sfh) - if (term['not_null'](hashed_len_set)): - hashed_len = 0 - for x in xrange(0,len(hashed_len_set)): - hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) - hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) - return hashed_len/len(hashed_len_set) - else : - return -1 - - @staticmethod - def get_base_sfh(data_set): - base_sfh = list() - for x in xrange(0,10): - base_sfh.append(data_set[x]) - return base_sfh - - - - -class data_value(object): - - @staticmethod - def get_data_values(data): - data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) - #data_set[0]=null,data_set[1]=url - data_value_dic = dict() - for x in xrange(1,len(feature_list)+1): - if(x==1): - data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 - elif(x==2): - data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 - elif(x==3): - data_value_dic[feature_list[x-1]] = data_set[x] - elif(x==4): - data_value_dic[feature_list[x-1]] = bisect.bisect(breakpoints,int(data_set[x])) - elif(x==5): - data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 - elif(x==6): - data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 - return data_value_dic - -config = ConfigParser.RawConfigParser() -config.read("feature_statistics.conf") - -feature_statistics_type = ("feature","type") -raw_file_address = config.get("file","raw_file_address") -ripe_file_address = config.get("file","ripe_file_address") - -if(feature_statistics_type=="meida_len_statistics"): - breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] -elif(feature_statistics_type=="data_value_statistics"): - feature_list =[i for i in config.get("feature","feature_name").split(",")] -# ll=ctypes.cdll.LoadLibrary -# lib = ll("libmaatframe.so") - -i=0 -sfh_set = list() -statistic = feature_statistics() -with open(raw_file_address,'r') as infile: - for line in infile: - i+=1 - - - - - line_return = data_line.if_error(line) - if(line_return != -1): - if(feature_statistics_type=="meida_len_statistics"): - statistic.meida_len_statistics(line_return[3]) - elif(feature_statistics_type=="data_value_statistics"): - lost_list = list() - statistic.meida_len_statistics(line_return) - for i in statistic.lost: - (statistic.lost[i])[2] = float((statistic.lost[i])[0])/(statistic.lost[i])[1] - tmp = (i,int((statistic.lost[i])[0]),int((statistic.lost[i])[1]),float((statistic.lost[i])[2])) - lost_list.append(tmp) - print sorted(lost_list,cmp=lambda x,y:cmp(x[2],y[2])) - # if(x == len(feature_list)-1): - # outfile.write(data_value_dic[feature_list[x]]+'\n') - # else: - # print lost - # outfile.write(str(data_value_dic[feature_list[x]])+',') - # outfile.write(result[3]) - # sfh_dot=list() - # for x in xrange(0,10): - # #transform sfh to dot - # sfh_dot.append(lib.GIE_sfh_similiarity(result[19],len(result[19]),sfh_set[x],len(sfh_set[x]))) - # if(len(data_set)==7): - # outfile.write(str(data_set[0])+','+str(data_set[1])+','+str(data_set[2])\ - # +','+str(data_set[3])+','+str(data_set[4])+','+str(data_set[5])+','+result[5]\ - # +','+result[7]+','+result[9]+','+result[11]+','+result[13]+','+result[15]+result[17]\ - # +','+result[19]+'\n') - -# with open(ripe_file_address,'w') as outfile: -# outfile.write(str(lost)) diff --git a/dataset_build/file_digest.py b/dataset_build/file_digest.py deleted file mode 100644 index 590e059..0000000 --- a/dataset_build/file_digest.py +++ /dev/null @@ -1,96 +0,0 @@ -#-*-coding:utf-8-*- -import re -import random -import ConfigParser -import bisect -import commands -import os -import hashlib - -class data_line(object): - """docstring for ClassName""" - def __init__(self): - super(ClassName, self).__init__() - - @staticmethod - def if_error(data_line_str): - data_line_val = re.split(r';',data_line_str) - hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) - if(term['data_num'](data_line_val) and \ - term['not_null'](data_line_val[0]) and \ - term['ysp_len'](data_line_val[3]) and \ - term['not_null'](data_line_val[4]) and \ - term['td_len'](data_line_val[6]) and \ - term['td_len'](data_line_val[8]) and \ - term['td_len'](data_line_val[10]) and \ - term['td_len'](data_line_val[12]) and \ - term['td_len'](data_line_val[14]) and \ - term['td_len'](data_line_val[16]) and \ - term['not_null'](data_line_val[18]) and \ - term['sfh_len'](data_line_val[19]) and \ - term['not_null'](data_line_val[20]) and \ - hashed_len/float(data_line_val[3])>=0.8): - return data_line_val - else: - return -1 - -class TD_fingerprint(object): - def __init__(): - self.td = td - self.td_string = td_string - @staticmethod - def td_generate(td_string): - td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() - -class sfh_fingerprint(object): - - def __init__(self,sfh): - self.sfh = sfh - - @staticmethod - def get_hashed_len(sfh): - p = r"\[+\d+?:+\d+?\]" - pattern = re.compile(p) - hashed_len_set = pattern.findall(sfh) - if (term['not_null'](hashed_len_set)): - hashed_len = 0 - for x in xrange(0,len(hashed_len_set)): - hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) - hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) - return hashed_len/len(hashed_len_set) - else : - return -1 - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==21), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), - 'not_null':(lambda x : len(x)!=0), - 'ysp_len':(lambda x : int(x)!=0), - 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} - -grain="./get_lost" -ripe_files=[] -config = ConfigParser.RawConfigParser() -config.read("grain.conf") -raw_file_address=config.get("file","raw_file_address") -ripe_files_address=config.get("file","ripe_files_address") -print ("%s %s" %(raw_file_address,ripe_files_address)) -num = [0,0,0,0,0,0,0] -breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] -# i=0 -# for i in xrange(0,ripe_file_num): -# outfile=open(ripe_files_address+str(i)+'.txt','w') -# ripe_files.append(outfile) - -i=0 -with open(raw_file_address,'r') as infile: -# with open('./ripe_data/mistake_td_sfh1_sfh2_sim_rate_len_url_unequal','r')as infile: - with open(ripe_files_address,'w')as outfile: - for line in infile: - i+=1 - if(i%10000==0): - print i - line_return = data_line.if_error(line) - if(line_return != -1): - outfile.write(str(line)) \ No newline at end of file diff --git a/dataset_build/get_lost.c b/dataset_build/get_lost.c deleted file mode 100644 index 0e6c452..0000000 --- a/dataset_build/get_lost.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include -#include -#include -#include -#define HTABLE_SIZE 8*64*1024*1024 -#define SFH_PASS_RATE 0.8 -#define SIMILIAR 80 - -typedef struct td -{ - char * tdstr; - unsigned int lost; -}td; - -typedef struct file_sfh_data -{ - long id; - char * sfh; - td * td_value; - char * td_ori; -}file_sfh_data; - -int main(int argc,char *argv[]) -{ - FILE *fpread;//文件 - FILE *fpwrite;//write file handle - int array_size = 1024; - file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size); - char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost"; - //char* dirstr = *++argv; - char* writestr = "../../data/td_data_set/td_data_20171207/td.txt"; - int total_len = 0; - char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10]; - char buffer[1024*300+1]; - int ret = 0; - int line = 0; - int thread_safe = 0; - int i; - int id; - int similiarity; - MESA_htable_handle htable = NULL; - fpread=fopen(dirstr,"rb"); - fpwrite=fopen(writestr,"w"); - printf("file str is %s\n",dirstr); - if(fpread==NULL) - { - printf("open file error\n"); - return -1; - } - buffer[sizeof(buffer)]='\0'; - while(feof(fpread)==0) - { - fgets(buffer,sizeof(buffer)-1,fpread); - ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp); - if(ret!=4) - { - continue; - } - file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data)); - file_data[line]->id=line; - file_data[line]->sfh=strdup(SFH_tmp); - file_data[line]->td_value=(td*)calloc(1,sizeof(td)); - file_data[line]->td_value->tdstr=strdup(TD_tmp); - file_data[line]->td_value->lost=0; - file_data[line]->td_ori=strdup(TD_ORI); - line++; - if(line==array_size) - { - array_size*=2; - file_data=realloc(file_data,sizeof(file_sfh_data)*array_size); - } - } - printf("read file success!\n"); - htable = NULL; - htable=MESA_htable_born(); - thread_safe = 0; - MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); - unsigned int slot_size=1024*1024*16; - MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size)); - MESA_htable_mature(htable); - for(i=0;itd_value->tdstr),32,(void *)file_data[i]->id)<0) - { - id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32); - similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh)); - if(similiaritytd_value->lost = 1; - file_data[i]->td_value->lost = 1; - } - } - } - for(i=0;itd_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost); - } - for(i=0;isfh); - file_data[i]->sfh=NULL; - free(file_data[i]->td_value->tdstr); - file_data[i]->td_value->tdstr=NULL; - free(file_data[i]->td_value); - file_data[i]->td_value=NULL; - free(file_data[i]->td_ori); - file_data[i]->td_ori=NULL; - free(file_data[i]); - file_data[i]=NULL; - } - fclose(fpread); - fclose(fpwrite); - return 0; -} \ No newline at end of file diff --git a/dataset_build/grain.conf b/dataset_build/grain.conf deleted file mode 100644 index 944b337..0000000 --- a/dataset_build/grain.conf +++ /dev/null @@ -1,5 +0,0 @@ -[file] -ripe_files_address = ../../data/td_data_set/td_data_20171207/get_lost_raw_data -raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest -[output] -breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 \ No newline at end of file diff --git a/dataset_build/td_classification.py b/dataset_build/td_classification.py deleted file mode 100644 index 8d4b97c..0000000 --- a/dataset_build/td_classification.py +++ /dev/null @@ -1,5 +0,0 @@ -from sklearn.datasets import load_iris -from sklearn import tree - -with open() as infile: - \ No newline at end of file diff --git a/dataset_build/vedio_id_build.c b/dataset_build/vedio_id_build.c deleted file mode 100644 index 9faaa64..0000000 --- a/dataset_build/vedio_id_build.c +++ /dev/null @@ -1,171 +0,0 @@ -/* -gcc -g vedio_id_build.c -o vedio_id_build -lmaatframe -I../../inc -*/ -#include -#include -#include -#include "gram_index_engine.h" -#include -#include -#include -#define BUFFER_LEN (10*1024) -#define SFH_PASS_RATE 0.9 -#define SFH_LEN (10*1024) -#define URL_LEN (10*1024) - -typedef struct video_id -{ - long id; - char *sfh; -}video_id; - -typedef struct cache -{ - GIE_digest_t ** GIE_cache; - long cache_size; - long len; -}cache; - -long get_hashed_len(const char* sfh) -{ - char *data=(char*)malloc(strlen(sfh)+1); - memcpy(data,sfh, strlen(sfh)); - data[strlen(sfh)]='\0'; - char *token=NULL,*sub_token=NULL,*saveptr; - long left_offset=0,right_offset=0,hashed_length=0; - int ret=0,first=0; - for (token = data; ;token= NULL) - { - sub_token= strtok_r(token,"[", &saveptr); - if (sub_token == NULL) - { - break; - } - if(first==0)//jump over the first sub string. - { - first=1; - continue; - } - ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); - if(ret!=2) - { - return 0; - } - assert(ret==2); - hashed_length+=right_offset-left_offset+1; - } - //printf("hashed length=%ld\n",hashed_length); - free(data); - return hashed_length/2; -} - -int main(int argc,char *argv[]) -{ - FILE *video_id_sets_file; - FILE *new_sfh_file; - const char *video_id_sets_file_dir="../../data/td_data_set/td_data_20171207/video_id_raw_data"; - const char *new_sfh_file_dir="../../data/ripe_data/td_data_20171207/video_id.txt"; - char *buffer=NULL; - int ret = 0,hashed_len = 0,total_len = 0,resultnum = 0,i = 0; - int update = 0,video_id = 0,j = 0; - int* temp_int = NULL; - float temp_sfh_pass = 0; - char *sfh_str,*url_str; - GIE_digest_t *sfh_video_id = NULL; - GIE_result_t *query_result = NULL; - cache *GIE_digest_cache = NULL; - video_id_sets_file = fopen(video_id_sets_file_dir,"r+"); - new_sfh_file = fopen(new_sfh_file_dir,"w"); - if(video_id_sets_file == NULL) - { - printf("open video_id_sets_file error\n"); - return -1; - } - if(new_sfh_file == NULL) - { - printf("open new_sfh_file error\n"); - return -1; - } - buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); - GIE_create_para_t *query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); - query_result = (GIE_result_t*)calloc(1,sizeof(GIE_result_t)); - GIE_handle_t *query_handle; - query_para->gram_value = 7; - query_para->position_accuracy = 5; - query_handle=GIE_create((const GIE_create_para_t *)query_para); - free(query_para); - if(query_handle==NULL) - { - printf("create GIE handle error\n"); - return -1; - } - sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); - sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); - url_str = (char*)calloc(URL_LEN,sizeof(char)); - i=0; - GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); - GIE_digest_cache->cache_size = 1000; - GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); - GIE_digest_cache->len = 0; - while(feof(video_id_sets_file)==0) - { - i++; - if(i%10000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,video_id_sets_file); - ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - %*[^;];%*[^;];%*[^;];%[^;];%[^;]",sfh_str,url_str); - if(ret!=2) - { - continue; - } - hashed_len = get_hashed_len((const char*)sfh_str); - temp_sfh_pass = (float)hashed_len/total_len; - if(temp_sfh_passid=i; - sfh_video_id->sfh_length=strlen(sfh_str); - sfh_video_id->operation=GIE_INSERT_OPT; - sfh_video_id->cfds_lvl=5; - sfh_video_id->sfh=strdup(sfh_str); - sfh_video_id->tag=temp_int; - GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_video_id; - GIE_digest_cache->len++; - if(GIE_digest_cache->len==GIE_digest_cache->cache_size) - { - update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); - GIE_digest_cache->len=0; - for(j=0;jcache_size;j++) - { - free(GIE_digest_cache->GIE_cache[j]->sfh); - GIE_digest_cache->GIE_cache[j]->sfh=NULL; - free(GIE_digest_cache->GIE_cache[j]); - GIE_digest_cache->GIE_cache[j]=NULL; - } - } - fprintf(new_sfh_file,"%d,%s",i,buffer); - } - else - { - fprintf(new_sfh_file,"%d,%s",*((int*)query_result->tag),buffer); - } - } - free(buffer); - free(query_result); - free(sfh_video_id); - free(url_str); - free(sfh_str); - free(GIE_digest_cache); - return 0; -} \ No newline at end of file diff --git a/file_digest.conf b/file_digest.conf deleted file mode 100644 index a02cae2..0000000 --- a/file_digest.conf +++ /dev/null @@ -1,3 +0,0 @@ -[file] -ripe_files_address = ../data/ripe_data/td_data_20171207/all_av_digest -raw_file_address = ../data/td_data_20171207/td_data/all_av_digest diff --git a/file_digest.py b/file_digest.py deleted file mode 100644 index 3703794..0000000 --- a/file_digest.py +++ /dev/null @@ -1,104 +0,0 @@ -#-*-coding:utf-8-*- -import re -import random -import ConfigParser -import bisect -import commands -import os -import hashlib - -class data_line(object): - """docstring for ClassName""" - def __init__(self): - super(ClassName, self).__init__() - - @staticmethod - def if_error(data_line_str): - data_line_val = re.split(r';',data_line_str) - hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) - if(term['data_num'](data_line_val) and \ - term['not_null'](data_line_val[0]) and \ - term['not_null'](data_line_val[1]) and \ - term['not_null'](data_line_val[2]) and \ - term['ysp_len'](data_line_val[3]) and \ - term['not_null'](data_line_val[4]) and \ - term['not_null'](data_line_val[5]) and \ - term['td_len'](data_line_val[6]) and \ - term['td_len'](data_line_val[7]) and \ - term['td_len'](data_line_val[8]) and \ - term['td_len'](data_line_val[9]) and \ - term['td_len'](data_line_val[10]) and \ - term['td_len'](data_line_val[11]) and \ - term['td_len'](data_line_val[12]) and \ - term['td_len'](data_line_val[13]) and \ - term['td_len'](data_line_val[14]) and \ - term['td_len'](data_line_val[15]) and \ - term['td_len'](data_line_val[16]) and \ - term['td_len'](data_line_val[17]) and \ - term['not_null'](data_line_val[18]) and \ - term['sfh_len'](data_line_val[19]) and \ - term['not_null'](data_line_val[20]) and \ - hashed_len/float(data_line_val[3])>0.999): - return data_line_val - else: - return -1 - -class TD_fingerprint(object): - def __init__(): - self.td = td - self.td_string = td_string - @staticmethod - def td_generate(td_string): - td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() - -class sfh_fingerprint(object): - - def __init__(self,sfh): - self.sfh = sfh - - @staticmethod - def get_hashed_len(sfh): - p = r"\[+\d+?:+\d+?\]" - pattern = re.compile(p) - hashed_len_set = pattern.findall(sfh) - if (term['not_null'](hashed_len_set)): - hashed_len = 0 - for x in xrange(0,len(hashed_len_set)): - hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) - hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) - return hashed_len/len(hashed_len_set) - else : - return -1 - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==21), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), - 'not_null':(lambda x : len(x)!=0), - 'ysp_len':(lambda x : int(x)!=0), - 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} - -c_func="./" -ripe_files=[] -config = ConfigParser.RawConfigParser() -config.read("file_digest.conf") -raw_file_address=config.get("file","raw_file_address") -ripe_files_address=config.get("file","ripe_files_address") -print ("%s %s" %(raw_file_address,ripe_files_address)) -# num = [0,0,0,0,0,0,0] -# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] -# i=0 -# for i in xrange(0,ripe_file_num): -# outfile=open(ripe_files_address+str(i)+'.txt','w') -# ripe_files.append(outfile) - -i=0 -with open(raw_file_address,'r') as infile: - with open(ripe_files_address,'w')as outfile: - for line in infile: - i+=1 - if(i%10000==0): - print i - line_return = data_line.if_error(line) - if(line_return != -1): - outfile.write(str(line)) \ No newline at end of file diff --git a/get_td_mistake_lost/CMakeLists.txt b/get_td_mistake_lost/CMakeLists.txt deleted file mode 100644 index 87f4b6b..0000000 --- a/get_td_mistake_lost/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -PROJECT (CALCULATE) -SET (SRC_LIST get_lost_rate.c) -SET(CMAKE_BUILD_TYPE "Debug") -SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") -SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") -MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) -MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) -#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) -#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) -ADD_EXECUTABLE(get_lost_rate ${SRC_LIST} gram_index_engine.c) -TARGET_LINK_LIBRARIES(get_lost_rate maatframe libMESA_htable.so pthread m) diff --git a/get_td_mistake_lost/file_digest.conf b/get_td_mistake_lost/file_digest.conf deleted file mode 100644 index 6d1c06b..0000000 --- a/get_td_mistake_lost/file_digest.conf +++ /dev/null @@ -1,6 +0,0 @@ -[file_digest] -ripe_files_address = ../../data/ripe_data/td_data_20171207/all_av_digest -raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest -[new_td] -ripe_files_address = ../../data/ripe_data/td_data_20171207/new_TD.txt -raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest diff --git a/get_td_mistake_lost/file_digest.py b/get_td_mistake_lost/file_digest.py deleted file mode 100644 index 62786ef..0000000 --- a/get_td_mistake_lost/file_digest.py +++ /dev/null @@ -1,104 +0,0 @@ -#-*-coding:utf-8-*- -import re -import random -import ConfigParser -import bisect -import commands -import os -import hashlib - -class data_line(object): - """docstring for ClassName""" - def __init__(self): - super(ClassName, self).__init__() - - @staticmethod - def if_error(data_line_str): - data_line_val = re.split(r';',data_line_str) - hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) - if(term['data_num'](data_line_val) and \ - term['not_null'](data_line_val[0]) and \ - term['not_null'](data_line_val[1]) and \ - term['not_null'](data_line_val[2]) and \ - term['ysp_len'](data_line_val[3]) and \ - term['not_null'](data_line_val[4]) and \ - term['not_null'](data_line_val[5]) and \ - term['td_len'](data_line_val[6]) and \ - term['td_len'](data_line_val[7]) and \ - term['td_len'](data_line_val[8]) and \ - term['td_len'](data_line_val[9]) and \ - term['td_len'](data_line_val[10]) and \ - term['td_len'](data_line_val[11]) and \ - term['td_len'](data_line_val[12]) and \ - term['td_len'](data_line_val[13]) and \ - term['td_len'](data_line_val[14]) and \ - term['td_len'](data_line_val[15]) and \ - term['td_len'](data_line_val[16]) and \ - term['td_len'](data_line_val[17]) and \ - term['not_null'](data_line_val[18]) and \ - term['sfh_len'](data_line_val[19]) and \ - term['not_null'](data_line_val[20]) and \ - hashed_len/float(data_line_val[3])>0.999): - return data_line_val - else: - return -1 - -class TD_fingerprint(object): - def __init__(): - self.td = td - self.td_string = td_string - @staticmethod - def td_generate(td_string): - td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() - -class sfh_fingerprint(object): - - def __init__(self,sfh): - self.sfh = sfh - - @staticmethod - def get_hashed_len(sfh): - p = r"\[+\d+?:+\d+?\]" - pattern = re.compile(p) - hashed_len_set = pattern.findall(sfh) - if (term['not_null'](hashed_len_set)): - hashed_len = 0 - for x in xrange(0,len(hashed_len_set)): - hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) - hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) - return hashed_len/len(hashed_len_set) - else : - return -1 - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)==21), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), - 'not_null':(lambda x : len(x)!=0), - 'ysp_len':(lambda x : int(x)!=0), - 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} - -c_func="./" -ripe_files=[] -config = ConfigParser.RawConfigParser() -config.read("file_digest.conf") -raw_file_address=config.get("file_digest","raw_file_address") -ripe_files_address=config.get("file_digest","ripe_files_address") -print ("%s %s" %(raw_file_address,ripe_files_address)) -# num = [0,0,0,0,0,0,0] -# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] -# i=0 -# for i in xrange(0,ripe_file_num): -# outfile=open(ripe_files_address+str(i)+'.txt','w') -# ripe_files.append(outfile) - -i=0 -with open(raw_file_address,'r') as infile: - with open(ripe_files_address,'w')as outfile: - for line in infile: - i+=1 - if(i%10000==0): - print i - line_return = data_line.if_error(line) - if(line_return != -1): - outfile.write(str(line)) \ No newline at end of file diff --git a/get_td_mistake_lost/get_TD_SFH.c b/get_td_mistake_lost/get_TD_SFH.c deleted file mode 100644 index 2ed3ecd..0000000 --- a/get_td_mistake_lost/get_TD_SFH.c +++ /dev/null @@ -1,162 +0,0 @@ -/* -gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include -./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level -*/ - -#include -#include -#include -#include "gram_index_engine.h" -#include -#include -#include -#define BUFFER_LEN (15*1024) -#define SFH_LEN (10*1024) -#define TD_LEN 33 -#define THREAD_SAFE 0 -#define SLOT_SIZE (1024*1024*16) -#define TD_STR_LEN (10*1024) -#define TIME_STR_LEN 128 - -typedef struct sfh_link -{ - // char *time_str; - char *sfh_str; - char *td_ori; - // char *md5_32k; - int similiar; - int all_similiar; - // long hash_len; - struct sfh_link *next; -}sfh_link; - -typedef struct sfh -{ - int all_num; - int all_similiar; - char *sfh_str; - // long hash_len; - sfh_link *sfh_link_items; -}sfh; - -void print_td_sfh(const uchar *key,uint size,void *data,void *arg) -{ - FILE *ripe_file=(FILE*)arg; - sfh *temp_sfh=(sfh*)data; - fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str); -} - -int main() -{ - FILE *raw_file; - FILE *ripe_file; - char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; - char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3"; - raw_file = fopen(raw_file_dir,"r+"); - ripe_file = fopen(ripe_file_dir,"w+"); - if(raw_file==NULL) - { - printf("open all_av_digest error\n"); - return -1; - } - if(ripe_file==NULL) - { - printf("open all_av_digest_mistake_level error"); - return -1; - } - MESA_htable_handle htable=NULL; - char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; - int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; - unsigned int slot_size=SLOT_SIZE; - sfh *temp_sfh=NULL; - sfh_link *temp_sfh_link=NULL; - buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); - sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); - td = (char*)calloc(TD_LEN,sizeof(char)); - td[32]='\0'; - td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); - // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); - // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); - // time_str[TIME_STR_LEN-1]='\0'; - // md5_32k_str[32]='\0'; - htable=MESA_htable_born(); - MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); - MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); - MESA_htable_mature(htable); - while(feof(raw_file)==0) - { - i++; - if(i%100000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,raw_file); - buffer[BUFFER_LEN-1]='\0'; - // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ - // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); - // assert(ret==5); - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); - assert(ret==3); - td[32]='\0'; - // md5_32k_str[32]='\0'; - if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) - { - temp_sfh=(sfh*)calloc(1,sizeof(sfh)); - temp_sfh->all_num=1; - temp_sfh->all_similiar=0; - temp_sfh->sfh_str=strdup(sfh_str); - temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str); - temp_sfh->sfh_link_items->td_ori=strdup(td_str); - // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); - // temp_sfh->sfh_link_items->time_str=strdup(time_str); - temp_sfh->sfh_link_items->similiar=0; - temp_sfh->sfh_link_items->all_similiar=0; - temp_sfh->sfh_link_items->next=NULL; - ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh); - assert(ret>0); - } - else - { - temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_sfh->all_similiar+=temp_similiar; - temp_sfh_link=temp_sfh->sfh_link_items; - for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) - { - temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_sfh_link->all_similiar+=temp_similiar; - temp_all_similiar+=temp_similiar; - if(temp_sfh_link->all_similiar>temp_sfh->all_similiar) - { - free(temp_sfh->sfh_str); - temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); - temp_sfh->all_similiar=temp_sfh_link->all_similiar; - } - if(temp_sfh_link->next==NULL) - { - break; - } - } - temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_sfh_link->next->sfh_str=strdup(sfh_str); - temp_sfh_link->next->td_ori=strdup(td_str); - // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); - // temp_sfh_link->next->time_str=strdup(time_str); - temp_sfh_link->next->similiar=0; - temp_sfh_link->next->all_similiar=temp_all_similiar; - temp_sfh_link->next->next=NULL; - temp_sfh->all_num+=1; - } - } - fclose(raw_file); - MESA_htable_iterate(htable,print_td_sfh,ripe_file); - free(sfh_str); - free(td); - free(td_str); - // free(md5_32k_str); - MESA_htable_destroy(htable,NULL); - // fclose(raw_file); - fclose(ripe_file); - return 0; -} \ No newline at end of file diff --git a/get_td_mistake_lost/get_lost_rate.c b/get_td_mistake_lost/get_lost_rate.c deleted file mode 100644 index d983a00..0000000 --- a/get_td_mistake_lost/get_lost_rate.c +++ /dev/null @@ -1,210 +0,0 @@ -/* -gcc -g get_lost_rate.c -o get_lost_rate -lmaatframe -I../include -*/ -#include -#include -#include -#include "gram_index_engine.h" -#include -#include -#define BUFFER_LEN (10*1024) -#define CACHE_SIZE 2000000 -#define SFH_LEN (10*1024) -#define TD_LEN 33 -#define RESULT_NUM 10000 -#define TIME_STR_LEN 128 -#define TD_STR_LEN (10*1024) - -typedef struct cache -{ - GIE_digest_t ** GIE_cache; - long cache_size; - long len; -}cache; - -typedef struct GIE_tag -{ - char *td; - char *td_str; - char *sfh_str; -}GIE_tag; - -int main() -{ - FILE *td_sfh_file; - FILE *raw_file; - FILE *ripe_file; - const char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; - const char *td_sfh_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_1"; - const char *ripe_file_dir="../../data/ripe_data/td_data_20171207/get_lost_ripe_data_1"; - td_sfh_file = fopen(td_sfh_file_dir,"r+"); - raw_file = fopen(raw_file_dir,"r+"); - ripe_file = fopen(ripe_file_dir,"w+"); - char *buffer=NULL,*sfh_str=NULL,*td=NULL,*time_str=NULL,*td_str=NULL; - GIE_create_para_t *query_para=NULL; - GIE_handle_t *query_handle=NULL; - GIE_result_t *query_result = NULL; - cache *GIE_digest_cache = NULL; - GIE_digest_t *sfh_td = NULL; - int i=0,w=0,ret=0,lost=0,j=0,update=0,resultnum=0,temp_len=0; - GIE_tag *temp_tag =NULL; - if(td_sfh_file == NULL) - { - printf("open td_sfh_file_dir error\n"); - return -1; - } - if(raw_file == NULL) - { - printf("open raw_file_dir error\n"); - return -1; - } - if(ripe_file == NULL) - { - printf("open ripe_file_dir error\n"); - return -1; - } - sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); - sfh_str[SFH_LEN-1]='\0'; - td = (char*)calloc(TD_LEN,sizeof(char)); - td[32]='\0'; - time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); - time_str[TIME_STR_LEN-1]='\0'; - buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); - td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); - query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); - query_para->gram_value = 7; - query_para->position_accuracy = 5; - query_para->ED_reexamine=1; - query_para->format=GIE_INPUT_FORMAT_SFH; - query_handle=GIE_create((const GIE_create_para_t *)query_para); - free(query_para); - query_result = (GIE_result_t*)calloc(RESULT_NUM,sizeof(GIE_result_t)); - GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); - GIE_digest_cache->cache_size = CACHE_SIZE; - GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); - GIE_digest_cache->len = 0; - if(query_handle==NULL) - { - printf("create GIE handle error\n"); - return -1; - } - while(feof(td_sfh_file)==0) - { - i++; - if(i%100000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,td_sfh_file); - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td,td_str,sfh_str); - assert(ret==3); - td[32]='\0'; - sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); - sfh_td->id=i; - temp_len=strlen(sfh_str); - sfh_td->sfh_length=temp_len; - sfh_str[temp_len-1]='\0'; - sfh_td->operation=GIE_INSERT_OPT; - sfh_td->cfds_lvl=5; - sfh_td->sfh=strdup(sfh_str); - temp_tag=(GIE_tag*)calloc(1,sizeof(GIE_tag)); - temp_tag->td=strdup(td); - temp_tag->td_str=strdup(td_str); - temp_tag->sfh_str=strdup(sfh_str); - sfh_td->tag=(void*)temp_tag; - GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; - GIE_digest_cache->len++; - // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); - // if(resultnum==0) - // { - // sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); - // sfh_td->id=i; - // sfh_td->sfh_length=strlen(sfh_str); - // sfh_td->operation=GIE_INSERT_OPT; - // sfh_td->cfds_lvl=5; - // sfh_td->sfh=strdup(sfh_str); - // sfh_td->tag=(void*)strdup(td); - // GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; - // GIE_digest_cache->len++; - // } - // else - // { - // for(j=0;jtag),td)!=0) - // { - // lost++; - // fprintf(ripe_file,"%s,%s,%s\n",(char*)((query_result+j)->tag),td,sfh_str); - // } - // } - // continue; - // } - // if(GIE_digest_cache->len==GIE_digest_cache->cache_size) - // { - // update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); - // assert(update==GIE_digest_cache->len); - // GIE_digest_cache->len=0; - // for(j=0;jcache_size;j++) - // { - // free(GIE_digest_cache->GIE_cache[j]->sfh); - // GIE_digest_cache->GIE_cache[j]->sfh=NULL; - // free(GIE_digest_cache->GIE_cache[j]); - // GIE_digest_cache->GIE_cache[j]=NULL; - // } - // } - // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); - // for(i=0;itag,td)!=0) - // { - // lost++; - // } - // } - } - fclose(td_sfh_file); - update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->len); - for(j=0;jlen;j++) - { - free(GIE_digest_cache->GIE_cache[j]->sfh); - GIE_digest_cache->GIE_cache[j]->sfh=NULL; - free(GIE_digest_cache->GIE_cache[j]); - GIE_digest_cache->GIE_cache[j]=NULL; - } - i=0; - while(feof(raw_file)==0) - { - i++; - if(i%100000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,raw_file); - // ret=sscanf(buffer,"%[^;];%[^;]",td,sfh_str); - // assert(ret==2); - // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ - // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %*[^;];%[^;];%*[^;];%[^;];%*[^;]",td_str,td,sfh_str); - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); - assert(ret==3); - resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); - if(resultnum>1) - { - for(j=0;jtag)->td,td)!=0) - { - w=1; - fprintf(ripe_file,"%u,%s,%s,%s,%s,%s,%s\n",(query_result+j)->id,((GIE_tag*)((query_result+j)->tag))->td_str,((GIE_tag*)((query_result+j)->tag))->td,((GIE_tag*)((query_result+j)->tag))->sfh_str,td_str,td,sfh_str); - } - } - lost+=w; - w=0; - } - - } - printf("%d;%d\n",lost,i); - free(sfh_str); - free(td); - free(time_str); - free(td_str); -} \ No newline at end of file diff --git a/get_td_mistake_lost/get_mistake_level.c b/get_td_mistake_lost/get_mistake_level.c deleted file mode 100644 index 5f03974..0000000 --- a/get_td_mistake_lost/get_mistake_level.c +++ /dev/null @@ -1,366 +0,0 @@ -/* -gcc -g get_mistake_level.c -o get_mistake_level -lMESA_htable -lmaatframe -I../../include -./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level -*/ -#include -#include -#include -#include "gram_index_engine.h" -#include -#include -#include -#define THREAD_SAFE 0 -#define SLOT_SIZE (1024*1024*16) -#define SIMILIAR_RATE 90 -#define TD_STR_LEN (10*1024) -#define TIME_STR_LEN 128 -#define RAODONG_RATE 0.1 -#define BUFFER_LEN (15*1024) -#define SFH_LEN (10*1024) -#define TD_LEN 33 - -typedef struct sfh_link -{ - // char *time_str; - char *sfh_str; - char *td_ori; - // char *md5_32k; - int similiar; - int all_similiar; - // long hash_len; - struct sfh_link *next; -}sfh_link; - -typedef struct mistake_sfh -{ - int mistake_num; - int all_num; - int all_similiar; - char *sfh_str; - // long hash_len; - sfh_link *sfh_link_items; -}mistake_sfh; - -typedef struct temp_parameter -{ - int mistake_num; - FILE *ripe_file; -}temp_parameter; - -long get_hashed_len(const char* sfh) -{ - char *data=(char*)malloc(strlen(sfh)+1); - memcpy(data,sfh, strlen(sfh)); - data[strlen(sfh)]='\0'; - char *token=NULL,*sub_token=NULL,*saveptr; - long left_offset=0,right_offset=0,hashed_length=0; - int ret=0,first=0; - for (token = data; ; token= NULL) - { - sub_token= strtok_r(token,"[", &saveptr); - if (sub_token == NULL) - { - break; - } - if(first==0)//jump over the first sub string. - { - first=1; - continue; - } - ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); - if(ret!=2) - { - return 0; - } - assert(ret==2); - hashed_length+=right_offset-left_offset+1; - } - //printf("hashed length=%ld\n",hashed_length); - free(data); - return hashed_length/2; -} - -void print_mistake_td(const uchar *key,uint size,void *data,void *arg) -{ - temp_parameter *parameter = (temp_parameter*)arg; - mistake_sfh *temp_mistake_sfh=(mistake_sfh*)data; - float temp_rate=0; - temp_rate=(float)temp_mistake_sfh->mistake_num/(float)temp_mistake_sfh->all_num; - if(temp_rate>RAODONG_RATE) - { - parameter->mistake_num+=temp_mistake_sfh->mistake_num; - fprintf(parameter->ripe_file,"%d;%s\n",temp_mistake_sfh->mistake_num,temp_mistake_sfh->sfh_str); - sfh_link *temp_sfh_link=temp_mistake_sfh->sfh_link_items; - for(;;temp_sfh_link=temp_sfh_link->next) - { - if(temp_sfh_link==NULL) - { - break; - } - temp_sfh_link->similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str)); - // fprintf(parameter->ripe_file,"%s,%d;%s;%s;%s\n",temp_sfh_link->time_str,temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori,temp_sfh_link->md5_32k); - fprintf(parameter->ripe_file,"%d;%s;%s\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori); - } - fprintf(parameter->ripe_file,"\n"); - } -} - -int main(int argc,char *argv[]) -{ - FILE *raw_file; - FILE *ripe_file; - char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; - char *ripe_file_dir="../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_3"; - char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; - raw_file = fopen(raw_file_dir,"r+"); - ripe_file = fopen(ripe_file_dir,"w+"); - int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; - long temp_hash_len=0; - unsigned int slot_size=SLOT_SIZE; - mistake_sfh *temp_mistake_sfh=NULL; - sfh_link *temp_sfh_link=NULL; - MESA_htable_handle htable=NULL; - temp_parameter *parameter=NULL; - if(raw_file==NULL) - { - printf("open all_av_digest error\n"); - return -1; - } - - - if(ripe_file==NULL) - { - printf("open all_av_digest_mistake_level error"); - return -1; - } - buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); - sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); - td = (char*)calloc(TD_LEN,sizeof(char)); - td[32]='\0'; - td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); - // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); - // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); - // time_str[TIME_STR_LEN-1]='\0'; - // md5_32k_str[32]='\0'; - htable=MESA_htable_born(); - MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); - MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); - MESA_htable_mature(htable); - parameter=(temp_parameter*)calloc(1,sizeof(temp_parameter)); - parameter->mistake_num=0; - parameter->ripe_file=ripe_file; - while(feof(raw_file)==0) - { - i++; - if(i%100000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,raw_file); - buffer[BUFFER_LEN-1]='\0'; - // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ - // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); - assert(ret==3); - td[32]='\0'; - // md5_32k_str[32]='\0'; - if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) - { - temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); - temp_mistake_sfh->mistake_num=0; - temp_mistake_sfh->all_num=1; - temp_mistake_sfh->all_similiar=0; - // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); - temp_mistake_sfh->sfh_str=strdup(sfh_str); - temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); - temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); - // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); - // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); - temp_mistake_sfh->sfh_link_items->similiar=0; - temp_mistake_sfh->sfh_link_items->all_similiar=0; - temp_mistake_sfh->sfh_link_items->next=NULL; - ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); - assert(ret>0); - } - else - { - temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_mistake_sfh->all_similiar+=temp_similiar; - temp_sfh_link=temp_mistake_sfh->sfh_link_items; - for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) - { - // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_sfh_link->all_similiar+=temp_similiar; - temp_all_similiar+=temp_similiar; - if(temp_sfh_link->all_similiar>temp_mistake_sfh->all_similiar) - { - free(temp_mistake_sfh->sfh_str); - temp_mistake_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); - temp_mistake_sfh->all_similiar=temp_sfh_link->all_similiar; - } - if(temp_sfh_link->next==NULL) - { - break; - } - } - // if(temp_hash_len>temp_mistake_sfh->hash_len) - // { - // temp_mistake_sfh->hash_len=temp_hash_len; - // free(temp_mistake_sfh->sfh_str); - // temp_mistake_sfh->sfh_str=strdup(sfh_str); - // } - temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_sfh_link->next->sfh_str=strdup(sfh_str); - temp_sfh_link->next->td_ori=strdup(td_str); - // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); - // temp_sfh_link->next->time_str=strdup(time_str); - temp_sfh_link->next->similiar=0; - temp_sfh_link->next->all_similiar=temp_all_similiar; - temp_sfh_link->next->next=NULL; - temp_mistake_sfh->all_num+=1; - } - } - fclose(raw_file); - raw_file = fopen(raw_file_dir,"r+"); - if(raw_file==NULL) - { - printf("open all_av_digest error\n"); - return -1; - } - i=0; - while(feof(raw_file)==0) - { - i++; - if(i%10000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,raw_file); - buffer[BUFFER_LEN-1]='\0'; - // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ - // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); - assert(ret==3); - td[32]='\0'; - // md5_32k_str[32]='\0'; - temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN); - assert(temp_mistake_sfh!=NULL); - // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) - // { - // temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); - // temp_mistake_sfh->num=0; - // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); - // temp_mistake_sfh->sfh_str=strdup(sfh_str); - // temp_sfh_link=(sfh_link*)calloc(1,sizeof(sfh_link)); - // temp_sfh_link->sfh_str=strdup(sfh_str); - // temp_sfh_link->td_ori=strdup(td_str); - // temp_sfh_link->md5_32k=strdup(md5_32k_str); - // temp_sfh_link->time_str=strdup(time_str); - // temp_sfh_link->next=NULL; - // temp_mistake_sfh->sfh_link_items=temp_sfh_link; - // ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); - // assert(ret>0); - // } - // else - // { - // temp_hash_len=get_hashed_len(sfh_str); - // if(temp_hash_len>temp_mistake_sfh->hash_len) - // { - // temp_sfh_link->hash_len=get_hashed_len(); - // free(temp_sfh_link->sfh_str); - // temp_sfh_link->sfh_str=strdup(sfh_str); - // } - temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); - if(temp_similiarmistake_num+=1; - } - // if(temp_mistake_sfh->sfh_link_items!=NULL) - // { - // temp_sfh_link=temp_mistake_sfh->sfh_link_items; - // for(;;temp_sfh_link=temp_sfh_link->next) - // { - // // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))next==NULL) - // { - // break; - // } - // } - // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); - // temp_sfh_link->next->sfh_str=strdup(sfh_str); - // temp_sfh_link->next->td_ori=strdup(td_str); - // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); - // temp_sfh_link->next->time_str=strdup(time_str); - // temp_sfh_link->next->similiar=temp_similiar; - // temp_sfh_link->next->next=NULL; - // } - // else - // { - // temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); - // temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); - // temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); - // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); - // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); - // temp_mistake_sfh->sfh_link_items->similiar=temp_similiar; - // temp_mistake_sfh->sfh_link_items->next=NULL; - // } - // if(temp_mistake==1) - // { - // temp_mistake_sfh->num+=temp_mistake; - // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); - // temp_sfh_link->next->sfh_str=strdup(sfh_str); - // temp_sfh_link->next->td_ori=strdup(td_str); - // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); - // temp_sfh_link->next->next=NULL; - // temp_mistake=0; - // } - } - fclose(raw_file); - // raw_file=NULL; - // raw_file = fopen(raw_file_dir,"r+"); - // if(raw_file==NULL) - // { - // printf("open all_av_digest error\n"); - // return -1; - // } - // i=0; - // while(feof(raw_file)==0) - // { - // i++; - // if(i%10000==0) - // { - // printf("%d\n",i); - // } - // fgets(buffer,BUFFER_LEN-1,raw_file); - // buffer[BUFFER_LEN-1]='\0'; - // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ - // %*[^;];%[^;];%*[^;];%*[^;];%*[^;]",td); - // assert(ret==1); - // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))!=NULL) - // { - // fprintf(ripe_file,"%d;%s",temp_mistake_sfh->num,buffer); - // } - // } - MESA_htable_iterate(htable,print_mistake_td,(void*)parameter); - printf("%d,%d\n",parameter->mistake_num,i); - free(buffer); - free(sfh_str); - free(td); - free(td_str); - // free(md5_32k_str); - MESA_htable_destroy(htable,NULL); - // fclose(raw_file); - fclose(ripe_file); - return 0; -} \ No newline at end of file diff --git a/get_td_mistake_lost/get_td_mistake_lost.sh b/get_td_mistake_lost/get_td_mistake_lost.sh deleted file mode 100644 index 7c851b8..0000000 --- a/get_td_mistake_lost/get_td_mistake_lost.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -python new_TD.py -./get_mistake_level -./get_TD_SFH -./get_lost_rate diff --git a/get_td_mistake_lost/gram_index_engine.c b/get_td_mistake_lost/gram_index_engine.c deleted file mode 100644 index 0f503db..0000000 --- a/get_td_mistake_lost/gram_index_engine.c +++ /dev/null @@ -1,1354 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "gram_index_engine.h" -#include "queue.h" - -#define HTABLE_SIZE 1024 *1024 -#define GRAM_CNT_MAX 2 -#define GRAM_MAX 128 -#define TOLERENCE_SIZE 0 -#define UNION_INIT_SIZE 1000 -#define BLOCKSIZE_MIN 3 -#define MEM_OCCUPY 1 -#define CNT_MAX 10 -#define GRAM_CNT_THRESHOLD 10 -#define QUERY_LEN_ACCURACY 0.1 -#define HTABLE_NUM 8 -//#define GIE_INPUT_FORMAT_SFH 1 -//#define GIE_INPUT_FORMAT_PLAIN 0 -#define MAX_LENGTH 10000 -#define KEY_MAX_LENGTH 10 -#define EDIT_DISTN_INSERT_COST 1 -#define EDIT_DISTN_REMOVE_COST 1 -#define EDIT_DISTN_REPLACE_COST 2 -#define MIN(x,y) ((x)<(y)?(x):(y)) - -int before(unsigned int off1, unsigned int off2) -{ - return (signed int)(off1-off2)<0; -} -#define after(off2,off1) before(off1,off2) - -typedef struct -{ - unsigned int user_gram_value; - unsigned int user_position_accuracy; - short ED_reexamine; - short input_format; - MESA_htable_handle id_table; - MESA_htable_handle index_table[HTABLE_NUM]; - unsigned long long mem_occupy; - unsigned long long hash_cnt; -}GIE_handle_inner_t; - - -struct linklist_node -{ - short * position; - struct id_table_data * basicinfo; - short size; - short index; - unsigned long long blocksize; - TAILQ_ENTRY(linklist_node) listentry; -}; - - -struct index_table_data -{ - struct TQ * listhead; - int cnt; -}; - - -struct id_table_data -{ - unsigned int id; - short sfh_length; - short gram_cnt; - unsigned long long blocksize; - char * sfh; - void * tag; - char cfds_lvl; -}; - - -struct htable_handle -{ - MESA_htable_handle runtime_table; - MESA_htable_handle para; -}; - -struct key_list_node -{ - char * key; - int digest_id; - int pos; - unsigned long long blocksize; - TAILQ_ENTRY(key_list_node) keylistentry; -}; - - -unsigned long long hash_cnt; -unsigned long long cnt_sum; - -TAILQ_HEAD(TQ, linklist_node); -TAILQ_HEAD(KL, key_list_node); - -void idtable_free(void * data); -void indextable_free(void * data); -int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2); -int GIE_insert_indextable(MESA_htable_handle handle, struct id_table_data * info, char * key, unsigned int index,unsigned long long blocksize); - -int GIE_delete_from_indextable_by_key(MESA_htable_handle handle, char * key, unsigned int id); -int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t * digest); -int GIE_cmp(const void * a, const void * b); -inline unsigned int get_real_length(const char * string, unsigned int length); -void print_item_iterate(const uchar * key, unsigned int size, void * data, void * user); -inline unsigned long long calc_fh_blocksize(unsigned long long orilen); -inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len); - -MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data)); -void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user); -void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user); - -GIE_handle_t * GIE_create(const GIE_create_para_t * para) -{ - int i = 0; - GIE_handle_inner_t * handle = (GIE_handle_inner_t *)calloc(1, sizeof(GIE_handle_inner_t)); - handle->mem_occupy = 0; - handle->mem_occupy += sizeof(GIE_handle_inner_t); - - handle->user_gram_value = para->gram_value; - handle->user_position_accuracy = para->position_accuracy; - handle->input_format = para->format; - //handle->user_cmp = GIE_INPUT_FORMAT_PLAIN; - handle->ED_reexamine = para->ED_reexamine; - handle->hash_cnt = 0; - - - MESA_htable_create_args_t idtable_args,indextable_args[HTABLE_NUM]; - memset(&idtable_args, 0, sizeof(idtable_args)); - idtable_args.thread_safe = 0; - idtable_args.hash_slot_size = HTABLE_SIZE; - idtable_args.max_elem_num = 0; - idtable_args.expire_time = 0; - idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO; - idtable_args.key_comp = NULL; - idtable_args.key2index = NULL; - idtable_args.data_free = idtable_free; - idtable_args.data_expire_with_condition = NULL; - idtable_args.recursive = 0; - handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args)); - - for(i = 0;i < HTABLE_NUM;i++) - { - memset(&indextable_args[i], 0, sizeof(indextable_args[i])); - indextable_args[i].thread_safe = 0; - indextable_args[i].hash_slot_size = HTABLE_SIZE; - indextable_args[i].max_elem_num = 0; - indextable_args[i].expire_time = 0; - indextable_args[i].eliminate_type = HASH_ELIMINATE_ALGO_FIFO; - indextable_args[i].key_comp = key_compare; - indextable_args[i].key2index = NULL; - indextable_args[i].data_free = indextable_free; - indextable_args[i].data_expire_with_condition = NULL; - indextable_args[i].recursive = 0; - handle->index_table[i] = MESA_htable_create(&indextable_args[i], sizeof(indextable_args[i])); - } - - return (GIE_handle_t *)(handle); -} - -int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2) -{ - return ( (*(long*)key1) - (*(long*)key2)); -} - - -void idtable_free(void * data) -{ - struct id_table_data * tmp = (struct id_table_data *)data; - free(tmp->sfh); - tmp->sfh = NULL; - tmp->tag = NULL; - free(tmp); - tmp = NULL; - - return; -} - -void indextable_delete_with_threshold(MESA_htable_handle * htable_handle, struct index_table_data * tmp, char * key) -{ - int key_length = strnlen(key,KEY_MAX_LENGTH); - struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); - while(tmp_node != NULL) - { - struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node,listentry); - if(tmp_node->basicinfo->gram_cnt <= GRAM_CNT_THRESHOLD) - { - tmp_node = linklist_tmp; - continue; - } - TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); - tmp_node->basicinfo->gram_cnt--; - tmp->cnt--; - if(TAILQ_EMPTY(tmp->listhead) == 1) - { - //_handle->hash_cnt--; - //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); - if(MESA_htable_del(htable_handle, (const uchar *)(key), key_length, indextable_free) < 0) - { - printf("indextable backtrack delete error!\n"); - assert(0); - return; - } - } - //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp_node->size)); - free(tmp_node->position); - tmp_node->position = NULL; - free(tmp_node); - tmp_node = NULL; - tmp_node = linklist_tmp; - - } - return; -} - - -void indextable_free(void * data) -{ - struct index_table_data * tmp = (struct index_table_data *)data; - struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); - while(tmp_node != NULL) - { - struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); - TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); - tmp->cnt--; - free(tmp_node->position); - tmp_node->position = NULL; - free(tmp_node); - tmp_node = NULL; - tmp_node = linklist_tmp; - } - free(tmp->listhead); - tmp->listhead = NULL; - free(tmp); - tmp = NULL; - return; -} - - -void indextable_free_cnt(void * data) -{ - struct index_table_data * tmp = (struct index_table_data *)data; - hash_cnt++; - cnt_sum += tmp->cnt; - struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); - while(tmp_node != NULL) - { - struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); - TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); - tmp->cnt--; - free(tmp_node->position); - tmp_node->position = NULL; - free(tmp_node); - tmp_node = NULL; - tmp_node = linklist_tmp; - } - free(tmp->listhead); - tmp->listhead = NULL; - free(tmp); - tmp = NULL; - return; -} - -void print_item_iterate_idtable(const uchar * key, uint size, void * data, void * user) -{ - struct id_table_data * id_data = (struct id_table_data *)data; - printf("id:%u\n",id_data->id); -} - - - -void print_item_iterate(const uchar * key, uint size, void * data, void * user) -{ - struct index_table_data * index_data = (struct index_table_data *)data; - printf("%s %d\n", (char *)key, index_data->cnt); - struct linklist_node * tmp_node = NULL; - int i = 0; - TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) - { - printf("id = %u\n",tmp_node->basicinfo->id); - printf("position is :\n"); - for(i = 0;i < tmp_node->index;i++) - { - printf("%d ",tmp_node->position[i]); - } - printf("\n"); - } - printf("\n"); -} - -int edit_distn(const char *s1, int s1len, const char *s2, int s2len) -{ - long int max_len = 0; - if(s1len >= s2len) - { - max_len = s1len; - } - else - { - max_len = s2len; - } - int **t = (int **)malloc(2*sizeof(int *)); - t[0] = (int *)malloc((max_len +1)*sizeof(int)); - t[1] = (int *)malloc((max_len +1)*sizeof(int)); - //int t[2][EDIT_DISTN_MAXLEN+1]; - int *t1 = t[0]; - int *t2 = t[1]; - int *t3; - size_t i1, i2; - for (i2 = 0; i2 <= s2len; i2++) - t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST; - for (i1 = 0; i1 < s1len; i1++) { - t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST; - for (i2 = 0; i2 < s2len; i2++) { - int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST; - int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST; - int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST); - t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r); - } - t3 = t1; - t1 = t2; - t2 = t3; - } - long int ret = t1[s2len]; - free(t[0]); - free(t[1]); - free(t); - return ret; - //return t1[s2len]; -} - - -void GIE_destory(GIE_handle_t * handle) -{ - GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); - //printf("hash_cnt:%llu\n",_handle->hash_cnt); - //printf("mem_occupy:%llu\n",_handle->mem_occupy); - int i = 0; - for(i = 0;i < HTABLE_NUM;i++) - { - MESA_htable_destroy(_handle->index_table[i], indextable_free_cnt); - } - MESA_htable_destroy(_handle->id_table, idtable_free); - //printf("index_free hash_cnt :%llu\n", hash_cnt); - //printf("cnt sum :%llu\n",cnt_sum); - free(_handle); - _handle = NULL; -} - - -int grab_key_set(char * str_begin,short str_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list, unsigned long long blocksize) -{ - int k = 0,j = 0; - char * tmp_gram = str_begin; - char key[gram_value+1]; - int sum = 0,htable_index = 0; - if(str_length < gram_value) - { - return 0; - } - str_length = MIN(str_length,strnlen(str_begin,str_length)); - *gram_cnt = str_length - gram_value + 1; - //printf("str_length:%d\n",str_length); - for(k = 0; k < str_length - gram_value + 1; k++) - { - sum = 0; - memset(key,'\0', gram_value+1); - memcpy(key, tmp_gram++, gram_value); - //printf("k:%d key:%s\n",k,key); - for(j = 0; j < gram_value; j++) - { - sum += key[j]; - } - htable_index = sum%HTABLE_NUM; - struct key_list_node *tmp_node = (struct key_list_node *)calloc(1,sizeof(struct key_list_node)); - tmp_node->key = (char *)calloc(gram_value+1,sizeof(char)); - memcpy(tmp_node->key,key,gram_value); - tmp_node->digest_id = i; - tmp_node->pos = k; - tmp_node->blocksize = blocksize; - TAILQ_INSERT_TAIL(to_process_list[htable_index], tmp_node, keylistentry); - } - return 1; -} -int sfh_grab_key_set(char *sfh,short sfh_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list) -{ - int t = 0; - char * tmp_gram = sfh; - unsigned long long blocksize = 0; - for(t = 0; t < 2;t++) - { - blocksize = get_blocksize_from_head(tmp_gram, sfh_length); - while(*tmp_gram != '\0') - { - if(*tmp_gram == ':') - { - tmp_gram++; - break; - } - tmp_gram++; - } - unsigned int real_length = get_real_length(tmp_gram, sfh_length); - if(real_length < gram_value) - { - if(t==0) - { - return 0; - } - else - { - continue; - } - } - grab_key_set(tmp_gram, real_length, i, gram_value, gram_cnt, to_process_list, blocksize); - while(*tmp_gram != '\0') - { - if(*tmp_gram == '#') - { - tmp_gram++; - break; - } - tmp_gram++; - } - } - return 1; -} - -void free_key_set(struct KL ** to_process_list,int size) -{ - int i = 0; - for(i = 0;i < size;i++) - { - struct key_list_node *tmp_node = TAILQ_FIRST(to_process_list[i]); - while(tmp_node != NULL) - { - struct key_list_node *key_list_tmp = TAILQ_NEXT(tmp_node, keylistentry); - TAILQ_REMOVE(to_process_list[i], tmp_node, keylistentry); - free(tmp_node->key); - tmp_node->key = NULL; - free(tmp_node); - tmp_node = NULL; - tmp_node = key_list_tmp; - } - free(to_process_list[i]); - to_process_list[i]= NULL; - } -} - -int GIE_update(GIE_handle_t * handle,GIE_digest_t * * digests,int size) -{ - GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); - struct id_table_data * info = NULL; - int success_cnt = 0; - int m = 0, i = 0, grab_ret = 0; - short gram_cnt = 0; - unsigned int input_fh_len = 0; - unsigned int gram_value = _handle->user_gram_value; - struct KL* to_process_list[HTABLE_NUM]; - - MESA_htable_handle htable_index_copy; - MESA_htable_handle htable_id_copy; - MESA_htable_handle htable_tmp_index=NULL,htable_tmp_id=NULL; - struct htable_handle * htable_copied_id_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); - struct htable_handle * htable_copied_index_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); - - htable_copied_id_para->runtime_table = _handle->id_table; - htable_copied_id_para->para = NULL; - htable_id_copy = copy_htable((void *)htable_copied_id_para, copy_idtable_item_iterate,idtable_free); - - MESA_htable_handle garbage_htable[HTABLE_NUM]; - /*if(MESA_htable_iterate(htable_id_copy, print_item_iterate_idtable, NULL) == -1) - { - printf("iterate error!\n"); - } - printf("size:%u\n",id_size);*/ - - for(m = 0;m < HTABLE_NUM;m++) - { - to_process_list[m]=(struct KL*)calloc(1,sizeof(struct KL)); - TAILQ_INIT(to_process_list[m]); - } - - for(i = 0; i < size; i++) - { - switch(digests[i]->operation) - { - case GIE_INSERT_OPT: - { - assert(digests[i]->tag!=NULL); - if(_handle->input_format == GIE_INPUT_FORMAT_SFH) - { - grab_ret = sfh_grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list); - } - else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) - { - - grab_ret = grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); - } - if(grab_ret == 0) - { - continue; - } - else - { - info = (struct id_table_data *)calloc(1,sizeof(struct id_table_data)); - input_fh_len = digests[i]->sfh_length; - info->sfh = (char *)calloc(input_fh_len + 1,sizeof(char)); - memcpy(info->sfh, digests[i]->sfh, input_fh_len); - _handle->mem_occupy += sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1); - info->sfh_length = digests[i]->sfh_length; - info->gram_cnt = gram_cnt; - - /*int tag_len = strnlen(digests[i]->tag,MAX_LENGTH); - info->tag = (char *)calloc(tag_len+1,sizeof(char)); - memcpy(info->tag,digests[i]->tag,tag_len);*/ - info->tag = digests[i]->tag; - - info->id = digests[i]->id; - info->cfds_lvl = digests[i]->cfds_lvl; - if(_handle->input_format == GIE_INPUT_FORMAT_SFH) - { - info->blocksize = get_blocksize_from_head(digests[i]->sfh, digests[i]->sfh_length); - } - else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) - { - info->blocksize = 0; - } - - if(MESA_htable_add(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0) - { - _handle->mem_occupy -= (sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1)); - free(info->sfh); - info->sfh = NULL; - free(info); - info = NULL; - continue; - } - } - success_cnt ++; - break; - } - - case GIE_DELETE_OPT: - { - - struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(htable_id_copy, \ - (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id)); - if(ret!= NULL) - { - if(_handle->input_format == GIE_INPUT_FORMAT_SFH) - { - success_cnt += sfh_grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list); - } - else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) - { - - success_cnt += grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); - } - } - else - { - break; - } - if(MESA_htable_del(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0) - { - printf("delete id failed!"); - assert(0); - } - //success_cnt += GIE_delete(_handle, digests[i]); - break; - } - - default: - break; - } - - } - unsigned int digest_id = 0; - struct id_table_data * tmp_info= NULL; - - for(i = 0;i < HTABLE_NUM;i++) - { - htable_copied_index_para->runtime_table = _handle->index_table[i]; - htable_copied_index_para->para = htable_id_copy; - htable_index_copy = copy_htable((void *)htable_copied_index_para,copy_indextable_item_iterate,indextable_free); - struct key_list_node * tmp_node; - TAILQ_FOREACH(tmp_node, to_process_list[i], keylistentry) - { - digest_id = tmp_node->digest_id; - if(digests[digest_id]->operation == GIE_INSERT_OPT) - { - tmp_info =(struct id_table_data *)MESA_htable_search(htable_id_copy, (const uchar *)(&(digests[digest_id])->id), \ - sizeof((digests[digest_id])->id)); - if(tmp_info == NULL) - { - printf("id %u not insert\n",digests[digest_id]->id); - } - if(GIE_insert_indextable(htable_index_copy, tmp_info, tmp_node->key, tmp_node->pos,tmp_node->blocksize) < 0) - { - printf("insert %d indextable failed!\n",digests[digest_id]->id); - continue; - } - } - else if(digests[digest_id]->operation == GIE_DELETE_OPT) - { - if(GIE_delete_from_indextable_by_key(htable_index_copy, tmp_node->key, (digests[digest_id])->id) < 0) - { - printf("delete %d indextable failed!\n",digests[digest_id]->id); - continue; - } - } - } - htable_tmp_index= _handle->index_table[i]; - _handle->index_table[i] = htable_index_copy; - garbage_htable[i]=htable_tmp_index; - } - - htable_tmp_id = _handle->id_table; - _handle->id_table = htable_id_copy; - usleep(200); - MESA_htable_destroy(htable_tmp_id, idtable_free); - /*if(MESA_htable_iterate(_handle->index_table, print_item_iterate, NULL) == -1) - { - printf("iterate error!\n"); - }*/ - for(i=0;iruntime_table = copy_htable_handle; - htable_iterate_para->para = htable_copied_para->para; - - if(MESA_htable_iterate(htable_copied_para->runtime_table, func, htable_iterate_para) == -1) - { - printf("iterate error!\n"); - } - free(htable_iterate_para); - htable_copied_para=NULL; - return copy_htable_handle; -} - -void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user) -{ - struct index_table_data * index_data = (struct index_table_data *)data; - struct htable_handle * htable_copied_para = (struct htable_handle *)user; - - struct index_table_data * index_data_copy = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); - struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); - index_data_copy->listhead = head; - index_data_copy->cnt = index_data->cnt; - - TAILQ_INIT(head); - struct linklist_node * tmp_node = NULL; - struct id_table_data * ret = NULL; - int i = 0; - - TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) - { - struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); - node_data->size = tmp_node->size; - node_data->position = (short *)calloc(node_data->size, sizeof(short)); - for(i = 0;i < tmp_node->index;i++) - { - node_data->position[i] = tmp_node->position[i]; - } - ret = (struct id_table_data *)MESA_htable_search(htable_copied_para->para, (const uchar *)(&(tmp_node->basicinfo->id)), sizeof(tmp_node->basicinfo->id)); - if(ret == NULL) - { - //printf("copy id %u not exist\n",tmp_node->basicinfo->id); - free(node_data->position); - node_data->position = NULL; - free(node_data); - node_data = NULL; - continue; - } - node_data->basicinfo = ret; - node_data->index = tmp_node->index; - node_data->blocksize = tmp_node->blocksize; - TAILQ_INSERT_TAIL(head, node_data, listentry); - } - MESA_htable_add(htable_copied_para->runtime_table, key, size, (const void *)index_data_copy); -} -//TODO: Using the orginal value instead of make a duplication to be faster. -void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user) -{ - struct id_table_data * id_data = (struct id_table_data *)data; - struct htable_handle * htable_para = (struct htable_handle *)user; - struct id_table_data * id_data_copy = (struct id_table_data *)calloc(1, sizeof(struct id_table_data)); - assert(id_data->tag!=NULL); - memcpy(id_data_copy,id_data,sizeof(struct id_table_data)); - id_data_copy->sfh = (char *)calloc(id_data_copy->sfh_length,sizeof(char)); - memcpy(id_data_copy->sfh,id_data->sfh,id_data_copy->sfh_length); - - MESA_htable_add(htable_para->runtime_table, (const uchar *)(&(id_data_copy->id)), sizeof(id_data_copy->id), (const void *)id_data_copy); -} - - - - -int GIE_insert_indextable(MESA_htable_handle htable_copy, struct id_table_data * info, char * key, unsigned int index, unsigned long long blocksize) -{ - int key_length = strnlen(key,KEY_MAX_LENGTH); - struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); - node_data->size = GRAM_CNT_MAX; - node_data->position = (short *)calloc(node_data->size, sizeof(short)); - node_data->basicinfo = info; - node_data->index = 0; - node_data->position[(node_data->index)++] = index; - node_data->blocksize = blocksize; - - //_handle->mem_occupy += sizeof(struct linklist_node) + sizeof(short)*(node_data->size); - - struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable_copy, \ - (const uchar *)(key), key_length)); - - - if(ret != NULL) - { - struct linklist_node * tmp = NULL; - TAILQ_FOREACH(tmp, ret->listhead, listentry) - { - if(tmp->basicinfo->id > node_data->basicinfo->id) - { - TAILQ_INSERT_BEFORE(tmp, node_data, listentry); - ret->cnt ++; - if(ret->cnt >= CNT_MAX) - { - indextable_delete_with_threshold(htable_copy,ret,key); - } - return 0; - } - if(tmp->basicinfo->id == node_data->basicinfo->id && tmp->blocksize == blocksize) - { - if(tmp->index >= tmp->size) - { - tmp->size *= 2; - tmp->position = realloc(tmp->position, (tmp->size)*sizeof(short)); - } - tmp->position[(tmp->index)++] = index; - //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(node_data->size)); - free(node_data->position); - node_data->position = NULL; - free(node_data); - node_data = NULL; - return 0; - } - } - TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry); - ret->cnt ++; - if(ret->cnt >= CNT_MAX) - { - indextable_delete_with_threshold(htable_copy,ret,key); - } - } - - else - { - struct index_table_data * index_data = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); - struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); - //_handle->mem_occupy += sizeof(struct index_table_data) + sizeof(struct TQ); - - index_data->listhead = head; - index_data->cnt = 0; - - TAILQ_INIT(head); - TAILQ_INSERT_TAIL(head, node_data, listentry); - index_data->cnt++; - //_handle->hash_cnt++; - if(MESA_htable_add(htable_copy, (const uchar *)(key), key_length, (const void *)index_data) < 0) - { - printf("add index_table failed!\n"); - assert(0); - return -1; - } - } - return 0; - -} - - - -int GIE_delete(GIE_handle_inner_t * _handle, GIE_digest_t * digest) -{ - int success_cnt = 0; - struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ - (const uchar *)(&(digest->id)), sizeof(digest->id)); - if(ret == NULL) - { - printf("del %d doesn't exist!\n",digest->id); - return -1; - } - else - { - int gram_value = _handle->user_gram_value; - char key[gram_value+1]; - char * tmp_gram = ret->sfh; - while(*tmp_gram != '\0') - { - if(*tmp_gram == ':') - { - tmp_gram++; - break; - } - tmp_gram++; - } - unsigned int real_length = get_real_length(tmp_gram, ret->sfh_length); - int gram_cnt = real_length - gram_value + 1; - int k = 0; - for(k = 0; k < gram_cnt; k++) - { - memset(key, '\0', gram_value+1); - memcpy(key, tmp_gram++, gram_value); - if(GIE_delete_from_indextable_by_key(_handle, key, digest->id) < 0) - { - printf("delete %d indextable failed!\n",digest->id); - continue; - } - } - success_cnt++; - } - - return success_cnt; -} - - - -int GIE_delete_from_indextable_by_key(MESA_htable_handle htable, char * key, unsigned int id) -{ - int key_length = strnlen(key,KEY_MAX_LENGTH); - struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable, \ - (const uchar *)(key), key_length)); - if(ret == NULL) - { - return 0; - } - - - struct linklist_node * tmp = TAILQ_FIRST(ret->listhead); - while(tmp != NULL) - { - struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp, listentry); - if(tmp->basicinfo->id != id) - { - tmp=linklist_tmp; - continue; - } - TAILQ_REMOVE(ret->listhead, tmp, listentry); - ret->cnt--; - //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp->size)); - free(tmp->position); - tmp->position = NULL; - free(tmp); - tmp = NULL; - if(TAILQ_EMPTY(ret->listhead) == 1) - { - //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); - int ret = MESA_htable_del(htable, (const uchar *)(key), key_length, indextable_free); - if(ret < 0) - { - printf("indextable backtrack delete error!\n"); - assert(0); - return -1; - } - - } - } - return 0; -} - - - - -int GIE_cmp(const void * a, const void * b) -{ - unsigned int tmp_a = *(unsigned int *)a; - unsigned int tmp_b = *(unsigned int *)b; - if(before(tmp_a, tmp_b)) - { - return -1; - } - else if(after(tmp_a, tmp_b)) - { - return 1; - } - else - { - return 0; - } -} - - -inline unsigned int get_real_length(const char * string, unsigned int length) -{ - unsigned int ret = 0; - const char * tmp_str = string; - while(*tmp_str != '\0') - { - if(*tmp_str == '[') - { - break; - } - tmp_str++; - ret ++; - } - return ret; -} - - -inline int GIE_part_query(GIE_handle_inner_t * _handle, const char * query_string, int index_begin, int part_query_len,unsigned int ** id_union, unsigned int * union_index, unsigned int * union_size, unsigned long long blocksize) -{ - unsigned int gram_value = _handle->user_gram_value; - - unsigned int real_length = part_query_len; - unsigned int chunk_count_max = 0; - if(real_length < gram_value) - { - return 0; - } - else - { - chunk_count_max = real_length/gram_value; - } - char key[gram_value+1]; - struct index_table_data * ret = NULL; - struct linklist_node * tmp_node_t = NULL; - - unsigned int position_accuracy = _handle->user_position_accuracy; - - int i=0,j=0,k=0; - unsigned int tmp_min = 0; - int sum = 0, htable_index = 0; - for(i = index_begin; i < chunk_count_max + index_begin; i++) - { - sum = 0; - memset(key,'\0',gram_value+1); - memcpy(key, query_string, gram_value); - for(k = 0; k < gram_value; k++) - { - sum += key[k]; - } - htable_index = sum%HTABLE_NUM; - ret = (struct index_table_data *) MESA_htable_search(_handle->index_table[htable_index], \ - (const uchar *)(key), strnlen(key,gram_value)); - query_string = query_string + gram_value; - - if(ret ==NULL) - { - break; - } - - tmp_node_t = NULL; - TAILQ_FOREACH(tmp_node_t, ret->listhead, listentry) - { - tmp_min = 0; - if(i*gram_value >= position_accuracy) - { - tmp_min = i*gram_value - position_accuracy; - } - for(j = 0; j < tmp_node_t->index; j++) - { - if((blocksize == tmp_node_t->basicinfo->blocksize) && (tmp_node_t->position[j] >= tmp_min) && (tmp_node_t->position[j] <= i*gram_value + position_accuracy)) - //if(blocksize == tmp_node_t->basicinfo->blocksize) - { - if((*union_index) >= (*union_size)) - { - *union_size = (*union_size) * 2; - *id_union = (unsigned int *)realloc(*id_union, (*union_size)*sizeof(unsigned int)); - } - (*id_union)[(*union_index)] = tmp_node_t->basicinfo->id; - (*union_index)++; - break; - } - } - } - } - return chunk_count_max; -} - -inline int GIE_gram_with_position(GIE_handle_inner_t * _handle, unsigned long long query_blocksize, const char * fuzzy_string, unsigned int ** id_union, - unsigned int * union_index,unsigned int * union_size, unsigned int * chunk_cnt) -{ - const char * tmpstr = fuzzy_string; - const char * query_string_begin; - unsigned long long blocksize = query_blocksize; - int part_query_len = 0; - int query_actual_len = 0; - while(*tmpstr != ':'&& *tmpstr != '\0') - { - tmpstr ++; - } - if(*tmpstr == ':') - { - tmpstr ++; - } - else - { - return 0; - } - query_string_begin = tmpstr; - char *p = NULL; - - while((*query_string_begin) != '\0') - { - int left = 0; - int right = 0; - p=strchr(query_string_begin,'['); - if(p!=NULL) - { - part_query_len = p-query_string_begin; - int ret = sscanf(p,"[%d:%d]",&left,&right); - if(ret != 2) - { - break; - } - p=strchr(p,']'); - if(p != NULL && (*p) != '\0') - { - int index_begin = (left/blocksize - TOLERENCE_SIZE > 0 ? (left/blocksize - TOLERENCE_SIZE) : 0); - (*chunk_cnt) += GIE_part_query(_handle,query_string_begin,index_begin, part_query_len, - id_union, union_index, union_size, blocksize); - query_actual_len += part_query_len; - query_string_begin = p+1; - } - else - { - break; - } - } - else - { - break; - } - } - return query_actual_len; -} - -inline unsigned long long calc_fh_blocksize(unsigned long long orilen) -{ - double tmp = orilen/(64 * BLOCKSIZE_MIN); - double index = floor(log(tmp)/log(2)); - double tmp_t = pow(2,index); - unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN); - return blocksize; -} - -inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len) -{ - const char * tmp_str = fuzzy_string; - char blk[100]; - memset(blk,'\0',sizeof(blk)); - unsigned long long blocksize = 0; - int i = 0; - while(*tmp_str != '\0' && *tmp_str != ':' && str_len != 0 && i < 100) - { - blk[i++] = *tmp_str; - tmp_str++; - str_len--; - } - blocksize = (unsigned long long)atoi(blk); - return blocksize; -} -int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2) -{ - int edit_distance=0; - int conf=0; - edit_distance = edit_distn(str1, len1,str2,len2); - conf = 100-(edit_distance*100)/(len1 + len2); - return conf; -} - -int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2) -{ - int j = 0, t = 0; - unsigned long long query_blocksize = 0, index_blocksize = 0; - unsigned int query_real_length = 0, index_real_length = 0; - const char *query_gram_begin = sfh1; - const char *index_gram_begin = sfh2; - char *splice_str = (char *)malloc(sizeof(char)*len1); - memset(splice_str,'\0',len1); - char *spli_str_begin = splice_str; - int edit_distance = 0; - int ret = 0; - char *p = NULL; - int splice_len = 0; - - for(j = 0; j < 2; j++) - { - index_blocksize = get_blocksize_from_head(index_gram_begin, len2); - while((*index_gram_begin) != '\0') - { - if((*index_gram_begin) == ':') - { - index_gram_begin++; - break; - } - index_gram_begin++; - } - index_real_length = get_real_length(index_gram_begin, len2); - query_gram_begin = sfh1; - for(t = 0; t < 2; t++) - { - query_blocksize = get_blocksize_from_head(query_gram_begin, len1); - //printf("gram_begin:%c\n",*index_gram_begin); - //printf("gram_str:%s\n",index_gram_begin); - while((*query_gram_begin) != '\0') - { - if((*query_gram_begin) == ':') - { - query_gram_begin++; - break; - } - query_gram_begin++; - } - //printf("query_blocksize:%lld, index_blocksize:%lld\n",query_blocksize,index_blocksize); - //index_real_length = get_real_length(index_gram_begin, len1); - if(query_blocksize == index_blocksize) - { - while((*query_gram_begin) != '#' && (*query_gram_begin) != '\0') - { - p=strchr(query_gram_begin,'['); - if(p!=NULL) - { - query_real_length = p-query_gram_begin; - p=strchr(p,']'); - if(p != NULL && (*p) != '\0') - { - - memcpy(spli_str_begin,query_gram_begin,query_real_length); - spli_str_begin += query_real_length; - //edit_distance += edit_distn(query_gram_begin, query_real_length, index_gram_begin, index_real_length); - query_gram_begin = p+1; - } - else - { - break; - } - } - else - { - break; - } - } - splice_len = strnlen(splice_str,len1); - edit_distance = edit_distn(index_gram_begin, index_real_length, splice_str, splice_len); - //printf("query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); - ret = 100-(edit_distance*100)/(index_real_length + splice_len); - //ret = (100*ret)/SPAM_LENGTH; - //ret = 100-ret; - //ret = 100 - (100*edit_distance)/(query_real_length); - free(splice_str); - return ret; - } - while(*query_gram_begin != '\0') - { - if(*query_gram_begin == '#') - { - query_gram_begin++; - break; - } - query_gram_begin++; - } - - } - while(*index_gram_begin != '\0') - { - if(*index_gram_begin == '#') - { - index_gram_begin++; - break; - } - index_gram_begin++; - } - } - //printf("no blocksize:query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); - free(splice_str); - return 0; -} - - - - -int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size) -{ - GIE_handle_inner_t * _handle = (GIE_handle_inner_t *) handle; - int i = 0, j = 0; - unsigned int union_index = 0; - unsigned int gram_value = _handle->user_gram_value; - unsigned int query_actual_len = 0; - unsigned int union_size = UNION_INIT_SIZE; - unsigned int chunk_cnt = 0; - const char *fuzzy_string_begin = data; - unsigned int * id_union =(unsigned int *)calloc(union_size, sizeof(unsigned int)); - unsigned long long query_blocksize = 0; - unsigned int fuzzy_string_len = (unsigned int)data_len; - - if(_handle->input_format == GIE_INPUT_FORMAT_SFH) - { - for(j = 0;j < 2;j++) - { - query_blocksize = get_blocksize_from_head(fuzzy_string_begin, fuzzy_string_len); - if(query_blocksize == 0) - { - return 0; - } - query_actual_len += GIE_gram_with_position(_handle, query_blocksize, fuzzy_string_begin, &id_union, &union_index, &union_size, &chunk_cnt); - while(*fuzzy_string_begin != '#' && *fuzzy_string_begin != '\0') - { - fuzzy_string_begin++; - } - if(*fuzzy_string_begin == '#') - { - fuzzy_string_begin++; - } - } - } - else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) - { - query_actual_len = fuzzy_string_len; - chunk_cnt = GIE_part_query(_handle, fuzzy_string_begin, 0, query_actual_len, &id_union, &union_index, &union_size, 0); - } - - if(union_index == 0) - { - free(id_union); - id_union = NULL; - return 0; - } - - qsort(id_union, union_index, sizeof(id_union[0]), GIE_cmp); - - unsigned int current_id = id_union[0]; - unsigned int * tmp_id = id_union; - unsigned int count = 0; - struct id_table_data * ret_tmp = NULL; - short conf = 0; - int ret_size = 0; - for(i = 0; i <= union_index; i++) - { - if( i == union_index || *tmp_id != current_id ) - { - ret_tmp = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ - (const uchar *)(&(current_id)), sizeof(current_id)); - - if(ret_tmp == NULL) - { - break; - } - char * tmp_gram = ret_tmp->sfh; - int length = ret_tmp->sfh_length; - if(ret_tmp->gram_cnt == 0||chunk_cnt == 0) - { - conf = 0; - } - else - { - conf = (count*(query_actual_len-gram_value+1)*10)/(chunk_cnt*(ret_tmp->gram_cnt)); - } - - if(_handle->ED_reexamine == 1) - { - if(_handle->input_format == GIE_INPUT_FORMAT_SFH) - { - conf = GIE_sfh_similiarity(data, fuzzy_string_len, tmp_gram, length); - } - else - { - conf=GIE_string_similiarity(data, fuzzy_string_len, tmp_gram, length); - } - } - - if(conf >= ret_tmp->cfds_lvl) - { - results[ret_size].cfds_lvl = conf; - results[ret_size].id = current_id; - /*results[ret_size].tag = (char *)malloc((ret_tmp->sfh_length + 1)*sizeof(char)); - memset(results[ret_size].tag,'\0',(ret_tmp->sfh_length+1)); - memcpy(results[ret_size].tag, ret_tmp->sfh,ret_tmp->sfh_length);*/ - results[ret_size].tag = ret_tmp->tag; - ret_size++; - } - - if(ret_size == result_size) - { - break; - } - - current_id = *tmp_id; - count = 1; - - } - else - { - count++; - } - - tmp_id ++; - } - - free(id_union); - id_union = NULL; - return ret_size; -} - - -unsigned long long GIE_status(GIE_handle_t * handle, int type) -{ - unsigned long long length; - GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle; - switch(type) - { - case MEM_OCCUPY: - length = _handle->mem_occupy; - break; - default: - return 0; - } - return length; -} - diff --git a/get_td_mistake_lost/new_TD.conf b/get_td_mistake_lost/new_TD.conf deleted file mode 100644 index be9301e..0000000 --- a/get_td_mistake_lost/new_TD.conf +++ /dev/null @@ -1,3 +0,0 @@ -[file] -ripe_files_address = ../data/ripe_data/td_data_20171207/new_TD.txt -raw_file_address = ../data/ripe_data/td_data_20171207/all_av_digest diff --git a/get_td_mistake_lost/new_TD.py b/get_td_mistake_lost/new_TD.py deleted file mode 100644 index 5b7269f..0000000 --- a/get_td_mistake_lost/new_TD.py +++ /dev/null @@ -1,34 +0,0 @@ -#-*-coding:utf-8-*- -import re -import random -import ConfigParser -import bisect -import commands -import os -import hashlib - -config = ConfigParser.RawConfigParser() -config.read("file_digest.conf") -raw_file_address=config.get("new_td","raw_file_address") -ripe_files_address=config.get("new_td","ripe_files_address") -print ("%s %s" %(raw_file_address,ripe_files_address)) - -def get_md5_value(td_string): - my_md5 = hashlib.md5() - my_md5.update(td_string) - my_md5_string=str(my_md5.hexdigest()) - return my_md5_string - -i=0 -with open(raw_file_address,'r') as infile: - with open(ripe_files_address,'w')as outfile: - for line in infile: - i+=1 - if(i%100000==0): - print i; - data_line_val = re.split(r';',line) - data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4]) - td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \ - +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16]) - new_td=get_md5_value(td_string) - outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n") \ No newline at end of file diff --git a/include/gram_index_engine.h b/include/gram_index_engine.h new file mode 100644 index 0000000..a69e924 --- /dev/null +++ b/include/gram_index_engine.h @@ -0,0 +1,68 @@ +#ifndef _GRAM_INDEX_ENGINE_ +#define _GRAM_INDEX_ENGINE_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define GIE_INSERT_OPT 0 +#define GIE_DELETE_OPT 1 +#define GIE_INPUT_FORMAT_SFH 1 +#define GIE_INPUT_FORMAT_PLAIN 0 + + +typedef struct +{ + /* data */ +}GIE_handle_t; + + +typedef struct +{ + unsigned int id; + unsigned int sfh_length;//size of fuzzy_hash + short operation;//GIE_INSERT_OPT or GIE_DELETE_OPT.if operation is GIE_DELETE_OPT, only id is needed; + short cfds_lvl; + char * sfh; + void * tag; +}GIE_digest_t; + + +typedef struct +{ + unsigned int id; + short cfds_lvl; + void * tag; +}GIE_result_t; + + +typedef struct +{ + unsigned int gram_value; + //unsigned int htable_num; + unsigned int position_accuracy; + short format; //if format==GIE_INPUT_FORMAT_SFH, means the input string is a GIE_INPUT_FORMAT_SFH string + //else id format==PALIN, means the input string is common string + short ED_reexamine;//if ED_reexamine==1, calculate edit distance to verify the final result +}GIE_create_para_t; + + +GIE_handle_t * GIE_create(const GIE_create_para_t * para); + + +int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size); + + +//return actual matched result count +//return 0 when matched nothing; +//return -1 when error occurs; +int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size); + +void GIE_destory(GIE_handle_t * handle); +int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2); +int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/rssb_statistics/all_len_st.py b/rssb_statistics/all_len_st.py deleted file mode 100644 index 6e32518..0000000 --- a/rssb_statistics/all_len_st.py +++ /dev/null @@ -1,17 +0,0 @@ -import os -import re -import csv -import bisect -# cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" -# cmd2 = "cat media_expire.log.2018* > media_expire.log" -# os.system(cmd1) -# os.system(cmd2) -breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] -st_num = [0,0,0,0,0,0,0,0] - -with open("media_expire.log",'r') as infile: - for line in infile: - line_result = re.split(r",|MID:|media_len:",line) - j = bisect.bisect(breakpoints,int(line_result[6])) - st_num[j]+=1 -print st_num \ No newline at end of file diff --git a/rssb_statistics/delete_len_st.py b/rssb_statistics/delete_len_st.py deleted file mode 100644 index 010dc84..0000000 --- a/rssb_statistics/delete_len_st.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import re -import csv -import bisect -cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" -cmd2 = "cat ../12.log/media_expire.log.2018* > ../12.log/media_expire.log" -cmd3 = "cat media_expire.log.2018* > media_expire.log" -os.system(cmd1) -os.system(cmd2) -os.system(cmd3) -breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] -st_num = [0,0,0,0,0,0,0,0] -mid_13_set=set() -mid_12_set=set() -mid_12_file = "../12.log/media_expire.log" -mid_13_file = "media_expire.log" - -i=0 -with open(mid_13_file,'r') as infile: - for line in infile: - i+=1 - if(i%100000==0): - print i - line_result = re.split(r",|MID:|media_len:",line) - mid_13_set.add(int(line_result[3])) - -i=0 -with open(mid_12_file,'r') as infile: - for line in infile: - i+=1 - if(i%100000==0): - print i - line_result = re.split(r",|MID:|media_len:",line) - mid_12_set.add(int(line_result[3])) - -different = mid_12_set.difference(mid_13_set) - -i=0 -with open(mid_12_file,'r') as infile: - for line in infile: - i+=1 - if(i%100000==0): - print i - line_result = re.split(r",|MID:|media_len:",line) - if((int(line_result[3]) in different)==True): - j = bisect.bisect(breakpoints,int(line_result[6])) - st_num[j]+=1 -print st_num \ No newline at end of file diff --git a/rssb_statistics/find_lost_td.conf b/rssb_statistics/find_lost_td.conf deleted file mode 100644 index 6ab40d9..0000000 --- a/rssb_statistics/find_lost_td.conf +++ /dev/null @@ -1,14 +0,0 @@ -[file] -raw_survey_file_13 = ../data/data_20180423/14.log/survey.recv.log -raw_deup_file = ../data/data_20180423/13.log/dedup.log -raw_survey_file_12 = ../data/data_20180423/12.log/survey.recv.log -run_time_file = ../data/data_20180423/runtime_log -lost_td_line = ../data/data_20180423/ripe_file/lost_td_line -mid_12_file = ../data/data_20180423/ripe_file/mid_12_file -mid_13_file = ../data/data_20180423/ripe_file/mid_14_file -list_12_file = ../data/data_20180423/ripe_file/list_12_file -list_13_file = ../data/data_20180423/ripe_file/list_14_file -different_mid_file_13 = ../data/data_20180423/ripe_file/different_mid_file_14 -different_mid_file_12 = ../data/data_20180423/ripe_file/different_mid_file_12 -different_list_file_13 = ../data/data_20180423/ripe_file/different_list_file_14 -different_list_file_12 = ../data/data_20180423/ripe_file/different_list_file_12 \ No newline at end of file diff --git a/rssb_statistics/find_lost_td.py b/rssb_statistics/find_lost_td.py deleted file mode 100644 index 50f3fab..0000000 --- a/rssb_statistics/find_lost_td.py +++ /dev/null @@ -1,147 +0,0 @@ -import ConfigParser -import re - -config = ConfigParser.RawConfigParser() -config.read("find_lost_td.conf") -raw_survey_file_13 = config.get("file","raw_survey_file_13") -raw_deup_file = config.get("file","raw_deup_file") -# run_time_file = config.get("file","run_time_file") -raw_survey_file_12 = config.get("file","raw_survey_file_12") -# lost_td_line = config.get("file","lost_td_line") -mid_12_file = config.get("file","mid_12_file") -mid_13_file = config.get("file","mid_13_file") -list_12_file = config.get("file","list_12_file") -list_13_file = config.get("file","list_13_file") -different_mid_file_13 = config.get("file","different_mid_file_13") -different_mid_file_12 = config.get("file","different_mid_file_12") -different_list_file_13 = config.get("file","different_list_file_13") -different_list_file_12 = config.get("file","different_list_file_12") - -term = {'td_len':(lambda x : len(x)==32), - 'data_num':(lambda x : len(x)>7), - 'url':(lambda x : x.find['NUll']), - 'sfh_len':(lambda x : len(x)>20), - 'not_null':(lambda x : len(x)!=0)} - -mid_13=dict() -with open(raw_survey_file_13,'r') as infile: - for line in infile: - data_line_val = re.split(r',',line) - if(len(data_line_val)==8): - mid_string = (re.split(r"MID:",data_line_val[2]))[1] - mid_13[mid_string]=list() - - -with open(mid_13_file,'w') as outfile: - for key in mid_13: - outfile.write(key+'\n') - -mid_12=dict() -with open(raw_survey_file_12,'r') as infile: - for line in infile: - data_line_val = re.split(r',',line) - if(len(data_line_val)==8): - mid_string = (re.split(r"MID:",data_line_val[2]))[1] - mid_12[mid_string]=list() - -with open(mid_12_file,'w') as outfile: - for key in mid_12: - outfile.write(key+'\n') - -different_mid_13 = list() -with open(different_mid_file_13,'w') as outfile: - for key in mid_13: - if(mid_12.has_key(key)==False): - different_mid_13.append(key) - outfile.write(key+'\n') - -different_mid_12 = list() -with open(different_mid_file_12,'w') as outfile: - for key in mid_12: - if(mid_13.has_key(key)==False): - different_mid_12.append(key) - outfile.write(key+'\n') - -i=0 -with open(raw_deup_file,'r') as infile: - for line in infile: - i+=1 - if(i%100000==0): - print i - data_line_val = re.split(r",|MID:|TD:",line) - if(term['data_num'](data_line_val) and \ - mid_13.has_key(str(data_line_val[4])) == True): - mid_13[data_line_val[4]].append(data_line_val[6]) - if(term['data_num'](data_line_val) and \ - mid_12.has_key(str(data_line_val[4])) == True): - mid_12[data_line_val[4]].append(data_line_val[6]) - -td_list_13 =list() -with open(list_13_file,'w') as outfile: - for key in mid_13.keys(): - for td in mid_13[key]: - if(term['not_null'](td) and td_list_13.count(td)==0): - td_list_13.append(td) - outfile.write(td+'\n') - -td_list_12 =list() -with open(list_12_file,'w') as outfile: - for key in mid_12.keys(): - for td in mid_12[key]: - if(term['not_null'](td) and td_list_12.count(td)==0): - td_list_12.append(td) - outfile.write(td+'\n') - -different_list_12 = list() -with open(different_list_file_12,'w') as outfile: - for x in td_list_12: - if(td_list_13.count(x)==0): - different_list_12.append(x) - outfile.write(x+'\n') - -different_list_13 = list() -with open(different_list_file_13,'w') as outfile: - for x in td_list_13: - if(td_list_12.count(x)==0): - different_list_13.append(x) - outfile.write(x+'\n') - -td_dict=dict() -for i in different_list_12: - td_dict[i]=list() - -# i=0 -# with open(run_time_file,'r') as infile: -# for line in infile: -# i+=1 -# if(i%100000==0): -# print i -# if(line.find("NCHK_QUREY__KNOW")!=-1): -# data_line_val = re.split(r',|TD:',line) -# if(td_dict.has_key(data_line_val[6]) == True): -# td_dict[data_line_val[6]].insert(0,"NCHK_QUREY__KNOW"+'\n') -# td_dict[data_line_val[6]].append(line) -# elif(line.find("NCHK_QUREY__UNKNOW")!=-1): -# data_line_val = re.split(r',|TD:',line) -# if(td_dict.has_key(data_line_val[6]) == True): -# td_dict[data_line_val[6]].append(line) -# elif(line.find("NCHK_REPORT__SUCC")!=-1): -# data_line_val = re.split(r',|TD:',line) -# if(td_dict.has_key(data_line_val[6]) == True): -# td_dict[data_line_val[6]].append(line) - -# else: -# continue - - -print len(different_list_12),len(different_list_13),\ -len(td_list_12),len(td_list_13),\ -len(mid_12),len(mid_13),len(different_mid_13),len(different_mid_12) - -# with open(lost_td_line,'w') as outfile: -# for key in td_dict.keys(): -# if(len(td_dict[key])>2 and td_dict[key][0]=="NCHK_QUREY__KNOW"): -# outfile.write(key+':\n') -# for i in td_dict[key]: -# outfile.write(i) - diff --git a/rssb_statistics/harm_len_st.py b/rssb_statistics/harm_len_st.py deleted file mode 100644 index 8a372b3..0000000 --- a/rssb_statistics/harm_len_st.py +++ /dev/null @@ -1,29 +0,0 @@ -import os -import re -import csv -import bisect -cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" -cmd2 = "cat media_expire.log.2018* > media_expire.log" -os.system(cmd1) -os.system(cmd2) -breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] -st_num = [0,0,0,0,0,0,0,0] -harm_mid_dic=dict() -with open("survey.recv_survey.log",'r') as infile: - for line in infile: - data_line_val = re.split(r',',line) - if(len(data_line_val)==8): - mid_string = (re.split(r"MID:",data_line_val[2]))[1] - harm_mid_dic[mid_string]=list() - -with open("media_expire.log",'r') as infile: - for line in infile: - line_result = re.split(r",|MID:|media_len:",line) - if(harm_mid_dic.has_key(str(line_result[3]))==True): - # print int(line_result[6]) - j = bisect.bisect(breakpoints,int(line_result[6])) - st_num[j]+=1 -print st_num -# with open("un_recv_list.csv",'w') as csvfile: -# writer = csv.writer(csvfile) -# writer.writerow(un_recv_list) \ No newline at end of file diff --git a/rssb_statistics/recv_survey_mid_st.py b/rssb_statistics/recv_survey_mid_st.py deleted file mode 100644 index f7faaeb..0000000 --- a/rssb_statistics/recv_survey_mid_st.py +++ /dev/null @@ -1,3 +0,0 @@ -import os -cmd = "cat survey.log.2018* | grep \"recv survey\" | wc -l" -os.system(cmd) \ No newline at end of file diff --git a/rssb_statistics/service_st.py b/rssb_statistics/service_st.py deleted file mode 100644 index 7fecd33..0000000 --- a/rssb_statistics/service_st.py +++ /dev/null @@ -1,29 +0,0 @@ -import re - -different_mid_12=set() -different_mid_14=set() -service_dic=dict() - -with open("different_mid_file_12",'r') as infile: - for line in infile: - different_mid_12.add(long(line)) -with open("different_mid_file_14",'r') as infile: - for line in infile: - different_mid_14.add(long(line)) -with open("../12.log/survey.recv_survey.log",'r') as infile: - for line in infile: - line_result = re.split(r",|MID:|service:",line) - if((long(line_result[3]) in different_mid_12)==True): - if(service_dic.has_key(line_result[5])==True): - service_dic[line_result[5]]+=1 - else: - service_dic[line_result[5]]=0 -with open("../14.log/survey.recv.log",'r') as infile: - for line in infile: - line_result = re.split(r",|MID:|service:",line) - if((long(line_result[3]) in different_mid_14)==True): - if(service_dic.has_key(line_result[5])==True): - service_dic[line_result[5]]+=1 - else: - service_dic[line_result[5]]=0 -print service_dic \ No newline at end of file diff --git a/rssb_statistics/un_recv_st.py b/rssb_statistics/un_recv_st.py deleted file mode 100644 index 9d3d234..0000000 --- a/rssb_statistics/un_recv_st.py +++ /dev/null @@ -1,36 +0,0 @@ -import os -import re -import csv -cmd = "cat rssb_stat.log.2018-04-16 | grep \"/home/audiorecognition/aufilter/un_recv\" > rssb_stat.log.un_recv" -os.system(cmd) -i=0 -last_len = 0 -add_len = 0 -num = 0 -un_recv_list = list() -with open("rssb_stat.log.un_recv",'r') as infile: - for line in infile: - line_result = re.split(r'\t',line) - if(i==0): - last_len = long(line_result[2]) - break - -with open("rssb_stat.log.un_recv",'r') as infile: - for line in infile: - line_result = re.split(r'\t',line) - if(last_len <= long(line_result[2])): - add_len = add_len+long(line_result[2])-last_len - last_len = long(line_result[2]) - else: - add_len = add_len+long(line_result[2]) - last_len = long(line_result[2]) - i+=1 - if(i>=120): - i=0 - un_recv_list.append(str(add_len)) - add_len=0 - -with open("un_recv_list.csv",'w') as csvfile: - writer = csv.writer(csvfile) - writer.writerow(un_recv_list) - diff --git a/rssb_statistics/un_recv_survey_mid_st.py b/rssb_statistics/un_recv_survey_mid_st.py deleted file mode 100644 index e1e970f..0000000 --- a/rssb_statistics/un_recv_survey_mid_st.py +++ /dev/null @@ -1,3 +0,0 @@ -import os -cmd = "cat survey.log.2018* | grep \"sync_audio\" | wc -l" -os.system(cmd) \ No newline at end of file diff --git a/sfh_integrate/SFH_function.c b/sfh_integrate/SFH_function.c deleted file mode 100644 index a311f9c..0000000 --- a/sfh_integrate/SFH_function.c +++ /dev/null @@ -1,177 +0,0 @@ -/* -gcc -g SFH_function.c -o SFH_function -lmaatframe -lMESA_htable -I../include -*/ -#include -#include -#include -#include "gram_index_engine.h" -#include -#include -#include -#define SLOT_SIZE (1024*1024*16) -#define THREAD_SAFE 0 -#define BUFFER_LEN (10*1024) -#define SFH_LEN (10*1024) -#define TD_LEN 33 - -typedef struct sfh_link -{ - char *sfh_str; - int similiar; - int all_similiar; - long hash_len; - struct sfh_link *next; -}sfh_link; - -typedef struct top_similiar_sfh -{ - int all_num; - int all_similiar; - char *sfh_str; - long hash_len; - sfh_link *sfh_link_items; -}top_similiar_sfh; - -long get_hashed_len(const char* sfh) -{ - char *data=(char*)malloc(strlen(sfh)+1); - memcpy(data,sfh, strlen(sfh)); - data[strlen(sfh)]='\0'; - char *token=NULL,*sub_token=NULL,*saveptr; - long left_offset=0,right_offset=0,hashed_length=0; - int ret=0,first=0; - for (token = data; ; token= NULL) - { - sub_token= strtok_r(token,"[", &saveptr); - if (sub_token == NULL) - { - break; - } - if(first==0)//jump over the first sub string. - { - first=1; - continue; - } - ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); - if(ret!=2) - { - return 0; - } - assert(ret==2); - hashed_length+=right_offset-left_offset+1; - } - //printf("hashed length=%ld\n",hashed_length); - free(data); - return hashed_length/2; -} - -void print_mistake_td(const uchar *key,uint size,void *data,void *arg) -{ - FILE *ripe_file = (FILE*)arg; - top_similiar_sfh *temp_top_similiar_sfh=(top_similiar_sfh*)data; - fprintf(ripe_file,"%s,%s\n",key,temp_top_similiar_sfh->sfh_str); - sfh_link *temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; - for(;;temp_sfh_link=temp_sfh_link->next) - { - if(temp_sfh_link==NULL) - { - break; - } - fprintf(ripe_file,"%d;%s;%d\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->hash_len); - } - fprintf(ripe_file,"\n"); -} -int main() -{ - FILE *raw_file; - FILE *ripe_file; - char *raw_file_dir="../data/td_data_set/td_data_20171207/video_id_raw_data"; - char *ripe_file_dir="../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_2"; - char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL; - raw_file = fopen(raw_file_dir,"r+"); - ripe_file = fopen(ripe_file_dir,"w+"); - long temp_hash_len=0; - unsigned int slot_size=SLOT_SIZE; - int i=0,thread_safe=THREAD_SAFE,ret=0,temp_similiar=0,temp_all_similiar=0; - top_similiar_sfh *temp_top_similiar_sfh=NULL; - sfh_link *temp_sfh_link=NULL; - MESA_htable_handle htable=NULL; - if(raw_file==NULL) - { - printf("open all_av_digest error\n"); - return -1; - } - - if(ripe_file==NULL) - { - printf("open all_av_digest_mistake_level error"); - return -1; - } - buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); - sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); - td = (char*)calloc(TD_LEN,sizeof(char)); - td[32]='\0'; - htable=MESA_htable_born(); - MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); - MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); - MESA_htable_mature(htable); - while(feof(raw_file)==0) - { - i++; - if(i%10000==0) - { - printf("%d\n",i); - } - fgets(buffer,BUFFER_LEN-1,raw_file); - buffer[BUFFER_LEN-1]='\0'; - ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); - assert(ret==2); - td[32]='\0'; - if((temp_top_similiar_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) - { - temp_top_similiar_sfh=(top_similiar_sfh*)calloc(1,sizeof(top_similiar_sfh)); - temp_top_similiar_sfh->all_num=1; - temp_top_similiar_sfh->all_similiar=0; - temp_top_similiar_sfh->hash_len=get_hashed_len(sfh_str); - temp_top_similiar_sfh->sfh_str=strdup(sfh_str); - temp_top_similiar_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_top_similiar_sfh->sfh_link_items->sfh_str=strdup(sfh_str); - temp_top_similiar_sfh->sfh_link_items->similiar=0; - temp_top_similiar_sfh->sfh_link_items->all_similiar=0; - temp_top_similiar_sfh->sfh_link_items->next=NULL; - ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_top_similiar_sfh); - assert(ret>0); - } - else - { - temp_similiar=GIE_sfh_similiarity(temp_top_similiar_sfh->sfh_str,(int)strlen(temp_top_similiar_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_top_similiar_sfh->all_similiar+=temp_similiar; - temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; - for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) - { - temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); - temp_sfh_link->all_similiar+=temp_similiar; - temp_all_similiar+=temp_similiar; - if(temp_sfh_link->all_similiar>temp_top_similiar_sfh->all_similiar) - { - free(temp_top_similiar_sfh->sfh_str); - temp_top_similiar_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); - temp_top_similiar_sfh->all_similiar=temp_sfh_link->all_similiar; - } - if(temp_sfh_link->next==NULL) - { - break; - } - } - temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); - temp_sfh_link->next->sfh_str=strdup(sfh_str); - temp_sfh_link->next->hash_len=get_hashed_len(sfh_str); - temp_sfh_link->next->similiar=0; - temp_sfh_link->next->all_similiar=temp_all_similiar; - temp_sfh_link->next->next=NULL; - temp_top_similiar_sfh->all_num+=1; - } - } - fclose(raw_file); - MESA_htable_iterate(htable,print_mistake_td,ripe_file); -} \ No newline at end of file diff --git a/src/CMakeCache.txt b/src/CMakeCache.txt new file mode 100644 index 0000000..5aa46ec --- /dev/null +++ b/src/CMakeCache.txt @@ -0,0 +1,278 @@ +# This is the CMakeCache file. +# For build in directory: /home/chenguanlin/TD_evaluation/src +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUI's for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Value Computed by CMake +CALCULATE_BINARY_DIR:STATIC=/home/chenguanlin/TD_evaluation/src + +//Value Computed by CMake +CALCULATE_SOURCE_DIR:STATIC=/home/chenguanlin/TD_evaluation/src + +//Path to a program. +CMAKE_AR:FILEPATH=/usr/bin/ar + +//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or +// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel. +CMAKE_BUILD_TYPE:STRING= + +//Enable/Disable color output during build. +CMAKE_COLOR_MAKEFILE:BOOL=ON + +//CXX compiler. +CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ + +//Flags used by the compiler during all build types. +CMAKE_CXX_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release minsize builds. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds (/MD /Ob1 /Oi +// /Ot /Oy /Gs will produce slightly less optimized but smaller +// files). +CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during Release with Debug Info builds. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g + +//C compiler. +CMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc + +//Flags used by the compiler during all build types. +CMAKE_C_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_C_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release minsize builds. +CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds (/MD /Ob1 /Oi +// /Ot /Oy /Gs will produce slightly less optimized but smaller +// files). +CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during Release with Debug Info builds. +CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g + +//Flags used by the linker. +CMAKE_EXE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Install path prefix, prepended onto install directories. +CMAKE_INSTALL_PREFIX:PATH=/usr/local + +//Path to a program. +CMAKE_LINKER:FILEPATH=/usr/bin/ld + +//Path to a program. +CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake + +//Flags used by the linker during the creation of modules. +CMAKE_MODULE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_NM:FILEPATH=/usr/bin/nm + +//Path to a program. +CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy + +//Path to a program. +CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump + +//Path to a program. +CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib + +//Flags used by the linker during the creation of dll's. +CMAKE_SHARED_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Path to a program. +CMAKE_STRIP:FILEPATH=/usr/bin/strip + +//If true, cmake will use relative paths in makefiles and projects. +CMAKE_USE_RELATIVE_PATHS:BOOL=OFF + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + + +######################## +# INTERNAL cache entries +######################## + +//Advanced flag for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_BUILD_TOOL +CMAKE_BUILD_TOOL-ADVANCED:INTERNAL=1 +//What is the target build tool cmake is generating for. +CMAKE_BUILD_TOOL:INTERNAL=/usr/bin/gmake +//This is the directory where this CMakeCahe.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenguanlin/TD_evaluation/src +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=2 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=6 +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_RELEASE_VERSION:INTERNAL=patch 4 +//Advanced flag for variable: CMAKE_COLOR_MAKEFILE +CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//Advanced flag for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +CMAKE_CXX_COMPILER_WORKS:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_COMPILER +CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 +CMAKE_C_COMPILER_WORKS:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS +CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_DEBUG +CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_MINSIZEREL +CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_RELEASE +CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_RELWITHDEBINFO +CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Result of TRY_COMPILE +CMAKE_DETERMINE_CXX_ABI_COMPILED:INTERNAL=TRUE +//Result of TRY_COMPILE +CMAKE_DETERMINE_C_ABI_COMPILED:INTERNAL=TRUE +//Path to cache edit program executable. +CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Unix Makefiles +//Start directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenguanlin/TD_evaluation/src +//Install .so files without execute permission. +CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0 +//Advanced flag for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MAKE_PROGRAM +CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_NM +CMAKE_NM-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_LOCAL_GENERATORS:INTERNAL=1 +//Advanced flag for variable: CMAKE_OBJCOPY +CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_OBJDUMP +CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_RANLIB +CMAKE_RANLIB-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_STRIP +CMAKE_STRIP-ADVANCED:INTERNAL=1 +//uname command +CMAKE_UNAME:INTERNAL=/bin/uname +//Advanced flag for variable: CMAKE_USE_RELATIVE_PATHS +CMAKE_USE_RELATIVE_PATHS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 + diff --git a/src/CMakeFiles/CMakeCCompiler.cmake b/src/CMakeFiles/CMakeCCompiler.cmake new file mode 100644 index 0000000..1d1bbea --- /dev/null +++ b/src/CMakeFiles/CMakeCCompiler.cmake @@ -0,0 +1,36 @@ +SET(CMAKE_C_COMPILER "/usr/bin/gcc") +SET(CMAKE_C_COMPILER_ARG1 "") +SET(CMAKE_C_COMPILER_ID "GNU") +SET(CMAKE_C_PLATFORM_ID "Linux") +SET(CMAKE_AR "/usr/bin/ar") +SET(CMAKE_RANLIB "/usr/bin/ranlib") +SET(CMAKE_COMPILER_IS_GNUCC 1) +SET(CMAKE_C_COMPILER_LOADED 1) +SET(CMAKE_COMPILER_IS_MINGW ) +SET(CMAKE_COMPILER_IS_CYGWIN ) +IF(CMAKE_COMPILER_IS_CYGWIN) + SET(CYGWIN 1) + SET(UNIX 1) +ENDIF(CMAKE_COMPILER_IS_CYGWIN) + +SET(CMAKE_C_COMPILER_ENV_VAR "CC") + +IF(CMAKE_COMPILER_IS_MINGW) + SET(MINGW 1) +ENDIF(CMAKE_COMPILER_IS_MINGW) +SET(CMAKE_C_COMPILER_ID_RUN 1) +SET(CMAKE_C_SOURCE_FILE_EXTENSIONS c) +SET(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +SET(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +SET(CMAKE_C_SIZEOF_DATA_PTR "8") +SET(CMAKE_C_COMPILER_ABI "ELF") + +IF(CMAKE_C_SIZEOF_DATA_PTR) + SET(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +ENDIF(CMAKE_C_SIZEOF_DATA_PTR) + +IF(CMAKE_C_COMPILER_ABI) + SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +ENDIF(CMAKE_C_COMPILER_ABI) diff --git a/src/CMakeFiles/CMakeCXXCompiler.cmake b/src/CMakeFiles/CMakeCXXCompiler.cmake new file mode 100644 index 0000000..64dad77 --- /dev/null +++ b/src/CMakeFiles/CMakeCXXCompiler.cmake @@ -0,0 +1,36 @@ +SET(CMAKE_CXX_COMPILER "/usr/bin/c++") +SET(CMAKE_CXX_COMPILER_ARG1 "") +SET(CMAKE_CXX_COMPILER_ID "GNU") +SET(CMAKE_CXX_PLATFORM_ID "Linux") +SET(CMAKE_AR "/usr/bin/ar") +SET(CMAKE_RANLIB "/usr/bin/ranlib") +SET(CMAKE_COMPILER_IS_GNUCXX 1) +SET(CMAKE_CXX_COMPILER_LOADED 1) +SET(CMAKE_COMPILER_IS_MINGW ) +SET(CMAKE_COMPILER_IS_CYGWIN ) +IF(CMAKE_COMPILER_IS_CYGWIN) + SET(CYGWIN 1) + SET(UNIX 1) +ENDIF(CMAKE_COMPILER_IS_CYGWIN) + +SET(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +IF(CMAKE_COMPILER_IS_MINGW) + SET(MINGW 1) +ENDIF(CMAKE_COMPILER_IS_MINGW) +SET(CMAKE_CXX_COMPILER_ID_RUN 1) +SET(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;H;o;O;obj;OBJ;def;DEF;rc;RC) +SET(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm) +SET(CMAKE_CXX_LINKER_PREFERENCE 30) + +# Save compiler ABI information. +SET(CMAKE_CXX_SIZEOF_DATA_PTR "8") +SET(CMAKE_CXX_COMPILER_ABI "ELF") + +IF(CMAKE_CXX_SIZEOF_DATA_PTR) + SET(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +ENDIF(CMAKE_CXX_SIZEOF_DATA_PTR) + +IF(CMAKE_CXX_COMPILER_ABI) + SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +ENDIF(CMAKE_CXX_COMPILER_ABI) diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin new file mode 100644 index 0000000..9a91ed2 Binary files /dev/null and b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin differ diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin new file mode 100644 index 0000000..be1e6e2 Binary files /dev/null and b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin differ diff --git a/src/CMakeFiles/CMakeDirectoryInformation.cmake b/src/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 0000000..66066d5 --- /dev/null +++ b/src/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,21 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Relative path conversion top directories. +SET(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenguanlin/TD_evaluation/src") +SET(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenguanlin/TD_evaluation/src") + +# Force unix paths in dependencies. +SET(CMAKE_FORCE_UNIX_PATHS 1) + +# The C and CXX include file search paths: +SET(CMAKE_C_INCLUDE_PATH + ) +SET(CMAKE_CXX_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) +SET(CMAKE_Fortran_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) + +# The C and CXX include file regular expressions for this directory. +SET(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +SET(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +SET(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +SET(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/src/CMakeFiles/CMakeOutput.log b/src/CMakeFiles/CMakeOutput.log new file mode 100644 index 0000000..3475153 --- /dev/null +++ b/src/CMakeFiles/CMakeOutput.log @@ -0,0 +1,89 @@ +The system is: Linux - 2.6.32-358.el6.x86_64 - x86_64 +Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. +Compiler: /usr/bin/gcc +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" + +The C compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdC/a.out" + +Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. +Compiler: /usr/bin/c++ +Build flags: +Id flags: + +The output was: +0 +CMakeCXXCompilerId.cpp:67: warning: deprecated conversion from string constant to ‘char*’ +CMakeCXXCompilerId.cpp:157: warning: deprecated conversion from string constant to ‘char*’ + + +Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" + +The CXX compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdCXX/a.out" + +Determining if the C compiler works passed with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building C object CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o +/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCCompiler.c +Linking C executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Detecting C compiler ABI info compiled with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building C object CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o +/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake/Modules/CMakeCCompilerABI.c +Linking C executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Determining if the CXX compiler works passed with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building CXX object CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o +/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCXXCompiler.cxx +Linking CXX executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Detecting CXX compiler ABI info compiled with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building CXX object CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o +/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp +Linking CXX executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + diff --git a/src/CMakeFiles/CMakeSystem.cmake b/src/CMakeFiles/CMakeSystem.cmake new file mode 100644 index 0000000..3b3e967 --- /dev/null +++ b/src/CMakeFiles/CMakeSystem.cmake @@ -0,0 +1,15 @@ + + +SET(CMAKE_SYSTEM "Linux-2.6.32-358.el6.x86_64") +SET(CMAKE_SYSTEM_NAME "Linux") +SET(CMAKE_SYSTEM_VERSION "2.6.32-358.el6.x86_64") +SET(CMAKE_SYSTEM_PROCESSOR "x86_64") + +SET(CMAKE_HOST_SYSTEM "Linux-2.6.32-358.el6.x86_64") +SET(CMAKE_HOST_SYSTEM_NAME "Linux") +SET(CMAKE_HOST_SYSTEM_VERSION "2.6.32-358.el6.x86_64") +SET(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + +SET(CMAKE_CROSSCOMPILING "FALSE") + +SET(CMAKE_SYSTEM_LOADED 1) diff --git a/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c new file mode 100644 index 0000000..7fd0088 --- /dev/null +++ b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c @@ -0,0 +1,182 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + +#elif defined(__WATCOMC__) +# define COMPILER_ID "Watcom" + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + +#elif defined(__IBMC__) +# define COMPILER_ID "VisualAge" + +#elif defined(__PGI) +# define COMPILER_ID "PGI" + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + +#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +/* Analog Devices C++ compiler for Blackfin, TigerSHARC and + SHARC (21000) DSPs */ +# define COMPILER_ID "ADSP" + +/* IAR Systems compiler for embedded systems. + http://www.iar.com + Not supported yet by CMake +#elif defined(__IAR_SYSTEMS_ICC__) +# define COMPILER_ID "IAR" */ + +/* sdcc, the small devices C compiler for embedded systems, + http://sdcc.sourceforge.net */ +#elif defined(SDCC) +# define COMPILER_ID "SDCC" + +#elif defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" + +/* This compiler is either not known or is too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) +# define PLATFORM_ID "Haiku" +/* Haiku also defines __BEOS__ so we must + put it prior to the check for __BEOS__ +*/ + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; + + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + (void)argv; + return require; +} +#endif diff --git a/src/CMakeFiles/CompilerIdC/a.out b/src/CMakeFiles/CompilerIdC/a.out new file mode 100644 index 0000000..c389161 Binary files /dev/null and b/src/CMakeFiles/CompilerIdC/a.out differ diff --git a/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 0000000..f8c041f --- /dev/null +++ b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,169 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + +#elif defined(__WATCOMC__) +# define COMPILER_ID "Watcom" + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + +#elif defined(__IBMCPP__) +# define COMPILER_ID "VisualAge" + +#elif defined(__PGI) +# define COMPILER_ID "PGI" + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + +#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +/* Analog Devices C++ compiler for Blackfin, TigerSHARC and + SHARC (21000) DSPs */ +# define COMPILER_ID "ADSP" + +#elif defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" + +/* This compiler is either not known or is too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) +# define PLATFORM_ID "Haiku" +/* Haiku also defines __BEOS__ so we must + put it prior to the check for __BEOS__ +*/ + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; + + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + (void)argv; + return require; +} diff --git a/src/CMakeFiles/CompilerIdCXX/a.out b/src/CMakeFiles/CompilerIdCXX/a.out new file mode 100644 index 0000000..65597e7 Binary files /dev/null and b/src/CMakeFiles/CompilerIdCXX/a.out differ diff --git a/src/CMakeFiles/Makefile.cmake b/src/CMakeFiles/Makefile.cmake new file mode 100644 index 0000000..8466809 --- /dev/null +++ b/src/CMakeFiles/Makefile.cmake @@ -0,0 +1,52 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# The generator used is: +SET(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") + +# The top level Makefile was generated from the following files: +SET(CMAKE_MAKEFILE_DEPENDS + "CMakeCache.txt" + "CMakeFiles/CMakeCCompiler.cmake" + "CMakeFiles/CMakeCXXCompiler.cmake" + "CMakeFiles/CMakeSystem.cmake" + "CMakeLists.txt" + "/usr/share/cmake/Modules/CMakeCCompiler.cmake.in" + "/usr/share/cmake/Modules/CMakeCCompilerABI.c" + "/usr/share/cmake/Modules/CMakeCInformation.cmake" + "/usr/share/cmake/Modules/CMakeCXXCompiler.cmake.in" + "/usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp" + "/usr/share/cmake/Modules/CMakeCXXInformation.cmake" + "/usr/share/cmake/Modules/CMakeCommonLanguageInclude.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerABI.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake" + "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake" + "/usr/share/cmake/Modules/CMakeFindBinUtils.cmake" + "/usr/share/cmake/Modules/CMakeGenericSystem.cmake" + "/usr/share/cmake/Modules/CMakeSystem.cmake.in" + "/usr/share/cmake/Modules/CMakeSystemSpecificInformation.cmake" + "/usr/share/cmake/Modules/CMakeTestCCompiler.cmake" + "/usr/share/cmake/Modules/CMakeTestCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeUnixFindMake.cmake" + "/usr/share/cmake/Modules/Platform/Linux.cmake" + "/usr/share/cmake/Modules/Platform/UnixPaths.cmake" + "/usr/share/cmake/Modules/Platform/gcc.cmake" + ) + +# The corresponding makefile is: +SET(CMAKE_MAKEFILE_OUTPUTS + "Makefile" + "CMakeFiles/cmake.check_cache" + "CMakeFiles/CMakeDirectoryInformation.cmake" + ) + +# Byproducts of CMake generate step: +SET(CMAKE_MAKEFILE_PRODUCTS + ) + +# Dependency information for all targets: +SET(CMAKE_DEPEND_INFO_FILES + "CMakeFiles/grain.dir/DependInfo.cmake" + ) diff --git a/src/CMakeFiles/Makefile2 b/src/CMakeFiles/Makefile2 new file mode 100644 index 0000000..405383d --- /dev/null +++ b/src/CMakeFiles/Makefile2 @@ -0,0 +1,99 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +# The main recursive all target +all: +.PHONY : all + +# The main recursive preinstall target +preinstall: +.PHONY : preinstall + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +#============================================================================= +# Target rules for target CMakeFiles/grain.dir + +# All Build rule for target. +CMakeFiles/grain.dir/all: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/depend + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build + $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 + @echo "Built target grain" +.PHONY : CMakeFiles/grain.dir/all + +# Include target in all. +all: CMakeFiles/grain.dir/all +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/grain.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/grain.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 +.PHONY : CMakeFiles/grain.dir/rule + +# Convenience name for target. +grain: CMakeFiles/grain.dir/rule +.PHONY : grain + +# clean rule for target. +CMakeFiles/grain.dir/clean: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/clean +.PHONY : CMakeFiles/grain.dir/clean + +# clean rule for target. +clean: CMakeFiles/grain.dir/clean +.PHONY : clean + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/src/CMakeFiles/Progress/1 b/src/CMakeFiles/Progress/1 new file mode 100644 index 0000000..7b4d68d --- /dev/null +++ b/src/CMakeFiles/Progress/1 @@ -0,0 +1 @@ +empty \ No newline at end of file diff --git a/src/CMakeFiles/Progress/count.txt b/src/CMakeFiles/Progress/count.txt new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/src/CMakeFiles/Progress/count.txt @@ -0,0 +1 @@ +1 diff --git a/src/CMakeFiles/cmake.check_cache b/src/CMakeFiles/cmake.check_cache new file mode 100644 index 0000000..3dccd73 --- /dev/null +++ b/src/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/src/CMakeFiles/grain.dir/C.includecache b/src/CMakeFiles/grain.dir/C.includecache new file mode 100644 index 0000000..d4110de --- /dev/null +++ b/src/CMakeFiles/grain.dir/C.includecache @@ -0,0 +1,24 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/chenguanlin/TD_evaluation/src/grain.c +stdio.h +- +string.h +- +stdlib.h +- +gram_index_engine.h +/home/chenguanlin/TD_evaluation/src/gram_index_engine.h +MESA/MESA_htable.h +- +assert.h +- +ctype.h +- + diff --git a/src/CMakeFiles/grain.dir/DependInfo.cmake b/src/CMakeFiles/grain.dir/DependInfo.cmake new file mode 100644 index 0000000..0504394 --- /dev/null +++ b/src/CMakeFiles/grain.dir/DependInfo.cmake @@ -0,0 +1,13 @@ +# The set of languages for which implicit dependencies are needed: +SET(CMAKE_DEPENDS_LANGUAGES + "C" + ) +# The set of files for implicit dependencies of each language: +SET(CMAKE_DEPENDS_CHECK_C + "/home/chenguanlin/TD_evaluation/src/grain.c" "/home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/grain.c.o" + ) +SET(CMAKE_C_COMPILER_ID "GNU") + +# Targets to which this target links. +SET(CMAKE_TARGET_LINKED_INFO_FILES + ) diff --git a/src/CMakeFiles/grain.dir/build.make b/src/CMakeFiles/grain.dir/build.make new file mode 100644 index 0000000..18e458a --- /dev/null +++ b/src/CMakeFiles/grain.dir/build.make @@ -0,0 +1,103 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +# Include any dependencies generated for this target. +include CMakeFiles/grain.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/grain.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/grain.dir/flags.make + +CMakeFiles/grain.dir/grain.c.o: CMakeFiles/grain.dir/flags.make +CMakeFiles/grain.dir/grain.c.o: grain.c + $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles $(CMAKE_PROGRESS_1) + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Building C object CMakeFiles/grain.dir/grain.c.o" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -o CMakeFiles/grain.dir/grain.c.o -c /home/chenguanlin/TD_evaluation/src/grain.c + +CMakeFiles/grain.dir/grain.c.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing C source to CMakeFiles/grain.dir/grain.c.i" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -E /home/chenguanlin/TD_evaluation/src/grain.c > CMakeFiles/grain.dir/grain.c.i + +CMakeFiles/grain.dir/grain.c.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling C source to assembly CMakeFiles/grain.dir/grain.c.s" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -S /home/chenguanlin/TD_evaluation/src/grain.c -o CMakeFiles/grain.dir/grain.c.s + +CMakeFiles/grain.dir/grain.c.o.requires: +.PHONY : CMakeFiles/grain.dir/grain.c.o.requires + +CMakeFiles/grain.dir/grain.c.o.provides: CMakeFiles/grain.dir/grain.c.o.requires + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o.provides.build +.PHONY : CMakeFiles/grain.dir/grain.c.o.provides + +CMakeFiles/grain.dir/grain.c.o.provides.build: CMakeFiles/grain.dir/grain.c.o +.PHONY : CMakeFiles/grain.dir/grain.c.o.provides.build + +# Object files for target grain +grain_OBJECTS = \ +"CMakeFiles/grain.dir/grain.c.o" + +# External object files for target grain +grain_EXTERNAL_OBJECTS = + +grain: CMakeFiles/grain.dir/grain.c.o +grain: CMakeFiles/grain.dir/build.make +grain: CMakeFiles/grain.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --red --bold "Linking C executable grain" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/grain.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/grain.dir/build: grain +.PHONY : CMakeFiles/grain.dir/build + +CMakeFiles/grain.dir/requires: CMakeFiles/grain.dir/grain.c.o.requires +.PHONY : CMakeFiles/grain.dir/requires + +CMakeFiles/grain.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/grain.dir/cmake_clean.cmake +.PHONY : CMakeFiles/grain.dir/clean + +CMakeFiles/grain.dir/depend: + cd /home/chenguanlin/TD_evaluation/src && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/grain.dir/depend + diff --git a/src/CMakeFiles/grain.dir/cmake_clean.cmake b/src/CMakeFiles/grain.dir/cmake_clean.cmake new file mode 100644 index 0000000..54d1698 --- /dev/null +++ b/src/CMakeFiles/grain.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +FILE(REMOVE_RECURSE + "CMakeFiles/grain.dir/grain.c.o" + "grain.pdb" + "grain" +) + +# Per-language clean rules from dependency scanning. +FOREACH(lang C) + INCLUDE(CMakeFiles/grain.dir/cmake_clean_${lang}.cmake OPTIONAL) +ENDFOREACH(lang) diff --git a/src/CMakeFiles/grain.dir/depend.internal b/src/CMakeFiles/grain.dir/depend.internal new file mode 100644 index 0000000..f1b3d06 --- /dev/null +++ b/src/CMakeFiles/grain.dir/depend.internal @@ -0,0 +1,5 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +CMakeFiles/grain.dir/grain.c.o + /home/chenguanlin/TD_evaluation/src/grain.c diff --git a/src/CMakeFiles/grain.dir/depend.make b/src/CMakeFiles/grain.dir/depend.make new file mode 100644 index 0000000..85fc728 --- /dev/null +++ b/src/CMakeFiles/grain.dir/depend.make @@ -0,0 +1,5 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +CMakeFiles/grain.dir/grain.c.o: grain.c + diff --git a/src/CMakeFiles/grain.dir/flags.make b/src/CMakeFiles/grain.dir/flags.make new file mode 100644 index 0000000..72791e9 --- /dev/null +++ b/src/CMakeFiles/grain.dir/flags.make @@ -0,0 +1,8 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# compile C with /usr/bin/gcc +C_FLAGS = -g + +C_DEFINES = + diff --git a/src/CMakeFiles/grain.dir/link.txt b/src/CMakeFiles/grain.dir/link.txt new file mode 100644 index 0000000..0f3e72b --- /dev/null +++ b/src/CMakeFiles/grain.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/gcc -g -fPIC CMakeFiles/grain.dir/grain.c.o -o grain -rdynamic -lmaatframe -lMESA_htable -lpthread -lm diff --git a/src/CMakeFiles/grain.dir/progress.make b/src/CMakeFiles/grain.dir/progress.make new file mode 100644 index 0000000..781c7de --- /dev/null +++ b/src/CMakeFiles/grain.dir/progress.make @@ -0,0 +1,2 @@ +CMAKE_PROGRESS_1 = 1 + diff --git a/src/CMakeFiles/progress.make b/src/CMakeFiles/progress.make new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/src/CMakeFiles/progress.make @@ -0,0 +1 @@ +1 diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..a3fd6fa --- /dev/null +++ b/src/Makefile @@ -0,0 +1,163 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." + /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles /home/chenguanlin/TD_evaluation/src/CMakeFiles/progress.make + $(MAKE) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named grain + +# Build rule for target. +grain: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 grain +.PHONY : grain + +# fast build rule for target. +grain/fast: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build +.PHONY : grain/fast + +grain.o: grain.c.o +.PHONY : grain.o + +# target to build an object file +grain.c.o: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o +.PHONY : grain.c.o + +grain.i: grain.c.i +.PHONY : grain.i + +# target to preprocess a source file +grain.c.i: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.i +.PHONY : grain.c.i + +grain.s: grain.c.s +.PHONY : grain.s + +# target to generate assembly for a file +grain.c.s: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.s +.PHONY : grain.c.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... grain" + @echo "... rebuild_cache" + @echo "... grain.o" + @echo "... grain.i" + @echo "... grain.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git "a/src/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" "b/src/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" new file mode 100644 index 0000000..4d37049 Binary files /dev/null and "b/src/TD\344\273\243\347\240\201\350\257\264\346\230\216.docx" differ diff --git a/src/cmake_install.cmake b/src/cmake_install.cmake new file mode 100644 index 0000000..75c1e3c --- /dev/null +++ b/src/cmake_install.cmake @@ -0,0 +1,44 @@ +# Install script for directory: /home/chenguanlin/TD_evaluation/src + +# Set the install prefix +IF(NOT DEFINED CMAKE_INSTALL_PREFIX) + SET(CMAKE_INSTALL_PREFIX "/usr/local") +ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX) +STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + IF(BUILD_TYPE) + STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + ELSE(BUILD_TYPE) + SET(CMAKE_INSTALL_CONFIG_NAME "Debug") + ENDIF(BUILD_TYPE) + MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + +# Set the component getting installed. +IF(NOT CMAKE_INSTALL_COMPONENT) + IF(COMPONENT) + MESSAGE(STATUS "Install component: \"${COMPONENT}\"") + SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + ELSE(COMPONENT) + SET(CMAKE_INSTALL_COMPONENT) + ENDIF(COMPONENT) +ENDIF(NOT CMAKE_INSTALL_COMPONENT) + +# Install shared libraries without execute permission? +IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + SET(CMAKE_INSTALL_SO_NO_EXE "0") +ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + +IF(CMAKE_INSTALL_COMPONENT) + SET(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +ELSE(CMAKE_INSTALL_COMPONENT) + SET(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +ENDIF(CMAKE_INSTALL_COMPONENT) + +FILE(WRITE "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "") +FOREACH(file ${CMAKE_INSTALL_MANIFEST_FILES}) + FILE(APPEND "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "${file}\n") +ENDFOREACH(file) diff --git a/src/dataset_build/CMakeLists.txt b/src/dataset_build/CMakeLists.txt new file mode 100644 index 0000000..8840a74 --- /dev/null +++ b/src/dataset_build/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT (CALCULATE) +SET (SRC_LIST get_lost.c) +SET(CMAKE_BUILD_TYPE "Debug") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) +MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) +#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) +#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) +ADD_EXECUTABLE(get_lost ${SRC_LIST}) +TARGET_LINK_LIBRARIES(get_lost maatframe libMESA_htable.so pthread m) diff --git a/src/dataset_build/based_sfh.conf b/src/dataset_build/based_sfh.conf new file mode 100644 index 0000000..cdcf4cf --- /dev/null +++ b/src/dataset_build/based_sfh.conf @@ -0,0 +1,3 @@ +[file] +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest_nots +ripe_file_address = ../../data/td_data_set/td_data_20171207/base_sfh_set \ No newline at end of file diff --git a/src/dataset_build/based_sfh.py b/src/dataset_build/based_sfh.py new file mode 100644 index 0000000..b3281ce --- /dev/null +++ b/src/dataset_build/based_sfh.py @@ -0,0 +1,44 @@ +import re +import ConfigParser +import bisect +import random + +term = {'not_null':(lambda x : len(x)!=0)} + +config = ConfigParser.RawConfigParser() +config.read("based_sfh.conf") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +i=0 +sfh_set = list() +with open(raw_file_address,'r') as infile: + with open(ripe_file_address,'w') as outfile: + for line in infile: + i+=1 + if(i%100000==0): + print i + result = re.split(r';',line) + if(term['not_null'](result[3]) and term['not_null'](result[19])): + hashed_len = sfh_fingerprint.get_hashed_len(result[19]) + if(hashed_len/int(result[3])>0.8): + outfile.write(result[19]+'\n') \ No newline at end of file diff --git a/src/dataset_build/cal_information.conf b/src/dataset_build/cal_information.conf new file mode 100644 index 0000000..1571b8b --- /dev/null +++ b/src/dataset_build/cal_information.conf @@ -0,0 +1,5 @@ +[file] +raw_file_address = ../../data/ripe_data/td_data_20171207/video_id.txt +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic +[feature] +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/src/dataset_build/cal_information.py b/src/dataset_build/cal_information.py new file mode 100644 index 0000000..19cd95c --- /dev/null +++ b/src/dataset_build/cal_information.py @@ -0,0 +1,133 @@ +import re +import numpy +import ConfigParser +import binascii +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==4), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +class calculation(object): + """docstring for calculation""" + def __init__(self, arg): + super(calculation, self).__init__() + self.arg = arg + + @staticmethod + def cal_ent(x): + x_value_list = set([x[i] for i in range(x.shape[0])]) + ent = 0.0 + num_0 = x[x == 0].shape[0] + for x_value in x_value_list: + if(x_value==0): + continue + p = float(x[x == x_value].shape[0])/(x.shape[0]- num_0) + logp = numpy.log2(p) + ent -=p*logp + return ent + +class data_value(object): + """docstring for data_value""" + def __init__(self, arg): + super(data_value, self).__init__() + self.arg = arg + + @staticmethod + def get_data_values(data): + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + #data_set[0]=null,data_set[1]=url + data_value_dic = [long(0)]*6 + for x in xrange(1,len(feature_list)+1): + if(x==1): + if(term['not_null'](data_set[x])==True): + data_value_dic[0] = binascii.crc32(data_set[x]) + else: + data_value_dic[0] = 0 + elif(x==2): + if(term['not_null'](data_set[x])==True): + data_value_dic[1] = binascii.crc32(data_set[x]) + else: + data_value_dic[1] = 0 + elif(x==3): + data_value_dic[2] = long(data_set[x]) + elif(x==4): + data_value_dic[3] = long(data_set[x]) + elif(x==5): + if(term['not_null'](data_set[x])==True): + data_value_dic[4] = binascii.crc32(data_set[x]) + else: + data_value_dic[4] = 0 + elif(x==6): + if(term['not_null'](data_set[x])==True): + data_value_dic[5] = binascii.crc32(data_set[x]) + else: + data_value_dic[5] = 0 + return data_value_dic + +config = ConfigParser.RawConfigParser() +config.read("cal_information.conf") + +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") +feature_list =[i for i in config.get("feature","feature_name").split(",")] + +i=0 +with open(raw_file_address,'r') as infile: + for line in infile: + i+=1 + if(i%10000==0): + print i + if(i==50000): + break + line_split = re.split(";",line) + data_value_temp = data_value.get_data_values(line_split[5]) + data_value_temp.extend([binascii.crc32(line_split[j]) for j in range(6,19)]) + data_value_temp.append(binascii.crc32(line_split[0])) + if(i==1): + a=numpy.array(data_value_temp) + else: + a=numpy.row_stack((a,numpy.array(data_value_temp))) + +for i in range(20): + if(i==0): + print "URL:"+str(calculation.cal_ent(a[:,i])) + elif(i==1): + print "ServerIP:"+str(calculation.cal_ent(a[:,i])) + elif(i==2): + print "MediaType:"+str(calculation.cal_ent(a[:,i])) + elif(i==3): + print "MediaLen:"+str(calculation.cal_ent(a[:,i])) + elif(i==4): + print "Etag:"+str(calculation.cal_ent(a[:,i])) + elif(i==5): + print "LastModify:"+str(calculation.cal_ent(a[:,i])) + elif(i==6): + print "td_0k:"+str(calculation.cal_ent(a[:,i])) + elif(i==7): + print "td_data_md5_1k:"+str(calculation.cal_ent(a[:,i])) + elif(i==8): + print "td_1k:"+str(calculation.cal_ent(a[:,i])) + elif(i==9): + print "td_data_md5_2k:"+str(calculation.cal_ent(a[:,i])) + elif(i==10): + print "td_2k:"+str(calculation.cal_ent(a[:,i])) + elif(i==11): + print "td_data_md5_4k:"+str(calculation.cal_ent(a[:,i])) + elif(i==12): + print "td_4k:"+str(calculation.cal_ent(a[:,i])) + elif(i==13): + print "td_data_md5_8k:"+str(calculation.cal_ent(a[:,i])) + elif(i==14): + print "td_8k:"+str(calculation.cal_ent(a[:,i])) + elif(i==15): + print "td_data_md5_16k:"+str(calculation.cal_ent(a[:,i])) + elif(i==16): + print "td_16k:"+str(calculation.cal_ent(a[:,i])) + elif(i==17): + print "td_data_md5_32k:"+str(calculation.cal_ent(a[:,i])) + elif(i==18): + print "td_32k:"+str(calculation.cal_ent(a[:,i])) + elif(i==19): + print "id:"+str(calculation.cal_ent(a[:,i])) + diff --git a/src/dataset_build/dataset_build.conf b/src/dataset_build/dataset_build.conf new file mode 100644 index 0000000..400e160 --- /dev/null +++ b/src/dataset_build/dataset_build.conf @@ -0,0 +1,8 @@ +[file] +raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_dataset +base_sfh_sets = ../../data/td_data_set/td_data_20171207/base_sfh_set +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 +[feature] +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/src/dataset_build/dataset_build.py b/src/dataset_build/dataset_build.py new file mode 100644 index 0000000..a832072 --- /dev/null +++ b/src/dataset_build/dataset_build.py @@ -0,0 +1,144 @@ +import re +import ConfigParser +import bisect +import random +import ctypes +import hashlib +import zlib +import binascii +import json +import datetime +import time + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +config = ConfigParser.RawConfigParser() +config.read("dataset_build.conf") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") +base_sfh_sets = config.get("file","base_sfh_sets") +breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +feature_list =[i for i in config.get("feature","feature_name").split(",")] +ll=ctypes.cdll.LoadLibrary +lib = ll("libmaatframe.so") +lost = dict() + + +class data_value(object): + + @staticmethod + def get_feature(data): + return_data=list() + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data[5]) + for x in xrange(1,21): + if(x==1): + if(term['not_null'](data_set[6])): + try: + time1=datetime.datetime.strptime(data[1],'%Y-%m-%d %H:%M:%S')+datetime.timedelta(hours=int(8)) + data_set[6]=data_set[6][0:25] + time2=datetime.datetime.strptime(data_set[6],'%a, %d %b %Y %H:%M:%S') + except Exception, e: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + else: + return_data.append(str((time1-time2).seconds)) + return_data.append(((time1-time2).seconds)/60) + return_data.append(((time1-time2).seconds)/3600) + return_data.append((time1-time2).days) + else: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + elif(x==2): + continue + elif(x==3): + continue + elif(x==4): + return_data.append(long(data[4])) + elif(x==5): + if(term['not_null'](data_set[1])): + return_data.append(len(data_set[1])) + else: + return_data.append(-1) + if(term['not_null'](data_set[2])): + ip_set=re.split(r'\.',data_set[2]) + return_data.append(ip_set[0]) + return_data.append(ip_set[1]) + return_data.append(ip_set[2]) + return_data.append(ip_set[3]) + else: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + if(term['not_null'](data_set[3])): + return_data.append(int(data_set[3])) + else: + return_data.append(-1) + if(term['not_null'](data_set[5])): + return_data.append(binascii.crc32(data_set[5])) + else: + return_data.append(-1) + if(term['not_null'](data_set[6])): + return_data.append(binascii.crc32(data_set[6])) + else: + return_data.append(-1) + elif(x==7): + return_data.append(binascii.crc32(data[7])) + elif(x==9): + return_data.append(binascii.crc32(data[9])) + elif(x==11): + return_data.append(binascii.crc32(data[11])) + elif(x==13): + return_data.append(binascii.crc32(data[13])) + elif(x==15): + return_data.append(binascii.crc32(data[15])) + elif(x==17): + return_data.append(binascii.crc32(data[17])) + return return_data + # data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + # #data_set[0]=null,data_set[1]=url + # data_value_dic = dict() + # for x in xrange(1,len(feature_list)+1): + # if(x==1): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==2): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==3): + # data_value_dic[feature_list[x-1]] = data_set[x] + # elif(x==4): + # data_value_dic[feature_list[x-1]] = data_set[x] + # elif(x==5): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==6): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # return data_value_dic + + +i=0 +sfh_set = list() +with open(raw_file_address,'r') as infile: + with open(ripe_file_address,'w') as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = re.split(r';',line) + # if(int(line_return[0])==0): + # print 'td is right' + outfile.write(str(line_return[0])+',') + return_data=data_value.get_feature(line_return) + for x in range(19): + if(x==18): + outfile.write(str(return_data[18])+'\n') + else: + outfile.write(str(return_data[x])+',') diff --git a/src/dataset_build/feature_statistics.conf b/src/dataset_build/feature_statistics.conf new file mode 100644 index 0000000..12cf089 --- /dev/null +++ b/src/dataset_build/feature_statistics.conf @@ -0,0 +1,8 @@ +[file] +raw_file_address = ../../data/td_data_set/td_data_20171207/td.txt +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,44194304 +[feature] +type = data_value_statistics +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify \ No newline at end of file diff --git a/src/dataset_build/feature_statistics.py b/src/dataset_build/feature_statistics.py new file mode 100644 index 0000000..52ae8e0 --- /dev/null +++ b/src/dataset_build/feature_statistics.py @@ -0,0 +1,164 @@ +import re +import ConfigParser +import bisect +import random +import ctypes +import hashlib +import zlib +import binascii + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==4), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and term['sfh_len'](data_line_val[19]) and term['td_len'](data_line_val[9])\ + and term['td_len'](data_line_val[2]) and term['td_len'](data_line_val[13]) and term['td_len'](data_line_val[15])\ + and term['td_len'](data_line_val[17]) and term['not_null'](data_line_val[18]) and term['not_null'](data_line_val[19])\ + and hashed_len/float(data_line_val[3])>0.8): + return data_line_val + else: + return -1 + + +class feature_statistics(object): + """YSP feature_statistics""" + def __init__(self): + super(feature_statistics, self).__init__() + self.meida_len_statistics_set = [0,0,0,0,0,0,0] + self.lost_dict = dict() + + def meida_len_statistics(meida_len): + j = bisect.bisect(breakpoints,meida_len) + self.meida_len_statistics_set[j-1]+=1 + + def data_value_statistics(data_value_dic,data_value): + data_value_str = str() + for x in xrange(0,len(feature_list)): + data_value_str = data_value_str+str(data_value_dic[feature_list[x]])+',' + + if(self.lost_dict.has_key(data_value_str)==False): + self.lost_dict[data_value_str]=[0,1,0.] + else: + if (int(result[3])==1): + self.lost_dict[data_value_str][0] += 1 + self.lost_dict[data_value_str][1] += 1 + else: + self.lost_dict[data_value_str][1] += 1 + + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + + @staticmethod + def get_base_sfh(data_set): + base_sfh = list() + for x in xrange(0,10): + base_sfh.append(data_set[x]) + return base_sfh + + + + +class data_value(object): + + @staticmethod + def get_data_values(data): + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + #data_set[0]=null,data_set[1]=url + data_value_dic = dict() + for x in xrange(1,len(feature_list)+1): + if(x==1): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==2): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==3): + data_value_dic[feature_list[x-1]] = data_set[x] + elif(x==4): + data_value_dic[feature_list[x-1]] = bisect.bisect(breakpoints,int(data_set[x])) + elif(x==5): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==6): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + return data_value_dic + +config = ConfigParser.RawConfigParser() +config.read("feature_statistics.conf") + +feature_statistics_type = ("feature","type") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") + +if(feature_statistics_type=="meida_len_statistics"): + breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +elif(feature_statistics_type=="data_value_statistics"): + feature_list =[i for i in config.get("feature","feature_name").split(",")] +# ll=ctypes.cdll.LoadLibrary +# lib = ll("libmaatframe.so") + +i=0 +sfh_set = list() +statistic = feature_statistics() +with open(raw_file_address,'r') as infile: + for line in infile: + i+=1 + + + + + line_return = data_line.if_error(line) + if(line_return != -1): + if(feature_statistics_type=="meida_len_statistics"): + statistic.meida_len_statistics(line_return[3]) + elif(feature_statistics_type=="data_value_statistics"): + lost_list = list() + statistic.meida_len_statistics(line_return) + for i in statistic.lost: + (statistic.lost[i])[2] = float((statistic.lost[i])[0])/(statistic.lost[i])[1] + tmp = (i,int((statistic.lost[i])[0]),int((statistic.lost[i])[1]),float((statistic.lost[i])[2])) + lost_list.append(tmp) + print sorted(lost_list,cmp=lambda x,y:cmp(x[2],y[2])) + # if(x == len(feature_list)-1): + # outfile.write(data_value_dic[feature_list[x]]+'\n') + # else: + # print lost + # outfile.write(str(data_value_dic[feature_list[x]])+',') + # outfile.write(result[3]) + # sfh_dot=list() + # for x in xrange(0,10): + # #transform sfh to dot + # sfh_dot.append(lib.GIE_sfh_similiarity(result[19],len(result[19]),sfh_set[x],len(sfh_set[x]))) + # if(len(data_set)==7): + # outfile.write(str(data_set[0])+','+str(data_set[1])+','+str(data_set[2])\ + # +','+str(data_set[3])+','+str(data_set[4])+','+str(data_set[5])+','+result[5]\ + # +','+result[7]+','+result[9]+','+result[11]+','+result[13]+','+result[15]+result[17]\ + # +','+result[19]+'\n') + +# with open(ripe_file_address,'w') as outfile: +# outfile.write(str(lost)) diff --git a/src/dataset_build/file_digest.py b/src/dataset_build/file_digest.py new file mode 100644 index 0000000..590e059 --- /dev/null +++ b/src/dataset_build/file_digest.py @@ -0,0 +1,96 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[16]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>=0.8): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +grain="./get_lost" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("grain.conf") +raw_file_address=config.get("file","raw_file_address") +ripe_files_address=config.get("file","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +num = [0,0,0,0,0,0,0] +breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: +# with open('./ripe_data/mistake_td_sfh1_sfh2_sim_rate_len_url_unequal','r')as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line)) \ No newline at end of file diff --git a/src/dataset_build/get_lost.c b/src/dataset_build/get_lost.c new file mode 100644 index 0000000..0e6c452 --- /dev/null +++ b/src/dataset_build/get_lost.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include +#include +#define HTABLE_SIZE 8*64*1024*1024 +#define SFH_PASS_RATE 0.8 +#define SIMILIAR 80 + +typedef struct td +{ + char * tdstr; + unsigned int lost; +}td; + +typedef struct file_sfh_data +{ + long id; + char * sfh; + td * td_value; + char * td_ori; +}file_sfh_data; + +int main(int argc,char *argv[]) +{ + FILE *fpread;//文件 + FILE *fpwrite;//write file handle + int array_size = 1024; + file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size); + char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost"; + //char* dirstr = *++argv; + char* writestr = "../../data/td_data_set/td_data_20171207/td.txt"; + int total_len = 0; + char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10]; + char buffer[1024*300+1]; + int ret = 0; + int line = 0; + int thread_safe = 0; + int i; + int id; + int similiarity; + MESA_htable_handle htable = NULL; + fpread=fopen(dirstr,"rb"); + fpwrite=fopen(writestr,"w"); + printf("file str is %s\n",dirstr); + if(fpread==NULL) + { + printf("open file error\n"); + return -1; + } + buffer[sizeof(buffer)]='\0'; + while(feof(fpread)==0) + { + fgets(buffer,sizeof(buffer)-1,fpread); + ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp); + if(ret!=4) + { + continue; + } + file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data)); + file_data[line]->id=line; + file_data[line]->sfh=strdup(SFH_tmp); + file_data[line]->td_value=(td*)calloc(1,sizeof(td)); + file_data[line]->td_value->tdstr=strdup(TD_tmp); + file_data[line]->td_value->lost=0; + file_data[line]->td_ori=strdup(TD_ORI); + line++; + if(line==array_size) + { + array_size*=2; + file_data=realloc(file_data,sizeof(file_sfh_data)*array_size); + } + } + printf("read file success!\n"); + htable = NULL; + htable=MESA_htable_born(); + thread_safe = 0; + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + unsigned int slot_size=1024*1024*16; + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size)); + MESA_htable_mature(htable); + for(i=0;itd_value->tdstr),32,(void *)file_data[i]->id)<0) + { + id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32); + similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh)); + if(similiaritytd_value->lost = 1; + file_data[i]->td_value->lost = 1; + } + } + } + for(i=0;itd_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost); + } + for(i=0;isfh); + file_data[i]->sfh=NULL; + free(file_data[i]->td_value->tdstr); + file_data[i]->td_value->tdstr=NULL; + free(file_data[i]->td_value); + file_data[i]->td_value=NULL; + free(file_data[i]->td_ori); + file_data[i]->td_ori=NULL; + free(file_data[i]); + file_data[i]=NULL; + } + fclose(fpread); + fclose(fpwrite); + return 0; +} \ No newline at end of file diff --git a/src/dataset_build/grain.conf b/src/dataset_build/grain.conf new file mode 100644 index 0000000..944b337 --- /dev/null +++ b/src/dataset_build/grain.conf @@ -0,0 +1,5 @@ +[file] +ripe_files_address = ../../data/td_data_set/td_data_20171207/get_lost_raw_data +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 \ No newline at end of file diff --git a/src/dataset_build/td_classification.py b/src/dataset_build/td_classification.py new file mode 100644 index 0000000..8d4b97c --- /dev/null +++ b/src/dataset_build/td_classification.py @@ -0,0 +1,5 @@ +from sklearn.datasets import load_iris +from sklearn import tree + +with open() as infile: + \ No newline at end of file diff --git a/src/dataset_build/vedio_id_build.c b/src/dataset_build/vedio_id_build.c new file mode 100644 index 0000000..9faaa64 --- /dev/null +++ b/src/dataset_build/vedio_id_build.c @@ -0,0 +1,171 @@ +/* +gcc -g vedio_id_build.c -o vedio_id_build -lmaatframe -I../../inc +*/ +#include +#include +#include +#include "gram_index_engine.h" +#include +#include +#include +#define BUFFER_LEN (10*1024) +#define SFH_PASS_RATE 0.9 +#define SFH_LEN (10*1024) +#define URL_LEN (10*1024) + +typedef struct video_id +{ + long id; + char *sfh; +}video_id; + +typedef struct cache +{ + GIE_digest_t ** GIE_cache; + long cache_size; + long len; +}cache; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ;token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +int main(int argc,char *argv[]) +{ + FILE *video_id_sets_file; + FILE *new_sfh_file; + const char *video_id_sets_file_dir="../../data/td_data_set/td_data_20171207/video_id_raw_data"; + const char *new_sfh_file_dir="../../data/ripe_data/td_data_20171207/video_id.txt"; + char *buffer=NULL; + int ret = 0,hashed_len = 0,total_len = 0,resultnum = 0,i = 0; + int update = 0,video_id = 0,j = 0; + int* temp_int = NULL; + float temp_sfh_pass = 0; + char *sfh_str,*url_str; + GIE_digest_t *sfh_video_id = NULL; + GIE_result_t *query_result = NULL; + cache *GIE_digest_cache = NULL; + video_id_sets_file = fopen(video_id_sets_file_dir,"r+"); + new_sfh_file = fopen(new_sfh_file_dir,"w"); + if(video_id_sets_file == NULL) + { + printf("open video_id_sets_file error\n"); + return -1; + } + if(new_sfh_file == NULL) + { + printf("open new_sfh_file error\n"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + GIE_create_para_t *query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); + query_result = (GIE_result_t*)calloc(1,sizeof(GIE_result_t)); + GIE_handle_t *query_handle; + query_para->gram_value = 7; + query_para->position_accuracy = 5; + query_handle=GIE_create((const GIE_create_para_t *)query_para); + free(query_para); + if(query_handle==NULL) + { + printf("create GIE handle error\n"); + return -1; + } + sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + url_str = (char*)calloc(URL_LEN,sizeof(char)); + i=0; + GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); + GIE_digest_cache->cache_size = 1000; + GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); + GIE_digest_cache->len = 0; + while(feof(video_id_sets_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,video_id_sets_file); + ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + %*[^;];%*[^;];%*[^;];%[^;];%[^;]",sfh_str,url_str); + if(ret!=2) + { + continue; + } + hashed_len = get_hashed_len((const char*)sfh_str); + temp_sfh_pass = (float)hashed_len/total_len; + if(temp_sfh_passid=i; + sfh_video_id->sfh_length=strlen(sfh_str); + sfh_video_id->operation=GIE_INSERT_OPT; + sfh_video_id->cfds_lvl=5; + sfh_video_id->sfh=strdup(sfh_str); + sfh_video_id->tag=temp_int; + GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_video_id; + GIE_digest_cache->len++; + if(GIE_digest_cache->len==GIE_digest_cache->cache_size) + { + update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); + GIE_digest_cache->len=0; + for(j=0;jcache_size;j++) + { + free(GIE_digest_cache->GIE_cache[j]->sfh); + GIE_digest_cache->GIE_cache[j]->sfh=NULL; + free(GIE_digest_cache->GIE_cache[j]); + GIE_digest_cache->GIE_cache[j]=NULL; + } + } + fprintf(new_sfh_file,"%d,%s",i,buffer); + } + else + { + fprintf(new_sfh_file,"%d,%s",*((int*)query_result->tag),buffer); + } + } + free(buffer); + free(query_result); + free(sfh_video_id); + free(url_str); + free(sfh_str); + free(GIE_digest_cache); + return 0; +} \ No newline at end of file diff --git a/src/file_digest.conf b/src/file_digest.conf new file mode 100644 index 0000000..a02cae2 --- /dev/null +++ b/src/file_digest.conf @@ -0,0 +1,3 @@ +[file] +ripe_files_address = ../data/ripe_data/td_data_20171207/all_av_digest +raw_file_address = ../data/td_data_20171207/td_data/all_av_digest diff --git a/src/file_digest.py b/src/file_digest.py new file mode 100644 index 0000000..3703794 --- /dev/null +++ b/src/file_digest.py @@ -0,0 +1,104 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['not_null'](data_line_val[1]) and \ + term['not_null'](data_line_val[2]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['not_null'](data_line_val[5]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[7]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[9]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[11]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[13]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[15]) and \ + term['td_len'](data_line_val[16]) and \ + term['td_len'](data_line_val[17]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>0.999): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +c_func="./" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("file","raw_file_address") +ripe_files_address=config.get("file","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +# num = [0,0,0,0,0,0,0] +# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line)) \ No newline at end of file diff --git a/src/get_td_mistake_lost/CMakeLists.txt b/src/get_td_mistake_lost/CMakeLists.txt new file mode 100644 index 0000000..87f4b6b --- /dev/null +++ b/src/get_td_mistake_lost/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT (CALCULATE) +SET (SRC_LIST get_lost_rate.c) +SET(CMAKE_BUILD_TYPE "Debug") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) +MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) +#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) +#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) +ADD_EXECUTABLE(get_lost_rate ${SRC_LIST} gram_index_engine.c) +TARGET_LINK_LIBRARIES(get_lost_rate maatframe libMESA_htable.so pthread m) diff --git a/src/get_td_mistake_lost/file_digest.conf b/src/get_td_mistake_lost/file_digest.conf new file mode 100644 index 0000000..6d1c06b --- /dev/null +++ b/src/get_td_mistake_lost/file_digest.conf @@ -0,0 +1,6 @@ +[file_digest] +ripe_files_address = ../../data/ripe_data/td_data_20171207/all_av_digest +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest +[new_td] +ripe_files_address = ../../data/ripe_data/td_data_20171207/new_TD.txt +raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest diff --git a/src/get_td_mistake_lost/file_digest.py b/src/get_td_mistake_lost/file_digest.py new file mode 100644 index 0000000..62786ef --- /dev/null +++ b/src/get_td_mistake_lost/file_digest.py @@ -0,0 +1,104 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['not_null'](data_line_val[1]) and \ + term['not_null'](data_line_val[2]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['not_null'](data_line_val[5]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[7]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[9]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[11]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[13]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[15]) and \ + term['td_len'](data_line_val[16]) and \ + term['td_len'](data_line_val[17]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>0.999): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +c_func="./" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("file_digest","raw_file_address") +ripe_files_address=config.get("file_digest","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +# num = [0,0,0,0,0,0,0] +# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line)) \ No newline at end of file diff --git a/src/get_td_mistake_lost/get_TD_SFH.c b/src/get_td_mistake_lost/get_TD_SFH.c new file mode 100644 index 0000000..2ed3ecd --- /dev/null +++ b/src/get_td_mistake_lost/get_TD_SFH.c @@ -0,0 +1,162 @@ +/* +gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include +./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +*/ + +#include +#include +#include +#include "gram_index_engine.h" +#include +#include +#include +#define BUFFER_LEN (15*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 +#define THREAD_SAFE 0 +#define SLOT_SIZE (1024*1024*16) +#define TD_STR_LEN (10*1024) +#define TIME_STR_LEN 128 + +typedef struct sfh_link +{ + // char *time_str; + char *sfh_str; + char *td_ori; + // char *md5_32k; + int similiar; + int all_similiar; + // long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct sfh +{ + int all_num; + int all_similiar; + char *sfh_str; + // long hash_len; + sfh_link *sfh_link_items; +}sfh; + +void print_td_sfh(const uchar *key,uint size,void *data,void *arg) +{ + FILE *ripe_file=(FILE*)arg; + sfh *temp_sfh=(sfh*)data; + fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str); +} + +int main() +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3"; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + MESA_htable_handle htable=NULL; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; + unsigned int slot_size=SLOT_SIZE; + sfh *temp_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); + // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + // time_str[TIME_STR_LEN-1]='\0'; + // md5_32k_str[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + // assert(ret==5); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_sfh=(sfh*)calloc(1,sizeof(sfh)); + temp_sfh->all_num=1; + temp_sfh->all_similiar=0; + temp_sfh->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_sfh->sfh_link_items->time_str=strdup(time_str); + temp_sfh->sfh_link_items->similiar=0; + temp_sfh->sfh_link_items->all_similiar=0; + temp_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_sfh->all_similiar) + { + free(temp_sfh->sfh_str); + temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_sfh->all_num+=1; + } + } + fclose(raw_file); + MESA_htable_iterate(htable,print_td_sfh,ripe_file); + free(sfh_str); + free(td); + free(td_str); + // free(md5_32k_str); + MESA_htable_destroy(htable,NULL); + // fclose(raw_file); + fclose(ripe_file); + return 0; +} \ No newline at end of file diff --git a/src/get_td_mistake_lost/get_lost_rate.c b/src/get_td_mistake_lost/get_lost_rate.c new file mode 100644 index 0000000..d983a00 --- /dev/null +++ b/src/get_td_mistake_lost/get_lost_rate.c @@ -0,0 +1,210 @@ +/* +gcc -g get_lost_rate.c -o get_lost_rate -lmaatframe -I../include +*/ +#include +#include +#include +#include "gram_index_engine.h" +#include +#include +#define BUFFER_LEN (10*1024) +#define CACHE_SIZE 2000000 +#define SFH_LEN (10*1024) +#define TD_LEN 33 +#define RESULT_NUM 10000 +#define TIME_STR_LEN 128 +#define TD_STR_LEN (10*1024) + +typedef struct cache +{ + GIE_digest_t ** GIE_cache; + long cache_size; + long len; +}cache; + +typedef struct GIE_tag +{ + char *td; + char *td_str; + char *sfh_str; +}GIE_tag; + +int main() +{ + FILE *td_sfh_file; + FILE *raw_file; + FILE *ripe_file; + const char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + const char *td_sfh_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_1"; + const char *ripe_file_dir="../../data/ripe_data/td_data_20171207/get_lost_ripe_data_1"; + td_sfh_file = fopen(td_sfh_file_dir,"r+"); + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + char *buffer=NULL,*sfh_str=NULL,*td=NULL,*time_str=NULL,*td_str=NULL; + GIE_create_para_t *query_para=NULL; + GIE_handle_t *query_handle=NULL; + GIE_result_t *query_result = NULL; + cache *GIE_digest_cache = NULL; + GIE_digest_t *sfh_td = NULL; + int i=0,w=0,ret=0,lost=0,j=0,update=0,resultnum=0,temp_len=0; + GIE_tag *temp_tag =NULL; + if(td_sfh_file == NULL) + { + printf("open td_sfh_file_dir error\n"); + return -1; + } + if(raw_file == NULL) + { + printf("open raw_file_dir error\n"); + return -1; + } + if(ripe_file == NULL) + { + printf("open ripe_file_dir error\n"); + return -1; + } + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + sfh_str[SFH_LEN-1]='\0'; + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + time_str[TIME_STR_LEN-1]='\0'; + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); + query_para->gram_value = 7; + query_para->position_accuracy = 5; + query_para->ED_reexamine=1; + query_para->format=GIE_INPUT_FORMAT_SFH; + query_handle=GIE_create((const GIE_create_para_t *)query_para); + free(query_para); + query_result = (GIE_result_t*)calloc(RESULT_NUM,sizeof(GIE_result_t)); + GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); + GIE_digest_cache->cache_size = CACHE_SIZE; + GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); + GIE_digest_cache->len = 0; + if(query_handle==NULL) + { + printf("create GIE handle error\n"); + return -1; + } + while(feof(td_sfh_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,td_sfh_file); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td,td_str,sfh_str); + assert(ret==3); + td[32]='\0'; + sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + sfh_td->id=i; + temp_len=strlen(sfh_str); + sfh_td->sfh_length=temp_len; + sfh_str[temp_len-1]='\0'; + sfh_td->operation=GIE_INSERT_OPT; + sfh_td->cfds_lvl=5; + sfh_td->sfh=strdup(sfh_str); + temp_tag=(GIE_tag*)calloc(1,sizeof(GIE_tag)); + temp_tag->td=strdup(td); + temp_tag->td_str=strdup(td_str); + temp_tag->sfh_str=strdup(sfh_str); + sfh_td->tag=(void*)temp_tag; + GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; + GIE_digest_cache->len++; + // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + // if(resultnum==0) + // { + // sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + // sfh_td->id=i; + // sfh_td->sfh_length=strlen(sfh_str); + // sfh_td->operation=GIE_INSERT_OPT; + // sfh_td->cfds_lvl=5; + // sfh_td->sfh=strdup(sfh_str); + // sfh_td->tag=(void*)strdup(td); + // GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; + // GIE_digest_cache->len++; + // } + // else + // { + // for(j=0;jtag),td)!=0) + // { + // lost++; + // fprintf(ripe_file,"%s,%s,%s\n",(char*)((query_result+j)->tag),td,sfh_str); + // } + // } + // continue; + // } + // if(GIE_digest_cache->len==GIE_digest_cache->cache_size) + // { + // update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); + // assert(update==GIE_digest_cache->len); + // GIE_digest_cache->len=0; + // for(j=0;jcache_size;j++) + // { + // free(GIE_digest_cache->GIE_cache[j]->sfh); + // GIE_digest_cache->GIE_cache[j]->sfh=NULL; + // free(GIE_digest_cache->GIE_cache[j]); + // GIE_digest_cache->GIE_cache[j]=NULL; + // } + // } + // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + // for(i=0;itag,td)!=0) + // { + // lost++; + // } + // } + } + fclose(td_sfh_file); + update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->len); + for(j=0;jlen;j++) + { + free(GIE_digest_cache->GIE_cache[j]->sfh); + GIE_digest_cache->GIE_cache[j]->sfh=NULL; + free(GIE_digest_cache->GIE_cache[j]); + GIE_digest_cache->GIE_cache[j]=NULL; + } + i=0; + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + // ret=sscanf(buffer,"%[^;];%[^;]",td,sfh_str); + // assert(ret==2); + // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%[^;];%*[^;];%[^;];%*[^;]",td_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + if(resultnum>1) + { + for(j=0;jtag)->td,td)!=0) + { + w=1; + fprintf(ripe_file,"%u,%s,%s,%s,%s,%s,%s\n",(query_result+j)->id,((GIE_tag*)((query_result+j)->tag))->td_str,((GIE_tag*)((query_result+j)->tag))->td,((GIE_tag*)((query_result+j)->tag))->sfh_str,td_str,td,sfh_str); + } + } + lost+=w; + w=0; + } + + } + printf("%d;%d\n",lost,i); + free(sfh_str); + free(td); + free(time_str); + free(td_str); +} \ No newline at end of file diff --git a/src/get_td_mistake_lost/get_mistake_level.c b/src/get_td_mistake_lost/get_mistake_level.c new file mode 100644 index 0000000..5f03974 --- /dev/null +++ b/src/get_td_mistake_lost/get_mistake_level.c @@ -0,0 +1,366 @@ +/* +gcc -g get_mistake_level.c -o get_mistake_level -lMESA_htable -lmaatframe -I../../include +./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +*/ +#include +#include +#include +#include "gram_index_engine.h" +#include +#include +#include +#define THREAD_SAFE 0 +#define SLOT_SIZE (1024*1024*16) +#define SIMILIAR_RATE 90 +#define TD_STR_LEN (10*1024) +#define TIME_STR_LEN 128 +#define RAODONG_RATE 0.1 +#define BUFFER_LEN (15*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 + +typedef struct sfh_link +{ + // char *time_str; + char *sfh_str; + char *td_ori; + // char *md5_32k; + int similiar; + int all_similiar; + // long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct mistake_sfh +{ + int mistake_num; + int all_num; + int all_similiar; + char *sfh_str; + // long hash_len; + sfh_link *sfh_link_items; +}mistake_sfh; + +typedef struct temp_parameter +{ + int mistake_num; + FILE *ripe_file; +}temp_parameter; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ; token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +void print_mistake_td(const uchar *key,uint size,void *data,void *arg) +{ + temp_parameter *parameter = (temp_parameter*)arg; + mistake_sfh *temp_mistake_sfh=(mistake_sfh*)data; + float temp_rate=0; + temp_rate=(float)temp_mistake_sfh->mistake_num/(float)temp_mistake_sfh->all_num; + if(temp_rate>RAODONG_RATE) + { + parameter->mistake_num+=temp_mistake_sfh->mistake_num; + fprintf(parameter->ripe_file,"%d;%s\n",temp_mistake_sfh->mistake_num,temp_mistake_sfh->sfh_str); + sfh_link *temp_sfh_link=temp_mistake_sfh->sfh_link_items; + for(;;temp_sfh_link=temp_sfh_link->next) + { + if(temp_sfh_link==NULL) + { + break; + } + temp_sfh_link->similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str)); + // fprintf(parameter->ripe_file,"%s,%d;%s;%s;%s\n",temp_sfh_link->time_str,temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori,temp_sfh_link->md5_32k); + fprintf(parameter->ripe_file,"%d;%s;%s\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori); + } + fprintf(parameter->ripe_file,"\n"); + } +} + +int main(int argc,char *argv[]) +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + char *ripe_file_dir="../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_3"; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; + long temp_hash_len=0; + unsigned int slot_size=SLOT_SIZE; + mistake_sfh *temp_mistake_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + MESA_htable_handle htable=NULL; + temp_parameter *parameter=NULL; + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + + + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); + // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + // time_str[TIME_STR_LEN-1]='\0'; + // md5_32k_str[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + parameter=(temp_parameter*)calloc(1,sizeof(temp_parameter)); + parameter->mistake_num=0; + parameter->ripe_file=ripe_file; + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); + temp_mistake_sfh->mistake_num=0; + temp_mistake_sfh->all_num=1; + temp_mistake_sfh->all_similiar=0; + // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); + temp_mistake_sfh->sfh_str=strdup(sfh_str); + temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); + temp_mistake_sfh->sfh_link_items->similiar=0; + temp_mistake_sfh->sfh_link_items->all_similiar=0; + temp_mistake_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_mistake_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_mistake_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_mistake_sfh->all_similiar) + { + free(temp_mistake_sfh->sfh_str); + temp_mistake_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_mistake_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + // if(temp_hash_len>temp_mistake_sfh->hash_len) + // { + // temp_mistake_sfh->hash_len=temp_hash_len; + // free(temp_mistake_sfh->sfh_str); + // temp_mistake_sfh->sfh_str=strdup(sfh_str); + // } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_mistake_sfh->all_num+=1; + } + } + fclose(raw_file); + raw_file = fopen(raw_file_dir,"r+"); + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + i=0; + while(feof(raw_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN); + assert(temp_mistake_sfh!=NULL); + // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + // { + // temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); + // temp_mistake_sfh->num=0; + // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); + // temp_mistake_sfh->sfh_str=strdup(sfh_str); + // temp_sfh_link=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->sfh_str=strdup(sfh_str); + // temp_sfh_link->td_ori=strdup(td_str); + // temp_sfh_link->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->time_str=strdup(time_str); + // temp_sfh_link->next=NULL; + // temp_mistake_sfh->sfh_link_items=temp_sfh_link; + // ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); + // assert(ret>0); + // } + // else + // { + // temp_hash_len=get_hashed_len(sfh_str); + // if(temp_hash_len>temp_mistake_sfh->hash_len) + // { + // temp_sfh_link->hash_len=get_hashed_len(); + // free(temp_sfh_link->sfh_str); + // temp_sfh_link->sfh_str=strdup(sfh_str); + // } + temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + if(temp_similiarmistake_num+=1; + } + // if(temp_mistake_sfh->sfh_link_items!=NULL) + // { + // temp_sfh_link=temp_mistake_sfh->sfh_link_items; + // for(;;temp_sfh_link=temp_sfh_link->next) + // { + // // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))next==NULL) + // { + // break; + // } + // } + // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->next->sfh_str=strdup(sfh_str); + // temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + // temp_sfh_link->next->similiar=temp_similiar; + // temp_sfh_link->next->next=NULL; + // } + // else + // { + // temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + // temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); + // temp_mistake_sfh->sfh_link_items->similiar=temp_similiar; + // temp_mistake_sfh->sfh_link_items->next=NULL; + // } + // if(temp_mistake==1) + // { + // temp_mistake_sfh->num+=temp_mistake; + // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->next->sfh_str=strdup(sfh_str); + // temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->next=NULL; + // temp_mistake=0; + // } + } + fclose(raw_file); + // raw_file=NULL; + // raw_file = fopen(raw_file_dir,"r+"); + // if(raw_file==NULL) + // { + // printf("open all_av_digest error\n"); + // return -1; + // } + // i=0; + // while(feof(raw_file)==0) + // { + // i++; + // if(i%10000==0) + // { + // printf("%d\n",i); + // } + // fgets(buffer,BUFFER_LEN-1,raw_file); + // buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%[^;];%*[^;];%*[^;];%*[^;]",td); + // assert(ret==1); + // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))!=NULL) + // { + // fprintf(ripe_file,"%d;%s",temp_mistake_sfh->num,buffer); + // } + // } + MESA_htable_iterate(htable,print_mistake_td,(void*)parameter); + printf("%d,%d\n",parameter->mistake_num,i); + free(buffer); + free(sfh_str); + free(td); + free(td_str); + // free(md5_32k_str); + MESA_htable_destroy(htable,NULL); + // fclose(raw_file); + fclose(ripe_file); + return 0; +} \ No newline at end of file diff --git a/src/get_td_mistake_lost/get_td_mistake_lost.sh b/src/get_td_mistake_lost/get_td_mistake_lost.sh new file mode 100644 index 0000000..7c851b8 --- /dev/null +++ b/src/get_td_mistake_lost/get_td_mistake_lost.sh @@ -0,0 +1,5 @@ +#!/bin/bash +python new_TD.py +./get_mistake_level +./get_TD_SFH +./get_lost_rate diff --git a/src/get_td_mistake_lost/gram_index_engine.c b/src/get_td_mistake_lost/gram_index_engine.c new file mode 100644 index 0000000..0f503db --- /dev/null +++ b/src/get_td_mistake_lost/gram_index_engine.c @@ -0,0 +1,1354 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "gram_index_engine.h" +#include "queue.h" + +#define HTABLE_SIZE 1024 *1024 +#define GRAM_CNT_MAX 2 +#define GRAM_MAX 128 +#define TOLERENCE_SIZE 0 +#define UNION_INIT_SIZE 1000 +#define BLOCKSIZE_MIN 3 +#define MEM_OCCUPY 1 +#define CNT_MAX 10 +#define GRAM_CNT_THRESHOLD 10 +#define QUERY_LEN_ACCURACY 0.1 +#define HTABLE_NUM 8 +//#define GIE_INPUT_FORMAT_SFH 1 +//#define GIE_INPUT_FORMAT_PLAIN 0 +#define MAX_LENGTH 10000 +#define KEY_MAX_LENGTH 10 +#define EDIT_DISTN_INSERT_COST 1 +#define EDIT_DISTN_REMOVE_COST 1 +#define EDIT_DISTN_REPLACE_COST 2 +#define MIN(x,y) ((x)<(y)?(x):(y)) + +int before(unsigned int off1, unsigned int off2) +{ + return (signed int)(off1-off2)<0; +} +#define after(off2,off1) before(off1,off2) + +typedef struct +{ + unsigned int user_gram_value; + unsigned int user_position_accuracy; + short ED_reexamine; + short input_format; + MESA_htable_handle id_table; + MESA_htable_handle index_table[HTABLE_NUM]; + unsigned long long mem_occupy; + unsigned long long hash_cnt; +}GIE_handle_inner_t; + + +struct linklist_node +{ + short * position; + struct id_table_data * basicinfo; + short size; + short index; + unsigned long long blocksize; + TAILQ_ENTRY(linklist_node) listentry; +}; + + +struct index_table_data +{ + struct TQ * listhead; + int cnt; +}; + + +struct id_table_data +{ + unsigned int id; + short sfh_length; + short gram_cnt; + unsigned long long blocksize; + char * sfh; + void * tag; + char cfds_lvl; +}; + + +struct htable_handle +{ + MESA_htable_handle runtime_table; + MESA_htable_handle para; +}; + +struct key_list_node +{ + char * key; + int digest_id; + int pos; + unsigned long long blocksize; + TAILQ_ENTRY(key_list_node) keylistentry; +}; + + +unsigned long long hash_cnt; +unsigned long long cnt_sum; + +TAILQ_HEAD(TQ, linklist_node); +TAILQ_HEAD(KL, key_list_node); + +void idtable_free(void * data); +void indextable_free(void * data); +int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2); +int GIE_insert_indextable(MESA_htable_handle handle, struct id_table_data * info, char * key, unsigned int index,unsigned long long blocksize); + +int GIE_delete_from_indextable_by_key(MESA_htable_handle handle, char * key, unsigned int id); +int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t * digest); +int GIE_cmp(const void * a, const void * b); +inline unsigned int get_real_length(const char * string, unsigned int length); +void print_item_iterate(const uchar * key, unsigned int size, void * data, void * user); +inline unsigned long long calc_fh_blocksize(unsigned long long orilen); +inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len); + +MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data)); +void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user); +void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user); + +GIE_handle_t * GIE_create(const GIE_create_para_t * para) +{ + int i = 0; + GIE_handle_inner_t * handle = (GIE_handle_inner_t *)calloc(1, sizeof(GIE_handle_inner_t)); + handle->mem_occupy = 0; + handle->mem_occupy += sizeof(GIE_handle_inner_t); + + handle->user_gram_value = para->gram_value; + handle->user_position_accuracy = para->position_accuracy; + handle->input_format = para->format; + //handle->user_cmp = GIE_INPUT_FORMAT_PLAIN; + handle->ED_reexamine = para->ED_reexamine; + handle->hash_cnt = 0; + + + MESA_htable_create_args_t idtable_args,indextable_args[HTABLE_NUM]; + memset(&idtable_args, 0, sizeof(idtable_args)); + idtable_args.thread_safe = 0; + idtable_args.hash_slot_size = HTABLE_SIZE; + idtable_args.max_elem_num = 0; + idtable_args.expire_time = 0; + idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO; + idtable_args.key_comp = NULL; + idtable_args.key2index = NULL; + idtable_args.data_free = idtable_free; + idtable_args.data_expire_with_condition = NULL; + idtable_args.recursive = 0; + handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args)); + + for(i = 0;i < HTABLE_NUM;i++) + { + memset(&indextable_args[i], 0, sizeof(indextable_args[i])); + indextable_args[i].thread_safe = 0; + indextable_args[i].hash_slot_size = HTABLE_SIZE; + indextable_args[i].max_elem_num = 0; + indextable_args[i].expire_time = 0; + indextable_args[i].eliminate_type = HASH_ELIMINATE_ALGO_FIFO; + indextable_args[i].key_comp = key_compare; + indextable_args[i].key2index = NULL; + indextable_args[i].data_free = indextable_free; + indextable_args[i].data_expire_with_condition = NULL; + indextable_args[i].recursive = 0; + handle->index_table[i] = MESA_htable_create(&indextable_args[i], sizeof(indextable_args[i])); + } + + return (GIE_handle_t *)(handle); +} + +int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2) +{ + return ( (*(long*)key1) - (*(long*)key2)); +} + + +void idtable_free(void * data) +{ + struct id_table_data * tmp = (struct id_table_data *)data; + free(tmp->sfh); + tmp->sfh = NULL; + tmp->tag = NULL; + free(tmp); + tmp = NULL; + + return; +} + +void indextable_delete_with_threshold(MESA_htable_handle * htable_handle, struct index_table_data * tmp, char * key) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node,listentry); + if(tmp_node->basicinfo->gram_cnt <= GRAM_CNT_THRESHOLD) + { + tmp_node = linklist_tmp; + continue; + } + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp_node->basicinfo->gram_cnt--; + tmp->cnt--; + if(TAILQ_EMPTY(tmp->listhead) == 1) + { + //_handle->hash_cnt--; + //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); + if(MESA_htable_del(htable_handle, (const uchar *)(key), key_length, indextable_free) < 0) + { + printf("indextable backtrack delete error!\n"); + assert(0); + return; + } + } + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp_node->size)); + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + + } + return; +} + + +void indextable_free(void * data) +{ + struct index_table_data * tmp = (struct index_table_data *)data; + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp->cnt--; + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + } + free(tmp->listhead); + tmp->listhead = NULL; + free(tmp); + tmp = NULL; + return; +} + + +void indextable_free_cnt(void * data) +{ + struct index_table_data * tmp = (struct index_table_data *)data; + hash_cnt++; + cnt_sum += tmp->cnt; + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp->cnt--; + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + } + free(tmp->listhead); + tmp->listhead = NULL; + free(tmp); + tmp = NULL; + return; +} + +void print_item_iterate_idtable(const uchar * key, uint size, void * data, void * user) +{ + struct id_table_data * id_data = (struct id_table_data *)data; + printf("id:%u\n",id_data->id); +} + + + +void print_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct index_table_data * index_data = (struct index_table_data *)data; + printf("%s %d\n", (char *)key, index_data->cnt); + struct linklist_node * tmp_node = NULL; + int i = 0; + TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) + { + printf("id = %u\n",tmp_node->basicinfo->id); + printf("position is :\n"); + for(i = 0;i < tmp_node->index;i++) + { + printf("%d ",tmp_node->position[i]); + } + printf("\n"); + } + printf("\n"); +} + +int edit_distn(const char *s1, int s1len, const char *s2, int s2len) +{ + long int max_len = 0; + if(s1len >= s2len) + { + max_len = s1len; + } + else + { + max_len = s2len; + } + int **t = (int **)malloc(2*sizeof(int *)); + t[0] = (int *)malloc((max_len +1)*sizeof(int)); + t[1] = (int *)malloc((max_len +1)*sizeof(int)); + //int t[2][EDIT_DISTN_MAXLEN+1]; + int *t1 = t[0]; + int *t2 = t[1]; + int *t3; + size_t i1, i2; + for (i2 = 0; i2 <= s2len; i2++) + t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST; + for (i1 = 0; i1 < s1len; i1++) { + t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST; + for (i2 = 0; i2 < s2len; i2++) { + int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST; + int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST; + int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST); + t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r); + } + t3 = t1; + t1 = t2; + t2 = t3; + } + long int ret = t1[s2len]; + free(t[0]); + free(t[1]); + free(t); + return ret; + //return t1[s2len]; +} + + +void GIE_destory(GIE_handle_t * handle) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); + //printf("hash_cnt:%llu\n",_handle->hash_cnt); + //printf("mem_occupy:%llu\n",_handle->mem_occupy); + int i = 0; + for(i = 0;i < HTABLE_NUM;i++) + { + MESA_htable_destroy(_handle->index_table[i], indextable_free_cnt); + } + MESA_htable_destroy(_handle->id_table, idtable_free); + //printf("index_free hash_cnt :%llu\n", hash_cnt); + //printf("cnt sum :%llu\n",cnt_sum); + free(_handle); + _handle = NULL; +} + + +int grab_key_set(char * str_begin,short str_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list, unsigned long long blocksize) +{ + int k = 0,j = 0; + char * tmp_gram = str_begin; + char key[gram_value+1]; + int sum = 0,htable_index = 0; + if(str_length < gram_value) + { + return 0; + } + str_length = MIN(str_length,strnlen(str_begin,str_length)); + *gram_cnt = str_length - gram_value + 1; + //printf("str_length:%d\n",str_length); + for(k = 0; k < str_length - gram_value + 1; k++) + { + sum = 0; + memset(key,'\0', gram_value+1); + memcpy(key, tmp_gram++, gram_value); + //printf("k:%d key:%s\n",k,key); + for(j = 0; j < gram_value; j++) + { + sum += key[j]; + } + htable_index = sum%HTABLE_NUM; + struct key_list_node *tmp_node = (struct key_list_node *)calloc(1,sizeof(struct key_list_node)); + tmp_node->key = (char *)calloc(gram_value+1,sizeof(char)); + memcpy(tmp_node->key,key,gram_value); + tmp_node->digest_id = i; + tmp_node->pos = k; + tmp_node->blocksize = blocksize; + TAILQ_INSERT_TAIL(to_process_list[htable_index], tmp_node, keylistentry); + } + return 1; +} +int sfh_grab_key_set(char *sfh,short sfh_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list) +{ + int t = 0; + char * tmp_gram = sfh; + unsigned long long blocksize = 0; + for(t = 0; t < 2;t++) + { + blocksize = get_blocksize_from_head(tmp_gram, sfh_length); + while(*tmp_gram != '\0') + { + if(*tmp_gram == ':') + { + tmp_gram++; + break; + } + tmp_gram++; + } + unsigned int real_length = get_real_length(tmp_gram, sfh_length); + if(real_length < gram_value) + { + if(t==0) + { + return 0; + } + else + { + continue; + } + } + grab_key_set(tmp_gram, real_length, i, gram_value, gram_cnt, to_process_list, blocksize); + while(*tmp_gram != '\0') + { + if(*tmp_gram == '#') + { + tmp_gram++; + break; + } + tmp_gram++; + } + } + return 1; +} + +void free_key_set(struct KL ** to_process_list,int size) +{ + int i = 0; + for(i = 0;i < size;i++) + { + struct key_list_node *tmp_node = TAILQ_FIRST(to_process_list[i]); + while(tmp_node != NULL) + { + struct key_list_node *key_list_tmp = TAILQ_NEXT(tmp_node, keylistentry); + TAILQ_REMOVE(to_process_list[i], tmp_node, keylistentry); + free(tmp_node->key); + tmp_node->key = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = key_list_tmp; + } + free(to_process_list[i]); + to_process_list[i]= NULL; + } +} + +int GIE_update(GIE_handle_t * handle,GIE_digest_t * * digests,int size) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); + struct id_table_data * info = NULL; + int success_cnt = 0; + int m = 0, i = 0, grab_ret = 0; + short gram_cnt = 0; + unsigned int input_fh_len = 0; + unsigned int gram_value = _handle->user_gram_value; + struct KL* to_process_list[HTABLE_NUM]; + + MESA_htable_handle htable_index_copy; + MESA_htable_handle htable_id_copy; + MESA_htable_handle htable_tmp_index=NULL,htable_tmp_id=NULL; + struct htable_handle * htable_copied_id_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); + struct htable_handle * htable_copied_index_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); + + htable_copied_id_para->runtime_table = _handle->id_table; + htable_copied_id_para->para = NULL; + htable_id_copy = copy_htable((void *)htable_copied_id_para, copy_idtable_item_iterate,idtable_free); + + MESA_htable_handle garbage_htable[HTABLE_NUM]; + /*if(MESA_htable_iterate(htable_id_copy, print_item_iterate_idtable, NULL) == -1) + { + printf("iterate error!\n"); + } + printf("size:%u\n",id_size);*/ + + for(m = 0;m < HTABLE_NUM;m++) + { + to_process_list[m]=(struct KL*)calloc(1,sizeof(struct KL)); + TAILQ_INIT(to_process_list[m]); + } + + for(i = 0; i < size; i++) + { + switch(digests[i]->operation) + { + case GIE_INSERT_OPT: + { + assert(digests[i]->tag!=NULL); + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + grab_ret = sfh_grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + + grab_ret = grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); + } + if(grab_ret == 0) + { + continue; + } + else + { + info = (struct id_table_data *)calloc(1,sizeof(struct id_table_data)); + input_fh_len = digests[i]->sfh_length; + info->sfh = (char *)calloc(input_fh_len + 1,sizeof(char)); + memcpy(info->sfh, digests[i]->sfh, input_fh_len); + _handle->mem_occupy += sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1); + info->sfh_length = digests[i]->sfh_length; + info->gram_cnt = gram_cnt; + + /*int tag_len = strnlen(digests[i]->tag,MAX_LENGTH); + info->tag = (char *)calloc(tag_len+1,sizeof(char)); + memcpy(info->tag,digests[i]->tag,tag_len);*/ + info->tag = digests[i]->tag; + + info->id = digests[i]->id; + info->cfds_lvl = digests[i]->cfds_lvl; + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + info->blocksize = get_blocksize_from_head(digests[i]->sfh, digests[i]->sfh_length); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + info->blocksize = 0; + } + + if(MESA_htable_add(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0) + { + _handle->mem_occupy -= (sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1)); + free(info->sfh); + info->sfh = NULL; + free(info); + info = NULL; + continue; + } + } + success_cnt ++; + break; + } + + case GIE_DELETE_OPT: + { + + struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(htable_id_copy, \ + (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id)); + if(ret!= NULL) + { + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + success_cnt += sfh_grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + + success_cnt += grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); + } + } + else + { + break; + } + if(MESA_htable_del(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0) + { + printf("delete id failed!"); + assert(0); + } + //success_cnt += GIE_delete(_handle, digests[i]); + break; + } + + default: + break; + } + + } + unsigned int digest_id = 0; + struct id_table_data * tmp_info= NULL; + + for(i = 0;i < HTABLE_NUM;i++) + { + htable_copied_index_para->runtime_table = _handle->index_table[i]; + htable_copied_index_para->para = htable_id_copy; + htable_index_copy = copy_htable((void *)htable_copied_index_para,copy_indextable_item_iterate,indextable_free); + struct key_list_node * tmp_node; + TAILQ_FOREACH(tmp_node, to_process_list[i], keylistentry) + { + digest_id = tmp_node->digest_id; + if(digests[digest_id]->operation == GIE_INSERT_OPT) + { + tmp_info =(struct id_table_data *)MESA_htable_search(htable_id_copy, (const uchar *)(&(digests[digest_id])->id), \ + sizeof((digests[digest_id])->id)); + if(tmp_info == NULL) + { + printf("id %u not insert\n",digests[digest_id]->id); + } + if(GIE_insert_indextable(htable_index_copy, tmp_info, tmp_node->key, tmp_node->pos,tmp_node->blocksize) < 0) + { + printf("insert %d indextable failed!\n",digests[digest_id]->id); + continue; + } + } + else if(digests[digest_id]->operation == GIE_DELETE_OPT) + { + if(GIE_delete_from_indextable_by_key(htable_index_copy, tmp_node->key, (digests[digest_id])->id) < 0) + { + printf("delete %d indextable failed!\n",digests[digest_id]->id); + continue; + } + } + } + htable_tmp_index= _handle->index_table[i]; + _handle->index_table[i] = htable_index_copy; + garbage_htable[i]=htable_tmp_index; + } + + htable_tmp_id = _handle->id_table; + _handle->id_table = htable_id_copy; + usleep(200); + MESA_htable_destroy(htable_tmp_id, idtable_free); + /*if(MESA_htable_iterate(_handle->index_table, print_item_iterate, NULL) == -1) + { + printf("iterate error!\n"); + }*/ + for(i=0;iruntime_table = copy_htable_handle; + htable_iterate_para->para = htable_copied_para->para; + + if(MESA_htable_iterate(htable_copied_para->runtime_table, func, htable_iterate_para) == -1) + { + printf("iterate error!\n"); + } + free(htable_iterate_para); + htable_copied_para=NULL; + return copy_htable_handle; +} + +void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct index_table_data * index_data = (struct index_table_data *)data; + struct htable_handle * htable_copied_para = (struct htable_handle *)user; + + struct index_table_data * index_data_copy = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); + struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); + index_data_copy->listhead = head; + index_data_copy->cnt = index_data->cnt; + + TAILQ_INIT(head); + struct linklist_node * tmp_node = NULL; + struct id_table_data * ret = NULL; + int i = 0; + + TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) + { + struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); + node_data->size = tmp_node->size; + node_data->position = (short *)calloc(node_data->size, sizeof(short)); + for(i = 0;i < tmp_node->index;i++) + { + node_data->position[i] = tmp_node->position[i]; + } + ret = (struct id_table_data *)MESA_htable_search(htable_copied_para->para, (const uchar *)(&(tmp_node->basicinfo->id)), sizeof(tmp_node->basicinfo->id)); + if(ret == NULL) + { + //printf("copy id %u not exist\n",tmp_node->basicinfo->id); + free(node_data->position); + node_data->position = NULL; + free(node_data); + node_data = NULL; + continue; + } + node_data->basicinfo = ret; + node_data->index = tmp_node->index; + node_data->blocksize = tmp_node->blocksize; + TAILQ_INSERT_TAIL(head, node_data, listentry); + } + MESA_htable_add(htable_copied_para->runtime_table, key, size, (const void *)index_data_copy); +} +//TODO: Using the orginal value instead of make a duplication to be faster. +void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct id_table_data * id_data = (struct id_table_data *)data; + struct htable_handle * htable_para = (struct htable_handle *)user; + struct id_table_data * id_data_copy = (struct id_table_data *)calloc(1, sizeof(struct id_table_data)); + assert(id_data->tag!=NULL); + memcpy(id_data_copy,id_data,sizeof(struct id_table_data)); + id_data_copy->sfh = (char *)calloc(id_data_copy->sfh_length,sizeof(char)); + memcpy(id_data_copy->sfh,id_data->sfh,id_data_copy->sfh_length); + + MESA_htable_add(htable_para->runtime_table, (const uchar *)(&(id_data_copy->id)), sizeof(id_data_copy->id), (const void *)id_data_copy); +} + + + + +int GIE_insert_indextable(MESA_htable_handle htable_copy, struct id_table_data * info, char * key, unsigned int index, unsigned long long blocksize) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); + node_data->size = GRAM_CNT_MAX; + node_data->position = (short *)calloc(node_data->size, sizeof(short)); + node_data->basicinfo = info; + node_data->index = 0; + node_data->position[(node_data->index)++] = index; + node_data->blocksize = blocksize; + + //_handle->mem_occupy += sizeof(struct linklist_node) + sizeof(short)*(node_data->size); + + struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable_copy, \ + (const uchar *)(key), key_length)); + + + if(ret != NULL) + { + struct linklist_node * tmp = NULL; + TAILQ_FOREACH(tmp, ret->listhead, listentry) + { + if(tmp->basicinfo->id > node_data->basicinfo->id) + { + TAILQ_INSERT_BEFORE(tmp, node_data, listentry); + ret->cnt ++; + if(ret->cnt >= CNT_MAX) + { + indextable_delete_with_threshold(htable_copy,ret,key); + } + return 0; + } + if(tmp->basicinfo->id == node_data->basicinfo->id && tmp->blocksize == blocksize) + { + if(tmp->index >= tmp->size) + { + tmp->size *= 2; + tmp->position = realloc(tmp->position, (tmp->size)*sizeof(short)); + } + tmp->position[(tmp->index)++] = index; + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(node_data->size)); + free(node_data->position); + node_data->position = NULL; + free(node_data); + node_data = NULL; + return 0; + } + } + TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry); + ret->cnt ++; + if(ret->cnt >= CNT_MAX) + { + indextable_delete_with_threshold(htable_copy,ret,key); + } + } + + else + { + struct index_table_data * index_data = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); + struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); + //_handle->mem_occupy += sizeof(struct index_table_data) + sizeof(struct TQ); + + index_data->listhead = head; + index_data->cnt = 0; + + TAILQ_INIT(head); + TAILQ_INSERT_TAIL(head, node_data, listentry); + index_data->cnt++; + //_handle->hash_cnt++; + if(MESA_htable_add(htable_copy, (const uchar *)(key), key_length, (const void *)index_data) < 0) + { + printf("add index_table failed!\n"); + assert(0); + return -1; + } + } + return 0; + +} + + + +int GIE_delete(GIE_handle_inner_t * _handle, GIE_digest_t * digest) +{ + int success_cnt = 0; + struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ + (const uchar *)(&(digest->id)), sizeof(digest->id)); + if(ret == NULL) + { + printf("del %d doesn't exist!\n",digest->id); + return -1; + } + else + { + int gram_value = _handle->user_gram_value; + char key[gram_value+1]; + char * tmp_gram = ret->sfh; + while(*tmp_gram != '\0') + { + if(*tmp_gram == ':') + { + tmp_gram++; + break; + } + tmp_gram++; + } + unsigned int real_length = get_real_length(tmp_gram, ret->sfh_length); + int gram_cnt = real_length - gram_value + 1; + int k = 0; + for(k = 0; k < gram_cnt; k++) + { + memset(key, '\0', gram_value+1); + memcpy(key, tmp_gram++, gram_value); + if(GIE_delete_from_indextable_by_key(_handle, key, digest->id) < 0) + { + printf("delete %d indextable failed!\n",digest->id); + continue; + } + } + success_cnt++; + } + + return success_cnt; +} + + + +int GIE_delete_from_indextable_by_key(MESA_htable_handle htable, char * key, unsigned int id) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable, \ + (const uchar *)(key), key_length)); + if(ret == NULL) + { + return 0; + } + + + struct linklist_node * tmp = TAILQ_FIRST(ret->listhead); + while(tmp != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp, listentry); + if(tmp->basicinfo->id != id) + { + tmp=linklist_tmp; + continue; + } + TAILQ_REMOVE(ret->listhead, tmp, listentry); + ret->cnt--; + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp->size)); + free(tmp->position); + tmp->position = NULL; + free(tmp); + tmp = NULL; + if(TAILQ_EMPTY(ret->listhead) == 1) + { + //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); + int ret = MESA_htable_del(htable, (const uchar *)(key), key_length, indextable_free); + if(ret < 0) + { + printf("indextable backtrack delete error!\n"); + assert(0); + return -1; + } + + } + } + return 0; +} + + + + +int GIE_cmp(const void * a, const void * b) +{ + unsigned int tmp_a = *(unsigned int *)a; + unsigned int tmp_b = *(unsigned int *)b; + if(before(tmp_a, tmp_b)) + { + return -1; + } + else if(after(tmp_a, tmp_b)) + { + return 1; + } + else + { + return 0; + } +} + + +inline unsigned int get_real_length(const char * string, unsigned int length) +{ + unsigned int ret = 0; + const char * tmp_str = string; + while(*tmp_str != '\0') + { + if(*tmp_str == '[') + { + break; + } + tmp_str++; + ret ++; + } + return ret; +} + + +inline int GIE_part_query(GIE_handle_inner_t * _handle, const char * query_string, int index_begin, int part_query_len,unsigned int ** id_union, unsigned int * union_index, unsigned int * union_size, unsigned long long blocksize) +{ + unsigned int gram_value = _handle->user_gram_value; + + unsigned int real_length = part_query_len; + unsigned int chunk_count_max = 0; + if(real_length < gram_value) + { + return 0; + } + else + { + chunk_count_max = real_length/gram_value; + } + char key[gram_value+1]; + struct index_table_data * ret = NULL; + struct linklist_node * tmp_node_t = NULL; + + unsigned int position_accuracy = _handle->user_position_accuracy; + + int i=0,j=0,k=0; + unsigned int tmp_min = 0; + int sum = 0, htable_index = 0; + for(i = index_begin; i < chunk_count_max + index_begin; i++) + { + sum = 0; + memset(key,'\0',gram_value+1); + memcpy(key, query_string, gram_value); + for(k = 0; k < gram_value; k++) + { + sum += key[k]; + } + htable_index = sum%HTABLE_NUM; + ret = (struct index_table_data *) MESA_htable_search(_handle->index_table[htable_index], \ + (const uchar *)(key), strnlen(key,gram_value)); + query_string = query_string + gram_value; + + if(ret ==NULL) + { + break; + } + + tmp_node_t = NULL; + TAILQ_FOREACH(tmp_node_t, ret->listhead, listentry) + { + tmp_min = 0; + if(i*gram_value >= position_accuracy) + { + tmp_min = i*gram_value - position_accuracy; + } + for(j = 0; j < tmp_node_t->index; j++) + { + if((blocksize == tmp_node_t->basicinfo->blocksize) && (tmp_node_t->position[j] >= tmp_min) && (tmp_node_t->position[j] <= i*gram_value + position_accuracy)) + //if(blocksize == tmp_node_t->basicinfo->blocksize) + { + if((*union_index) >= (*union_size)) + { + *union_size = (*union_size) * 2; + *id_union = (unsigned int *)realloc(*id_union, (*union_size)*sizeof(unsigned int)); + } + (*id_union)[(*union_index)] = tmp_node_t->basicinfo->id; + (*union_index)++; + break; + } + } + } + } + return chunk_count_max; +} + +inline int GIE_gram_with_position(GIE_handle_inner_t * _handle, unsigned long long query_blocksize, const char * fuzzy_string, unsigned int ** id_union, + unsigned int * union_index,unsigned int * union_size, unsigned int * chunk_cnt) +{ + const char * tmpstr = fuzzy_string; + const char * query_string_begin; + unsigned long long blocksize = query_blocksize; + int part_query_len = 0; + int query_actual_len = 0; + while(*tmpstr != ':'&& *tmpstr != '\0') + { + tmpstr ++; + } + if(*tmpstr == ':') + { + tmpstr ++; + } + else + { + return 0; + } + query_string_begin = tmpstr; + char *p = NULL; + + while((*query_string_begin) != '\0') + { + int left = 0; + int right = 0; + p=strchr(query_string_begin,'['); + if(p!=NULL) + { + part_query_len = p-query_string_begin; + int ret = sscanf(p,"[%d:%d]",&left,&right); + if(ret != 2) + { + break; + } + p=strchr(p,']'); + if(p != NULL && (*p) != '\0') + { + int index_begin = (left/blocksize - TOLERENCE_SIZE > 0 ? (left/blocksize - TOLERENCE_SIZE) : 0); + (*chunk_cnt) += GIE_part_query(_handle,query_string_begin,index_begin, part_query_len, + id_union, union_index, union_size, blocksize); + query_actual_len += part_query_len; + query_string_begin = p+1; + } + else + { + break; + } + } + else + { + break; + } + } + return query_actual_len; +} + +inline unsigned long long calc_fh_blocksize(unsigned long long orilen) +{ + double tmp = orilen/(64 * BLOCKSIZE_MIN); + double index = floor(log(tmp)/log(2)); + double tmp_t = pow(2,index); + unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN); + return blocksize; +} + +inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len) +{ + const char * tmp_str = fuzzy_string; + char blk[100]; + memset(blk,'\0',sizeof(blk)); + unsigned long long blocksize = 0; + int i = 0; + while(*tmp_str != '\0' && *tmp_str != ':' && str_len != 0 && i < 100) + { + blk[i++] = *tmp_str; + tmp_str++; + str_len--; + } + blocksize = (unsigned long long)atoi(blk); + return blocksize; +} +int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2) +{ + int edit_distance=0; + int conf=0; + edit_distance = edit_distn(str1, len1,str2,len2); + conf = 100-(edit_distance*100)/(len1 + len2); + return conf; +} + +int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2) +{ + int j = 0, t = 0; + unsigned long long query_blocksize = 0, index_blocksize = 0; + unsigned int query_real_length = 0, index_real_length = 0; + const char *query_gram_begin = sfh1; + const char *index_gram_begin = sfh2; + char *splice_str = (char *)malloc(sizeof(char)*len1); + memset(splice_str,'\0',len1); + char *spli_str_begin = splice_str; + int edit_distance = 0; + int ret = 0; + char *p = NULL; + int splice_len = 0; + + for(j = 0; j < 2; j++) + { + index_blocksize = get_blocksize_from_head(index_gram_begin, len2); + while((*index_gram_begin) != '\0') + { + if((*index_gram_begin) == ':') + { + index_gram_begin++; + break; + } + index_gram_begin++; + } + index_real_length = get_real_length(index_gram_begin, len2); + query_gram_begin = sfh1; + for(t = 0; t < 2; t++) + { + query_blocksize = get_blocksize_from_head(query_gram_begin, len1); + //printf("gram_begin:%c\n",*index_gram_begin); + //printf("gram_str:%s\n",index_gram_begin); + while((*query_gram_begin) != '\0') + { + if((*query_gram_begin) == ':') + { + query_gram_begin++; + break; + } + query_gram_begin++; + } + //printf("query_blocksize:%lld, index_blocksize:%lld\n",query_blocksize,index_blocksize); + //index_real_length = get_real_length(index_gram_begin, len1); + if(query_blocksize == index_blocksize) + { + while((*query_gram_begin) != '#' && (*query_gram_begin) != '\0') + { + p=strchr(query_gram_begin,'['); + if(p!=NULL) + { + query_real_length = p-query_gram_begin; + p=strchr(p,']'); + if(p != NULL && (*p) != '\0') + { + + memcpy(spli_str_begin,query_gram_begin,query_real_length); + spli_str_begin += query_real_length; + //edit_distance += edit_distn(query_gram_begin, query_real_length, index_gram_begin, index_real_length); + query_gram_begin = p+1; + } + else + { + break; + } + } + else + { + break; + } + } + splice_len = strnlen(splice_str,len1); + edit_distance = edit_distn(index_gram_begin, index_real_length, splice_str, splice_len); + //printf("query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); + ret = 100-(edit_distance*100)/(index_real_length + splice_len); + //ret = (100*ret)/SPAM_LENGTH; + //ret = 100-ret; + //ret = 100 - (100*edit_distance)/(query_real_length); + free(splice_str); + return ret; + } + while(*query_gram_begin != '\0') + { + if(*query_gram_begin == '#') + { + query_gram_begin++; + break; + } + query_gram_begin++; + } + + } + while(*index_gram_begin != '\0') + { + if(*index_gram_begin == '#') + { + index_gram_begin++; + break; + } + index_gram_begin++; + } + } + //printf("no blocksize:query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); + free(splice_str); + return 0; +} + + + + +int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *) handle; + int i = 0, j = 0; + unsigned int union_index = 0; + unsigned int gram_value = _handle->user_gram_value; + unsigned int query_actual_len = 0; + unsigned int union_size = UNION_INIT_SIZE; + unsigned int chunk_cnt = 0; + const char *fuzzy_string_begin = data; + unsigned int * id_union =(unsigned int *)calloc(union_size, sizeof(unsigned int)); + unsigned long long query_blocksize = 0; + unsigned int fuzzy_string_len = (unsigned int)data_len; + + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + for(j = 0;j < 2;j++) + { + query_blocksize = get_blocksize_from_head(fuzzy_string_begin, fuzzy_string_len); + if(query_blocksize == 0) + { + return 0; + } + query_actual_len += GIE_gram_with_position(_handle, query_blocksize, fuzzy_string_begin, &id_union, &union_index, &union_size, &chunk_cnt); + while(*fuzzy_string_begin != '#' && *fuzzy_string_begin != '\0') + { + fuzzy_string_begin++; + } + if(*fuzzy_string_begin == '#') + { + fuzzy_string_begin++; + } + } + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + query_actual_len = fuzzy_string_len; + chunk_cnt = GIE_part_query(_handle, fuzzy_string_begin, 0, query_actual_len, &id_union, &union_index, &union_size, 0); + } + + if(union_index == 0) + { + free(id_union); + id_union = NULL; + return 0; + } + + qsort(id_union, union_index, sizeof(id_union[0]), GIE_cmp); + + unsigned int current_id = id_union[0]; + unsigned int * tmp_id = id_union; + unsigned int count = 0; + struct id_table_data * ret_tmp = NULL; + short conf = 0; + int ret_size = 0; + for(i = 0; i <= union_index; i++) + { + if( i == union_index || *tmp_id != current_id ) + { + ret_tmp = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ + (const uchar *)(&(current_id)), sizeof(current_id)); + + if(ret_tmp == NULL) + { + break; + } + char * tmp_gram = ret_tmp->sfh; + int length = ret_tmp->sfh_length; + if(ret_tmp->gram_cnt == 0||chunk_cnt == 0) + { + conf = 0; + } + else + { + conf = (count*(query_actual_len-gram_value+1)*10)/(chunk_cnt*(ret_tmp->gram_cnt)); + } + + if(_handle->ED_reexamine == 1) + { + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + conf = GIE_sfh_similiarity(data, fuzzy_string_len, tmp_gram, length); + } + else + { + conf=GIE_string_similiarity(data, fuzzy_string_len, tmp_gram, length); + } + } + + if(conf >= ret_tmp->cfds_lvl) + { + results[ret_size].cfds_lvl = conf; + results[ret_size].id = current_id; + /*results[ret_size].tag = (char *)malloc((ret_tmp->sfh_length + 1)*sizeof(char)); + memset(results[ret_size].tag,'\0',(ret_tmp->sfh_length+1)); + memcpy(results[ret_size].tag, ret_tmp->sfh,ret_tmp->sfh_length);*/ + results[ret_size].tag = ret_tmp->tag; + ret_size++; + } + + if(ret_size == result_size) + { + break; + } + + current_id = *tmp_id; + count = 1; + + } + else + { + count++; + } + + tmp_id ++; + } + + free(id_union); + id_union = NULL; + return ret_size; +} + + +unsigned long long GIE_status(GIE_handle_t * handle, int type) +{ + unsigned long long length; + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle; + switch(type) + { + case MEM_OCCUPY: + length = _handle->mem_occupy; + break; + default: + return 0; + } + return length; +} + diff --git a/src/get_td_mistake_lost/new_TD.conf b/src/get_td_mistake_lost/new_TD.conf new file mode 100644 index 0000000..be9301e --- /dev/null +++ b/src/get_td_mistake_lost/new_TD.conf @@ -0,0 +1,3 @@ +[file] +ripe_files_address = ../data/ripe_data/td_data_20171207/new_TD.txt +raw_file_address = ../data/ripe_data/td_data_20171207/all_av_digest diff --git a/src/get_td_mistake_lost/new_TD.py b/src/get_td_mistake_lost/new_TD.py new file mode 100644 index 0000000..5b7269f --- /dev/null +++ b/src/get_td_mistake_lost/new_TD.py @@ -0,0 +1,34 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("new_td","raw_file_address") +ripe_files_address=config.get("new_td","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) + +def get_md5_value(td_string): + my_md5 = hashlib.md5() + my_md5.update(td_string) + my_md5_string=str(my_md5.hexdigest()) + return my_md5_string + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%100000==0): + print i; + data_line_val = re.split(r';',line) + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4]) + td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \ + +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16]) + new_td=get_md5_value(td_string) + outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n") \ No newline at end of file diff --git a/src/rssb_statistics/all_len_st.py b/src/rssb_statistics/all_len_st.py new file mode 100644 index 0000000..6e32518 --- /dev/null +++ b/src/rssb_statistics/all_len_st.py @@ -0,0 +1,17 @@ +import os +import re +import csv +import bisect +# cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +# cmd2 = "cat media_expire.log.2018* > media_expire.log" +# os.system(cmd1) +# os.system(cmd2) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] + +with open("media_expire.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|media_len:",line) + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num \ No newline at end of file diff --git a/src/rssb_statistics/delete_len_st.py b/src/rssb_statistics/delete_len_st.py new file mode 100644 index 0000000..010dc84 --- /dev/null +++ b/src/rssb_statistics/delete_len_st.py @@ -0,0 +1,48 @@ +import os +import re +import csv +import bisect +cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +cmd2 = "cat ../12.log/media_expire.log.2018* > ../12.log/media_expire.log" +cmd3 = "cat media_expire.log.2018* > media_expire.log" +os.system(cmd1) +os.system(cmd2) +os.system(cmd3) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] +mid_13_set=set() +mid_12_set=set() +mid_12_file = "../12.log/media_expire.log" +mid_13_file = "media_expire.log" + +i=0 +with open(mid_13_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + mid_13_set.add(int(line_result[3])) + +i=0 +with open(mid_12_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + mid_12_set.add(int(line_result[3])) + +different = mid_12_set.difference(mid_13_set) + +i=0 +with open(mid_12_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + if((int(line_result[3]) in different)==True): + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num \ No newline at end of file diff --git a/src/rssb_statistics/find_lost_td.conf b/src/rssb_statistics/find_lost_td.conf new file mode 100644 index 0000000..6ab40d9 --- /dev/null +++ b/src/rssb_statistics/find_lost_td.conf @@ -0,0 +1,14 @@ +[file] +raw_survey_file_13 = ../data/data_20180423/14.log/survey.recv.log +raw_deup_file = ../data/data_20180423/13.log/dedup.log +raw_survey_file_12 = ../data/data_20180423/12.log/survey.recv.log +run_time_file = ../data/data_20180423/runtime_log +lost_td_line = ../data/data_20180423/ripe_file/lost_td_line +mid_12_file = ../data/data_20180423/ripe_file/mid_12_file +mid_13_file = ../data/data_20180423/ripe_file/mid_14_file +list_12_file = ../data/data_20180423/ripe_file/list_12_file +list_13_file = ../data/data_20180423/ripe_file/list_14_file +different_mid_file_13 = ../data/data_20180423/ripe_file/different_mid_file_14 +different_mid_file_12 = ../data/data_20180423/ripe_file/different_mid_file_12 +different_list_file_13 = ../data/data_20180423/ripe_file/different_list_file_14 +different_list_file_12 = ../data/data_20180423/ripe_file/different_list_file_12 \ No newline at end of file diff --git a/src/rssb_statistics/find_lost_td.py b/src/rssb_statistics/find_lost_td.py new file mode 100644 index 0000000..50f3fab --- /dev/null +++ b/src/rssb_statistics/find_lost_td.py @@ -0,0 +1,147 @@ +import ConfigParser +import re + +config = ConfigParser.RawConfigParser() +config.read("find_lost_td.conf") +raw_survey_file_13 = config.get("file","raw_survey_file_13") +raw_deup_file = config.get("file","raw_deup_file") +# run_time_file = config.get("file","run_time_file") +raw_survey_file_12 = config.get("file","raw_survey_file_12") +# lost_td_line = config.get("file","lost_td_line") +mid_12_file = config.get("file","mid_12_file") +mid_13_file = config.get("file","mid_13_file") +list_12_file = config.get("file","list_12_file") +list_13_file = config.get("file","list_13_file") +different_mid_file_13 = config.get("file","different_mid_file_13") +different_mid_file_12 = config.get("file","different_mid_file_12") +different_list_file_13 = config.get("file","different_list_file_13") +different_list_file_12 = config.get("file","different_list_file_12") + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)>7), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +mid_13=dict() +with open(raw_survey_file_13,'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + mid_13[mid_string]=list() + + +with open(mid_13_file,'w') as outfile: + for key in mid_13: + outfile.write(key+'\n') + +mid_12=dict() +with open(raw_survey_file_12,'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + mid_12[mid_string]=list() + +with open(mid_12_file,'w') as outfile: + for key in mid_12: + outfile.write(key+'\n') + +different_mid_13 = list() +with open(different_mid_file_13,'w') as outfile: + for key in mid_13: + if(mid_12.has_key(key)==False): + different_mid_13.append(key) + outfile.write(key+'\n') + +different_mid_12 = list() +with open(different_mid_file_12,'w') as outfile: + for key in mid_12: + if(mid_13.has_key(key)==False): + different_mid_12.append(key) + outfile.write(key+'\n') + +i=0 +with open(raw_deup_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + data_line_val = re.split(r",|MID:|TD:",line) + if(term['data_num'](data_line_val) and \ + mid_13.has_key(str(data_line_val[4])) == True): + mid_13[data_line_val[4]].append(data_line_val[6]) + if(term['data_num'](data_line_val) and \ + mid_12.has_key(str(data_line_val[4])) == True): + mid_12[data_line_val[4]].append(data_line_val[6]) + +td_list_13 =list() +with open(list_13_file,'w') as outfile: + for key in mid_13.keys(): + for td in mid_13[key]: + if(term['not_null'](td) and td_list_13.count(td)==0): + td_list_13.append(td) + outfile.write(td+'\n') + +td_list_12 =list() +with open(list_12_file,'w') as outfile: + for key in mid_12.keys(): + for td in mid_12[key]: + if(term['not_null'](td) and td_list_12.count(td)==0): + td_list_12.append(td) + outfile.write(td+'\n') + +different_list_12 = list() +with open(different_list_file_12,'w') as outfile: + for x in td_list_12: + if(td_list_13.count(x)==0): + different_list_12.append(x) + outfile.write(x+'\n') + +different_list_13 = list() +with open(different_list_file_13,'w') as outfile: + for x in td_list_13: + if(td_list_12.count(x)==0): + different_list_13.append(x) + outfile.write(x+'\n') + +td_dict=dict() +for i in different_list_12: + td_dict[i]=list() + +# i=0 +# with open(run_time_file,'r') as infile: +# for line in infile: +# i+=1 +# if(i%100000==0): +# print i +# if(line.find("NCHK_QUREY__KNOW")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].insert(0,"NCHK_QUREY__KNOW"+'\n') +# td_dict[data_line_val[6]].append(line) +# elif(line.find("NCHK_QUREY__UNKNOW")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].append(line) +# elif(line.find("NCHK_REPORT__SUCC")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].append(line) + +# else: +# continue + + +print len(different_list_12),len(different_list_13),\ +len(td_list_12),len(td_list_13),\ +len(mid_12),len(mid_13),len(different_mid_13),len(different_mid_12) + +# with open(lost_td_line,'w') as outfile: +# for key in td_dict.keys(): +# if(len(td_dict[key])>2 and td_dict[key][0]=="NCHK_QUREY__KNOW"): +# outfile.write(key+':\n') +# for i in td_dict[key]: +# outfile.write(i) + diff --git a/src/rssb_statistics/harm_len_st.py b/src/rssb_statistics/harm_len_st.py new file mode 100644 index 0000000..8a372b3 --- /dev/null +++ b/src/rssb_statistics/harm_len_st.py @@ -0,0 +1,29 @@ +import os +import re +import csv +import bisect +cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +cmd2 = "cat media_expire.log.2018* > media_expire.log" +os.system(cmd1) +os.system(cmd2) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] +harm_mid_dic=dict() +with open("survey.recv_survey.log",'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + harm_mid_dic[mid_string]=list() + +with open("media_expire.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|media_len:",line) + if(harm_mid_dic.has_key(str(line_result[3]))==True): + # print int(line_result[6]) + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num +# with open("un_recv_list.csv",'w') as csvfile: +# writer = csv.writer(csvfile) +# writer.writerow(un_recv_list) \ No newline at end of file diff --git a/src/rssb_statistics/recv_survey_mid_st.py b/src/rssb_statistics/recv_survey_mid_st.py new file mode 100644 index 0000000..f7faaeb --- /dev/null +++ b/src/rssb_statistics/recv_survey_mid_st.py @@ -0,0 +1,3 @@ +import os +cmd = "cat survey.log.2018* | grep \"recv survey\" | wc -l" +os.system(cmd) \ No newline at end of file diff --git a/src/rssb_statistics/service_st.py b/src/rssb_statistics/service_st.py new file mode 100644 index 0000000..7fecd33 --- /dev/null +++ b/src/rssb_statistics/service_st.py @@ -0,0 +1,29 @@ +import re + +different_mid_12=set() +different_mid_14=set() +service_dic=dict() + +with open("different_mid_file_12",'r') as infile: + for line in infile: + different_mid_12.add(long(line)) +with open("different_mid_file_14",'r') as infile: + for line in infile: + different_mid_14.add(long(line)) +with open("../12.log/survey.recv_survey.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|service:",line) + if((long(line_result[3]) in different_mid_12)==True): + if(service_dic.has_key(line_result[5])==True): + service_dic[line_result[5]]+=1 + else: + service_dic[line_result[5]]=0 +with open("../14.log/survey.recv.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|service:",line) + if((long(line_result[3]) in different_mid_14)==True): + if(service_dic.has_key(line_result[5])==True): + service_dic[line_result[5]]+=1 + else: + service_dic[line_result[5]]=0 +print service_dic \ No newline at end of file diff --git a/src/rssb_statistics/un_recv_st.py b/src/rssb_statistics/un_recv_st.py new file mode 100644 index 0000000..9d3d234 --- /dev/null +++ b/src/rssb_statistics/un_recv_st.py @@ -0,0 +1,36 @@ +import os +import re +import csv +cmd = "cat rssb_stat.log.2018-04-16 | grep \"/home/audiorecognition/aufilter/un_recv\" > rssb_stat.log.un_recv" +os.system(cmd) +i=0 +last_len = 0 +add_len = 0 +num = 0 +un_recv_list = list() +with open("rssb_stat.log.un_recv",'r') as infile: + for line in infile: + line_result = re.split(r'\t',line) + if(i==0): + last_len = long(line_result[2]) + break + +with open("rssb_stat.log.un_recv",'r') as infile: + for line in infile: + line_result = re.split(r'\t',line) + if(last_len <= long(line_result[2])): + add_len = add_len+long(line_result[2])-last_len + last_len = long(line_result[2]) + else: + add_len = add_len+long(line_result[2]) + last_len = long(line_result[2]) + i+=1 + if(i>=120): + i=0 + un_recv_list.append(str(add_len)) + add_len=0 + +with open("un_recv_list.csv",'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(un_recv_list) + diff --git a/src/rssb_statistics/un_recv_survey_mid_st.py b/src/rssb_statistics/un_recv_survey_mid_st.py new file mode 100644 index 0000000..e1e970f --- /dev/null +++ b/src/rssb_statistics/un_recv_survey_mid_st.py @@ -0,0 +1,3 @@ +import os +cmd = "cat survey.log.2018* | grep \"sync_audio\" | wc -l" +os.system(cmd) \ No newline at end of file diff --git a/src/sfh_integrate/SFH_function.c b/src/sfh_integrate/SFH_function.c new file mode 100644 index 0000000..a311f9c --- /dev/null +++ b/src/sfh_integrate/SFH_function.c @@ -0,0 +1,177 @@ +/* +gcc -g SFH_function.c -o SFH_function -lmaatframe -lMESA_htable -I../include +*/ +#include +#include +#include +#include "gram_index_engine.h" +#include +#include +#include +#define SLOT_SIZE (1024*1024*16) +#define THREAD_SAFE 0 +#define BUFFER_LEN (10*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 + +typedef struct sfh_link +{ + char *sfh_str; + int similiar; + int all_similiar; + long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct top_similiar_sfh +{ + int all_num; + int all_similiar; + char *sfh_str; + long hash_len; + sfh_link *sfh_link_items; +}top_similiar_sfh; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ; token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +void print_mistake_td(const uchar *key,uint size,void *data,void *arg) +{ + FILE *ripe_file = (FILE*)arg; + top_similiar_sfh *temp_top_similiar_sfh=(top_similiar_sfh*)data; + fprintf(ripe_file,"%s,%s\n",key,temp_top_similiar_sfh->sfh_str); + sfh_link *temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; + for(;;temp_sfh_link=temp_sfh_link->next) + { + if(temp_sfh_link==NULL) + { + break; + } + fprintf(ripe_file,"%d;%s;%d\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->hash_len); + } + fprintf(ripe_file,"\n"); +} +int main() +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../data/td_data_set/td_data_20171207/video_id_raw_data"; + char *ripe_file_dir="../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_2"; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + long temp_hash_len=0; + unsigned int slot_size=SLOT_SIZE; + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_similiar=0,temp_all_similiar=0; + top_similiar_sfh *temp_top_similiar_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + MESA_htable_handle htable=NULL; + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + while(feof(raw_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==2); + td[32]='\0'; + if((temp_top_similiar_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_top_similiar_sfh=(top_similiar_sfh*)calloc(1,sizeof(top_similiar_sfh)); + temp_top_similiar_sfh->all_num=1; + temp_top_similiar_sfh->all_similiar=0; + temp_top_similiar_sfh->hash_len=get_hashed_len(sfh_str); + temp_top_similiar_sfh->sfh_str=strdup(sfh_str); + temp_top_similiar_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_top_similiar_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_top_similiar_sfh->sfh_link_items->similiar=0; + temp_top_similiar_sfh->sfh_link_items->all_similiar=0; + temp_top_similiar_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_top_similiar_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_top_similiar_sfh->sfh_str,(int)strlen(temp_top_similiar_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_top_similiar_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_top_similiar_sfh->all_similiar) + { + free(temp_top_similiar_sfh->sfh_str); + temp_top_similiar_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_top_similiar_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->hash_len=get_hashed_len(sfh_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_top_similiar_sfh->all_num+=1; + } + } + fclose(raw_file); + MESA_htable_iterate(htable,print_mistake_td,ripe_file); +} \ No newline at end of file diff --git a/src/test/digest_temp b/src/test/digest_temp new file mode 100644 index 0000000..3cde5ae --- /dev/null +++ b/src/test/digest_temp @@ -0,0 +1,8 @@ +with open('./temp1','r') as file_sfh: + with open('./temp2','w') as out_file: + for line in file_sfh: + line=line.replace("\n","").replace("\"","").replace("\t","").replace(",","").strip() + if(line.find("mid")>=0): + out_file.write(line[4:]+",") + else: + out_file.write(line[7:]+"\n") \ No newline at end of file diff --git a/test/digest_temp b/test/digest_temp deleted file mode 100644 index 3cde5ae..0000000 --- a/test/digest_temp +++ /dev/null @@ -1,8 +0,0 @@ -with open('./temp1','r') as file_sfh: - with open('./temp2','w') as out_file: - for line in file_sfh: - line=line.replace("\n","").replace("\"","").replace("\t","").replace(",","").strip() - if(line.find("mid")>=0): - out_file.write(line[4:]+",") - else: - out_file.write(line[7:]+"\n") \ No newline at end of file -- cgit v1.2.3