diff options
| author | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
|---|---|---|
| committer | 陈冠林 <[email protected]> | 2019-06-18 10:44:20 +0800 |
| commit | b2a2f39d89b3bd154da10eb619f8a40c7c6b15d2 (patch) | |
| tree | b7a7d489030cfcc3b2fa878520d8c5d42dc5fce6 /src | |
| parent | b026525362d7f3b0ad58fb74362bf7f95ab515e8 (diff) | |
Diffstat (limited to 'src')
67 files changed, 5145 insertions, 0 deletions
diff --git a/src/CMakeCache.txt b/src/CMakeCache.txt new file mode 100644 index 0000000..5aa46ec --- /dev/null +++ b/src/CMakeCache.txt @@ -0,0 +1,278 @@ +# This is the CMakeCache file. +# For build in directory: /home/chenguanlin/TD_evaluation/src +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUI's for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Value Computed by CMake +CALCULATE_BINARY_DIR:STATIC=/home/chenguanlin/TD_evaluation/src + +//Value Computed by CMake +CALCULATE_SOURCE_DIR:STATIC=/home/chenguanlin/TD_evaluation/src + +//Path to a program. +CMAKE_AR:FILEPATH=/usr/bin/ar + +//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or +// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel. +CMAKE_BUILD_TYPE:STRING= + +//Enable/Disable color output during build. +CMAKE_COLOR_MAKEFILE:BOOL=ON + +//CXX compiler. +CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ + +//Flags used by the compiler during all build types. +CMAKE_CXX_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release minsize builds. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds (/MD /Ob1 /Oi +// /Ot /Oy /Gs will produce slightly less optimized but smaller +// files). +CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during Release with Debug Info builds. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g + +//C compiler. +CMAKE_C_COMPILER:FILEPATH=/usr/bin/gcc + +//Flags used by the compiler during all build types. +CMAKE_C_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_C_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release minsize builds. +CMAKE_C_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds (/MD /Ob1 /Oi +// /Ot /Oy /Gs will produce slightly less optimized but smaller +// files). +CMAKE_C_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during Release with Debug Info builds. +CMAKE_C_FLAGS_RELWITHDEBINFO:STRING=-O2 -g + +//Flags used by the linker. +CMAKE_EXE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Install path prefix, prepended onto install directories. +CMAKE_INSTALL_PREFIX:PATH=/usr/local + +//Path to a program. +CMAKE_LINKER:FILEPATH=/usr/bin/ld + +//Path to a program. +CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake + +//Flags used by the linker during the creation of modules. +CMAKE_MODULE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_NM:FILEPATH=/usr/bin/nm + +//Path to a program. +CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy + +//Path to a program. +CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump + +//Path to a program. +CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib + +//Flags used by the linker during the creation of dll's. +CMAKE_SHARED_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Path to a program. +CMAKE_STRIP:FILEPATH=/usr/bin/strip + +//If true, cmake will use relative paths in makefiles and projects. +CMAKE_USE_RELATIVE_PATHS:BOOL=OFF + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + + +######################## +# INTERNAL cache entries +######################## + +//Advanced flag for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_BUILD_TOOL +CMAKE_BUILD_TOOL-ADVANCED:INTERNAL=1 +//What is the target build tool cmake is generating for. +CMAKE_BUILD_TOOL:INTERNAL=/usr/bin/gmake +//This is the directory where this CMakeCahe.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/home/chenguanlin/TD_evaluation/src +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=2 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=6 +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_RELEASE_VERSION:INTERNAL=patch 4 +//Advanced flag for variable: CMAKE_COLOR_MAKEFILE +CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//Advanced flag for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +CMAKE_CXX_COMPILER_WORKS:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_COMPILER +CMAKE_C_COMPILER-ADVANCED:INTERNAL=1 +CMAKE_C_COMPILER_WORKS:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS +CMAKE_C_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_DEBUG +CMAKE_C_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_MINSIZEREL +CMAKE_C_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_RELEASE +CMAKE_C_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_C_FLAGS_RELWITHDEBINFO +CMAKE_C_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Result of TRY_COMPILE +CMAKE_DETERMINE_CXX_ABI_COMPILED:INTERNAL=TRUE +//Result of TRY_COMPILE +CMAKE_DETERMINE_C_ABI_COMPILED:INTERNAL=TRUE +//Path to cache edit program executable. +CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Unix Makefiles +//Start directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=/home/chenguanlin/TD_evaluation/src +//Install .so files without execute permission. +CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0 +//Advanced flag for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MAKE_PROGRAM +CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_NM +CMAKE_NM-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_LOCAL_GENERATORS:INTERNAL=1 +//Advanced flag for variable: CMAKE_OBJCOPY +CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_OBJDUMP +CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_RANLIB +CMAKE_RANLIB-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_STRIP +CMAKE_STRIP-ADVANCED:INTERNAL=1 +//uname command +CMAKE_UNAME:INTERNAL=/bin/uname +//Advanced flag for variable: CMAKE_USE_RELATIVE_PATHS +CMAKE_USE_RELATIVE_PATHS-ADVANCED:INTERNAL=1 +//Advanced flag for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 + diff --git a/src/CMakeFiles/CMakeCCompiler.cmake b/src/CMakeFiles/CMakeCCompiler.cmake new file mode 100644 index 0000000..1d1bbea --- /dev/null +++ b/src/CMakeFiles/CMakeCCompiler.cmake @@ -0,0 +1,36 @@ +SET(CMAKE_C_COMPILER "/usr/bin/gcc") +SET(CMAKE_C_COMPILER_ARG1 "") +SET(CMAKE_C_COMPILER_ID "GNU") +SET(CMAKE_C_PLATFORM_ID "Linux") +SET(CMAKE_AR "/usr/bin/ar") +SET(CMAKE_RANLIB "/usr/bin/ranlib") +SET(CMAKE_COMPILER_IS_GNUCC 1) +SET(CMAKE_C_COMPILER_LOADED 1) +SET(CMAKE_COMPILER_IS_MINGW ) +SET(CMAKE_COMPILER_IS_CYGWIN ) +IF(CMAKE_COMPILER_IS_CYGWIN) + SET(CYGWIN 1) + SET(UNIX 1) +ENDIF(CMAKE_COMPILER_IS_CYGWIN) + +SET(CMAKE_C_COMPILER_ENV_VAR "CC") + +IF(CMAKE_COMPILER_IS_MINGW) + SET(MINGW 1) +ENDIF(CMAKE_COMPILER_IS_MINGW) +SET(CMAKE_C_COMPILER_ID_RUN 1) +SET(CMAKE_C_SOURCE_FILE_EXTENSIONS c) +SET(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +SET(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +SET(CMAKE_C_SIZEOF_DATA_PTR "8") +SET(CMAKE_C_COMPILER_ABI "ELF") + +IF(CMAKE_C_SIZEOF_DATA_PTR) + SET(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +ENDIF(CMAKE_C_SIZEOF_DATA_PTR) + +IF(CMAKE_C_COMPILER_ABI) + SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +ENDIF(CMAKE_C_COMPILER_ABI) diff --git a/src/CMakeFiles/CMakeCXXCompiler.cmake b/src/CMakeFiles/CMakeCXXCompiler.cmake new file mode 100644 index 0000000..64dad77 --- /dev/null +++ b/src/CMakeFiles/CMakeCXXCompiler.cmake @@ -0,0 +1,36 @@ +SET(CMAKE_CXX_COMPILER "/usr/bin/c++") +SET(CMAKE_CXX_COMPILER_ARG1 "") +SET(CMAKE_CXX_COMPILER_ID "GNU") +SET(CMAKE_CXX_PLATFORM_ID "Linux") +SET(CMAKE_AR "/usr/bin/ar") +SET(CMAKE_RANLIB "/usr/bin/ranlib") +SET(CMAKE_COMPILER_IS_GNUCXX 1) +SET(CMAKE_CXX_COMPILER_LOADED 1) +SET(CMAKE_COMPILER_IS_MINGW ) +SET(CMAKE_COMPILER_IS_CYGWIN ) +IF(CMAKE_COMPILER_IS_CYGWIN) + SET(CYGWIN 1) + SET(UNIX 1) +ENDIF(CMAKE_COMPILER_IS_CYGWIN) + +SET(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +IF(CMAKE_COMPILER_IS_MINGW) + SET(MINGW 1) +ENDIF(CMAKE_COMPILER_IS_MINGW) +SET(CMAKE_CXX_COMPILER_ID_RUN 1) +SET(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;H;o;O;obj;OBJ;def;DEF;rc;RC) +SET(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm) +SET(CMAKE_CXX_LINKER_PREFERENCE 30) + +# Save compiler ABI information. +SET(CMAKE_CXX_SIZEOF_DATA_PTR "8") +SET(CMAKE_CXX_COMPILER_ABI "ELF") + +IF(CMAKE_CXX_SIZEOF_DATA_PTR) + SET(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +ENDIF(CMAKE_CXX_SIZEOF_DATA_PTR) + +IF(CMAKE_CXX_COMPILER_ABI) + SET(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +ENDIF(CMAKE_CXX_COMPILER_ABI) diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin Binary files differnew file mode 100644 index 0000000..9a91ed2 --- /dev/null +++ b/src/CMakeFiles/CMakeDetermineCompilerABI_C.bin diff --git a/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin Binary files differnew file mode 100644 index 0000000..be1e6e2 --- /dev/null +++ b/src/CMakeFiles/CMakeDetermineCompilerABI_CXX.bin diff --git a/src/CMakeFiles/CMakeDirectoryInformation.cmake b/src/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 0000000..66066d5 --- /dev/null +++ b/src/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,21 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Relative path conversion top directories. +SET(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/chenguanlin/TD_evaluation/src") +SET(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/chenguanlin/TD_evaluation/src") + +# Force unix paths in dependencies. +SET(CMAKE_FORCE_UNIX_PATHS 1) + +# The C and CXX include file search paths: +SET(CMAKE_C_INCLUDE_PATH + ) +SET(CMAKE_CXX_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) +SET(CMAKE_Fortran_INCLUDE_PATH ${CMAKE_C_INCLUDE_PATH}) + +# The C and CXX include file regular expressions for this directory. +SET(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +SET(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +SET(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +SET(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/src/CMakeFiles/CMakeOutput.log b/src/CMakeFiles/CMakeOutput.log new file mode 100644 index 0000000..3475153 --- /dev/null +++ b/src/CMakeFiles/CMakeOutput.log @@ -0,0 +1,89 @@ +The system is: Linux - 2.6.32-358.el6.x86_64 - x86_64 +Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. +Compiler: /usr/bin/gcc +Build flags: +Id flags: + +The output was: +0 + + +Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" + +The C compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdC/a.out" + +Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. +Compiler: /usr/bin/c++ +Build flags: +Id flags: + +The output was: +0 +CMakeCXXCompilerId.cpp:67: warning: deprecated conversion from string constant to ‘char*’ +CMakeCXXCompilerId.cpp:157: warning: deprecated conversion from string constant to ‘char*’ + + +Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" + +The CXX compiler identification is GNU, found in "/home/chenguanlin/TD_evaluation/src/CMakeFiles/CompilerIdCXX/a.out" + +Determining if the C compiler works passed with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building C object CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o +/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCCompiler.c +Linking C executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/testCCompiler.c.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Detecting C compiler ABI info compiled with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building C object CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o +/usr/bin/gcc -o CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -c /usr/share/cmake/Modules/CMakeCCompilerABI.c +Linking C executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/gcc -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCCompilerABI.c.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Determining if the CXX compiler works passed with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building CXX object CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o +/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -c /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/testCXXCompiler.cxx +Linking CXX executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/testCXXCompiler.cxx.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + +Detecting CXX compiler ABI info compiled with the following output: +Change Dir: /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp + +Run Build Command:/usr/bin/gmake "cmTryCompileExec/fast" +/usr/bin/gmake -f CMakeFiles/cmTryCompileExec.dir/build.make CMakeFiles/cmTryCompileExec.dir/build +gmake[1]: Entering directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' +/usr/bin/cmake -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp/CMakeFiles 1 +Building CXX object CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o +/usr/bin/c++ -o CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -c /usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp +Linking CXX executable cmTryCompileExec +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTryCompileExec.dir/link.txt --verbose=1 +/usr/bin/c++ -fPIC CMakeFiles/cmTryCompileExec.dir/CMakeCXXCompilerABI.cpp.o -o cmTryCompileExec -rdynamic +gmake[1]: Leaving directory `/home/chenguanlin/TD_evaluation/src/CMakeFiles/CMakeTmp' + + diff --git a/src/CMakeFiles/CMakeSystem.cmake b/src/CMakeFiles/CMakeSystem.cmake new file mode 100644 index 0000000..3b3e967 --- /dev/null +++ b/src/CMakeFiles/CMakeSystem.cmake @@ -0,0 +1,15 @@ + + +SET(CMAKE_SYSTEM "Linux-2.6.32-358.el6.x86_64") +SET(CMAKE_SYSTEM_NAME "Linux") +SET(CMAKE_SYSTEM_VERSION "2.6.32-358.el6.x86_64") +SET(CMAKE_SYSTEM_PROCESSOR "x86_64") + +SET(CMAKE_HOST_SYSTEM "Linux-2.6.32-358.el6.x86_64") +SET(CMAKE_HOST_SYSTEM_NAME "Linux") +SET(CMAKE_HOST_SYSTEM_VERSION "2.6.32-358.el6.x86_64") +SET(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + +SET(CMAKE_CROSSCOMPILING "FALSE") + +SET(CMAKE_SYSTEM_LOADED 1) diff --git a/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c new file mode 100644 index 0000000..7fd0088 --- /dev/null +++ b/src/CMakeFiles/CompilerIdC/CMakeCCompilerId.c @@ -0,0 +1,182 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + +#elif defined(__WATCOMC__) +# define COMPILER_ID "Watcom" + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + +#elif defined(__IBMC__) +# define COMPILER_ID "VisualAge" + +#elif defined(__PGI) +# define COMPILER_ID "PGI" + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + +#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +/* Analog Devices C++ compiler for Blackfin, TigerSHARC and + SHARC (21000) DSPs */ +# define COMPILER_ID "ADSP" + +/* IAR Systems compiler for embedded systems. + http://www.iar.com + Not supported yet by CMake +#elif defined(__IAR_SYSTEMS_ICC__) +# define COMPILER_ID "IAR" */ + +/* sdcc, the small devices C compiler for embedded systems, + http://sdcc.sourceforge.net */ +#elif defined(SDCC) +# define COMPILER_ID "SDCC" + +#elif defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" + +/* This compiler is either not known or is too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) +# define PLATFORM_ID "Haiku" +/* Haiku also defines __BEOS__ so we must + put it prior to the check for __BEOS__ +*/ + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; + + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + (void)argv; + return require; +} +#endif diff --git a/src/CMakeFiles/CompilerIdC/a.out b/src/CMakeFiles/CompilerIdC/a.out Binary files differnew file mode 100644 index 0000000..c389161 --- /dev/null +++ b/src/CMakeFiles/CompilerIdC/a.out diff --git a/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 0000000..f8c041f --- /dev/null +++ b/src/CMakeFiles/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,169 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + +#elif defined(__WATCOMC__) +# define COMPILER_ID "Watcom" + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + +#elif defined(__IBMCPP__) +# define COMPILER_ID "VisualAge" + +#elif defined(__PGI) +# define COMPILER_ID "PGI" + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + +#elif defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +/* Analog Devices C++ compiler for Blackfin, TigerSHARC and + SHARC (21000) DSPs */ +# define COMPILER_ID "ADSP" + +#elif defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" + +/* This compiler is either not known or is too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU) || defined(__HAIKU__) || defined(_HAIKU) +# define PLATFORM_ID "Haiku" +/* Haiku also defines __BEOS__ so we must + put it prior to the check for __BEOS__ +*/ + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; + + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + (void)argv; + return require; +} diff --git a/src/CMakeFiles/CompilerIdCXX/a.out b/src/CMakeFiles/CompilerIdCXX/a.out Binary files differnew file mode 100644 index 0000000..65597e7 --- /dev/null +++ b/src/CMakeFiles/CompilerIdCXX/a.out diff --git a/src/CMakeFiles/Makefile.cmake b/src/CMakeFiles/Makefile.cmake new file mode 100644 index 0000000..8466809 --- /dev/null +++ b/src/CMakeFiles/Makefile.cmake @@ -0,0 +1,52 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# The generator used is: +SET(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") + +# The top level Makefile was generated from the following files: +SET(CMAKE_MAKEFILE_DEPENDS + "CMakeCache.txt" + "CMakeFiles/CMakeCCompiler.cmake" + "CMakeFiles/CMakeCXXCompiler.cmake" + "CMakeFiles/CMakeSystem.cmake" + "CMakeLists.txt" + "/usr/share/cmake/Modules/CMakeCCompiler.cmake.in" + "/usr/share/cmake/Modules/CMakeCCompilerABI.c" + "/usr/share/cmake/Modules/CMakeCInformation.cmake" + "/usr/share/cmake/Modules/CMakeCXXCompiler.cmake.in" + "/usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp" + "/usr/share/cmake/Modules/CMakeCXXInformation.cmake" + "/usr/share/cmake/Modules/CMakeCommonLanguageInclude.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerABI.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake" + "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake" + "/usr/share/cmake/Modules/CMakeFindBinUtils.cmake" + "/usr/share/cmake/Modules/CMakeGenericSystem.cmake" + "/usr/share/cmake/Modules/CMakeSystem.cmake.in" + "/usr/share/cmake/Modules/CMakeSystemSpecificInformation.cmake" + "/usr/share/cmake/Modules/CMakeTestCCompiler.cmake" + "/usr/share/cmake/Modules/CMakeTestCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeUnixFindMake.cmake" + "/usr/share/cmake/Modules/Platform/Linux.cmake" + "/usr/share/cmake/Modules/Platform/UnixPaths.cmake" + "/usr/share/cmake/Modules/Platform/gcc.cmake" + ) + +# The corresponding makefile is: +SET(CMAKE_MAKEFILE_OUTPUTS + "Makefile" + "CMakeFiles/cmake.check_cache" + "CMakeFiles/CMakeDirectoryInformation.cmake" + ) + +# Byproducts of CMake generate step: +SET(CMAKE_MAKEFILE_PRODUCTS + ) + +# Dependency information for all targets: +SET(CMAKE_DEPEND_INFO_FILES + "CMakeFiles/grain.dir/DependInfo.cmake" + ) diff --git a/src/CMakeFiles/Makefile2 b/src/CMakeFiles/Makefile2 new file mode 100644 index 0000000..405383d --- /dev/null +++ b/src/CMakeFiles/Makefile2 @@ -0,0 +1,99 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +# The main recursive all target +all: +.PHONY : all + +# The main recursive preinstall target +preinstall: +.PHONY : preinstall + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +#============================================================================= +# Target rules for target CMakeFiles/grain.dir + +# All Build rule for target. +CMakeFiles/grain.dir/all: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/depend + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build + $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 + @echo "Built target grain" +.PHONY : CMakeFiles/grain.dir/all + +# Include target in all. +all: CMakeFiles/grain.dir/all +.PHONY : all + +# Build rule for subdir invocation for target. +CMakeFiles/grain.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 1 + $(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/grain.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 +.PHONY : CMakeFiles/grain.dir/rule + +# Convenience name for target. +grain: CMakeFiles/grain.dir/rule +.PHONY : grain + +# clean rule for target. +CMakeFiles/grain.dir/clean: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/clean +.PHONY : CMakeFiles/grain.dir/clean + +# clean rule for target. +clean: CMakeFiles/grain.dir/clean +.PHONY : clean + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/src/CMakeFiles/Progress/1 b/src/CMakeFiles/Progress/1 new file mode 100644 index 0000000..7b4d68d --- /dev/null +++ b/src/CMakeFiles/Progress/1 @@ -0,0 +1 @@ +empty
\ No newline at end of file diff --git a/src/CMakeFiles/Progress/count.txt b/src/CMakeFiles/Progress/count.txt new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/src/CMakeFiles/Progress/count.txt @@ -0,0 +1 @@ +1 diff --git a/src/CMakeFiles/cmake.check_cache b/src/CMakeFiles/cmake.check_cache new file mode 100644 index 0000000..3dccd73 --- /dev/null +++ b/src/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/src/CMakeFiles/grain.dir/C.includecache b/src/CMakeFiles/grain.dir/C.includecache new file mode 100644 index 0000000..d4110de --- /dev/null +++ b/src/CMakeFiles/grain.dir/C.includecache @@ -0,0 +1,24 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +/home/chenguanlin/TD_evaluation/src/grain.c +stdio.h +- +string.h +- +stdlib.h +- +gram_index_engine.h +/home/chenguanlin/TD_evaluation/src/gram_index_engine.h +MESA/MESA_htable.h +- +assert.h +- +ctype.h +- + diff --git a/src/CMakeFiles/grain.dir/DependInfo.cmake b/src/CMakeFiles/grain.dir/DependInfo.cmake new file mode 100644 index 0000000..0504394 --- /dev/null +++ b/src/CMakeFiles/grain.dir/DependInfo.cmake @@ -0,0 +1,13 @@ +# The set of languages for which implicit dependencies are needed: +SET(CMAKE_DEPENDS_LANGUAGES + "C" + ) +# The set of files for implicit dependencies of each language: +SET(CMAKE_DEPENDS_CHECK_C + "/home/chenguanlin/TD_evaluation/src/grain.c" "/home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/grain.c.o" + ) +SET(CMAKE_C_COMPILER_ID "GNU") + +# Targets to which this target links. +SET(CMAKE_TARGET_LINKED_INFO_FILES + ) diff --git a/src/CMakeFiles/grain.dir/build.make b/src/CMakeFiles/grain.dir/build.make new file mode 100644 index 0000000..18e458a --- /dev/null +++ b/src/CMakeFiles/grain.dir/build.make @@ -0,0 +1,103 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +# Include any dependencies generated for this target. +include CMakeFiles/grain.dir/depend.make + +# Include the progress variables for this target. +include CMakeFiles/grain.dir/progress.make + +# Include the compile flags for this target's objects. +include CMakeFiles/grain.dir/flags.make + +CMakeFiles/grain.dir/grain.c.o: CMakeFiles/grain.dir/flags.make +CMakeFiles/grain.dir/grain.c.o: grain.c + $(CMAKE_COMMAND) -E cmake_progress_report /home/chenguanlin/TD_evaluation/src/CMakeFiles $(CMAKE_PROGRESS_1) + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Building C object CMakeFiles/grain.dir/grain.c.o" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -o CMakeFiles/grain.dir/grain.c.o -c /home/chenguanlin/TD_evaluation/src/grain.c + +CMakeFiles/grain.dir/grain.c.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing C source to CMakeFiles/grain.dir/grain.c.i" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -E /home/chenguanlin/TD_evaluation/src/grain.c > CMakeFiles/grain.dir/grain.c.i + +CMakeFiles/grain.dir/grain.c.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling C source to assembly CMakeFiles/grain.dir/grain.c.s" + /usr/bin/gcc $(C_DEFINES) $(C_FLAGS) -S /home/chenguanlin/TD_evaluation/src/grain.c -o CMakeFiles/grain.dir/grain.c.s + +CMakeFiles/grain.dir/grain.c.o.requires: +.PHONY : CMakeFiles/grain.dir/grain.c.o.requires + +CMakeFiles/grain.dir/grain.c.o.provides: CMakeFiles/grain.dir/grain.c.o.requires + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o.provides.build +.PHONY : CMakeFiles/grain.dir/grain.c.o.provides + +CMakeFiles/grain.dir/grain.c.o.provides.build: CMakeFiles/grain.dir/grain.c.o +.PHONY : CMakeFiles/grain.dir/grain.c.o.provides.build + +# Object files for target grain +grain_OBJECTS = \ +"CMakeFiles/grain.dir/grain.c.o" + +# External object files for target grain +grain_EXTERNAL_OBJECTS = + +grain: CMakeFiles/grain.dir/grain.c.o +grain: CMakeFiles/grain.dir/build.make +grain: CMakeFiles/grain.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --red --bold "Linking C executable grain" + $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/grain.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +CMakeFiles/grain.dir/build: grain +.PHONY : CMakeFiles/grain.dir/build + +CMakeFiles/grain.dir/requires: CMakeFiles/grain.dir/grain.c.o.requires +.PHONY : CMakeFiles/grain.dir/requires + +CMakeFiles/grain.dir/clean: + $(CMAKE_COMMAND) -P CMakeFiles/grain.dir/cmake_clean.cmake +.PHONY : CMakeFiles/grain.dir/clean + +CMakeFiles/grain.dir/depend: + cd /home/chenguanlin/TD_evaluation/src && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src /home/chenguanlin/TD_evaluation/src/CMakeFiles/grain.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : CMakeFiles/grain.dir/depend + diff --git a/src/CMakeFiles/grain.dir/cmake_clean.cmake b/src/CMakeFiles/grain.dir/cmake_clean.cmake new file mode 100644 index 0000000..54d1698 --- /dev/null +++ b/src/CMakeFiles/grain.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +FILE(REMOVE_RECURSE + "CMakeFiles/grain.dir/grain.c.o" + "grain.pdb" + "grain" +) + +# Per-language clean rules from dependency scanning. +FOREACH(lang C) + INCLUDE(CMakeFiles/grain.dir/cmake_clean_${lang}.cmake OPTIONAL) +ENDFOREACH(lang) diff --git a/src/CMakeFiles/grain.dir/depend.internal b/src/CMakeFiles/grain.dir/depend.internal new file mode 100644 index 0000000..f1b3d06 --- /dev/null +++ b/src/CMakeFiles/grain.dir/depend.internal @@ -0,0 +1,5 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +CMakeFiles/grain.dir/grain.c.o + /home/chenguanlin/TD_evaluation/src/grain.c diff --git a/src/CMakeFiles/grain.dir/depend.make b/src/CMakeFiles/grain.dir/depend.make new file mode 100644 index 0000000..85fc728 --- /dev/null +++ b/src/CMakeFiles/grain.dir/depend.make @@ -0,0 +1,5 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +CMakeFiles/grain.dir/grain.c.o: grain.c + diff --git a/src/CMakeFiles/grain.dir/flags.make b/src/CMakeFiles/grain.dir/flags.make new file mode 100644 index 0000000..72791e9 --- /dev/null +++ b/src/CMakeFiles/grain.dir/flags.make @@ -0,0 +1,8 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# compile C with /usr/bin/gcc +C_FLAGS = -g + +C_DEFINES = + diff --git a/src/CMakeFiles/grain.dir/link.txt b/src/CMakeFiles/grain.dir/link.txt new file mode 100644 index 0000000..0f3e72b --- /dev/null +++ b/src/CMakeFiles/grain.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/gcc -g -fPIC CMakeFiles/grain.dir/grain.c.o -o grain -rdynamic -lmaatframe -lMESA_htable -lpthread -lm diff --git a/src/CMakeFiles/grain.dir/progress.make b/src/CMakeFiles/grain.dir/progress.make new file mode 100644 index 0000000..781c7de --- /dev/null +++ b/src/CMakeFiles/grain.dir/progress.make @@ -0,0 +1,2 @@ +CMAKE_PROGRESS_1 = 1 + diff --git a/src/CMakeFiles/progress.make b/src/CMakeFiles/progress.make new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/src/CMakeFiles/progress.make @@ -0,0 +1 @@ +1 diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..a3fd6fa --- /dev/null +++ b/src/Makefile @@ -0,0 +1,163 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 2.6 + +# Default target executed when no arguments are given to make. +default_target: all +.PHONY : default_target + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canoncical targets will work. +.SUFFIXES: + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + +# A target that is always out of date. +cmake_force: +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# The program to use to edit the cache. +CMAKE_EDIT_COMMAND = /usr/bin/ccmake + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/chenguanlin/TD_evaluation/src + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/chenguanlin/TD_evaluation/src + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." + /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles /home/chenguanlin/TD_evaluation/src/CMakeFiles/progress.make + $(MAKE) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /home/chenguanlin/TD_evaluation/src/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named grain + +# Build rule for target. +grain: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 grain +.PHONY : grain + +# fast build rule for target. +grain/fast: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/build +.PHONY : grain/fast + +grain.o: grain.c.o +.PHONY : grain.o + +# target to build an object file +grain.c.o: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.o +.PHONY : grain.c.o + +grain.i: grain.c.i +.PHONY : grain.i + +# target to preprocess a source file +grain.c.i: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.i +.PHONY : grain.c.i + +grain.s: grain.c.s +.PHONY : grain.s + +# target to generate assembly for a file +grain.c.s: + $(MAKE) -f CMakeFiles/grain.dir/build.make CMakeFiles/grain.dir/grain.c.s +.PHONY : grain.c.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... grain" + @echo "... rebuild_cache" + @echo "... grain.o" + @echo "... grain.i" + @echo "... grain.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/src/TD代码说明.docx b/src/TD代码说明.docx Binary files differnew file mode 100644 index 0000000..4d37049 --- /dev/null +++ b/src/TD代码说明.docx diff --git a/src/cmake_install.cmake b/src/cmake_install.cmake new file mode 100644 index 0000000..75c1e3c --- /dev/null +++ b/src/cmake_install.cmake @@ -0,0 +1,44 @@ +# Install script for directory: /home/chenguanlin/TD_evaluation/src + +# Set the install prefix +IF(NOT DEFINED CMAKE_INSTALL_PREFIX) + SET(CMAKE_INSTALL_PREFIX "/usr/local") +ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX) +STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + IF(BUILD_TYPE) + STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + ELSE(BUILD_TYPE) + SET(CMAKE_INSTALL_CONFIG_NAME "Debug") + ENDIF(BUILD_TYPE) + MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + +# Set the component getting installed. +IF(NOT CMAKE_INSTALL_COMPONENT) + IF(COMPONENT) + MESSAGE(STATUS "Install component: \"${COMPONENT}\"") + SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + ELSE(COMPONENT) + SET(CMAKE_INSTALL_COMPONENT) + ENDIF(COMPONENT) +ENDIF(NOT CMAKE_INSTALL_COMPONENT) + +# Install shared libraries without execute permission? +IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + SET(CMAKE_INSTALL_SO_NO_EXE "0") +ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + +IF(CMAKE_INSTALL_COMPONENT) + SET(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +ELSE(CMAKE_INSTALL_COMPONENT) + SET(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +ENDIF(CMAKE_INSTALL_COMPONENT) + +FILE(WRITE "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "") +FOREACH(file ${CMAKE_INSTALL_MANIFEST_FILES}) + FILE(APPEND "/home/chenguanlin/TD_evaluation/src/${CMAKE_INSTALL_MANIFEST}" "${file}\n") +ENDFOREACH(file) diff --git a/src/dataset_build/CMakeLists.txt b/src/dataset_build/CMakeLists.txt new file mode 100644 index 0000000..8840a74 --- /dev/null +++ b/src/dataset_build/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT (CALCULATE) +SET (SRC_LIST get_lost.c) +SET(CMAKE_BUILD_TYPE "Debug") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) +MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) +#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) +#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) +ADD_EXECUTABLE(get_lost ${SRC_LIST}) +TARGET_LINK_LIBRARIES(get_lost maatframe libMESA_htable.so pthread m) diff --git a/src/dataset_build/based_sfh.conf b/src/dataset_build/based_sfh.conf new file mode 100644 index 0000000..cdcf4cf --- /dev/null +++ b/src/dataset_build/based_sfh.conf @@ -0,0 +1,3 @@ +[file] +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest_nots +ripe_file_address = ../../data/td_data_set/td_data_20171207/base_sfh_set
\ No newline at end of file diff --git a/src/dataset_build/based_sfh.py b/src/dataset_build/based_sfh.py new file mode 100644 index 0000000..b3281ce --- /dev/null +++ b/src/dataset_build/based_sfh.py @@ -0,0 +1,44 @@ +import re +import ConfigParser +import bisect +import random + +term = {'not_null':(lambda x : len(x)!=0)} + +config = ConfigParser.RawConfigParser() +config.read("based_sfh.conf") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +i=0 +sfh_set = list() +with open(raw_file_address,'r') as infile: + with open(ripe_file_address,'w') as outfile: + for line in infile: + i+=1 + if(i%100000==0): + print i + result = re.split(r';',line) + if(term['not_null'](result[3]) and term['not_null'](result[19])): + hashed_len = sfh_fingerprint.get_hashed_len(result[19]) + if(hashed_len/int(result[3])>0.8): + outfile.write(result[19]+'\n')
\ No newline at end of file diff --git a/src/dataset_build/cal_information.conf b/src/dataset_build/cal_information.conf new file mode 100644 index 0000000..1571b8b --- /dev/null +++ b/src/dataset_build/cal_information.conf @@ -0,0 +1,5 @@ +[file] +raw_file_address = ../../data/ripe_data/td_data_20171207/video_id.txt +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic +[feature] +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify
\ No newline at end of file diff --git a/src/dataset_build/cal_information.py b/src/dataset_build/cal_information.py new file mode 100644 index 0000000..19cd95c --- /dev/null +++ b/src/dataset_build/cal_information.py @@ -0,0 +1,133 @@ +import re +import numpy +import ConfigParser +import binascii +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==4), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +class calculation(object): + """docstring for calculation""" + def __init__(self, arg): + super(calculation, self).__init__() + self.arg = arg + + @staticmethod + def cal_ent(x): + x_value_list = set([x[i] for i in range(x.shape[0])]) + ent = 0.0 + num_0 = x[x == 0].shape[0] + for x_value in x_value_list: + if(x_value==0): + continue + p = float(x[x == x_value].shape[0])/(x.shape[0]- num_0) + logp = numpy.log2(p) + ent -=p*logp + return ent + +class data_value(object): + """docstring for data_value""" + def __init__(self, arg): + super(data_value, self).__init__() + self.arg = arg + + @staticmethod + def get_data_values(data): + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + #data_set[0]=null,data_set[1]=url + data_value_dic = [long(0)]*6 + for x in xrange(1,len(feature_list)+1): + if(x==1): + if(term['not_null'](data_set[x])==True): + data_value_dic[0] = binascii.crc32(data_set[x]) + else: + data_value_dic[0] = 0 + elif(x==2): + if(term['not_null'](data_set[x])==True): + data_value_dic[1] = binascii.crc32(data_set[x]) + else: + data_value_dic[1] = 0 + elif(x==3): + data_value_dic[2] = long(data_set[x]) + elif(x==4): + data_value_dic[3] = long(data_set[x]) + elif(x==5): + if(term['not_null'](data_set[x])==True): + data_value_dic[4] = binascii.crc32(data_set[x]) + else: + data_value_dic[4] = 0 + elif(x==6): + if(term['not_null'](data_set[x])==True): + data_value_dic[5] = binascii.crc32(data_set[x]) + else: + data_value_dic[5] = 0 + return data_value_dic + +config = ConfigParser.RawConfigParser() +config.read("cal_information.conf") + +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") +feature_list =[i for i in config.get("feature","feature_name").split(",")] + +i=0 +with open(raw_file_address,'r') as infile: + for line in infile: + i+=1 + if(i%10000==0): + print i + if(i==50000): + break + line_split = re.split(";",line) + data_value_temp = data_value.get_data_values(line_split[5]) + data_value_temp.extend([binascii.crc32(line_split[j]) for j in range(6,19)]) + data_value_temp.append(binascii.crc32(line_split[0])) + if(i==1): + a=numpy.array(data_value_temp) + else: + a=numpy.row_stack((a,numpy.array(data_value_temp))) + +for i in range(20): + if(i==0): + print "URL:"+str(calculation.cal_ent(a[:,i])) + elif(i==1): + print "ServerIP:"+str(calculation.cal_ent(a[:,i])) + elif(i==2): + print "MediaType:"+str(calculation.cal_ent(a[:,i])) + elif(i==3): + print "MediaLen:"+str(calculation.cal_ent(a[:,i])) + elif(i==4): + print "Etag:"+str(calculation.cal_ent(a[:,i])) + elif(i==5): + print "LastModify:"+str(calculation.cal_ent(a[:,i])) + elif(i==6): + print "td_0k:"+str(calculation.cal_ent(a[:,i])) + elif(i==7): + print "td_data_md5_1k:"+str(calculation.cal_ent(a[:,i])) + elif(i==8): + print "td_1k:"+str(calculation.cal_ent(a[:,i])) + elif(i==9): + print "td_data_md5_2k:"+str(calculation.cal_ent(a[:,i])) + elif(i==10): + print "td_2k:"+str(calculation.cal_ent(a[:,i])) + elif(i==11): + print "td_data_md5_4k:"+str(calculation.cal_ent(a[:,i])) + elif(i==12): + print "td_4k:"+str(calculation.cal_ent(a[:,i])) + elif(i==13): + print "td_data_md5_8k:"+str(calculation.cal_ent(a[:,i])) + elif(i==14): + print "td_8k:"+str(calculation.cal_ent(a[:,i])) + elif(i==15): + print "td_data_md5_16k:"+str(calculation.cal_ent(a[:,i])) + elif(i==16): + print "td_16k:"+str(calculation.cal_ent(a[:,i])) + elif(i==17): + print "td_data_md5_32k:"+str(calculation.cal_ent(a[:,i])) + elif(i==18): + print "td_32k:"+str(calculation.cal_ent(a[:,i])) + elif(i==19): + print "id:"+str(calculation.cal_ent(a[:,i])) + diff --git a/src/dataset_build/dataset_build.conf b/src/dataset_build/dataset_build.conf new file mode 100644 index 0000000..400e160 --- /dev/null +++ b/src/dataset_build/dataset_build.conf @@ -0,0 +1,8 @@ +[file] +raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_dataset +base_sfh_sets = ../../data/td_data_set/td_data_20171207/base_sfh_set +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304 +[feature] +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify
\ No newline at end of file diff --git a/src/dataset_build/dataset_build.py b/src/dataset_build/dataset_build.py new file mode 100644 index 0000000..a832072 --- /dev/null +++ b/src/dataset_build/dataset_build.py @@ -0,0 +1,144 @@ +import re +import ConfigParser +import bisect +import random +import ctypes +import hashlib +import zlib +import binascii +import json +import datetime +import time + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +config = ConfigParser.RawConfigParser() +config.read("dataset_build.conf") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") +base_sfh_sets = config.get("file","base_sfh_sets") +breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +feature_list =[i for i in config.get("feature","feature_name").split(",")] +ll=ctypes.cdll.LoadLibrary +lib = ll("libmaatframe.so") +lost = dict() + + +class data_value(object): + + @staticmethod + def get_feature(data): + return_data=list() + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data[5]) + for x in xrange(1,21): + if(x==1): + if(term['not_null'](data_set[6])): + try: + time1=datetime.datetime.strptime(data[1],'%Y-%m-%d %H:%M:%S')+datetime.timedelta(hours=int(8)) + data_set[6]=data_set[6][0:25] + time2=datetime.datetime.strptime(data_set[6],'%a, %d %b %Y %H:%M:%S') + except Exception, e: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + else: + return_data.append(str((time1-time2).seconds)) + return_data.append(((time1-time2).seconds)/60) + return_data.append(((time1-time2).seconds)/3600) + return_data.append((time1-time2).days) + else: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + elif(x==2): + continue + elif(x==3): + continue + elif(x==4): + return_data.append(long(data[4])) + elif(x==5): + if(term['not_null'](data_set[1])): + return_data.append(len(data_set[1])) + else: + return_data.append(-1) + if(term['not_null'](data_set[2])): + ip_set=re.split(r'\.',data_set[2]) + return_data.append(ip_set[0]) + return_data.append(ip_set[1]) + return_data.append(ip_set[2]) + return_data.append(ip_set[3]) + else: + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + return_data.append(-1) + if(term['not_null'](data_set[3])): + return_data.append(int(data_set[3])) + else: + return_data.append(-1) + if(term['not_null'](data_set[5])): + return_data.append(binascii.crc32(data_set[5])) + else: + return_data.append(-1) + if(term['not_null'](data_set[6])): + return_data.append(binascii.crc32(data_set[6])) + else: + return_data.append(-1) + elif(x==7): + return_data.append(binascii.crc32(data[7])) + elif(x==9): + return_data.append(binascii.crc32(data[9])) + elif(x==11): + return_data.append(binascii.crc32(data[11])) + elif(x==13): + return_data.append(binascii.crc32(data[13])) + elif(x==15): + return_data.append(binascii.crc32(data[15])) + elif(x==17): + return_data.append(binascii.crc32(data[17])) + return return_data + # data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + # #data_set[0]=null,data_set[1]=url + # data_value_dic = dict() + # for x in xrange(1,len(feature_list)+1): + # if(x==1): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==2): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==3): + # data_value_dic[feature_list[x-1]] = data_set[x] + # elif(x==4): + # data_value_dic[feature_list[x-1]] = data_set[x] + # elif(x==5): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # elif(x==6): + # data_value_dic[feature_list[x-1]] = binascii.crc32(data_set[x]) + # return data_value_dic + + +i=0 +sfh_set = list() +with open(raw_file_address,'r') as infile: + with open(ripe_file_address,'w') as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = re.split(r';',line) + # if(int(line_return[0])==0): + # print 'td is right' + outfile.write(str(line_return[0])+',') + return_data=data_value.get_feature(line_return) + for x in range(19): + if(x==18): + outfile.write(str(return_data[18])+'\n') + else: + outfile.write(str(return_data[x])+',') diff --git a/src/dataset_build/feature_statistics.conf b/src/dataset_build/feature_statistics.conf new file mode 100644 index 0000000..12cf089 --- /dev/null +++ b/src/dataset_build/feature_statistics.conf @@ -0,0 +1,8 @@ +[file] +raw_file_address = ../../data/td_data_set/td_data_20171207/td.txt +ripe_file_address = ../../data/td_data_set/td_data_20171207/td_data_set_statistic +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,44194304 +[feature] +type = data_value_statistics +feature_name = URL,ServerIP,MediaType,MediaLen,Etag,LastModify
\ No newline at end of file diff --git a/src/dataset_build/feature_statistics.py b/src/dataset_build/feature_statistics.py new file mode 100644 index 0000000..52ae8e0 --- /dev/null +++ b/src/dataset_build/feature_statistics.py @@ -0,0 +1,164 @@ +import re +import ConfigParser +import bisect +import random +import ctypes +import hashlib +import zlib +import binascii + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==4), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and term['sfh_len'](data_line_val[19]) and term['td_len'](data_line_val[9])\ + and term['td_len'](data_line_val[2]) and term['td_len'](data_line_val[13]) and term['td_len'](data_line_val[15])\ + and term['td_len'](data_line_val[17]) and term['not_null'](data_line_val[18]) and term['not_null'](data_line_val[19])\ + and hashed_len/float(data_line_val[3])>0.8): + return data_line_val + else: + return -1 + + +class feature_statistics(object): + """YSP feature_statistics""" + def __init__(self): + super(feature_statistics, self).__init__() + self.meida_len_statistics_set = [0,0,0,0,0,0,0] + self.lost_dict = dict() + + def meida_len_statistics(meida_len): + j = bisect.bisect(breakpoints,meida_len) + self.meida_len_statistics_set[j-1]+=1 + + def data_value_statistics(data_value_dic,data_value): + data_value_str = str() + for x in xrange(0,len(feature_list)): + data_value_str = data_value_str+str(data_value_dic[feature_list[x]])+',' + + if(self.lost_dict.has_key(data_value_str)==False): + self.lost_dict[data_value_str]=[0,1,0.] + else: + if (int(result[3])==1): + self.lost_dict[data_value_str][0] += 1 + self.lost_dict[data_value_str][1] += 1 + else: + self.lost_dict[data_value_str][1] += 1 + + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + + @staticmethod + def get_base_sfh(data_set): + base_sfh = list() + for x in xrange(0,10): + base_sfh.append(data_set[x]) + return base_sfh + + + + +class data_value(object): + + @staticmethod + def get_data_values(data): + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data) + #data_set[0]=null,data_set[1]=url + data_value_dic = dict() + for x in xrange(1,len(feature_list)+1): + if(x==1): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==2): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==3): + data_value_dic[feature_list[x-1]] = data_set[x] + elif(x==4): + data_value_dic[feature_list[x-1]] = bisect.bisect(breakpoints,int(data_set[x])) + elif(x==5): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + elif(x==6): + data_value_dic[feature_list[x-1]] = 0 if(term['not_null']==False) else 1 + return data_value_dic + +config = ConfigParser.RawConfigParser() +config.read("feature_statistics.conf") + +feature_statistics_type = ("feature","type") +raw_file_address = config.get("file","raw_file_address") +ripe_file_address = config.get("file","ripe_file_address") + +if(feature_statistics_type=="meida_len_statistics"): + breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +elif(feature_statistics_type=="data_value_statistics"): + feature_list =[i for i in config.get("feature","feature_name").split(",")] +# ll=ctypes.cdll.LoadLibrary +# lib = ll("libmaatframe.so") + +i=0 +sfh_set = list() +statistic = feature_statistics() +with open(raw_file_address,'r') as infile: + for line in infile: + i+=1 + + + + + line_return = data_line.if_error(line) + if(line_return != -1): + if(feature_statistics_type=="meida_len_statistics"): + statistic.meida_len_statistics(line_return[3]) + elif(feature_statistics_type=="data_value_statistics"): + lost_list = list() + statistic.meida_len_statistics(line_return) + for i in statistic.lost: + (statistic.lost[i])[2] = float((statistic.lost[i])[0])/(statistic.lost[i])[1] + tmp = (i,int((statistic.lost[i])[0]),int((statistic.lost[i])[1]),float((statistic.lost[i])[2])) + lost_list.append(tmp) + print sorted(lost_list,cmp=lambda x,y:cmp(x[2],y[2])) + # if(x == len(feature_list)-1): + # outfile.write(data_value_dic[feature_list[x]]+'\n') + # else: + # print lost + # outfile.write(str(data_value_dic[feature_list[x]])+',') + # outfile.write(result[3]) + # sfh_dot=list() + # for x in xrange(0,10): + # #transform sfh to dot + # sfh_dot.append(lib.GIE_sfh_similiarity(result[19],len(result[19]),sfh_set[x],len(sfh_set[x]))) + # if(len(data_set)==7): + # outfile.write(str(data_set[0])+','+str(data_set[1])+','+str(data_set[2])\ + # +','+str(data_set[3])+','+str(data_set[4])+','+str(data_set[5])+','+result[5]\ + # +','+result[7]+','+result[9]+','+result[11]+','+result[13]+','+result[15]+result[17]\ + # +','+result[19]+'\n') + +# with open(ripe_file_address,'w') as outfile: +# outfile.write(str(lost)) diff --git a/src/dataset_build/file_digest.py b/src/dataset_build/file_digest.py new file mode 100644 index 0000000..590e059 --- /dev/null +++ b/src/dataset_build/file_digest.py @@ -0,0 +1,96 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[16]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>=0.8): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +grain="./get_lost" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("grain.conf") +raw_file_address=config.get("file","raw_file_address") +ripe_files_address=config.get("file","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +num = [0,0,0,0,0,0,0] +breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: +# with open('./ripe_data/mistake_td_sfh1_sfh2_sim_rate_len_url_unequal','r')as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line))
\ No newline at end of file diff --git a/src/dataset_build/get_lost.c b/src/dataset_build/get_lost.c new file mode 100644 index 0000000..0e6c452 --- /dev/null +++ b/src/dataset_build/get_lost.c @@ -0,0 +1,116 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define HTABLE_SIZE 8*64*1024*1024 +#define SFH_PASS_RATE 0.8 +#define SIMILIAR 80 + +typedef struct td +{ + char * tdstr; + unsigned int lost; +}td; + +typedef struct file_sfh_data +{ + long id; + char * sfh; + td * td_value; + char * td_ori; +}file_sfh_data; + +int main(int argc,char *argv[]) +{ + FILE *fpread;//文件 + FILE *fpwrite;//write file handle + int array_size = 1024; + file_sfh_data **file_data=(file_sfh_data **)malloc(sizeof(file_sfh_data)*array_size); + char* dirstr = "../../data/td_data_set/td_data_20171207/td_sfh_lost"; + //char* dirstr = *++argv; + char* writestr = "../../data/td_data_set/td_data_20171207/td.txt"; + int total_len = 0; + char TD_tmp[256], SFH_tmp[1024*300], TD_ORI[1024*10]; + char buffer[1024*300+1]; + int ret = 0; + int line = 0; + int thread_safe = 0; + int i; + int id; + int similiarity; + MESA_htable_handle htable = NULL; + fpread=fopen(dirstr,"rb"); + fpwrite=fopen(writestr,"w"); + printf("file str is %s\n",dirstr); + if(fpread==NULL) + { + printf("open file error\n"); + return -1; + } + buffer[sizeof(buffer)]='\0'; + while(feof(fpread)==0) + { + fgets(buffer,sizeof(buffer)-1,fpread); + ret=sscanf(buffer,"%d;%[^;];%[^;];%s",&total_len,TD_ORI,TD_tmp,SFH_tmp); + if(ret!=4) + { + continue; + } + file_data[line]=(file_sfh_data*)calloc(1,sizeof(file_sfh_data)); + file_data[line]->id=line; + file_data[line]->sfh=strdup(SFH_tmp); + file_data[line]->td_value=(td*)calloc(1,sizeof(td)); + file_data[line]->td_value->tdstr=strdup(TD_tmp); + file_data[line]->td_value->lost=0; + file_data[line]->td_ori=strdup(TD_ORI); + line++; + if(line==array_size) + { + array_size*=2; + file_data=realloc(file_data,sizeof(file_sfh_data)*array_size); + } + } + printf("read file success!\n"); + htable = NULL; + htable=MESA_htable_born(); + thread_safe = 0; + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + unsigned int slot_size=1024*1024*16; + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(slot_size)); + MESA_htable_mature(htable); + for(i=0;i<line;i++) + { + if(MESA_htable_add(htable,(char*)(file_data[i]->td_value->tdstr),32,(void *)file_data[i]->id)<0) + { + id=(long)MESA_htable_search(htable,(char*)file_data[i]->td_value->tdstr,32); + similiarity=GIE_sfh_similiarity(file_data[id]->sfh,(int)strlen(file_data[id]->sfh),file_data[i]->sfh,(int)strlen(file_data[i]->sfh)); + if(similiarity<SIMILIAR) + { + file_data[id]->td_value->lost = 1; + file_data[i]->td_value->lost = 1; + } + } + } + for(i=0;i<line;i++) + { + fprintf(fpwrite,"%s;%s;%s;%d\n",file_data[i]->td_value->tdstr,file_data[i]->sfh,file_data[i]->td_ori,file_data[i]->td_value->lost); + } + for(i=0;i<line;i++) + { + free(file_data[i]->sfh); + file_data[i]->sfh=NULL; + free(file_data[i]->td_value->tdstr); + file_data[i]->td_value->tdstr=NULL; + free(file_data[i]->td_value); + file_data[i]->td_value=NULL; + free(file_data[i]->td_ori); + file_data[i]->td_ori=NULL; + free(file_data[i]); + file_data[i]=NULL; + } + fclose(fpread); + fclose(fpwrite); + return 0; +}
\ No newline at end of file diff --git a/src/dataset_build/grain.conf b/src/dataset_build/grain.conf new file mode 100644 index 0000000..944b337 --- /dev/null +++ b/src/dataset_build/grain.conf @@ -0,0 +1,5 @@ +[file] +ripe_files_address = ../../data/td_data_set/td_data_20171207/get_lost_raw_data +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest +[output] +breakpoints = 1048576,1310720,1572864,1835008,2097152,3145728,4194304
\ No newline at end of file diff --git a/src/dataset_build/td_classification.py b/src/dataset_build/td_classification.py new file mode 100644 index 0000000..8d4b97c --- /dev/null +++ b/src/dataset_build/td_classification.py @@ -0,0 +1,5 @@ +from sklearn.datasets import load_iris +from sklearn import tree + +with open() as infile: +
\ No newline at end of file diff --git a/src/dataset_build/vedio_id_build.c b/src/dataset_build/vedio_id_build.c new file mode 100644 index 0000000..9faaa64 --- /dev/null +++ b/src/dataset_build/vedio_id_build.c @@ -0,0 +1,171 @@ +/* +gcc -g vedio_id_build.c -o vedio_id_build -lmaatframe -I../../inc +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define BUFFER_LEN (10*1024) +#define SFH_PASS_RATE 0.9 +#define SFH_LEN (10*1024) +#define URL_LEN (10*1024) + +typedef struct video_id +{ + long id; + char *sfh; +}video_id; + +typedef struct cache +{ + GIE_digest_t ** GIE_cache; + long cache_size; + long len; +}cache; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ;token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +int main(int argc,char *argv[]) +{ + FILE *video_id_sets_file; + FILE *new_sfh_file; + const char *video_id_sets_file_dir="../../data/td_data_set/td_data_20171207/video_id_raw_data"; + const char *new_sfh_file_dir="../../data/ripe_data/td_data_20171207/video_id.txt"; + char *buffer=NULL; + int ret = 0,hashed_len = 0,total_len = 0,resultnum = 0,i = 0; + int update = 0,video_id = 0,j = 0; + int* temp_int = NULL; + float temp_sfh_pass = 0; + char *sfh_str,*url_str; + GIE_digest_t *sfh_video_id = NULL; + GIE_result_t *query_result = NULL; + cache *GIE_digest_cache = NULL; + video_id_sets_file = fopen(video_id_sets_file_dir,"r+"); + new_sfh_file = fopen(new_sfh_file_dir,"w"); + if(video_id_sets_file == NULL) + { + printf("open video_id_sets_file error\n"); + return -1; + } + if(new_sfh_file == NULL) + { + printf("open new_sfh_file error\n"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + GIE_create_para_t *query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); + query_result = (GIE_result_t*)calloc(1,sizeof(GIE_result_t)); + GIE_handle_t *query_handle; + query_para->gram_value = 7; + query_para->position_accuracy = 5; + query_handle=GIE_create((const GIE_create_para_t *)query_para); + free(query_para); + if(query_handle==NULL) + { + printf("create GIE handle error\n"); + return -1; + } + sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + url_str = (char*)calloc(URL_LEN,sizeof(char)); + i=0; + GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); + GIE_digest_cache->cache_size = 1000; + GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); + GIE_digest_cache->len = 0; + while(feof(video_id_sets_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,video_id_sets_file); + ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + %*[^;];%*[^;];%*[^;];%[^;];%[^;]",sfh_str,url_str); + if(ret!=2) + { + continue; + } + hashed_len = get_hashed_len((const char*)sfh_str); + temp_sfh_pass = (float)hashed_len/total_len; + if(temp_sfh_pass<SFH_PASS_RATE) + { + continue; + } + resultnum=GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,1); + if(resultnum == 0) + { + temp_int=(int*)calloc(1,sizeof(int)); + *temp_int=i; + sfh_video_id = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + sfh_video_id->id=i; + sfh_video_id->sfh_length=strlen(sfh_str); + sfh_video_id->operation=GIE_INSERT_OPT; + sfh_video_id->cfds_lvl=5; + sfh_video_id->sfh=strdup(sfh_str); + sfh_video_id->tag=temp_int; + GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_video_id; + GIE_digest_cache->len++; + if(GIE_digest_cache->len==GIE_digest_cache->cache_size) + { + update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); + GIE_digest_cache->len=0; + for(j=0;j<GIE_digest_cache->cache_size;j++) + { + free(GIE_digest_cache->GIE_cache[j]->sfh); + GIE_digest_cache->GIE_cache[j]->sfh=NULL; + free(GIE_digest_cache->GIE_cache[j]); + GIE_digest_cache->GIE_cache[j]=NULL; + } + } + fprintf(new_sfh_file,"%d,%s",i,buffer); + } + else + { + fprintf(new_sfh_file,"%d,%s",*((int*)query_result->tag),buffer); + } + } + free(buffer); + free(query_result); + free(sfh_video_id); + free(url_str); + free(sfh_str); + free(GIE_digest_cache); + return 0; +}
\ No newline at end of file diff --git a/src/file_digest.conf b/src/file_digest.conf new file mode 100644 index 0000000..a02cae2 --- /dev/null +++ b/src/file_digest.conf @@ -0,0 +1,3 @@ +[file] +ripe_files_address = ../data/ripe_data/td_data_20171207/all_av_digest +raw_file_address = ../data/td_data_20171207/td_data/all_av_digest diff --git a/src/file_digest.py b/src/file_digest.py new file mode 100644 index 0000000..3703794 --- /dev/null +++ b/src/file_digest.py @@ -0,0 +1,104 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['not_null'](data_line_val[1]) and \ + term['not_null'](data_line_val[2]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['not_null'](data_line_val[5]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[7]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[9]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[11]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[13]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[15]) and \ + term['td_len'](data_line_val[16]) and \ + term['td_len'](data_line_val[17]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>0.999): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +c_func="./" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("file","raw_file_address") +ripe_files_address=config.get("file","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +# num = [0,0,0,0,0,0,0] +# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line))
\ No newline at end of file diff --git a/src/get_td_mistake_lost/CMakeLists.txt b/src/get_td_mistake_lost/CMakeLists.txt new file mode 100644 index 0000000..87f4b6b --- /dev/null +++ b/src/get_td_mistake_lost/CMakeLists.txt @@ -0,0 +1,11 @@ +PROJECT (CALCULATE) +SET (SRC_LIST get_lost_rate.c) +SET(CMAKE_BUILD_TYPE "Debug") +SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g -ggdb") +SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall") +MESSAGE(STATUS "This is BINARY dir" ${CALCULATE_BINARY_DIR}) +MESSAGE(STATUS "This is SOURCE dir" ${CALCULATE_SOURCE_DIR}) +#INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../include/) +#LINK_DIRECTORIES(${PROJECT_SOURCE_DIR}/../../lib/) +ADD_EXECUTABLE(get_lost_rate ${SRC_LIST} gram_index_engine.c) +TARGET_LINK_LIBRARIES(get_lost_rate maatframe libMESA_htable.so pthread m) diff --git a/src/get_td_mistake_lost/file_digest.conf b/src/get_td_mistake_lost/file_digest.conf new file mode 100644 index 0000000..6d1c06b --- /dev/null +++ b/src/get_td_mistake_lost/file_digest.conf @@ -0,0 +1,6 @@ +[file_digest] +ripe_files_address = ../../data/ripe_data/td_data_20171207/all_av_digest +raw_file_address = ../../data/td_data_20171207/td_data/all_av_digest +[new_td] +ripe_files_address = ../../data/ripe_data/td_data_20171207/new_TD.txt +raw_file_address = ../../data/ripe_data/td_data_20171207/all_av_digest diff --git a/src/get_td_mistake_lost/file_digest.py b/src/get_td_mistake_lost/file_digest.py new file mode 100644 index 0000000..62786ef --- /dev/null +++ b/src/get_td_mistake_lost/file_digest.py @@ -0,0 +1,104 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +class data_line(object): + """docstring for ClassName""" + def __init__(self): + super(ClassName, self).__init__() + + @staticmethod + def if_error(data_line_str): + data_line_val = re.split(r';',data_line_str) + hashed_len = sfh_fingerprint.get_hashed_len(data_line_val[19]) + if(term['data_num'](data_line_val) and \ + term['not_null'](data_line_val[0]) and \ + term['not_null'](data_line_val[1]) and \ + term['not_null'](data_line_val[2]) and \ + term['ysp_len'](data_line_val[3]) and \ + term['not_null'](data_line_val[4]) and \ + term['not_null'](data_line_val[5]) and \ + term['td_len'](data_line_val[6]) and \ + term['td_len'](data_line_val[7]) and \ + term['td_len'](data_line_val[8]) and \ + term['td_len'](data_line_val[9]) and \ + term['td_len'](data_line_val[10]) and \ + term['td_len'](data_line_val[11]) and \ + term['td_len'](data_line_val[12]) and \ + term['td_len'](data_line_val[13]) and \ + term['td_len'](data_line_val[14]) and \ + term['td_len'](data_line_val[15]) and \ + term['td_len'](data_line_val[16]) and \ + term['td_len'](data_line_val[17]) and \ + term['not_null'](data_line_val[18]) and \ + term['sfh_len'](data_line_val[19]) and \ + term['not_null'](data_line_val[20]) and \ + hashed_len/float(data_line_val[3])>0.999): + return data_line_val + else: + return -1 + +class TD_fingerprint(object): + def __init__(): + self.td = td + self.td_string = td_string + @staticmethod + def td_generate(td_string): + td_val = hashlib.md5(td_string,encode('utf-8')).hexdigest() + +class sfh_fingerprint(object): + + def __init__(self,sfh): + self.sfh = sfh + + @staticmethod + def get_hashed_len(sfh): + p = r"\[+\d+?:+\d+?\]" + pattern = re.compile(p) + hashed_len_set = pattern.findall(sfh) + if (term['not_null'](hashed_len_set)): + hashed_len = 0 + for x in xrange(0,len(hashed_len_set)): + hashed_len_num = re.split(r"\[|\]|:",hashed_len_set[x]) + hashed_len = hashed_len + int(hashed_len_num[2]) - int(hashed_len_num[1]) + return hashed_len/len(hashed_len_set) + else : + return -1 + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)==21), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20 and len(x)<(10*1024-100)), + 'not_null':(lambda x : len(x)!=0), + 'ysp_len':(lambda x : int(x)!=0), + 'line_len':(lambda x: len(x)>20 and len(x)<(10*1024-100))} + +c_func="./" +ripe_files=[] +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("file_digest","raw_file_address") +ripe_files_address=config.get("file_digest","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) +# num = [0,0,0,0,0,0,0] +# breakpoints = [int(i) for i in config.get("output","breakpoints").split(",")] +# i=0 +# for i in xrange(0,ripe_file_num): +# outfile=open(ripe_files_address+str(i)+'.txt','w') +# ripe_files.append(outfile) + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%10000==0): + print i + line_return = data_line.if_error(line) + if(line_return != -1): + outfile.write(str(line))
\ No newline at end of file diff --git a/src/get_td_mistake_lost/get_TD_SFH.c b/src/get_td_mistake_lost/get_TD_SFH.c new file mode 100644 index 0000000..2ed3ecd --- /dev/null +++ b/src/get_td_mistake_lost/get_TD_SFH.c @@ -0,0 +1,162 @@ +/* +gcc -g get_TD_SFH.c -o get_TD_SFH -lmaatframe -lMESA_htable -I../include +./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define BUFFER_LEN (15*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 +#define THREAD_SAFE 0 +#define SLOT_SIZE (1024*1024*16) +#define TD_STR_LEN (10*1024) +#define TIME_STR_LEN 128 + +typedef struct sfh_link +{ + // char *time_str; + char *sfh_str; + char *td_ori; + // char *md5_32k; + int similiar; + int all_similiar; + // long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct sfh +{ + int all_num; + int all_similiar; + char *sfh_str; + // long hash_len; + sfh_link *sfh_link_items; +}sfh; + +void print_td_sfh(const uchar *key,uint size,void *data,void *arg) +{ + FILE *ripe_file=(FILE*)arg; + sfh *temp_sfh=(sfh*)data; + fprintf(ripe_file,"%s;%s;%s",key,temp_sfh->sfh_link_items->td_ori,temp_sfh->sfh_str); +} + +int main() +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + char *ripe_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_3"; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + MESA_htable_handle htable=NULL; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; + unsigned int slot_size=SLOT_SIZE; + sfh *temp_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); + // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + // time_str[TIME_STR_LEN-1]='\0'; + // md5_32k_str[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + // assert(ret==5); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + if((temp_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_sfh=(sfh*)calloc(1,sizeof(sfh)); + temp_sfh->all_num=1; + temp_sfh->all_similiar=0; + temp_sfh->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_sfh->sfh_link_items->time_str=strdup(time_str); + temp_sfh->sfh_link_items->similiar=0; + temp_sfh->sfh_link_items->all_similiar=0; + temp_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_sfh->sfh_str,(int)strlen(temp_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_sfh->all_similiar) + { + free(temp_sfh->sfh_str); + temp_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_sfh->all_num+=1; + } + } + fclose(raw_file); + MESA_htable_iterate(htable,print_td_sfh,ripe_file); + free(sfh_str); + free(td); + free(td_str); + // free(md5_32k_str); + MESA_htable_destroy(htable,NULL); + // fclose(raw_file); + fclose(ripe_file); + return 0; +}
\ No newline at end of file diff --git a/src/get_td_mistake_lost/get_lost_rate.c b/src/get_td_mistake_lost/get_lost_rate.c new file mode 100644 index 0000000..d983a00 --- /dev/null +++ b/src/get_td_mistake_lost/get_lost_rate.c @@ -0,0 +1,210 @@ +/* +gcc -g get_lost_rate.c -o get_lost_rate -lmaatframe -I../include +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <assert.h> +#include <ctype.h> +#define BUFFER_LEN (10*1024) +#define CACHE_SIZE 2000000 +#define SFH_LEN (10*1024) +#define TD_LEN 33 +#define RESULT_NUM 10000 +#define TIME_STR_LEN 128 +#define TD_STR_LEN (10*1024) + +typedef struct cache +{ + GIE_digest_t ** GIE_cache; + long cache_size; + long len; +}cache; + +typedef struct GIE_tag +{ + char *td; + char *td_str; + char *sfh_str; +}GIE_tag; + +int main() +{ + FILE *td_sfh_file; + FILE *raw_file; + FILE *ripe_file; + const char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + const char *td_sfh_file_dir="../../data/ripe_data/td_data_20171207/TD_SFH_1"; + const char *ripe_file_dir="../../data/ripe_data/td_data_20171207/get_lost_ripe_data_1"; + td_sfh_file = fopen(td_sfh_file_dir,"r+"); + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + char *buffer=NULL,*sfh_str=NULL,*td=NULL,*time_str=NULL,*td_str=NULL; + GIE_create_para_t *query_para=NULL; + GIE_handle_t *query_handle=NULL; + GIE_result_t *query_result = NULL; + cache *GIE_digest_cache = NULL; + GIE_digest_t *sfh_td = NULL; + int i=0,w=0,ret=0,lost=0,j=0,update=0,resultnum=0,temp_len=0; + GIE_tag *temp_tag =NULL; + if(td_sfh_file == NULL) + { + printf("open td_sfh_file_dir error\n"); + return -1; + } + if(raw_file == NULL) + { + printf("open raw_file_dir error\n"); + return -1; + } + if(ripe_file == NULL) + { + printf("open ripe_file_dir error\n"); + return -1; + } + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + sfh_str[SFH_LEN-1]='\0'; + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + time_str[TIME_STR_LEN-1]='\0'; + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + query_para = (GIE_create_para_t*)calloc(1,sizeof(GIE_create_para_t)); + query_para->gram_value = 7; + query_para->position_accuracy = 5; + query_para->ED_reexamine=1; + query_para->format=GIE_INPUT_FORMAT_SFH; + query_handle=GIE_create((const GIE_create_para_t *)query_para); + free(query_para); + query_result = (GIE_result_t*)calloc(RESULT_NUM,sizeof(GIE_result_t)); + GIE_digest_cache =(cache*)calloc(1,sizeof(cache)); + GIE_digest_cache->cache_size = CACHE_SIZE; + GIE_digest_cache->GIE_cache = (GIE_digest_t**)calloc(GIE_digest_cache->cache_size,sizeof(GIE_digest_t*)); + GIE_digest_cache->len = 0; + if(query_handle==NULL) + { + printf("create GIE handle error\n"); + return -1; + } + while(feof(td_sfh_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,td_sfh_file); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td,td_str,sfh_str); + assert(ret==3); + td[32]='\0'; + sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + sfh_td->id=i; + temp_len=strlen(sfh_str); + sfh_td->sfh_length=temp_len; + sfh_str[temp_len-1]='\0'; + sfh_td->operation=GIE_INSERT_OPT; + sfh_td->cfds_lvl=5; + sfh_td->sfh=strdup(sfh_str); + temp_tag=(GIE_tag*)calloc(1,sizeof(GIE_tag)); + temp_tag->td=strdup(td); + temp_tag->td_str=strdup(td_str); + temp_tag->sfh_str=strdup(sfh_str); + sfh_td->tag=(void*)temp_tag; + GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; + GIE_digest_cache->len++; + // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + // if(resultnum==0) + // { + // sfh_td = (GIE_digest_t*)calloc(1,sizeof(GIE_digest_t)); + // sfh_td->id=i; + // sfh_td->sfh_length=strlen(sfh_str); + // sfh_td->operation=GIE_INSERT_OPT; + // sfh_td->cfds_lvl=5; + // sfh_td->sfh=strdup(sfh_str); + // sfh_td->tag=(void*)strdup(td); + // GIE_digest_cache->GIE_cache[GIE_digest_cache->len] = sfh_td; + // GIE_digest_cache->len++; + // } + // else + // { + // for(j=0;j<resultnum;j++) + // { + // if(strcmp((char*)((query_result+j)->tag),td)!=0) + // { + // lost++; + // fprintf(ripe_file,"%s,%s,%s\n",(char*)((query_result+j)->tag),td,sfh_str); + // } + // } + // continue; + // } + // if(GIE_digest_cache->len==GIE_digest_cache->cache_size) + // { + // update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->cache_size); + // assert(update==GIE_digest_cache->len); + // GIE_digest_cache->len=0; + // for(j=0;j<GIE_digest_cache->cache_size;j++) + // { + // free(GIE_digest_cache->GIE_cache[j]->sfh); + // GIE_digest_cache->GIE_cache[j]->sfh=NULL; + // free(GIE_digest_cache->GIE_cache[j]); + // GIE_digest_cache->GIE_cache[j]=NULL; + // } + // } + // resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + // for(i=0;i<resultnum;i++) + // { + // if(strcmp((char*)query_result[i]->tag,td)!=0) + // { + // lost++; + // } + // } + } + fclose(td_sfh_file); + update=GIE_update(query_handle,GIE_digest_cache->GIE_cache,GIE_digest_cache->len); + for(j=0;j<GIE_digest_cache->len;j++) + { + free(GIE_digest_cache->GIE_cache[j]->sfh); + GIE_digest_cache->GIE_cache[j]->sfh=NULL; + free(GIE_digest_cache->GIE_cache[j]); + GIE_digest_cache->GIE_cache[j]=NULL; + } + i=0; + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + // ret=sscanf(buffer,"%[^;];%[^;]",td,sfh_str); + // assert(ret==2); + // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%[^;];%*[^;];%[^;];%*[^;]",td_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + resultnum = GIE_query(query_handle,(const char *)sfh_str,(const long long)strlen(sfh_str),query_result,RESULT_NUM); + if(resultnum>1) + { + for(j=0;j<resultnum;j++) + { + if(strcmp(((GIE_tag*)(query_result+j)->tag)->td,td)!=0) + { + w=1; + fprintf(ripe_file,"%u,%s,%s,%s,%s,%s,%s\n",(query_result+j)->id,((GIE_tag*)((query_result+j)->tag))->td_str,((GIE_tag*)((query_result+j)->tag))->td,((GIE_tag*)((query_result+j)->tag))->sfh_str,td_str,td,sfh_str); + } + } + lost+=w; + w=0; + } + + } + printf("%d;%d\n",lost,i); + free(sfh_str); + free(td); + free(time_str); + free(td_str); +}
\ No newline at end of file diff --git a/src/get_td_mistake_lost/get_mistake_level.c b/src/get_td_mistake_lost/get_mistake_level.c new file mode 100644 index 0000000..5f03974 --- /dev/null +++ b/src/get_td_mistake_lost/get_mistake_level.c @@ -0,0 +1,366 @@ +/* +gcc -g get_mistake_level.c -o get_mistake_level -lMESA_htable -lmaatframe -I../../include +./get_mistake_level ../data/ripe_data/td_data_20171207/all_av_digest_mistake_level +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define THREAD_SAFE 0 +#define SLOT_SIZE (1024*1024*16) +#define SIMILIAR_RATE 90 +#define TD_STR_LEN (10*1024) +#define TIME_STR_LEN 128 +#define RAODONG_RATE 0.1 +#define BUFFER_LEN (15*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 + +typedef struct sfh_link +{ + // char *time_str; + char *sfh_str; + char *td_ori; + // char *md5_32k; + int similiar; + int all_similiar; + // long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct mistake_sfh +{ + int mistake_num; + int all_num; + int all_similiar; + char *sfh_str; + // long hash_len; + sfh_link *sfh_link_items; +}mistake_sfh; + +typedef struct temp_parameter +{ + int mistake_num; + FILE *ripe_file; +}temp_parameter; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ; token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +void print_mistake_td(const uchar *key,uint size,void *data,void *arg) +{ + temp_parameter *parameter = (temp_parameter*)arg; + mistake_sfh *temp_mistake_sfh=(mistake_sfh*)data; + float temp_rate=0; + temp_rate=(float)temp_mistake_sfh->mistake_num/(float)temp_mistake_sfh->all_num; + if(temp_rate>RAODONG_RATE) + { + parameter->mistake_num+=temp_mistake_sfh->mistake_num; + fprintf(parameter->ripe_file,"%d;%s\n",temp_mistake_sfh->mistake_num,temp_mistake_sfh->sfh_str); + sfh_link *temp_sfh_link=temp_mistake_sfh->sfh_link_items; + for(;;temp_sfh_link=temp_sfh_link->next) + { + if(temp_sfh_link==NULL) + { + break; + } + temp_sfh_link->similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str)); + // fprintf(parameter->ripe_file,"%s,%d;%s;%s;%s\n",temp_sfh_link->time_str,temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori,temp_sfh_link->md5_32k); + fprintf(parameter->ripe_file,"%d;%s;%s\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->td_ori); + } + fprintf(parameter->ripe_file,"\n"); + } +} + +int main(int argc,char *argv[]) +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../../data/ripe_data/td_data_20171207/new_TD.txt"; + char *ripe_file_dir="../../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_3"; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL,*md5_32k_str=NULL,*time_str=NULL; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_mistake=0,temp_similiar=0,temp_all_similiar=0; + long temp_hash_len=0; + unsigned int slot_size=SLOT_SIZE; + mistake_sfh *temp_mistake_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + MESA_htable_handle htable=NULL; + temp_parameter *parameter=NULL; + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + + + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + td_str = (char*)calloc(TD_STR_LEN,sizeof(char)); + // md5_32k_str = (char*)calloc(TD_LEN,sizeof(char)); + // time_str = (char*)calloc(TIME_STR_LEN,sizeof(char)); + // time_str[TIME_STR_LEN-1]='\0'; + // md5_32k_str[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + parameter=(temp_parameter*)calloc(1,sizeof(temp_parameter)); + parameter->mistake_num=0; + parameter->ripe_file=ripe_file; + while(feof(raw_file)==0) + { + i++; + if(i%100000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); + temp_mistake_sfh->mistake_num=0; + temp_mistake_sfh->all_num=1; + temp_mistake_sfh->all_similiar=0; + // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); + temp_mistake_sfh->sfh_str=strdup(sfh_str); + temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); + temp_mistake_sfh->sfh_link_items->similiar=0; + temp_mistake_sfh->sfh_link_items->all_similiar=0; + temp_mistake_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_mistake_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_mistake_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))<SIMILIAR_RATE) + // { + // temp_mistake=1; + // } + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_mistake_sfh->all_similiar) + { + free(temp_mistake_sfh->sfh_str); + temp_mistake_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_mistake_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + // if(temp_hash_len>temp_mistake_sfh->hash_len) + // { + // temp_mistake_sfh->hash_len=temp_hash_len; + // free(temp_mistake_sfh->sfh_str); + // temp_mistake_sfh->sfh_str=strdup(sfh_str); + // } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_mistake_sfh->all_num+=1; + } + } + fclose(raw_file); + raw_file = fopen(raw_file_dir,"r+"); + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + i=0; + while(feof(raw_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%[^;];%*[^;];%*[^;];%*[^;];%[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %[^;];%[^;];%*[^;];%[^;];%*[^;]",time_str,td_str,md5_32k_str,td,sfh_str); + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==3); + td[32]='\0'; + // md5_32k_str[32]='\0'; + temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN); + assert(temp_mistake_sfh!=NULL); + // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + // { + // temp_mistake_sfh=(mistake_sfh*)calloc(1,sizeof(mistake_sfh)); + // temp_mistake_sfh->num=0; + // temp_mistake_sfh->hash_len=get_hashed_len(sfh_str); + // temp_mistake_sfh->sfh_str=strdup(sfh_str); + // temp_sfh_link=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->sfh_str=strdup(sfh_str); + // temp_sfh_link->td_ori=strdup(td_str); + // temp_sfh_link->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->time_str=strdup(time_str); + // temp_sfh_link->next=NULL; + // temp_mistake_sfh->sfh_link_items=temp_sfh_link; + // ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_mistake_sfh); + // assert(ret>0); + // } + // else + // { + // temp_hash_len=get_hashed_len(sfh_str); + // if(temp_hash_len>temp_mistake_sfh->hash_len) + // { + // temp_sfh_link->hash_len=get_hashed_len(); + // free(temp_sfh_link->sfh_str); + // temp_sfh_link->sfh_str=strdup(sfh_str); + // } + temp_similiar=GIE_sfh_similiarity(temp_mistake_sfh->sfh_str,(int)strlen(temp_mistake_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + if(temp_similiar<SIMILIAR_RATE) + { + temp_mistake_sfh->mistake_num+=1; + } + // if(temp_mistake_sfh->sfh_link_items!=NULL) + // { + // temp_sfh_link=temp_mistake_sfh->sfh_link_items; + // for(;;temp_sfh_link=temp_sfh_link->next) + // { + // // if(GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str))<SIMILIAR_RATE) + // // { + // // temp_mistake=1; + // // } + // if(temp_sfh_link->next==NULL) + // { + // break; + // } + // } + // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->next->sfh_str=strdup(sfh_str); + // temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->time_str=strdup(time_str); + // temp_sfh_link->next->similiar=temp_similiar; + // temp_sfh_link->next->next=NULL; + // } + // else + // { + // temp_mistake_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_mistake_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + // temp_mistake_sfh->sfh_link_items->td_ori=strdup(td_str); + // temp_mistake_sfh->sfh_link_items->md5_32k=strdup(md5_32k_str); + // temp_mistake_sfh->sfh_link_items->time_str=strdup(time_str); + // temp_mistake_sfh->sfh_link_items->similiar=temp_similiar; + // temp_mistake_sfh->sfh_link_items->next=NULL; + // } + // if(temp_mistake==1) + // { + // temp_mistake_sfh->num+=temp_mistake; + // temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + // temp_sfh_link->next->sfh_str=strdup(sfh_str); + // temp_sfh_link->next->td_ori=strdup(td_str); + // temp_sfh_link->next->md5_32k=strdup(md5_32k_str); + // temp_sfh_link->next->next=NULL; + // temp_mistake=0; + // } + } + fclose(raw_file); + // raw_file=NULL; + // raw_file = fopen(raw_file_dir,"r+"); + // if(raw_file==NULL) + // { + // printf("open all_av_digest error\n"); + // return -1; + // } + // i=0; + // while(feof(raw_file)==0) + // { + // i++; + // if(i%10000==0) + // { + // printf("%d\n",i); + // } + // fgets(buffer,BUFFER_LEN-1,raw_file); + // buffer[BUFFER_LEN-1]='\0'; + // ret=sscanf(buffer,"%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];%*[^;];\ + // %*[^;];%[^;];%*[^;];%*[^;];%*[^;]",td); + // assert(ret==1); + // if((temp_mistake_sfh=MESA_htable_search(htable,td,TD_LEN))!=NULL) + // { + // fprintf(ripe_file,"%d;%s",temp_mistake_sfh->num,buffer); + // } + // } + MESA_htable_iterate(htable,print_mistake_td,(void*)parameter); + printf("%d,%d\n",parameter->mistake_num,i); + free(buffer); + free(sfh_str); + free(td); + free(td_str); + // free(md5_32k_str); + MESA_htable_destroy(htable,NULL); + // fclose(raw_file); + fclose(ripe_file); + return 0; +}
\ No newline at end of file diff --git a/src/get_td_mistake_lost/get_td_mistake_lost.sh b/src/get_td_mistake_lost/get_td_mistake_lost.sh new file mode 100644 index 0000000..7c851b8 --- /dev/null +++ b/src/get_td_mistake_lost/get_td_mistake_lost.sh @@ -0,0 +1,5 @@ +#!/bin/bash +python new_TD.py +./get_mistake_level +./get_TD_SFH +./get_lost_rate diff --git a/src/get_td_mistake_lost/gram_index_engine.c b/src/get_td_mistake_lost/gram_index_engine.c new file mode 100644 index 0000000..0f503db --- /dev/null +++ b/src/get_td_mistake_lost/gram_index_engine.c @@ -0,0 +1,1354 @@ +#include<stdio.h> +#include<stdlib.h> +#include<string.h> +#include<math.h> +#include<assert.h> +#include<MESA/MESA_htable.h> +#include<unistd.h> + +#include "gram_index_engine.h" +#include "queue.h" + +#define HTABLE_SIZE 1024 *1024 +#define GRAM_CNT_MAX 2 +#define GRAM_MAX 128 +#define TOLERENCE_SIZE 0 +#define UNION_INIT_SIZE 1000 +#define BLOCKSIZE_MIN 3 +#define MEM_OCCUPY 1 +#define CNT_MAX 10 +#define GRAM_CNT_THRESHOLD 10 +#define QUERY_LEN_ACCURACY 0.1 +#define HTABLE_NUM 8 +//#define GIE_INPUT_FORMAT_SFH 1 +//#define GIE_INPUT_FORMAT_PLAIN 0 +#define MAX_LENGTH 10000 +#define KEY_MAX_LENGTH 10 +#define EDIT_DISTN_INSERT_COST 1 +#define EDIT_DISTN_REMOVE_COST 1 +#define EDIT_DISTN_REPLACE_COST 2 +#define MIN(x,y) ((x)<(y)?(x):(y)) + +int before(unsigned int off1, unsigned int off2) +{ + return (signed int)(off1-off2)<0; +} +#define after(off2,off1) before(off1,off2) + +typedef struct +{ + unsigned int user_gram_value; + unsigned int user_position_accuracy; + short ED_reexamine; + short input_format; + MESA_htable_handle id_table; + MESA_htable_handle index_table[HTABLE_NUM]; + unsigned long long mem_occupy; + unsigned long long hash_cnt; +}GIE_handle_inner_t; + + +struct linklist_node +{ + short * position; + struct id_table_data * basicinfo; + short size; + short index; + unsigned long long blocksize; + TAILQ_ENTRY(linklist_node) listentry; +}; + + +struct index_table_data +{ + struct TQ * listhead; + int cnt; +}; + + +struct id_table_data +{ + unsigned int id; + short sfh_length; + short gram_cnt; + unsigned long long blocksize; + char * sfh; + void * tag; + char cfds_lvl; +}; + + +struct htable_handle +{ + MESA_htable_handle runtime_table; + MESA_htable_handle para; +}; + +struct key_list_node +{ + char * key; + int digest_id; + int pos; + unsigned long long blocksize; + TAILQ_ENTRY(key_list_node) keylistentry; +}; + + +unsigned long long hash_cnt; +unsigned long long cnt_sum; + +TAILQ_HEAD(TQ, linklist_node); +TAILQ_HEAD(KL, key_list_node); + +void idtable_free(void * data); +void indextable_free(void * data); +int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2); +int GIE_insert_indextable(MESA_htable_handle handle, struct id_table_data * info, char * key, unsigned int index,unsigned long long blocksize); + +int GIE_delete_from_indextable_by_key(MESA_htable_handle handle, char * key, unsigned int id); +int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t * digest); +int GIE_cmp(const void * a, const void * b); +inline unsigned int get_real_length(const char * string, unsigned int length); +void print_item_iterate(const uchar * key, unsigned int size, void * data, void * user); +inline unsigned long long calc_fh_blocksize(unsigned long long orilen); +inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len); + +MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data)); +void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user); +void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user); + +GIE_handle_t * GIE_create(const GIE_create_para_t * para) +{ + int i = 0; + GIE_handle_inner_t * handle = (GIE_handle_inner_t *)calloc(1, sizeof(GIE_handle_inner_t)); + handle->mem_occupy = 0; + handle->mem_occupy += sizeof(GIE_handle_inner_t); + + handle->user_gram_value = para->gram_value; + handle->user_position_accuracy = para->position_accuracy; + handle->input_format = para->format; + //handle->user_cmp = GIE_INPUT_FORMAT_PLAIN; + handle->ED_reexamine = para->ED_reexamine; + handle->hash_cnt = 0; + + + MESA_htable_create_args_t idtable_args,indextable_args[HTABLE_NUM]; + memset(&idtable_args, 0, sizeof(idtable_args)); + idtable_args.thread_safe = 0; + idtable_args.hash_slot_size = HTABLE_SIZE; + idtable_args.max_elem_num = 0; + idtable_args.expire_time = 0; + idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO; + idtable_args.key_comp = NULL; + idtable_args.key2index = NULL; + idtable_args.data_free = idtable_free; + idtable_args.data_expire_with_condition = NULL; + idtable_args.recursive = 0; + handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args)); + + for(i = 0;i < HTABLE_NUM;i++) + { + memset(&indextable_args[i], 0, sizeof(indextable_args[i])); + indextable_args[i].thread_safe = 0; + indextable_args[i].hash_slot_size = HTABLE_SIZE; + indextable_args[i].max_elem_num = 0; + indextable_args[i].expire_time = 0; + indextable_args[i].eliminate_type = HASH_ELIMINATE_ALGO_FIFO; + indextable_args[i].key_comp = key_compare; + indextable_args[i].key2index = NULL; + indextable_args[i].data_free = indextable_free; + indextable_args[i].data_expire_with_condition = NULL; + indextable_args[i].recursive = 0; + handle->index_table[i] = MESA_htable_create(&indextable_args[i], sizeof(indextable_args[i])); + } + + return (GIE_handle_t *)(handle); +} + +int key_compare(const uchar * key1, uint size1, const uchar * key2, uint size2) +{ + return ( (*(long*)key1) - (*(long*)key2)); +} + + +void idtable_free(void * data) +{ + struct id_table_data * tmp = (struct id_table_data *)data; + free(tmp->sfh); + tmp->sfh = NULL; + tmp->tag = NULL; + free(tmp); + tmp = NULL; + + return; +} + +void indextable_delete_with_threshold(MESA_htable_handle * htable_handle, struct index_table_data * tmp, char * key) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node,listentry); + if(tmp_node->basicinfo->gram_cnt <= GRAM_CNT_THRESHOLD) + { + tmp_node = linklist_tmp; + continue; + } + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp_node->basicinfo->gram_cnt--; + tmp->cnt--; + if(TAILQ_EMPTY(tmp->listhead) == 1) + { + //_handle->hash_cnt--; + //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); + if(MESA_htable_del(htable_handle, (const uchar *)(key), key_length, indextable_free) < 0) + { + printf("indextable backtrack delete error!\n"); + assert(0); + return; + } + } + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp_node->size)); + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + + } + return; +} + + +void indextable_free(void * data) +{ + struct index_table_data * tmp = (struct index_table_data *)data; + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp->cnt--; + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + } + free(tmp->listhead); + tmp->listhead = NULL; + free(tmp); + tmp = NULL; + return; +} + + +void indextable_free_cnt(void * data) +{ + struct index_table_data * tmp = (struct index_table_data *)data; + hash_cnt++; + cnt_sum += tmp->cnt; + struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead); + while(tmp_node != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry); + TAILQ_REMOVE(tmp->listhead, tmp_node, listentry); + tmp->cnt--; + free(tmp_node->position); + tmp_node->position = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = linklist_tmp; + } + free(tmp->listhead); + tmp->listhead = NULL; + free(tmp); + tmp = NULL; + return; +} + +void print_item_iterate_idtable(const uchar * key, uint size, void * data, void * user) +{ + struct id_table_data * id_data = (struct id_table_data *)data; + printf("id:%u\n",id_data->id); +} + + + +void print_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct index_table_data * index_data = (struct index_table_data *)data; + printf("%s %d\n", (char *)key, index_data->cnt); + struct linklist_node * tmp_node = NULL; + int i = 0; + TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) + { + printf("id = %u\n",tmp_node->basicinfo->id); + printf("position is :\n"); + for(i = 0;i < tmp_node->index;i++) + { + printf("%d ",tmp_node->position[i]); + } + printf("\n"); + } + printf("\n"); +} + +int edit_distn(const char *s1, int s1len, const char *s2, int s2len) +{ + long int max_len = 0; + if(s1len >= s2len) + { + max_len = s1len; + } + else + { + max_len = s2len; + } + int **t = (int **)malloc(2*sizeof(int *)); + t[0] = (int *)malloc((max_len +1)*sizeof(int)); + t[1] = (int *)malloc((max_len +1)*sizeof(int)); + //int t[2][EDIT_DISTN_MAXLEN+1]; + int *t1 = t[0]; + int *t2 = t[1]; + int *t3; + size_t i1, i2; + for (i2 = 0; i2 <= s2len; i2++) + t[0][i2] = i2 * EDIT_DISTN_REMOVE_COST; + for (i1 = 0; i1 < s1len; i1++) { + t2[0] = (i1 + 1) * EDIT_DISTN_INSERT_COST; + for (i2 = 0; i2 < s2len; i2++) { + int cost_a = t1[i2+1] + EDIT_DISTN_INSERT_COST; + int cost_d = t2[i2] + EDIT_DISTN_REMOVE_COST; + int cost_r = t1[i2] + (s1[i1] == s2[i2] ? 0 : EDIT_DISTN_REPLACE_COST); + t2[i2+1] = MIN(MIN(cost_a, cost_d), cost_r); + } + t3 = t1; + t1 = t2; + t2 = t3; + } + long int ret = t1[s2len]; + free(t[0]); + free(t[1]); + free(t); + return ret; + //return t1[s2len]; +} + + +void GIE_destory(GIE_handle_t * handle) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); + //printf("hash_cnt:%llu\n",_handle->hash_cnt); + //printf("mem_occupy:%llu\n",_handle->mem_occupy); + int i = 0; + for(i = 0;i < HTABLE_NUM;i++) + { + MESA_htable_destroy(_handle->index_table[i], indextable_free_cnt); + } + MESA_htable_destroy(_handle->id_table, idtable_free); + //printf("index_free hash_cnt :%llu\n", hash_cnt); + //printf("cnt sum :%llu\n",cnt_sum); + free(_handle); + _handle = NULL; +} + + +int grab_key_set(char * str_begin,short str_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list, unsigned long long blocksize) +{ + int k = 0,j = 0; + char * tmp_gram = str_begin; + char key[gram_value+1]; + int sum = 0,htable_index = 0; + if(str_length < gram_value) + { + return 0; + } + str_length = MIN(str_length,strnlen(str_begin,str_length)); + *gram_cnt = str_length - gram_value + 1; + //printf("str_length:%d\n",str_length); + for(k = 0; k < str_length - gram_value + 1; k++) + { + sum = 0; + memset(key,'\0', gram_value+1); + memcpy(key, tmp_gram++, gram_value); + //printf("k:%d key:%s\n",k,key); + for(j = 0; j < gram_value; j++) + { + sum += key[j]; + } + htable_index = sum%HTABLE_NUM; + struct key_list_node *tmp_node = (struct key_list_node *)calloc(1,sizeof(struct key_list_node)); + tmp_node->key = (char *)calloc(gram_value+1,sizeof(char)); + memcpy(tmp_node->key,key,gram_value); + tmp_node->digest_id = i; + tmp_node->pos = k; + tmp_node->blocksize = blocksize; + TAILQ_INSERT_TAIL(to_process_list[htable_index], tmp_node, keylistentry); + } + return 1; +} +int sfh_grab_key_set(char *sfh,short sfh_length,int i,unsigned int gram_value,short * gram_cnt,struct KL** to_process_list) +{ + int t = 0; + char * tmp_gram = sfh; + unsigned long long blocksize = 0; + for(t = 0; t < 2;t++) + { + blocksize = get_blocksize_from_head(tmp_gram, sfh_length); + while(*tmp_gram != '\0') + { + if(*tmp_gram == ':') + { + tmp_gram++; + break; + } + tmp_gram++; + } + unsigned int real_length = get_real_length(tmp_gram, sfh_length); + if(real_length < gram_value) + { + if(t==0) + { + return 0; + } + else + { + continue; + } + } + grab_key_set(tmp_gram, real_length, i, gram_value, gram_cnt, to_process_list, blocksize); + while(*tmp_gram != '\0') + { + if(*tmp_gram == '#') + { + tmp_gram++; + break; + } + tmp_gram++; + } + } + return 1; +} + +void free_key_set(struct KL ** to_process_list,int size) +{ + int i = 0; + for(i = 0;i < size;i++) + { + struct key_list_node *tmp_node = TAILQ_FIRST(to_process_list[i]); + while(tmp_node != NULL) + { + struct key_list_node *key_list_tmp = TAILQ_NEXT(tmp_node, keylistentry); + TAILQ_REMOVE(to_process_list[i], tmp_node, keylistentry); + free(tmp_node->key); + tmp_node->key = NULL; + free(tmp_node); + tmp_node = NULL; + tmp_node = key_list_tmp; + } + free(to_process_list[i]); + to_process_list[i]= NULL; + } +} + +int GIE_update(GIE_handle_t * handle,GIE_digest_t * * digests,int size) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle); + struct id_table_data * info = NULL; + int success_cnt = 0; + int m = 0, i = 0, grab_ret = 0; + short gram_cnt = 0; + unsigned int input_fh_len = 0; + unsigned int gram_value = _handle->user_gram_value; + struct KL* to_process_list[HTABLE_NUM]; + + MESA_htable_handle htable_index_copy; + MESA_htable_handle htable_id_copy; + MESA_htable_handle htable_tmp_index=NULL,htable_tmp_id=NULL; + struct htable_handle * htable_copied_id_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); + struct htable_handle * htable_copied_index_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); + + htable_copied_id_para->runtime_table = _handle->id_table; + htable_copied_id_para->para = NULL; + htable_id_copy = copy_htable((void *)htable_copied_id_para, copy_idtable_item_iterate,idtable_free); + + MESA_htable_handle garbage_htable[HTABLE_NUM]; + /*if(MESA_htable_iterate(htable_id_copy, print_item_iterate_idtable, NULL) == -1) + { + printf("iterate error!\n"); + } + printf("size:%u\n",id_size);*/ + + for(m = 0;m < HTABLE_NUM;m++) + { + to_process_list[m]=(struct KL*)calloc(1,sizeof(struct KL)); + TAILQ_INIT(to_process_list[m]); + } + + for(i = 0; i < size; i++) + { + switch(digests[i]->operation) + { + case GIE_INSERT_OPT: + { + assert(digests[i]->tag!=NULL); + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + grab_ret = sfh_grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + + grab_ret = grab_key_set(digests[i]->sfh,digests[i]->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); + } + if(grab_ret == 0) + { + continue; + } + else + { + info = (struct id_table_data *)calloc(1,sizeof(struct id_table_data)); + input_fh_len = digests[i]->sfh_length; + info->sfh = (char *)calloc(input_fh_len + 1,sizeof(char)); + memcpy(info->sfh, digests[i]->sfh, input_fh_len); + _handle->mem_occupy += sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1); + info->sfh_length = digests[i]->sfh_length; + info->gram_cnt = gram_cnt; + + /*int tag_len = strnlen(digests[i]->tag,MAX_LENGTH); + info->tag = (char *)calloc(tag_len+1,sizeof(char)); + memcpy(info->tag,digests[i]->tag,tag_len);*/ + info->tag = digests[i]->tag; + + info->id = digests[i]->id; + info->cfds_lvl = digests[i]->cfds_lvl; + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + info->blocksize = get_blocksize_from_head(digests[i]->sfh, digests[i]->sfh_length); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + info->blocksize = 0; + } + + if(MESA_htable_add(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0) + { + _handle->mem_occupy -= (sizeof(struct id_table_data) + sizeof(char)*(input_fh_len+1)); + free(info->sfh); + info->sfh = NULL; + free(info); + info = NULL; + continue; + } + } + success_cnt ++; + break; + } + + case GIE_DELETE_OPT: + { + + struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(htable_id_copy, \ + (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id)); + if(ret!= NULL) + { + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + success_cnt += sfh_grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list); + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + + success_cnt += grab_key_set(ret->sfh,ret->sfh_length,i,gram_value,&gram_cnt,to_process_list,0); + } + } + else + { + break; + } + if(MESA_htable_del(htable_id_copy, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0) + { + printf("delete id failed!"); + assert(0); + } + //success_cnt += GIE_delete(_handle, digests[i]); + break; + } + + default: + break; + } + + } + unsigned int digest_id = 0; + struct id_table_data * tmp_info= NULL; + + for(i = 0;i < HTABLE_NUM;i++) + { + htable_copied_index_para->runtime_table = _handle->index_table[i]; + htable_copied_index_para->para = htable_id_copy; + htable_index_copy = copy_htable((void *)htable_copied_index_para,copy_indextable_item_iterate,indextable_free); + struct key_list_node * tmp_node; + TAILQ_FOREACH(tmp_node, to_process_list[i], keylistentry) + { + digest_id = tmp_node->digest_id; + if(digests[digest_id]->operation == GIE_INSERT_OPT) + { + tmp_info =(struct id_table_data *)MESA_htable_search(htable_id_copy, (const uchar *)(&(digests[digest_id])->id), \ + sizeof((digests[digest_id])->id)); + if(tmp_info == NULL) + { + printf("id %u not insert\n",digests[digest_id]->id); + } + if(GIE_insert_indextable(htable_index_copy, tmp_info, tmp_node->key, tmp_node->pos,tmp_node->blocksize) < 0) + { + printf("insert %d indextable failed!\n",digests[digest_id]->id); + continue; + } + } + else if(digests[digest_id]->operation == GIE_DELETE_OPT) + { + if(GIE_delete_from_indextable_by_key(htable_index_copy, tmp_node->key, (digests[digest_id])->id) < 0) + { + printf("delete %d indextable failed!\n",digests[digest_id]->id); + continue; + } + } + } + htable_tmp_index= _handle->index_table[i]; + _handle->index_table[i] = htable_index_copy; + garbage_htable[i]=htable_tmp_index; + } + + htable_tmp_id = _handle->id_table; + _handle->id_table = htable_id_copy; + usleep(200); + MESA_htable_destroy(htable_tmp_id, idtable_free); + /*if(MESA_htable_iterate(_handle->index_table, print_item_iterate, NULL) == -1) + { + printf("iterate error!\n"); + }*/ + for(i=0;i<HTABLE_NUM;i++) + { + MESA_htable_destroy(garbage_htable[i], indextable_free_cnt); + + } + free_key_set(to_process_list,HTABLE_NUM); + free(htable_copied_id_para); + htable_copied_id_para = NULL; + free(htable_copied_index_para); + htable_copied_index_para = NULL; + return success_cnt; +} + + +MESA_htable_handle copy_htable(void * htable_para,void (* func)(const uchar * key, uint size, void * data, void *user),void (*free_fuc)(void * data)) +{ + MESA_htable_create_args_t copy_table_args; + memset(©_table_args, 0, sizeof(copy_table_args)); + copy_table_args.thread_safe = 0; + copy_table_args.hash_slot_size = HTABLE_SIZE; + copy_table_args.max_elem_num = 0; + copy_table_args.expire_time = 0; + copy_table_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO; + copy_table_args.key_comp = NULL; + copy_table_args.key2index = NULL; + copy_table_args.data_free = free_fuc; + copy_table_args.data_expire_with_condition = NULL; + copy_table_args.recursive = 0; + MESA_htable_handle copy_htable_handle = MESA_htable_create(©_table_args, sizeof(copy_table_args)); + + struct htable_handle * htable_copied_para = (struct htable_handle *)htable_para; + struct htable_handle * htable_iterate_para = (struct htable_handle *)calloc(1,sizeof(struct htable_handle)); + htable_iterate_para->runtime_table = copy_htable_handle; + htable_iterate_para->para = htable_copied_para->para; + + if(MESA_htable_iterate(htable_copied_para->runtime_table, func, htable_iterate_para) == -1) + { + printf("iterate error!\n"); + } + free(htable_iterate_para); + htable_copied_para=NULL; + return copy_htable_handle; +} + +void copy_indextable_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct index_table_data * index_data = (struct index_table_data *)data; + struct htable_handle * htable_copied_para = (struct htable_handle *)user; + + struct index_table_data * index_data_copy = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); + struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); + index_data_copy->listhead = head; + index_data_copy->cnt = index_data->cnt; + + TAILQ_INIT(head); + struct linklist_node * tmp_node = NULL; + struct id_table_data * ret = NULL; + int i = 0; + + TAILQ_FOREACH(tmp_node, index_data->listhead, listentry) + { + struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); + node_data->size = tmp_node->size; + node_data->position = (short *)calloc(node_data->size, sizeof(short)); + for(i = 0;i < tmp_node->index;i++) + { + node_data->position[i] = tmp_node->position[i]; + } + ret = (struct id_table_data *)MESA_htable_search(htable_copied_para->para, (const uchar *)(&(tmp_node->basicinfo->id)), sizeof(tmp_node->basicinfo->id)); + if(ret == NULL) + { + //printf("copy id %u not exist\n",tmp_node->basicinfo->id); + free(node_data->position); + node_data->position = NULL; + free(node_data); + node_data = NULL; + continue; + } + node_data->basicinfo = ret; + node_data->index = tmp_node->index; + node_data->blocksize = tmp_node->blocksize; + TAILQ_INSERT_TAIL(head, node_data, listentry); + } + MESA_htable_add(htable_copied_para->runtime_table, key, size, (const void *)index_data_copy); +} +//TODO: Using the orginal value instead of make a duplication to be faster. +void copy_idtable_item_iterate(const uchar * key, uint size, void * data, void * user) +{ + struct id_table_data * id_data = (struct id_table_data *)data; + struct htable_handle * htable_para = (struct htable_handle *)user; + struct id_table_data * id_data_copy = (struct id_table_data *)calloc(1, sizeof(struct id_table_data)); + assert(id_data->tag!=NULL); + memcpy(id_data_copy,id_data,sizeof(struct id_table_data)); + id_data_copy->sfh = (char *)calloc(id_data_copy->sfh_length,sizeof(char)); + memcpy(id_data_copy->sfh,id_data->sfh,id_data_copy->sfh_length); + + MESA_htable_add(htable_para->runtime_table, (const uchar *)(&(id_data_copy->id)), sizeof(id_data_copy->id), (const void *)id_data_copy); +} + + + + +int GIE_insert_indextable(MESA_htable_handle htable_copy, struct id_table_data * info, char * key, unsigned int index, unsigned long long blocksize) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct linklist_node * node_data = (struct linklist_node *)calloc(1,sizeof(struct linklist_node)); + node_data->size = GRAM_CNT_MAX; + node_data->position = (short *)calloc(node_data->size, sizeof(short)); + node_data->basicinfo = info; + node_data->index = 0; + node_data->position[(node_data->index)++] = index; + node_data->blocksize = blocksize; + + //_handle->mem_occupy += sizeof(struct linklist_node) + sizeof(short)*(node_data->size); + + struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable_copy, \ + (const uchar *)(key), key_length)); + + + if(ret != NULL) + { + struct linklist_node * tmp = NULL; + TAILQ_FOREACH(tmp, ret->listhead, listentry) + { + if(tmp->basicinfo->id > node_data->basicinfo->id) + { + TAILQ_INSERT_BEFORE(tmp, node_data, listentry); + ret->cnt ++; + if(ret->cnt >= CNT_MAX) + { + indextable_delete_with_threshold(htable_copy,ret,key); + } + return 0; + } + if(tmp->basicinfo->id == node_data->basicinfo->id && tmp->blocksize == blocksize) + { + if(tmp->index >= tmp->size) + { + tmp->size *= 2; + tmp->position = realloc(tmp->position, (tmp->size)*sizeof(short)); + } + tmp->position[(tmp->index)++] = index; + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(node_data->size)); + free(node_data->position); + node_data->position = NULL; + free(node_data); + node_data = NULL; + return 0; + } + } + TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry); + ret->cnt ++; + if(ret->cnt >= CNT_MAX) + { + indextable_delete_with_threshold(htable_copy,ret,key); + } + } + + else + { + struct index_table_data * index_data = (struct index_table_data *)calloc(1, sizeof(struct index_table_data)); + struct TQ * head = (struct TQ *)calloc(1, sizeof(struct TQ)); + //_handle->mem_occupy += sizeof(struct index_table_data) + sizeof(struct TQ); + + index_data->listhead = head; + index_data->cnt = 0; + + TAILQ_INIT(head); + TAILQ_INSERT_TAIL(head, node_data, listentry); + index_data->cnt++; + //_handle->hash_cnt++; + if(MESA_htable_add(htable_copy, (const uchar *)(key), key_length, (const void *)index_data) < 0) + { + printf("add index_table failed!\n"); + assert(0); + return -1; + } + } + return 0; + +} + + + +int GIE_delete(GIE_handle_inner_t * _handle, GIE_digest_t * digest) +{ + int success_cnt = 0; + struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ + (const uchar *)(&(digest->id)), sizeof(digest->id)); + if(ret == NULL) + { + printf("del %d doesn't exist!\n",digest->id); + return -1; + } + else + { + int gram_value = _handle->user_gram_value; + char key[gram_value+1]; + char * tmp_gram = ret->sfh; + while(*tmp_gram != '\0') + { + if(*tmp_gram == ':') + { + tmp_gram++; + break; + } + tmp_gram++; + } + unsigned int real_length = get_real_length(tmp_gram, ret->sfh_length); + int gram_cnt = real_length - gram_value + 1; + int k = 0; + for(k = 0; k < gram_cnt; k++) + { + memset(key, '\0', gram_value+1); + memcpy(key, tmp_gram++, gram_value); + if(GIE_delete_from_indextable_by_key(_handle, key, digest->id) < 0) + { + printf("delete %d indextable failed!\n",digest->id); + continue; + } + } + success_cnt++; + } + + return success_cnt; +} + + + +int GIE_delete_from_indextable_by_key(MESA_htable_handle htable, char * key, unsigned int id) +{ + int key_length = strnlen(key,KEY_MAX_LENGTH); + struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search(htable, \ + (const uchar *)(key), key_length)); + if(ret == NULL) + { + return 0; + } + + + struct linklist_node * tmp = TAILQ_FIRST(ret->listhead); + while(tmp != NULL) + { + struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp, listentry); + if(tmp->basicinfo->id != id) + { + tmp=linklist_tmp; + continue; + } + TAILQ_REMOVE(ret->listhead, tmp, listentry); + ret->cnt--; + //_handle->mem_occupy -= (sizeof(struct linklist_node) + sizeof(short)*(tmp->size)); + free(tmp->position); + tmp->position = NULL; + free(tmp); + tmp = NULL; + if(TAILQ_EMPTY(ret->listhead) == 1) + { + //_handle->mem_occupy -= (sizeof(struct index_table_data) + sizeof(struct TQ)); + int ret = MESA_htable_del(htable, (const uchar *)(key), key_length, indextable_free); + if(ret < 0) + { + printf("indextable backtrack delete error!\n"); + assert(0); + return -1; + } + + } + } + return 0; +} + + + + +int GIE_cmp(const void * a, const void * b) +{ + unsigned int tmp_a = *(unsigned int *)a; + unsigned int tmp_b = *(unsigned int *)b; + if(before(tmp_a, tmp_b)) + { + return -1; + } + else if(after(tmp_a, tmp_b)) + { + return 1; + } + else + { + return 0; + } +} + + +inline unsigned int get_real_length(const char * string, unsigned int length) +{ + unsigned int ret = 0; + const char * tmp_str = string; + while(*tmp_str != '\0') + { + if(*tmp_str == '[') + { + break; + } + tmp_str++; + ret ++; + } + return ret; +} + + +inline int GIE_part_query(GIE_handle_inner_t * _handle, const char * query_string, int index_begin, int part_query_len,unsigned int ** id_union, unsigned int * union_index, unsigned int * union_size, unsigned long long blocksize) +{ + unsigned int gram_value = _handle->user_gram_value; + + unsigned int real_length = part_query_len; + unsigned int chunk_count_max = 0; + if(real_length < gram_value) + { + return 0; + } + else + { + chunk_count_max = real_length/gram_value; + } + char key[gram_value+1]; + struct index_table_data * ret = NULL; + struct linklist_node * tmp_node_t = NULL; + + unsigned int position_accuracy = _handle->user_position_accuracy; + + int i=0,j=0,k=0; + unsigned int tmp_min = 0; + int sum = 0, htable_index = 0; + for(i = index_begin; i < chunk_count_max + index_begin; i++) + { + sum = 0; + memset(key,'\0',gram_value+1); + memcpy(key, query_string, gram_value); + for(k = 0; k < gram_value; k++) + { + sum += key[k]; + } + htable_index = sum%HTABLE_NUM; + ret = (struct index_table_data *) MESA_htable_search(_handle->index_table[htable_index], \ + (const uchar *)(key), strnlen(key,gram_value)); + query_string = query_string + gram_value; + + if(ret ==NULL) + { + break; + } + + tmp_node_t = NULL; + TAILQ_FOREACH(tmp_node_t, ret->listhead, listentry) + { + tmp_min = 0; + if(i*gram_value >= position_accuracy) + { + tmp_min = i*gram_value - position_accuracy; + } + for(j = 0; j < tmp_node_t->index; j++) + { + if((blocksize == tmp_node_t->basicinfo->blocksize) && (tmp_node_t->position[j] >= tmp_min) && (tmp_node_t->position[j] <= i*gram_value + position_accuracy)) + //if(blocksize == tmp_node_t->basicinfo->blocksize) + { + if((*union_index) >= (*union_size)) + { + *union_size = (*union_size) * 2; + *id_union = (unsigned int *)realloc(*id_union, (*union_size)*sizeof(unsigned int)); + } + (*id_union)[(*union_index)] = tmp_node_t->basicinfo->id; + (*union_index)++; + break; + } + } + } + } + return chunk_count_max; +} + +inline int GIE_gram_with_position(GIE_handle_inner_t * _handle, unsigned long long query_blocksize, const char * fuzzy_string, unsigned int ** id_union, + unsigned int * union_index,unsigned int * union_size, unsigned int * chunk_cnt) +{ + const char * tmpstr = fuzzy_string; + const char * query_string_begin; + unsigned long long blocksize = query_blocksize; + int part_query_len = 0; + int query_actual_len = 0; + while(*tmpstr != ':'&& *tmpstr != '\0') + { + tmpstr ++; + } + if(*tmpstr == ':') + { + tmpstr ++; + } + else + { + return 0; + } + query_string_begin = tmpstr; + char *p = NULL; + + while((*query_string_begin) != '\0') + { + int left = 0; + int right = 0; + p=strchr(query_string_begin,'['); + if(p!=NULL) + { + part_query_len = p-query_string_begin; + int ret = sscanf(p,"[%d:%d]",&left,&right); + if(ret != 2) + { + break; + } + p=strchr(p,']'); + if(p != NULL && (*p) != '\0') + { + int index_begin = (left/blocksize - TOLERENCE_SIZE > 0 ? (left/blocksize - TOLERENCE_SIZE) : 0); + (*chunk_cnt) += GIE_part_query(_handle,query_string_begin,index_begin, part_query_len, + id_union, union_index, union_size, blocksize); + query_actual_len += part_query_len; + query_string_begin = p+1; + } + else + { + break; + } + } + else + { + break; + } + } + return query_actual_len; +} + +inline unsigned long long calc_fh_blocksize(unsigned long long orilen) +{ + double tmp = orilen/(64 * BLOCKSIZE_MIN); + double index = floor(log(tmp)/log(2)); + double tmp_t = pow(2,index); + unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN); + return blocksize; +} + +inline unsigned long long get_blocksize_from_head(const char * fuzzy_string, unsigned int str_len) +{ + const char * tmp_str = fuzzy_string; + char blk[100]; + memset(blk,'\0',sizeof(blk)); + unsigned long long blocksize = 0; + int i = 0; + while(*tmp_str != '\0' && *tmp_str != ':' && str_len != 0 && i < 100) + { + blk[i++] = *tmp_str; + tmp_str++; + str_len--; + } + blocksize = (unsigned long long)atoi(blk); + return blocksize; +} +int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2) +{ + int edit_distance=0; + int conf=0; + edit_distance = edit_distn(str1, len1,str2,len2); + conf = 100-(edit_distance*100)/(len1 + len2); + return conf; +} + +int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2) +{ + int j = 0, t = 0; + unsigned long long query_blocksize = 0, index_blocksize = 0; + unsigned int query_real_length = 0, index_real_length = 0; + const char *query_gram_begin = sfh1; + const char *index_gram_begin = sfh2; + char *splice_str = (char *)malloc(sizeof(char)*len1); + memset(splice_str,'\0',len1); + char *spli_str_begin = splice_str; + int edit_distance = 0; + int ret = 0; + char *p = NULL; + int splice_len = 0; + + for(j = 0; j < 2; j++) + { + index_blocksize = get_blocksize_from_head(index_gram_begin, len2); + while((*index_gram_begin) != '\0') + { + if((*index_gram_begin) == ':') + { + index_gram_begin++; + break; + } + index_gram_begin++; + } + index_real_length = get_real_length(index_gram_begin, len2); + query_gram_begin = sfh1; + for(t = 0; t < 2; t++) + { + query_blocksize = get_blocksize_from_head(query_gram_begin, len1); + //printf("gram_begin:%c\n",*index_gram_begin); + //printf("gram_str:%s\n",index_gram_begin); + while((*query_gram_begin) != '\0') + { + if((*query_gram_begin) == ':') + { + query_gram_begin++; + break; + } + query_gram_begin++; + } + //printf("query_blocksize:%lld, index_blocksize:%lld\n",query_blocksize,index_blocksize); + //index_real_length = get_real_length(index_gram_begin, len1); + if(query_blocksize == index_blocksize) + { + while((*query_gram_begin) != '#' && (*query_gram_begin) != '\0') + { + p=strchr(query_gram_begin,'['); + if(p!=NULL) + { + query_real_length = p-query_gram_begin; + p=strchr(p,']'); + if(p != NULL && (*p) != '\0') + { + + memcpy(spli_str_begin,query_gram_begin,query_real_length); + spli_str_begin += query_real_length; + //edit_distance += edit_distn(query_gram_begin, query_real_length, index_gram_begin, index_real_length); + query_gram_begin = p+1; + } + else + { + break; + } + } + else + { + break; + } + } + splice_len = strnlen(splice_str,len1); + edit_distance = edit_distn(index_gram_begin, index_real_length, splice_str, splice_len); + //printf("query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); + ret = 100-(edit_distance*100)/(index_real_length + splice_len); + //ret = (100*ret)/SPAM_LENGTH; + //ret = 100-ret; + //ret = 100 - (100*edit_distance)/(query_real_length); + free(splice_str); + return ret; + } + while(*query_gram_begin != '\0') + { + if(*query_gram_begin == '#') + { + query_gram_begin++; + break; + } + query_gram_begin++; + } + + } + while(*index_gram_begin != '\0') + { + if(*index_gram_begin == '#') + { + index_gram_begin++; + break; + } + index_gram_begin++; + } + } + //printf("no blocksize:query_real_length:%d splice_length:%d edit_distance:%d\n",query_real_length,splice_len,edit_distance); + free(splice_str); + return 0; +} + + + + +int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size) +{ + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *) handle; + int i = 0, j = 0; + unsigned int union_index = 0; + unsigned int gram_value = _handle->user_gram_value; + unsigned int query_actual_len = 0; + unsigned int union_size = UNION_INIT_SIZE; + unsigned int chunk_cnt = 0; + const char *fuzzy_string_begin = data; + unsigned int * id_union =(unsigned int *)calloc(union_size, sizeof(unsigned int)); + unsigned long long query_blocksize = 0; + unsigned int fuzzy_string_len = (unsigned int)data_len; + + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + for(j = 0;j < 2;j++) + { + query_blocksize = get_blocksize_from_head(fuzzy_string_begin, fuzzy_string_len); + if(query_blocksize == 0) + { + return 0; + } + query_actual_len += GIE_gram_with_position(_handle, query_blocksize, fuzzy_string_begin, &id_union, &union_index, &union_size, &chunk_cnt); + while(*fuzzy_string_begin != '#' && *fuzzy_string_begin != '\0') + { + fuzzy_string_begin++; + } + if(*fuzzy_string_begin == '#') + { + fuzzy_string_begin++; + } + } + } + else if(_handle->input_format == GIE_INPUT_FORMAT_PLAIN) + { + query_actual_len = fuzzy_string_len; + chunk_cnt = GIE_part_query(_handle, fuzzy_string_begin, 0, query_actual_len, &id_union, &union_index, &union_size, 0); + } + + if(union_index == 0) + { + free(id_union); + id_union = NULL; + return 0; + } + + qsort(id_union, union_index, sizeof(id_union[0]), GIE_cmp); + + unsigned int current_id = id_union[0]; + unsigned int * tmp_id = id_union; + unsigned int count = 0; + struct id_table_data * ret_tmp = NULL; + short conf = 0; + int ret_size = 0; + for(i = 0; i <= union_index; i++) + { + if( i == union_index || *tmp_id != current_id ) + { + ret_tmp = (struct id_table_data *) MESA_htable_search(_handle->id_table, \ + (const uchar *)(&(current_id)), sizeof(current_id)); + + if(ret_tmp == NULL) + { + break; + } + char * tmp_gram = ret_tmp->sfh; + int length = ret_tmp->sfh_length; + if(ret_tmp->gram_cnt == 0||chunk_cnt == 0) + { + conf = 0; + } + else + { + conf = (count*(query_actual_len-gram_value+1)*10)/(chunk_cnt*(ret_tmp->gram_cnt)); + } + + if(_handle->ED_reexamine == 1) + { + if(_handle->input_format == GIE_INPUT_FORMAT_SFH) + { + conf = GIE_sfh_similiarity(data, fuzzy_string_len, tmp_gram, length); + } + else + { + conf=GIE_string_similiarity(data, fuzzy_string_len, tmp_gram, length); + } + } + + if(conf >= ret_tmp->cfds_lvl) + { + results[ret_size].cfds_lvl = conf; + results[ret_size].id = current_id; + /*results[ret_size].tag = (char *)malloc((ret_tmp->sfh_length + 1)*sizeof(char)); + memset(results[ret_size].tag,'\0',(ret_tmp->sfh_length+1)); + memcpy(results[ret_size].tag, ret_tmp->sfh,ret_tmp->sfh_length);*/ + results[ret_size].tag = ret_tmp->tag; + ret_size++; + } + + if(ret_size == result_size) + { + break; + } + + current_id = *tmp_id; + count = 1; + + } + else + { + count++; + } + + tmp_id ++; + } + + free(id_union); + id_union = NULL; + return ret_size; +} + + +unsigned long long GIE_status(GIE_handle_t * handle, int type) +{ + unsigned long long length; + GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle; + switch(type) + { + case MEM_OCCUPY: + length = _handle->mem_occupy; + break; + default: + return 0; + } + return length; +} + diff --git a/src/get_td_mistake_lost/new_TD.conf b/src/get_td_mistake_lost/new_TD.conf new file mode 100644 index 0000000..be9301e --- /dev/null +++ b/src/get_td_mistake_lost/new_TD.conf @@ -0,0 +1,3 @@ +[file] +ripe_files_address = ../data/ripe_data/td_data_20171207/new_TD.txt +raw_file_address = ../data/ripe_data/td_data_20171207/all_av_digest diff --git a/src/get_td_mistake_lost/new_TD.py b/src/get_td_mistake_lost/new_TD.py new file mode 100644 index 0000000..5b7269f --- /dev/null +++ b/src/get_td_mistake_lost/new_TD.py @@ -0,0 +1,34 @@ +#-*-coding:utf-8-*- +import re +import random +import ConfigParser +import bisect +import commands +import os +import hashlib + +config = ConfigParser.RawConfigParser() +config.read("file_digest.conf") +raw_file_address=config.get("new_td","raw_file_address") +ripe_files_address=config.get("new_td","ripe_files_address") +print ("%s %s" %(raw_file_address,ripe_files_address)) + +def get_md5_value(td_string): + my_md5 = hashlib.md5() + my_md5.update(td_string) + my_md5_string=str(my_md5.hexdigest()) + return my_md5_string + +i=0 +with open(raw_file_address,'r') as infile: + with open(ripe_files_address,'w')as outfile: + for line in infile: + i+=1 + if(i%100000==0): + print i; + data_line_val = re.split(r';',line) + data_set = re.split(r"URL:|ServerIP:|MediaType:|MediaLen:|Etag:|LastModify:",data_line_val[4]) + td_string=str("url"+data_set[1]+"MediaType:"+data_set[3]+"MediaLen:"+data_set[4] \ + +"Etag:"+data_set[5]+"LastModify:"+data_set[6]+"td_data_md5_32k:"+data_line_val[16]) + new_td=get_md5_value(td_string) + outfile.write(td_string+";"+new_td+";"+data_line_val[19]+"\n")
\ No newline at end of file diff --git a/src/rssb_statistics/all_len_st.py b/src/rssb_statistics/all_len_st.py new file mode 100644 index 0000000..6e32518 --- /dev/null +++ b/src/rssb_statistics/all_len_st.py @@ -0,0 +1,17 @@ +import os +import re +import csv +import bisect +# cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +# cmd2 = "cat media_expire.log.2018* > media_expire.log" +# os.system(cmd1) +# os.system(cmd2) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] + +with open("media_expire.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|media_len:",line) + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num
\ No newline at end of file diff --git a/src/rssb_statistics/delete_len_st.py b/src/rssb_statistics/delete_len_st.py new file mode 100644 index 0000000..010dc84 --- /dev/null +++ b/src/rssb_statistics/delete_len_st.py @@ -0,0 +1,48 @@ +import os +import re +import csv +import bisect +cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +cmd2 = "cat ../12.log/media_expire.log.2018* > ../12.log/media_expire.log" +cmd3 = "cat media_expire.log.2018* > media_expire.log" +os.system(cmd1) +os.system(cmd2) +os.system(cmd3) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] +mid_13_set=set() +mid_12_set=set() +mid_12_file = "../12.log/media_expire.log" +mid_13_file = "media_expire.log" + +i=0 +with open(mid_13_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + mid_13_set.add(int(line_result[3])) + +i=0 +with open(mid_12_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + mid_12_set.add(int(line_result[3])) + +different = mid_12_set.difference(mid_13_set) + +i=0 +with open(mid_12_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + line_result = re.split(r",|MID:|media_len:",line) + if((int(line_result[3]) in different)==True): + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num
\ No newline at end of file diff --git a/src/rssb_statistics/find_lost_td.conf b/src/rssb_statistics/find_lost_td.conf new file mode 100644 index 0000000..6ab40d9 --- /dev/null +++ b/src/rssb_statistics/find_lost_td.conf @@ -0,0 +1,14 @@ +[file] +raw_survey_file_13 = ../data/data_20180423/14.log/survey.recv.log +raw_deup_file = ../data/data_20180423/13.log/dedup.log +raw_survey_file_12 = ../data/data_20180423/12.log/survey.recv.log +run_time_file = ../data/data_20180423/runtime_log +lost_td_line = ../data/data_20180423/ripe_file/lost_td_line +mid_12_file = ../data/data_20180423/ripe_file/mid_12_file +mid_13_file = ../data/data_20180423/ripe_file/mid_14_file +list_12_file = ../data/data_20180423/ripe_file/list_12_file +list_13_file = ../data/data_20180423/ripe_file/list_14_file +different_mid_file_13 = ../data/data_20180423/ripe_file/different_mid_file_14 +different_mid_file_12 = ../data/data_20180423/ripe_file/different_mid_file_12 +different_list_file_13 = ../data/data_20180423/ripe_file/different_list_file_14 +different_list_file_12 = ../data/data_20180423/ripe_file/different_list_file_12
\ No newline at end of file diff --git a/src/rssb_statistics/find_lost_td.py b/src/rssb_statistics/find_lost_td.py new file mode 100644 index 0000000..50f3fab --- /dev/null +++ b/src/rssb_statistics/find_lost_td.py @@ -0,0 +1,147 @@ +import ConfigParser +import re + +config = ConfigParser.RawConfigParser() +config.read("find_lost_td.conf") +raw_survey_file_13 = config.get("file","raw_survey_file_13") +raw_deup_file = config.get("file","raw_deup_file") +# run_time_file = config.get("file","run_time_file") +raw_survey_file_12 = config.get("file","raw_survey_file_12") +# lost_td_line = config.get("file","lost_td_line") +mid_12_file = config.get("file","mid_12_file") +mid_13_file = config.get("file","mid_13_file") +list_12_file = config.get("file","list_12_file") +list_13_file = config.get("file","list_13_file") +different_mid_file_13 = config.get("file","different_mid_file_13") +different_mid_file_12 = config.get("file","different_mid_file_12") +different_list_file_13 = config.get("file","different_list_file_13") +different_list_file_12 = config.get("file","different_list_file_12") + +term = {'td_len':(lambda x : len(x)==32), + 'data_num':(lambda x : len(x)>7), + 'url':(lambda x : x.find['NUll']), + 'sfh_len':(lambda x : len(x)>20), + 'not_null':(lambda x : len(x)!=0)} + +mid_13=dict() +with open(raw_survey_file_13,'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + mid_13[mid_string]=list() + + +with open(mid_13_file,'w') as outfile: + for key in mid_13: + outfile.write(key+'\n') + +mid_12=dict() +with open(raw_survey_file_12,'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + mid_12[mid_string]=list() + +with open(mid_12_file,'w') as outfile: + for key in mid_12: + outfile.write(key+'\n') + +different_mid_13 = list() +with open(different_mid_file_13,'w') as outfile: + for key in mid_13: + if(mid_12.has_key(key)==False): + different_mid_13.append(key) + outfile.write(key+'\n') + +different_mid_12 = list() +with open(different_mid_file_12,'w') as outfile: + for key in mid_12: + if(mid_13.has_key(key)==False): + different_mid_12.append(key) + outfile.write(key+'\n') + +i=0 +with open(raw_deup_file,'r') as infile: + for line in infile: + i+=1 + if(i%100000==0): + print i + data_line_val = re.split(r",|MID:|TD:",line) + if(term['data_num'](data_line_val) and \ + mid_13.has_key(str(data_line_val[4])) == True): + mid_13[data_line_val[4]].append(data_line_val[6]) + if(term['data_num'](data_line_val) and \ + mid_12.has_key(str(data_line_val[4])) == True): + mid_12[data_line_val[4]].append(data_line_val[6]) + +td_list_13 =list() +with open(list_13_file,'w') as outfile: + for key in mid_13.keys(): + for td in mid_13[key]: + if(term['not_null'](td) and td_list_13.count(td)==0): + td_list_13.append(td) + outfile.write(td+'\n') + +td_list_12 =list() +with open(list_12_file,'w') as outfile: + for key in mid_12.keys(): + for td in mid_12[key]: + if(term['not_null'](td) and td_list_12.count(td)==0): + td_list_12.append(td) + outfile.write(td+'\n') + +different_list_12 = list() +with open(different_list_file_12,'w') as outfile: + for x in td_list_12: + if(td_list_13.count(x)==0): + different_list_12.append(x) + outfile.write(x+'\n') + +different_list_13 = list() +with open(different_list_file_13,'w') as outfile: + for x in td_list_13: + if(td_list_12.count(x)==0): + different_list_13.append(x) + outfile.write(x+'\n') + +td_dict=dict() +for i in different_list_12: + td_dict[i]=list() + +# i=0 +# with open(run_time_file,'r') as infile: +# for line in infile: +# i+=1 +# if(i%100000==0): +# print i +# if(line.find("NCHK_QUREY__KNOW")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].insert(0,"NCHK_QUREY__KNOW"+'\n') +# td_dict[data_line_val[6]].append(line) +# elif(line.find("NCHK_QUREY__UNKNOW")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].append(line) +# elif(line.find("NCHK_REPORT__SUCC")!=-1): +# data_line_val = re.split(r',|TD:',line) +# if(td_dict.has_key(data_line_val[6]) == True): +# td_dict[data_line_val[6]].append(line) + +# else: +# continue + + +print len(different_list_12),len(different_list_13),\ +len(td_list_12),len(td_list_13),\ +len(mid_12),len(mid_13),len(different_mid_13),len(different_mid_12) + +# with open(lost_td_line,'w') as outfile: +# for key in td_dict.keys(): +# if(len(td_dict[key])>2 and td_dict[key][0]=="NCHK_QUREY__KNOW"): +# outfile.write(key+':\n') +# for i in td_dict[key]: +# outfile.write(i) + diff --git a/src/rssb_statistics/harm_len_st.py b/src/rssb_statistics/harm_len_st.py new file mode 100644 index 0000000..8a372b3 --- /dev/null +++ b/src/rssb_statistics/harm_len_st.py @@ -0,0 +1,29 @@ +import os +import re +import csv +import bisect +cmd1 = "cat survey.log.2018* | grep \"recv survey\" > survey.recv_survey.log" +cmd2 = "cat media_expire.log.2018* > media_expire.log" +os.system(cmd1) +os.system(cmd2) +breakpoints = [1048576,1310720,1572864,1835008,2097152,3145728,4194304] +st_num = [0,0,0,0,0,0,0,0] +harm_mid_dic=dict() +with open("survey.recv_survey.log",'r') as infile: + for line in infile: + data_line_val = re.split(r',',line) + if(len(data_line_val)==8): + mid_string = (re.split(r"MID:",data_line_val[2]))[1] + harm_mid_dic[mid_string]=list() + +with open("media_expire.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|media_len:",line) + if(harm_mid_dic.has_key(str(line_result[3]))==True): + # print int(line_result[6]) + j = bisect.bisect(breakpoints,int(line_result[6])) + st_num[j]+=1 +print st_num +# with open("un_recv_list.csv",'w') as csvfile: +# writer = csv.writer(csvfile) +# writer.writerow(un_recv_list)
\ No newline at end of file diff --git a/src/rssb_statistics/recv_survey_mid_st.py b/src/rssb_statistics/recv_survey_mid_st.py new file mode 100644 index 0000000..f7faaeb --- /dev/null +++ b/src/rssb_statistics/recv_survey_mid_st.py @@ -0,0 +1,3 @@ +import os +cmd = "cat survey.log.2018* | grep \"recv survey\" | wc -l" +os.system(cmd)
\ No newline at end of file diff --git a/src/rssb_statistics/service_st.py b/src/rssb_statistics/service_st.py new file mode 100644 index 0000000..7fecd33 --- /dev/null +++ b/src/rssb_statistics/service_st.py @@ -0,0 +1,29 @@ +import re + +different_mid_12=set() +different_mid_14=set() +service_dic=dict() + +with open("different_mid_file_12",'r') as infile: + for line in infile: + different_mid_12.add(long(line)) +with open("different_mid_file_14",'r') as infile: + for line in infile: + different_mid_14.add(long(line)) +with open("../12.log/survey.recv_survey.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|service:",line) + if((long(line_result[3]) in different_mid_12)==True): + if(service_dic.has_key(line_result[5])==True): + service_dic[line_result[5]]+=1 + else: + service_dic[line_result[5]]=0 +with open("../14.log/survey.recv.log",'r') as infile: + for line in infile: + line_result = re.split(r",|MID:|service:",line) + if((long(line_result[3]) in different_mid_14)==True): + if(service_dic.has_key(line_result[5])==True): + service_dic[line_result[5]]+=1 + else: + service_dic[line_result[5]]=0 +print service_dic
\ No newline at end of file diff --git a/src/rssb_statistics/un_recv_st.py b/src/rssb_statistics/un_recv_st.py new file mode 100644 index 0000000..9d3d234 --- /dev/null +++ b/src/rssb_statistics/un_recv_st.py @@ -0,0 +1,36 @@ +import os +import re +import csv +cmd = "cat rssb_stat.log.2018-04-16 | grep \"/home/audiorecognition/aufilter/un_recv\" > rssb_stat.log.un_recv" +os.system(cmd) +i=0 +last_len = 0 +add_len = 0 +num = 0 +un_recv_list = list() +with open("rssb_stat.log.un_recv",'r') as infile: + for line in infile: + line_result = re.split(r'\t',line) + if(i==0): + last_len = long(line_result[2]) + break + +with open("rssb_stat.log.un_recv",'r') as infile: + for line in infile: + line_result = re.split(r'\t',line) + if(last_len <= long(line_result[2])): + add_len = add_len+long(line_result[2])-last_len + last_len = long(line_result[2]) + else: + add_len = add_len+long(line_result[2]) + last_len = long(line_result[2]) + i+=1 + if(i>=120): + i=0 + un_recv_list.append(str(add_len)) + add_len=0 + +with open("un_recv_list.csv",'w') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(un_recv_list) + diff --git a/src/rssb_statistics/un_recv_survey_mid_st.py b/src/rssb_statistics/un_recv_survey_mid_st.py new file mode 100644 index 0000000..e1e970f --- /dev/null +++ b/src/rssb_statistics/un_recv_survey_mid_st.py @@ -0,0 +1,3 @@ +import os +cmd = "cat survey.log.2018* | grep \"sync_audio\" | wc -l" +os.system(cmd)
\ No newline at end of file diff --git a/src/sfh_integrate/SFH_function.c b/src/sfh_integrate/SFH_function.c new file mode 100644 index 0000000..a311f9c --- /dev/null +++ b/src/sfh_integrate/SFH_function.c @@ -0,0 +1,177 @@ +/* +gcc -g SFH_function.c -o SFH_function -lmaatframe -lMESA_htable -I../include +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include "gram_index_engine.h" +#include <MESA/MESA_htable.h> +#include <assert.h> +#include <ctype.h> +#define SLOT_SIZE (1024*1024*16) +#define THREAD_SAFE 0 +#define BUFFER_LEN (10*1024) +#define SFH_LEN (10*1024) +#define TD_LEN 33 + +typedef struct sfh_link +{ + char *sfh_str; + int similiar; + int all_similiar; + long hash_len; + struct sfh_link *next; +}sfh_link; + +typedef struct top_similiar_sfh +{ + int all_num; + int all_similiar; + char *sfh_str; + long hash_len; + sfh_link *sfh_link_items; +}top_similiar_sfh; + +long get_hashed_len(const char* sfh) +{ + char *data=(char*)malloc(strlen(sfh)+1); + memcpy(data,sfh, strlen(sfh)); + data[strlen(sfh)]='\0'; + char *token=NULL,*sub_token=NULL,*saveptr; + long left_offset=0,right_offset=0,hashed_length=0; + int ret=0,first=0; + for (token = data; ; token= NULL) + { + sub_token= strtok_r(token,"[", &saveptr); + if (sub_token == NULL) + { + break; + } + if(first==0)//jump over the first sub string. + { + first=1; + continue; + } + ret=sscanf(sub_token,"%ld:%ld",&left_offset,&right_offset); + if(ret!=2) + { + return 0; + } + assert(ret==2); + hashed_length+=right_offset-left_offset+1; + } + //printf("hashed length=%ld\n",hashed_length); + free(data); + return hashed_length/2; +} + +void print_mistake_td(const uchar *key,uint size,void *data,void *arg) +{ + FILE *ripe_file = (FILE*)arg; + top_similiar_sfh *temp_top_similiar_sfh=(top_similiar_sfh*)data; + fprintf(ripe_file,"%s,%s\n",key,temp_top_similiar_sfh->sfh_str); + sfh_link *temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; + for(;;temp_sfh_link=temp_sfh_link->next) + { + if(temp_sfh_link==NULL) + { + break; + } + fprintf(ripe_file,"%d;%s;%d\n",temp_sfh_link->similiar,temp_sfh_link->sfh_str,temp_sfh_link->hash_len); + } + fprintf(ripe_file,"\n"); +} +int main() +{ + FILE *raw_file; + FILE *ripe_file; + char *raw_file_dir="../data/td_data_set/td_data_20171207/video_id_raw_data"; + char *ripe_file_dir="../data/ripe_data/td_data_20171207/all_av_digest_mistake_level_2"; + char *sfh_str=NULL,*td=NULL,*buffer=NULL,*td_str=NULL; + raw_file = fopen(raw_file_dir,"r+"); + ripe_file = fopen(ripe_file_dir,"w+"); + long temp_hash_len=0; + unsigned int slot_size=SLOT_SIZE; + int i=0,thread_safe=THREAD_SAFE,ret=0,temp_similiar=0,temp_all_similiar=0; + top_similiar_sfh *temp_top_similiar_sfh=NULL; + sfh_link *temp_sfh_link=NULL; + MESA_htable_handle htable=NULL; + if(raw_file==NULL) + { + printf("open all_av_digest error\n"); + return -1; + } + + if(ripe_file==NULL) + { + printf("open all_av_digest_mistake_level error"); + return -1; + } + buffer = (char*)calloc(BUFFER_LEN,sizeof(char)); + sfh_str = (char*)calloc(SFH_LEN,sizeof(char)); + td = (char*)calloc(TD_LEN,sizeof(char)); + td[32]='\0'; + htable=MESA_htable_born(); + MESA_htable_set_opt(htable,MHO_SCREEN_PRINT_CTRL,&thread_safe,sizeof(unsigned int)); + MESA_htable_set_opt(htable,MHO_HASH_SLOT_SIZE,&slot_size,sizeof(unsigned int)); + MESA_htable_mature(htable); + while(feof(raw_file)==0) + { + i++; + if(i%10000==0) + { + printf("%d\n",i); + } + fgets(buffer,BUFFER_LEN-1,raw_file); + buffer[BUFFER_LEN-1]='\0'; + ret=sscanf(buffer,"%[^;];%[^;];%[^;]",td_str,td,sfh_str); + assert(ret==2); + td[32]='\0'; + if((temp_top_similiar_sfh=MESA_htable_search(htable,td,TD_LEN))==NULL) + { + temp_top_similiar_sfh=(top_similiar_sfh*)calloc(1,sizeof(top_similiar_sfh)); + temp_top_similiar_sfh->all_num=1; + temp_top_similiar_sfh->all_similiar=0; + temp_top_similiar_sfh->hash_len=get_hashed_len(sfh_str); + temp_top_similiar_sfh->sfh_str=strdup(sfh_str); + temp_top_similiar_sfh->sfh_link_items=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_top_similiar_sfh->sfh_link_items->sfh_str=strdup(sfh_str); + temp_top_similiar_sfh->sfh_link_items->similiar=0; + temp_top_similiar_sfh->sfh_link_items->all_similiar=0; + temp_top_similiar_sfh->sfh_link_items->next=NULL; + ret=MESA_htable_add(htable,td,TD_LEN,(void *)temp_top_similiar_sfh); + assert(ret>0); + } + else + { + temp_similiar=GIE_sfh_similiarity(temp_top_similiar_sfh->sfh_str,(int)strlen(temp_top_similiar_sfh->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_top_similiar_sfh->all_similiar+=temp_similiar; + temp_sfh_link=temp_top_similiar_sfh->sfh_link_items; + for(temp_all_similiar=0;;temp_sfh_link=temp_sfh_link->next) + { + temp_similiar=GIE_sfh_similiarity(temp_sfh_link->sfh_str,(int)strlen(temp_sfh_link->sfh_str),sfh_str,(int)strlen(sfh_str)); + temp_sfh_link->all_similiar+=temp_similiar; + temp_all_similiar+=temp_similiar; + if(temp_sfh_link->all_similiar>temp_top_similiar_sfh->all_similiar) + { + free(temp_top_similiar_sfh->sfh_str); + temp_top_similiar_sfh->sfh_str=strdup(temp_sfh_link->sfh_str); + temp_top_similiar_sfh->all_similiar=temp_sfh_link->all_similiar; + } + if(temp_sfh_link->next==NULL) + { + break; + } + } + temp_sfh_link->next=(sfh_link*)calloc(1,sizeof(sfh_link)); + temp_sfh_link->next->sfh_str=strdup(sfh_str); + temp_sfh_link->next->hash_len=get_hashed_len(sfh_str); + temp_sfh_link->next->similiar=0; + temp_sfh_link->next->all_similiar=temp_all_similiar; + temp_sfh_link->next->next=NULL; + temp_top_similiar_sfh->all_num+=1; + } + } + fclose(raw_file); + MESA_htable_iterate(htable,print_mistake_td,ripe_file); +}
\ No newline at end of file diff --git a/src/test/digest_temp b/src/test/digest_temp new file mode 100644 index 0000000..3cde5ae --- /dev/null +++ b/src/test/digest_temp @@ -0,0 +1,8 @@ +with open('./temp1','r') as file_sfh: + with open('./temp2','w') as out_file: + for line in file_sfh: + line=line.replace("\n","").replace("\"","").replace("\t","").replace(",","").strip() + if(line.find("mid")>=0): + out_file.write(line[4:]+",") + else: + out_file.write(line[7:]+"\n")
\ No newline at end of file |
