#include #include #include #include #include #include #include #include #include #include #include #include #include #include struct translation_structure{ char *series; int times; //int len; }; //struct translation_structure table[39679]; struct pat_length{ int highest_times; struct translation_structure member[39679]; }*table; //pat_table=malloc( sizeof(struct pat_length) ); //pat_table[0].member[]. //[39679]; /* struct one_bit{ //unsigned c : 1; char c; }; */ struct one_index{ int len; int table_members; int empty; int search_num; std::bitset<65536> c; //std::bitset<65536> d; //struct one_bit c[256][256]; }; struct one_index *pat_index; int file_bytes=0; char *mem_file; //int table_members=1; static void progress_func(int max_searchs, int pat_len); void *match_search(void *set); static int empty_search(int num1, char *linestring); static int full_search(int num1, char *linestring); static void progress_func(int max_searchs, int pat_len){ static float a = 0; static float b = 0; if(max_searchs == 0){ b++; printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len ); }else{ a = max_searchs; printf("progress: %.02lf%%\n",(double)((b/a)*100)); } //printf("progress: %.02lf%% %d length patterns done.\n",(double)((b/a)*100),pat_len ); } void *match_search(void *set){ //returns 0 for match found and written //returns 1 for no matches found int num4; int num2; int num3; int return_val = 1; int num1 = pat_index[*(int *)set].search_num; int stop_byte = file_bytes-pat_index[num1].len; int member_length = pat_index[num1].len; //int buff1; //int buff2; //__m128* pSrc1 = (__m128*) pArray1; //__m128 a; //__m128 b; //__m128 c; //register int f[4]; //char mem_buffer1; //char mem_buffer2; for(num2=0; num2< stop_byte; num2++){ if( pat_index[num1].c[ (unsigned char)mem_file[num2]*256+(unsigned char)mem_file[num2+1] ] ){ //if(member_length>3){ // if( !pat_index[num1].d[ (unsigned char)mem_file[num2+(member_length-2)]*256+(unsigned char)mem_file[num2+(member_length-1)] ] ) // goto second; //} for(num4=0; num4-1; num4--){ /* a = _mm_setr_ps(table[num1].member[num4].series[0], table[num1].member[num4].series[1], table[num1].member[num4].series[2], table[num1].member[num4].series[3]); b = _mm_setr_ps(mem_file[num2+0], mem_file[num2+1], mem_file[num2+2], mem_file[num2+3]); b = _mm_cmpeq_ps( b, a ); _mm_comieq_ss( _mm_storeu_ps((float*) f, b); //for( num3=0; num3 < 4; num3++){ if(f[0] == 0 || f[1] == 0 || f[2] == 0 || f[3] == 0) goto next; //} */ for( num3=0; num3 < member_length; num3++){ //for( num3=member_length-1; num3 > -1; num3--){ //mem_buffer1 = table[num1].member[num4].series[num3]; //mem_buffer2 = mem_file[num2+num3]; //num3++; //}while(mem_buffer1 == mem_buffer2); //s = x != 0.0 ? Math.Sin(x)/x : 1.0; if(table[num1].member[num4].series[num3] != mem_file[num2+num3]){ //if(mem_buffer1 != mem_buffer2){ goto next; } } //if(num3==pat_index[num1].len-1){ //match found table[num1].member[num4].times++; if(table[num1].member[num4].times>table[num1].highest_times){ table[num1].highest_times=table[num1].member[num4].times; } //return_val = 0; //num4=-1; goto found; //num3 = pat_index[num1].len; //} //}else{ //goto next; //num3 = pat_index[num1].len; //while(mem_buffer1 == mem_buffer2); next: ; } }else{ //second: return_val = 1; } if(return_val==1){ return_val = empty_search(num1, &mem_file[num2]); } if(return_val==1){ return_val = full_search(num1, &mem_file[num2]); } if(return_val==1){ printf("error!\n"); exit(1); } found: ; } /* for(num4=pat_index[num1].table_members-2; num4>-1; num4--){ //if(table[num4].len == num1){ num3=0; while(table[num1].member[num4].series[num3] == linestring[num3]){ num3++; if(num3==pat_index[num1].len){ table[num1].member[num4].times++; return(0); printf("here 3\n"); } } //} //} } return (1); */ progress_func(0, member_length); //printf("\tpatterns %d done.\n", member_length); pthread_exit(0); //return 0; //void * pthread_exit(NULL); } static int empty_search(int num1, char *linestring){ //return 0 if empty spot found and wrote to //return 1 if no empty spots found //static int empty=0; int num4; //int num5=0; if(pat_index[num1].table_members == 39680){ return (1); } //int member_length = pat_index[num1].len; for(num4=pat_index[num1].empty; num4<=pat_index[num1].table_members; num4++){ if(table[num1].member[num4].times==0){ //array is not full table[num1].member[num4].series=linestring; table[num1].member[num4].times=1; //table[num4].len = num1; pat_index[num1].empty=num4; if(pat_index[num1].table_members < 39680){ pat_index[num1].table_members++; //printf("tbl mems %d\n", pat_index[num1].table_members); } pat_index[num1].c[ (unsigned char)linestring[0]*256+(unsigned char)linestring[1] ]=1; //if(member_length > 3){ // pat_index[num1].d[ (unsigned char)linestring[member_length-2]*256+(unsigned char)linestring[member_length-1] ]=1; //} //table[num1].highest_times++; return(0); //num5=1; //num4=39679; } } return(1); } static int full_search(int num1, char *linestring){ //return 0 for found and replaced array string of 1x //return 1 for error (no 1x strings found, for now) int num4; //int member_length = pat_index[num1].len; for(num4=0; num4<39679; num4++){ if(table[num1].member[num4].times==1){ //replace it //table[num4].series=realloc (table[num4].series, sizeof(unsigned char)*num1); //table[num4].series=(unsigned char *)malloc(sizeof(unsigned char)*num1); table[num1].member[num4].series=linestring; //for(num3=0; num3 3){ // pat_index[num1].d[ (unsigned char)linestring[member_length-2]*256+(unsigned char)linestring[member_length-1] ]=1; //} return(0); } } return(1); } void usage(void){ printf("\tUsage: patterns \n"); exit(1); } int main(int argc, char **argv){ FILE *output; int input; struct stat buffer; int status; //char *mem_file; int pat_size; //unsigned long file_bytes=0; //unsigned char *linestring; //int character; int num1, num2, num3, num4; //, num4, num5; int progress=0; double progress2=0; int max_searchs=0; int max_pat; int half_file; //int return_val; int frequency=4; int thread_count; //void *pth_return; //int pth_rval; //void *pth_return = &pth_rval; //void **pth_return2 = &pth_return; //struct timespec ts; struct timeval starttime,endtime; double time1; //if(argc == 6){ //frequency = atoi(argv[5]); //thread_count = atoi(argv[6]); if(argc != 7){ usage(); } // for(num1=0; num1<39679; num1++){ // //table[num1].series = NULL; // table[num1].times = 0; // table[num1].len = 0; // } frequency = atoi(argv[5]); thread_count = atoi(argv[6]); pat_size = atoi(argv[3]); max_pat = atoi(argv[4]); input = open(argv[2], O_RDONLY); if(input == -1){ fprintf( stderr, "Error opening %s\n", argv[2] ); exit( 1 ); } status = fstat(input, &buffer); file_bytes=buffer.st_size; mem_file=(char *)mmap(0, file_bytes, PROT_READ, MAP_SHARED, input, 0); if (mem_file == MAP_FAILED) { close(input); perror("Error mmapping the file"); exit(EXIT_FAILURE); } if( ( output = fopen( argv[1], "w" ) ) == NULL ) { fprintf( stderr, "Error opening %s\n", argv[1] ); exit( 1 ); } printf("input: %s\n", argv[2]); printf("output: %s\n", argv[1]); printf("pat_size: %d\n", pat_size); printf("bytes: %d\n", file_bytes); if(max_pat > file_bytes/2){ max_pat = floor(file_bytes/2); printf("max_pat: %d (reduced to limit!)\n", max_pat); }else{ printf("max_pat: %d\n", max_pat); } half_file = sizeof(unsigned char)*ceil(file_bytes/2); for(num1=pat_size; num1 num2){ num2=table[num1].highest_times; } //find highest table count //printf("tc: %d\n", pat_index[num1].table_members); if(pat_index[num1].table_members > highest_tbl_count){ highest_tbl_count=pat_index[num1].table_members; } //} } //printf("num2: %d frequency: %d tbl_count: %d max_s: %d\n", num2, frequency, highest_tbl_count, max_searchs); int print_max=0; int next_highest=0; //printf("here 1\n"); //6 1 for(num2=num2; num2>=frequency; num2--){ for(num4=0; num4 next_highest){ next_highest = table[num1].member[num4].times; } } } //num2 must be set to next highest //num2 = next_highest; //next_highest = frequency; } num2 = next_highest+1; next_highest = 0; //printf("num2: %d frequency: %d\n", num2, frequency); //if(print_max == 1){ // num2 = frequency-1; //} } max_reached: ; gettimeofday(&endtime, NULL); time1=((double)(endtime.tv_sec*1000000-starttime.tv_sec*1000000+endtime.tv_usec-starttime.tv_usec))/1000000; printf("time: %.02lfs\n", time1); if (munmap(mem_file, file_bytes) == -1) { perror("Error un-mmapping the file"); } close(input); fclose(output); printf("done.\n"); return 0; }