#include #include #include #include #include #include #include #define DICTIONARY "/usr/share/dict/words" #define READ_SIZE (1024*1024) #define HASH_BUCKETS 10485760 #define INPUT_BUFFER_SIZE 10485760 struct HashEntry { char *word; int count; void *next; }; inline char isStopChar( char c ) { switch( c ) { case ' ': case '\n': case '\r': case '!': case '@': case '#': case '$': case '%': case '^': case '&': case '*': case '(': case ')': case '\t': case ':': case '.': case '/': case ',': case '?': case '>': case '<': case ';': case '\'': case '"': case ']': case '[': case '{': case '}': case '\\': case '|': case '=': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': return 1; default: return 0; } } unsigned int hash( char *word ) { unsigned int hash = 0; unsigned int i = 0; for( ; word[i] != 0; ++i ) { hash += word[i]; hash += ( hash << 10 ); hash ^= ( hash >> 6 ); } //hash += ( hash << 3 ); //hash ^= ( hash >> 11 ); //hash += ( hash << 15 ); return hash % HASH_BUCKETS; } int main( int argc, char **argv ) { if( argc != 2 ) return 1; int i; struct stat statBuf; stat( DICTIONARY, &statBuf ); int dictionarySize = statBuf.st_size; char *dictionaryBuffer = (char*)malloc( dictionarySize * sizeof(char) ); FILE *dictionaryFile = fopen( DICTIONARY, "r" ); while( fread( dictionaryBuffer, READ_SIZE, 1, dictionaryFile ) ); struct HashEntry *hashEntries = (struct HashEntry*)malloc( HASH_BUCKETS * sizeof(struct HashEntry) ); memset( hashEntries, 0, sizeof(struct HashEntry) ); char *dictionaryWord = dictionaryBuffer; for( i=0; iword == 0 ) { newEntry->word = dictionaryWord; newEntry->count = 0; newEntry->next = 0; } else { newEntry = (struct HashEntry*) malloc( sizeof( struct HashEntry ) ); newEntry->word = dictionaryWord; newEntry->count = 0; newEntry->next = 0; struct HashEntry *existingEntry = &( hashEntries[hashVal] ); while( existingEntry->next != 0 ) { existingEntry = existingEntry->next; } existingEntry->next = newEntry; } dictionaryWord = dictionaryBuffer + i + 1; } } stat( argv[1], &statBuf ); int inputFileSize = statBuf.st_size; FILE *inputFile = fopen( argv[1], "r" ); char *inputBuffer = (char*)malloc( inputFileSize ); int counter=0; while( fread( inputBuffer+(READ_SIZE*counter++), READ_SIZE, 1, inputFile ) ); // For every word in the input file, count those that are also in the dictionary char *inputWord = inputBuffer; for( i=0; iword ) { if( !strcmp( entry->word, inputWord ) ) { entry->count++; } else { while( (entry = entry->next) ) { if( !strcmp( entry->word, inputWord ) ) { entry->count++; break; } } } } inputWord = inputBuffer + i; i--; } } // Print results: for( i=0; iword ) { if( entry->count > 0 ) printf( "%s: %d\n", entry->word, entry->count ); while( entry->next ) { entry = entry->next; if( entry->count > 0 ) printf( "%s: %d\n", entry->word, entry->count ); } } } return 0; }