Find Words Source Code

This is David Rainey’s Source code that seeks words from random characters. Then compares to this lexicon.

Click image to enlarge.

 

#pragma hdrstop
#include <condefs.h>
#include <conio.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <time.h>
// this is the structure that stores each word in the dictionary
class WordClass {
public:
char *pWord;
WordClass *pNextWord;
};

// there is one class/structure for each letter in the alphabet. A=0, B=1, C=2, etc.
/*
when the dictionary is loaded, it will consist of 26 linked lists, one for each letter.
will look something like:
BaseAlphabet[0] -> apple -> argon -> abate -> acute …
BaseAlphabet[1] -> barn -> bumper -> bell -> bargain -> burn …

*/
WordClass BaseAlphabet[26];

char szDictionary[256]; // name of dictionary file
char szData[256]; // name of input/data file

long liWordsFound = 0; // number of words found
long liDictionaryScans = 0; // how many times the dictionary is scanned for a word
long liWordCount=0; // number of words in dictionary
bool bShowWords = 0; // should we display each word as it is found

time_t timeStart, timeEnd; // beginning and ending time of run

FILE *pLogFile; // pointer to log file
char szLogFile[] = “findwords.log”; // log file name

// array which acts as a counter for words in dictionary — organized by # of characters
long liDictionaryWordCountByLength[100];
// array which acts as a counter for words discovered — organized by # of characters
long liFoundWordCountByLength[100];
int iLongestDictionaryWord = 1; // stores longest word found in dictionary
int iLongestFoundWord = 1; // stores longest word discovered
int LoadDictionary()
{
WordClass *(CurrentItem[26]); // each letter can have a current word so we create this array to store them
WordClass *pPreviousItem; // pointer to the previous word structure/class

int iWordLength; // length of current word

char szWord[128], szLowerWord[128];

int iArrayOffset;
WordClass *pTempWord;

// set the current pointers to the address of the base alphabet pointers — make sure we have a starting point
for ( int i=0; i < 26; i++)
{
CurrentItem[i] = &(BaseAlphabet[i]);
}

FILE *pFile = fopen(szDictionary, “r”); // open the dictionary file
if ( ! pFile ) return (1); // if we can’t open, let’s exit

while ( ( fgets(szWord, sizeof(szWord)-1, pFile) ) ) // read a word
{
// remove the trailing cr/lf
if ( szWord[strlen(szWord)-1] == ‘\n’ ) szWord[strlen(szWord)-1] = ‘\0′;

strcpy(szLowerWord, strlwr(szWord)); // make sure the word is stored as lower-case
if ( (szLowerWord[0] >= ‘a’) && (szLowerWord[0] <= ‘z’) ) // make sure it starts with a letter
{
if ( strlen(szLowerWord) == 1 ) continue; // let’s don’t mess with single letter words

liWordCount++; // increment the word counter

iArrayOffset = szLowerWord[0]-’a'; // determine which array element we should work with — a=0, b=1, c=2, etc.

pPreviousItem = CurrentItem[iArrayOffset]; // store the current item with the same beginning letter
pTempWord = (WordClass *) new char (sizeof(WordClass)); // allocated a new structure for the new word
CurrentItem[iArrayOffset] = pTempWord; // make the newly allocated structure the current one
CurrentItem[iArrayOffset]->pWord = NULL; // indicate this is the last word beginning with this letter
pPreviousItem->pNextWord = CurrentItem[iArrayOffset]; // point the previous word to the new one

pPreviousItem->pWord = (char *) malloc (strlen(szLowerWord)+1); // allocated a block for the word itself
if ( !pPreviousItem->pWord ) // see if the allocation failed
{
printf(“\n\nError allocating memory!!!”);
return (1);
}

strcpy(pPreviousItem->pWord, szLowerWord); // copy the word to the structure
// let’s get the word length and increment the appropriate counter
iWordLength = strlen(szLowerWord);
liDictionaryWordCountByLength[iWordLength-1]++;
// find the longest dictionary word and store it
if ( iWordLength > iLongestDictionaryWord ) iLongestDictionaryWord = iWordLength;

}
}

fclose(pFile);

return(0);
}

void SearchBatch(char *pszBatch)
{
int iWordLength;
WordClass tempWord;

if ( strlen(pszBatch) == 0 ) return; // if the block of letters passed happens to be no block at all, just exit

for ( int i=0; i<strlen(pszBatch); i++) // loop through every letter in the block
{
tempWord = BaseAlphabet[*(pszBatch+i)-'a']; // find the first word in the dictionary starting with the current letter in the block
do
{

liDictionaryScans++; // increment the number of dictionary words scanned

if ( !strncmp(tempWord.pWord, pszBatch+i, strlen(tempWord.pWord) ) ) // compare the current word in dictionary with block
{
if ( bShowWords ) printf(“\nWord found (%d): %s”, liWordsFound+1, tempWord.pWord); // found a word so echo to screen if desired
liWordsFound++; // increment the # of words found
if ( pLogFile ) // log to the file if desired
fprintf(pLogFile, “%s\n”, tempWord.pWord);

// increment the size counters
iWordLength = strlen(tempWord.pWord);
liFoundWordCountByLength[iWordLength-1]++;
// find the longest ‘found’ word
if ( iWordLength > iLongestFoundWord ) iLongestFoundWord = iWordLength;

}

tempWord = *tempWord.pNextWord; // point to the next word in the dictionary with this starting letter
} while ( tempWord.pWord != NULL ); // keep comparing to all words in the dictionary as long as it isn’t the last word

} // end for
}

void FindWords()
{
char szBatch[2048]; // stores a block of contiguous a-z characters
int iIndex=0; // index of current character in the file

// get start time
timeStart = time(NULL);

FILE *pFile = fopen(szData, “r”); // open the input file
if ( !pFile ) return; // exit if we can’t open it
// open the log file
pLogFile = fopen(szLogFile, “w+”);
if ( !pLogFile )
{
printf(“\n\nError, unable to create log file : %s\n\n”, szLogFile);
}

while ( ( szBatch[iIndex] = fgetc(pFile) ) != EOF ) // get the next character in the input file and loop until end of file
{
if ( ( szBatch[iIndex] >= ‘A’ ) && ( szBatch[iIndex] <= ‘Z’ ) ) // if we read an upper case letter convert to lower case
{
szBatch[iIndex] = szBatch[iIndex] + (‘a’ – ‘A’); // convert character to lower-case
}

// now see if we read a character
if ( ( szBatch[iIndex] >= ‘a’ ) && ( szBatch[iIndex] <= ‘z’ ) )
{
szBatch[++iIndex] = ‘\0′; // make sure the next character is the end of the string
} else
{
szBatch[iIndex] = ‘\0′; // we didn’t read an a-z character so let’s terminate the block here
SearchBatch(szBatch); // let’s search for all words in this block of letters
iIndex = 0; // reset the index into the block
szBatch[iIndex] = ‘\0′;
}

}

fclose(pFile);

// close the log
if ( pLogFile )
{
fclose(pLogFile);
printf(“\n\nA log of all the words found : %s\n”, szLogFile);
}

// get end time
timeEnd = time(NULL);

// display the results
printf(“\n%ld words loaded into dictionary\n”, liWordCount);
printf(“\nLongest word in dictionary was %d characters\n”, iLongestDictionaryWord);
printf(“\nWord distribution in dictionary by character length:\n”);
printf(“\nLength # Length # Length # Length # Length #\n”);
for ( int i=1; i<=iLongestDictionaryWord-1; i+=5 )
{
printf(“\n%6d %5d %6d %5d %6d %5d %6d %5d %6d %5d”,
i+1, liDictionaryWordCountByLength[i],i+2, liDictionaryWordCountByLength[i+1],i+3, liDictionaryWordCountByLength[i+2],i+4, liDictionaryWordCountByLength[i+3], i+5, liDictionaryWordCountByLength[i+4]);
}

printf(“\n\n%ld words found\n”, liWordsFound);
printf(“\nLongest word discovered was %d characters\n”, iLongestFoundWord);
printf(“\nDistribution of discovered words by character length:\n”);
printf(“\nLength # Length # Length # Length # Length #\n”);
for ( int i=1; i<=iLongestFoundWord-1; i+=5 )
{
printf(“\n%6d %5d %6d %5d %6d %5d %6d %5d %6d %5d”,
i+1, liFoundWordCountByLength[i],i+2, liFoundWordCountByLength[i+1],i+3, liFoundWordCountByLength[i+2],i+4, liFoundWordCountByLength[i+3], i+5, liFoundWordCountByLength[i+4]);
}

printf(“\n\n%ld scans of dictionary words\n”, liDictionaryScans);
printf(“\nApproximate scan duration : %f seconds or %f minutes”, difftime(timeEnd, timeStart),
difftime(timeEnd, timeStart)/60);

}

//—————————————————————————
#pragma argsused
int main(int argc, char **argv)
{

bShowWords = 0;

// check the parameters
if ( argc == 1 ) // prompt for paths
{
printf(“\n\nDictionary filename:”);
gets(szDictionary);
printf(“\n\nData filename:”);
gets(szData);
printf(“\n\nWould you like to display each word as it is found <y/n>?”);
int charJournal = getch();
if ( ( charJournal == ‘y’ ) || ( charJournal == ‘Y’ ) ) bShowWords = 1;

} else if ( argc == 3 ) // parameters were passed
{
strcpy(szDictionary, argv[1]);
strcpy(szData, argv[2]);
} else if ( argc == 4 ) // parameters were passed
{
strcpy(szDictionary, argv[1]);
strcpy(szData, argv[2]);
if ( *argv[3] == ’1′ ) bShowWords = 1;
} else // display information
{
printf(“\n\nIf you pass no parameters you will be prompted for the necessary information.”);
printf(“\nYou may also pass the file containing the dictionary and the file to process.”);
printf(“\nIf you want to display each word as it is found, pass 1 as the last parameter.”);
return(1);
}

// zero all the counters
for ( int i=0; i<99; i++ ) liDictionaryWordCountByLength[i] = 0;
for ( int i=0; i<99; i++ ) liFoundWordCountByLength[i] = 0;

// load the dictionary
printf(“\nLoading dictionary”);
if ( LoadDictionary() ) return(1);

// begin processing the data file
printf(“\nSearching for words”);

FindWords();

 

return 0;
}