import java.io.*; import java.util.LinkedList; import java.util.StringTokenizer; public class SpellChecker { private LinkedList []ht; /* Hash table -- array of linked lists */ private DataReader dictionaryReader; private DataReader documentReader; private int dictSize; /* * Below are three different has functions, please * test them out one at a time. Take a look at the * distribution of keys, as well as the size of * the buckets. */ /* * Java built in hash function * hashCode(String s) = s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] private int getAddr(String word) { word = word.toLowerCase(); int addr = word.hashCode(); addr = (addr < 0 ? -1 * addr : addr) % ht.length; return addr; } */ /* * Add and modulus hash function * */ private int getAddr(String word) { int addr = 0; word = word.toLowerCase(); for (int index=0; index < word.length(); index++) addr += Character.getNumericValue(word.charAt(index)); return addr % ht.length; } /* * Multiply, add, and modulus hash function */ /* private int getAddr(String word) { int addr = 0; word = word.toLowerCase(); for (int index=0; index < word.length(); index++) { addr = addr *11 % ht.length; addr += Character.getNumericValue(word.charAt(index)); } return addr % ht.length; } */ /* * Backward Multiply, add, and modulus hash function */ /* private int getAddr(String word) { int addr = 0; word = word.toLowerCase(); for (int index= word.length()-1; index >=0; index--) { addr = addr * 11 % ht.length; addr += Character.getNumericValue(word.charAt(index)); } return addr % ht.length; } */ private int searchWord (String word) { /* * This function searches for words in the hash table. * It also modifies the word and tries to guess its root * word, since the dictionary only holds root forms. */ int count=0; String tryWord; // A number try { Float.valueOf(word); return 0; } catch (NumberFormatException e) { } // Not a number, charge forward and try something else // straight up tryWord = word; if (ht[getAddr(tryWord)].contains(tryWord)) return ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count += (ht[getAddr(tryWord)].size()); if (word.endsWith("ing")) { // remove -ing tryWord = word.substring(0, word.length() - 3); if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); // remove -ing and add -e tryWord = word.substring(0, word.length() - 3) + "e"; if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); } if (word.endsWith("ies")) { // remove -ies and add -y tryWord = word.substring(0, word.length() - 3) + "y"; if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); } if ( word.endsWith("es") || word.endsWith("ly") || word.endsWith("ed")) { // remove last 2 tryWord = word.substring(0, word.length() - 2); if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); } if (word.endsWith("es") || word.endsWith("ed")) { // just remove last one tryWord = word.substring(0, word.length() - 1); if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); } if (word.endsWith("s")) { // just remove last one tryWord = word.substring(0, word.length() - 1); if (ht[getAddr(tryWord)].contains(tryWord)) return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1; else count +=ht[getAddr(tryWord)].size(); } return -1*count; } public SpellChecker (String dictionaryFile, String documentFile, int tableSize) { try { dictSize = 0; dictionaryReader = new DataReader (dictionaryFile); documentReader = new DataReader (documentFile); } catch (Exception e) { // Bad things happened -- bail. System.out.println (e); System.exit (-1); } ht = new LinkedList[tableSize]; for (int listNumber=0; listNumber < tableSize; ht[listNumber++] = new LinkedList()); try { while (true) { String word = dictionaryReader.readWord().toLowerCase(); ht[getAddr(word)].add(word); dictSize++; } } catch (EOFException e) { } // Normal exit catch (Exception e) { // Bad things happened -- bail. System.out.println (e); System.exit (-2); } } public void printHistogramAndAverage() { /* * You should write this. It should show a histogram of bucket * fullness. E.g. How many buckets are empty? Have 1 item? 2 items? * Etc? * * You don't need to make an ascii art chart, a simple, well-formatted * count will do. * * You should also print out the average number of items per bucket * as a sanity test. */ } public void spellCheck() { String line = ""; int lineNumber = 0; long searchCount = 0; long wordCount = 0; try { while (true) { line = documentReader.readLine(); lineNumber++; StringTokenizer lineTokenizer = new StringTokenizer (line, " -\t\n\r\f,.?\":;!(){}/+*=|[]<>%\\"); while (lineTokenizer.hasMoreTokens()) { String word = lineTokenizer.nextToken(); wordCount++; int thisCount = searchWord(word.toLowerCase()); searchCount += (thisCount > 0 ? thisCount : -1*thisCount); if (thisCount < 0) { System.out.println (line); System.out.println (word + "\n"); } } } } catch (IOException e) { } // End of document file catch (Exception e) { // Bad things happened -- bail. System.out.println (e); System.exit (-3); } System.out.println ("Average list search was " + (float)searchCount/(float)wordCount); } String toString() { /* * You need to write this method. It should return the count * for each bucket one-at-a time. Perhaps a simple * laundry list like below: 0: 5 1: 2 2: 6 4: 2 */ } public static void main (String []args) { if(args.length<3) { System.out.println("usage: java SpellChecker [dictionary] [document] [table size]"); System.exit(-4); } SpellChecker ht = new SpellChecker (args[0], args[1], Integer.parseInt(args[2])); System.out.println(ht); ht.printHistogramAndAverage(); ht.spellCheck(); } }