import java.io.*;
import java.util.LinkedList;
import java.util.StringTokenizer;

public class SpellChecker
{
  private LinkedList []ht; /* Hash table -- array of linked lists */
  private DataReader dictionaryReader;
  private DataReader documentReader;
  private int dictSize;

/*
 * Below are three different has functions, please
 * test them out one at a time. Take a look at the
 * distribution of keys, as well as the size of
 * the buckets.
 */

/*
 * Java built in hash function
 * hashCode(String s) = s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
  private int getAddr(String word)
  {
    word = word.toLowerCase();
    int addr = word.hashCode();
    addr = (addr < 0 ? -1 * addr : addr) % ht.length;

    return addr;
  }
*/

/* 
 * Add and modulus hash function
 *
*/
  private int getAddr(String word)
  {
    int addr = 0;

    word = word.toLowerCase();
    for (int index=0; index < word.length(); index++)
      addr += Character.getNumericValue(word.charAt(index));

    return addr % ht.length;
  }

/* 
 * Multiply, add, and modulus hash function
 */
/*
  private int getAddr(String word)
  {
    int addr = 0;

    word = word.toLowerCase();
    for (int index=0; index < word.length(); index++)
    {
      addr = addr *11 % ht.length;
      addr += Character.getNumericValue(word.charAt(index));
    }

    return addr % ht.length;
  }
*/

/* 
 * Backward Multiply, add, and modulus hash function
 */
/*
  private int getAddr(String word)
  {
    int addr = 0;

    word = word.toLowerCase();
    for (int index= word.length()-1; index >=0; index--)
    {
      addr = addr * 11 % ht.length;
      addr += Character.getNumericValue(word.charAt(index));
    }

    return addr % ht.length;
    }
*/


  private int searchWord (String word)
  {
    /*
     * This function searches for words in the hash table. 
     * It also modifies the word and tries to guess its root
     * word, since the dictionary only holds root forms.
     */
    int count=0;
    String tryWord;

    // A number
    try
    {
      Float.valueOf(word);
      return 0;
    }
    catch (NumberFormatException e)
    { } // Not a number, charge forward and try something else

    // straight up
    tryWord = word;
    if (ht[getAddr(tryWord)].contains(tryWord))
      return ht[getAddr(tryWord)].indexOf(tryWord) + 1;
    else
        count += (ht[getAddr(tryWord)].size());

    if (word.endsWith("ing"))
    {
      // remove -ing
      tryWord = word.substring(0, word.length() - 3);  
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();

      // remove -ing and add -e
      tryWord = word.substring(0, word.length() - 3) + "e";  
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();
    }

    if (word.endsWith("ies"))
    {
      // remove -ies and add -y
      tryWord = word.substring(0, word.length() - 3) + "y";
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();
    }


    if ( word.endsWith("es") || word.endsWith("ly") || word.endsWith("ed"))
    {
      // remove last 2
      tryWord = word.substring(0, word.length() - 2);  
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();
    }

    if (word.endsWith("es") || word.endsWith("ed"))
    {
      // just remove last one
      tryWord = word.substring(0, word.length() - 1);  
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();
    }

    if (word.endsWith("s"))
    {
      // just remove last one
      tryWord = word.substring(0, word.length() - 1);  
      if (ht[getAddr(tryWord)].contains(tryWord))
        return count + ht[getAddr(tryWord)].indexOf(tryWord) + 1;
      else
        count +=ht[getAddr(tryWord)].size();
    }

    return -1*count;
  }

  public SpellChecker (String dictionaryFile, 
                       String documentFile,
                       int tableSize)
  {
    try
    {
      dictSize = 0;
      dictionaryReader = new DataReader (dictionaryFile);
      documentReader = new DataReader (documentFile);
    }
    catch (Exception e)
    {
      // Bad things happened -- bail.
      System.out.println (e);
      System.exit (-1);
    }

    ht = new LinkedList[tableSize];
    for (int listNumber=0; listNumber < tableSize; 
         ht[listNumber++] = new LinkedList());

    try
    {
      while (true)
      {
        String word = dictionaryReader.readWord().toLowerCase();
        ht[getAddr(word)].add(word);
        dictSize++;
      }
    }
    catch (EOFException e)
    { } // Normal exit

    catch (Exception e)
    { 
      // Bad things happened -- bail.
      System.out.println (e);
      System.exit (-2);
    } 
  }

  public void printHistogramAndAverage()
  {
    /*
     * You should write this. It should show a histogram of bucket
     * fullness. E.g. How many buckets are empty? Have 1 item? 2 items?
     * Etc?
     *
     * You don't need to make an ascii art chart, a simple, well-formatted
     * count will do.
     *
     * You should also print out the average number of items per bucket
     * as a sanity test. 
     */
  }

  public void spellCheck()
  {
    String line = ""; 
    int lineNumber = 0;
    long searchCount = 0;
    long wordCount = 0;

    try
    {
      while (true)
      {
        line = documentReader.readLine();
        lineNumber++;

        StringTokenizer lineTokenizer = 
          new StringTokenizer (line, " -\t\n\r\f,.?\":;!(){}/+*=|[]<>%\\");
             
        while (lineTokenizer.hasMoreTokens()) 
        {
          String word = lineTokenizer.nextToken();
          wordCount++;

          int thisCount = searchWord(word.toLowerCase());
          searchCount += (thisCount > 0 ? thisCount : -1*thisCount);

          if (thisCount < 0)
          {
            System.out.println (line); 
            System.out.println (word + "\n"); 
          }
        }
      }
    }
    catch (IOException e)
    { } // End of document file
    catch (Exception e)
    {
      // Bad things happened -- bail.
      System.out.println (e);
      System.exit (-3);
    }

    System.out.println ("Average list search was " + 
                        (float)searchCount/(float)wordCount);
  }

  String toString()
  {
    /*
     * You need to write this method. It should return the count
     * for each bucket one-at-a time. Perhaps a simple 
     * laundry list like below:

       0: 5
       1: 2 
       2: 6
       4: 2
     */
  }
    
  public static void main (String []args)
  {
    if(args.length<3)
    {
      System.out.println("usage: java SpellChecker [dictionary] [document]
                          [table size]");
      System.exit(-4);
    }

    SpellChecker ht = new SpellChecker (args[0], args[1], 
                                        Integer.parseInt(args[2]));
    System.out.println(ht);
    ht.printHistogramAndAverage();
    ht.spellCheck();
  }
}