/** * Title: ATN

* Description: Implements a simpl ATN parser that uses WordNet data * Copyright: Copyright (c) by Mark Watson, 2000

* @author Mark Watson * @version 1.2 */ import java.io.*; import java.util.*; public class ATN { public ATN() { try { // the following code will read either a local file of a // resource in a JAR file: InputStream ins = ClassLoader.getSystemResourceAsStream("wncache.dat"); if (ins==null) { System.out.println("Failed to open 'wncache.dat'"); System.exit(1); } else { ObjectInputStream p = new ObjectInputStream(ins); adj = (Hashtable)p.readObject(); adv = (Hashtable)p.readObject(); noun = (Hashtable)p.readObject(); verb = (Hashtable)p.readObject(); ins.close(); } // Augment the WordNet 1.6 entries: art = new Hashtable(); addWords(art, ARTS); conj = new Hashtable(); addWords(conj, CONJS); det = new Hashtable(); addWords(det, DETS); pron = new Hashtable(); addWords(pron, PRONS); prep = new Hashtable(); addWords(prep, PREPS); // fill in a few common verbs that are not in Wordnet 1.6: verb.put("ran", b); } catch (Exception e) { e.printStackTrace(); } } private Boolean b = new Boolean(true); private void addWords(Hashtable h, String [] ws) { for (int i=0; i 2 && str.endsWith(",")) { str = str.substring(0, str.length() - 1); } if (str.length() > 2 && str.endsWith(".")) { str = str.substring(0, str.length() - 1); } if (str.length() > 2 && str.endsWith(":")) { str = str.substring(0, str.length() - 1); } if (str.length() > 2 && str.endsWith(";")) { str = str.substring(0, str.length() - 1); } v.addElement(str.toLowerCase()); } // It is easier to work with an array, so convert the Vector // to an array of Java strings: int size = v.size(); if (size == 0) return null; words = new String[size]; partsOfSpeech = new int[size]; num_words = size; for (int i=0; i= num_words) return word_index; // test ATN transitions --> if (checkWord(words[word_index], PREP)) { partsOfSpeech[start_word_index + word_index] = PREP; int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { return ii; } } return -1; } int parseNP(int start_word_index, int word_index) { if (word_index >= num_words) return word_index; // test ATN transitions --> --> if (word_index < num_words - 2 && checkWord(words[word_index], NOUN)) { if (checkWord(words[word_index + 1], CONJ)) { int ii = parseNP(start_word_index, word_index + 2); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = NOUN; partsOfSpeech[start_word_index + word_index + 1] = CONJ; return ii; } } } // test ATN transitions --> if (word_index < num_words - 1 && checkWord(words[word_index], ART)) { int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = ART; return ii; } } // test ATN transitions --> if (word_index < num_words - 1 && checkWord(words[word_index], ADJ)) { int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = ADJ; return ii; } } // test ATN transitions --> if (checkWord(words[word_index], ADJ)) { int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = ADJ; return ii; } } // test ATN transitions --> if (word_index < num_words - 1 && checkWord(words[word_index], ADV)) { int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = ADV; return ii; } } // test ATN transitions --> if (word_index < num_words - 1 && checkWord(words[word_index], NOUN)) { if (checkWord(words[word_index + 1], NOUN)) { partsOfSpeech[start_word_index + word_index] = NOUN; partsOfSpeech[start_word_index + word_index + 1] = NOUN; return word_index + 2; } } if (checkWord(words[word_index], NOUN)) { partsOfSpeech[start_word_index + word_index] = NOUN; return word_index + 1; } if (checkWord(words[word_index], PRON)) { int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { partsOfSpeech[start_word_index + word_index] = PRON; return ii; } } if (checkWord(words[word_index], PRON)) { partsOfSpeech[start_word_index + word_index] = PRON; return word_index + 1; } return -1; } int parseVP(int start_word_index, int word_index) { if (word_index >= num_words) return word_index; // test ATN transitions --> --> if (checkWord(words[word_index], VERB)) { partsOfSpeech[start_word_index + word_index] = VERB; int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { int jj = parsePP(start_word_index, ii); if (jj > -1) { return jj; } } } // test ATN transitions --> if (checkWord(words[word_index], VERB)) { partsOfSpeech[start_word_index + word_index] = VERB; int ii = parseNP(start_word_index, word_index + 1); if (ii > -1) { return ii; } } // test ATN transitions --> if (checkWord(words[word_index], VERB)) { partsOfSpeech[start_word_index + word_index] = VERB; int ii = parsePP(start_word_index, word_index + 1); if (ii > -1) { return ii; } } if (checkWord(words[word_index], VERB)) { partsOfSpeech[start_word_index + word_index] = VERB; return word_index + 1; } return -1; } int parseHelper(int [] atn, int start_word_index) { int word_index = 0; int len_atn = atn.length; int last_word_index = word_index; for (int i=0; i max_val) { max_val = k; max_word_index = i; } } System.out.println("Best ATN at word_index " + max_word_index); parseHelper(ALL_S[max_word_index], start_word_index); for (int i=0; i