Диссертация (1137241), страница 23
Текст из файла (страница 23)
Пакетcommunicative_actions, файл CommunicativeActionsArcBuilder.publicclassCommunicativeActionsArcBuilderIGeneralizer<Pair<String, Integer[]>>{private List<Pair<String,ArrayList<Pair<String, Integer[]>>();Integer[]>>implementscommActionsAttrpublic CommunicativeActionsArcBuilder(){=new183Integer[]{Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("agree", new-1,-1,1,-1}));1,commActionsAttr.add(new Pair<String, Integer[]>("accept", new-1,-1,1,1}));commActionsAttr.add(new Pair<String,-1,1,1,-1}));Integer[]>("explain",new Integer[]{ 0,Integer[]>("suggest",new Integer[]{ 1,commActionsAttr.add(new Pair<String,0,1,-1,-1}));Integer[]{commActionsAttr.add(new Pair<String, Integer[]>("claim", new0,1,-1,-1}));1,// bring-attentioncommActionsAttr.add(newInteger[]>("bring_attention", new Integer[]{ 1,1,1,Pair<String,1,Pair<String,1}));new Integer[]{ -1,commActionsAttr.add(new0,1,1,1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("allow", new-1,-1,-1,-1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("try", new0,-1,-1,-1}));Pair<String,Integer[]>("remind",new Integer[]{ 0,commActionsAttr.add(new1,-1,1,1}));Integer[]>("request",new Integer[]{ 0,commActionsAttr.add(new Pair<String, Integer[]>("understand",-1,-1,1,-1}));Integer[]{0,commActionsAttr.add(new Pair<String, Integer[]>("inform", new0,1,1,-1}));Integer[]{0,commActionsAttr.add(new Pair<String, Integer[]>("notify", new0,1,1,-1}));Integer[]{0,commActionsAttr.add(new Pair<String, Integer[]>("report", new0,1,1,-1}));new Integer[]{ 0,commActionsAttr.add(new Pair<String,-1,1,1,1}));Integer[]{0,commActionsAttr.add(new Pair<String, Integer[]>("ask", new1,-1,-1,-1}));-1,commActionsAttr.add(new Pair<String, Integer[]>("check", new1,-1,-1,1}));Integer[]{Integer[]>("confirm",184Integer[]{Integer[]{-1,commActionsAttr.add(new Pair<String, Integer[]>("ignore", new-1,-1,-1,1}));-1,commActionsAttr.add(new Pair<String, Integer[]>("wait", new-1,-1,-1,1}));new Integer[]{ 0,commActionsAttr.add(new Pair<String, Integer[]>("convince",1,1,1, -1}));new Integer[]{ -1,commActionsAttr.add(new Pair<String, Integer[]>("disagree",-1,-1,1,-1}));Integer[]{-1,commActionsAttr.add(new Pair<String, Integer[]>("appeal", new1,1,1,1}));Integer[]{-1,commActionsAttr.add(new Pair<String, Integer[]>("deny", new-1,-1,1,1}));new Integer[]{ -1,commActionsAttr.add(new Pair<String, Integer[]>("threaten",1, -1, 1,1}));new Integer[]{ 1,commActionsAttr.add(new-1, -1, 1,1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("afraid", new-1, -1, 1,1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("worri", new-1, -1, 1,1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("scare", new-1, -1, 1,1}));Integer[]{1,commActionsAttr.add(new Pair<String, Integer[]>("want", new0,-1,-1,-1}));Integer[]{0,commActionsAttr.add(new Pair<String, Integer[]>("know", new-1,-1,1,-1}));new Integer[]{ 0,Pair<String,commActionsAttr.add(new Pair<String,-1,-1,1,-1}));Integer[]>("concern",Integer[]>("believe",}public Pair<String, Integer[]> findCAInSentence(List<ParseTreeNode>sentence){for(ParseTreeNode node: sentence){for(Pair<String, Integer[]> ca: commActionsAttr){String lemma = (String)ca.getFirst();185// canonical form lemma is a sub-string of anactual form in parseTreeNodeif(node.getWord().toLowerCase().startsWith(lemma))return ca;}}return null;}public int findCAIndexInSentence(List<ParseTreeNode> sentence){for(int index = 1; index< sentence.size(); index++){ParseTreeNode node = sentence.get(index);for(Pair<String, Integer[]> ca: commActionsAttr){String lemma = (String)ca.getFirst();String[] lemmas = lemma.split("_");if (lemmas==null || lemmas.length<2){if(node.getWord().toLowerCase().startsWith(lemma))return index;} else { //multiword matchingfor(intindexM=index+1;indexM<sentence.size(); indexM++);//}}}return -1;}public List<Pair<String, Integer[]>> generalize(Object o1, Object o2) {List<Pair<String,ArrayList<Pair<String, Integer[]>>();Integer[]>>String ca1 = null, ca2=null;results=new186if (o1 instanceof String){ca1 = (String)o1;ca2 = (String)o2;} else {ca1 = ((Pair<String, Integer[]>)o1).getFirst();ca2 = ((Pair<String, Integer[]>)o2).getFirst();}// find entry for ca1Pair<String, Integer[]> caP1=null, caP2=null;for(Pair<String, Integer[]> ca: commActionsAttr){String lemma = (String)ca.getFirst();if (lemma.equals(ca1)){caP1=ca;break;}}// find entry for ca2for(Pair<String, Integer[]> ca: commActionsAttr){String lemma = (String)ca.getFirst();if (lemma.equals(ca2)){caP2=ca;break;}}if (ca1.equals(ca2)){results.add(caP1);} else {//generalizationofintarraysalsoimplementsIGeneralizer// we take Integer[] which is a first element of as resultantlist187Integer[] res = new CommunicativeActionsAttribute().generalize(caP1.getSecond(),caP2.getSecond()).get(0);results.add(new Pair<String, Integer[]>("", res ));}return results;}Приложение 3В данном приложении приведены основные фрагменты кода (наязыке Java), предназначенного для реализации операции сходства награфах,соответствующихчащамразбора,атакженаихлингвистических проекциях.Вычисление сходства для деревьев, входящих в чащу.
Пакетmatching, файл ParseTreePathMatcher.java.public class ParseTreePathMatcher {private static final int NUMBER_OF_ITERATIONS = 2;private ParseTreeChunkListScorerParseTreeChunkListScorer();parseTreeChunkListScorer=newprivate POSManager posManager = new POSManager();privateLemmaFormManagerLemmaFormManager();lemmaFormManager=newpublic ParseTreePathMatcher() {}publicParseTreePathgeneralizeTwoGroupedPhrasesOLD(ParseTreePathchunk1,ParseTreePath chunk2) {List<String> pos1 = chunk1.getPOSs();188List<String> pos2 = chunk1.getPOSs();List<String> commonPOS = new ArrayList<String>(), commonLemmas =new ArrayList<String>();int k1 = 0, k2 = 0;Boolean incrFirst = true;while (k1 < pos1.size() && k2 < pos2.size()) {// first check if the same POSString sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));if (sim != null) {commonPOS.add(pos1.get(k1));if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2&& chunk1.getLemmas().get(k1).equals(chunk2.getLemmas().get(k2))){commonLemmas.add(chunk1.getLemmas().get(k1));} else {commonLemmas.add("*");}k1++;k2++;} else if (incrFirst) {k1++;} else {k2++;}incrFirst = !incrFirst;}ParseTreePath res = new ParseTreePath(commonLemmas, commonPOS, 0,0);// if (parseTreeChunkListScorer.getScore(res)> 0.6)// System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" + res);return res;}// A for B => B have A189// transforms expr { A B C prep X Y }// into {A B {X Y} C}// should only be applied to a noun phrasepublic ParseTreePath prepositionalNNSTransform(ParseTreePath ch) {List<String> transfPOS = new ArrayList<String>(), transfLemmas = newArrayList<String>();if (!ch.getPOSs().contains("IN"))return ch;int indexIN = ch.getPOSs().lastIndexOf("IN");if (indexIN < 2)// preposition is a first word - should not be in a noun// phrasereturn ch;String Word_IN = ch.getLemmas().get(indexIN);if (!(Word_IN.equals("to") || Word_IN.equals("on") || Word_IN.equals("in")|| Word_IN.equals("of") || Word_IN.equals("with")|| Word_IN.equals("by") || Word_IN.equals("from")))return ch;List<String> toShiftAfterPartPOS = ch.getPOSs().subList(indexIN + 1,ch.getPOSs().size());List<String> toShiftAfterPartLemmas = ch.getLemmas().subList(indexIN +1,ch.getLemmas().size());if (indexIN - 1 > 0)transfPOS.addAll(ch.getPOSs().subList(0, indexIN - 1));transfPOS.addAll(toShiftAfterPartPOS);transfPOS.add(ch.getPOSs().get(indexIN - 1));if (indexIN - 1 > 0)transfLemmas.addAll(ch.getLemmas().subList(0, indexIN - 1));transfLemmas.addAll(toShiftAfterPartLemmas);transfLemmas.add(ch.getLemmas().get(indexIN - 1));return new ParseTreePath(transfLemmas, transfPOS, 0, 0);190}publicParseTreePathgeneralizeTwoGroupedPhrasesRandomSelectHighestScoreWithTransforms(ParseTreePath chunk1, ParseTreePath chunk2) {ParseTreePathchRes1generalizeTwoGroupedPhrasesRandomSelectHighestScore(=chunk1, chunk2);ParseTreePathchRes2generalizeTwoGroupedPhrasesRandomSelectHighestScore(=prepositionalNNSTransform(chunk1), chunk2);ParseTreePathchRes3generalizeTwoGroupedPhrasesRandomSelectHighestScore(=prepositionalNNSTransform(chunk2), chunk1);ParseTreePath chRes = null;if (parseTreeChunkListScorer.getScore(chRes1) > parseTreeChunkListScorer.getScore(chRes2))if(parseTreeChunkListScorer.getScore(chRes1)parseTreeChunkListScorer>.getScore(chRes3))chRes = chRes1;elsechRes = chRes3;elseifparseTreeChunkListScorer(parseTreeChunkListScorer.getScore(chRes2)>.getScore(chRes3))chRes = chRes2;elsechRes = chRes3;return chRes;}publicgeneralizeTwoGroupedPhrasesRandomSelectHighestScore(ParseTreePath chunk1, ParseTreePath chunk2) {ParseTreePath191List<String> pos1 = chunk1.getPOSs();List<String> pos2 = chunk2.getPOSs();// Map <ParseTreeChunk, Double> scoredResults = new HashMap<ParseTreeChunk,// Double> ();int timesRepetitiveRun = NUMBER_OF_ITERATIONS;Double globalScore = -1.0;ParseTreePath result = null;for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {List<String> commonPOS = new ArrayList<String>(), commonLemmas =new ArrayList<String>();int k1 = 0, k2 = 0;Double score = 0.0;while (k1 < pos1.size() && k2 < pos2.size()) {// first check if the same POSString sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1.getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);//lemmaMatch)){if(LemmaFormManager.acceptableLemmaAndPOS(sim,if ((sim != null)&& (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch.equals("fail")))) {// if (sim!=null){ // && (lemmaMatch!=null &&// !lemmaMatch.equals("fail"))){commonPOS.add(pos1.get(k1));if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2&& lemmaMatch != null) {commonLemmas.add(lemmaMatch);} else {commonLemmas.add("*");}192k1++;k2++;} else if (Math.random() > 0.5) {k1++;} else {k2++;}}ParseTreePathcommonPOS,currResult=newParseTreePath(commonLemmas,0, 0);score = parseTreeChunkListScorer.getScore(currResult);if (score > globalScore) {// System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" +// result+" score = "+ score +"\n\n");result = currResult;globalScore = score;}}for (int timesRun = 0; timesRun < timesRepetitiveRun; timesRun++) {List<String> commonPOS = new ArrayList<String>(), commonLemmas =new ArrayList<String>();int k1 = pos1.size() - 1, k2 = pos2.size() - 1;Double score = 0.0;while (k1 >= 0 && k2 >= 0) {// first check if the same POSString sim = posManager.similarPOS(pos1.get(k1), pos2.get(k2));String lemmaMatch = lemmaFormManager.matchLemmas(null, chunk1.getLemmas().get(k1), chunk2.getLemmas().get(k2), sim);// if (acceptableLemmaAndPOS(sim, lemmaMatch)){if ((sim != null)&& (lemmaMatch == null || (lemmaMatch != null && !lemmaMatch.equals("fail")))) {commonPOS.add(pos1.get(k1));193if (chunk1.getLemmas().size() > k1 && chunk2.getLemmas().size() > k2&& lemmaMatch != null) {commonLemmas.add(lemmaMatch);} else {commonLemmas.add("*");}k1--;k2--;} else if (Math.random() > 0.5) {k1--;} else {k2--;}}Collections.reverse(commonLemmas);Collections.reverse(commonPOS);ParseTreePathcommonPOS,currResult=newParseTreePath(commonLemmas,0, 0);score = parseTreeChunkListScorer.getScore(currResult);if (score > globalScore) {// System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" +// currResult+" score = "+ score +"\n\n");result = currResult;globalScore = score;}}// // System.out.println(chunk1 + " + \n"+ chunk2 + " = \n" + result// +" score = " +// // parseTreeChunkListScorer.getScore(result)+"\n\n");return result;194}public Boolean acceptableLemmaAndPOS(String sim, String lemmaMatch) {if (sim == null) {return false;}if (lemmaMatch != null && !lemmaMatch.equals("fail")) {return false;}// even if lemmaMatch==nullreturn true;// if (sim!=null && (lemmaMatch!=null && !lemmaMatch.equals("fail"))){}}Преобразование чащи в граф.