Диссертация (1137218), страница 26
Текст из файла (страница 26)
Пакет thicket2graph, файлGraphFromPTreeBuilder.java.public class GraphFromPTreeBuilder {publicGraph<ParseGraphNode,buildGraphFromPT(ParseThicket pt){DefaultEdge>PrintWriter out = new PrintWriter(System.out);List<Tree> ts = pt.getSentences();ts.get(0).pennPrint(out);Graph<ParseGraphNode,buildGGraphFromTree(ts.get(0));DefaultEdge>gfragment//ParseTreeVisualizer applet = new ParseTreeVisualizer();//applet.showGraph(gfragment);return gfragment;=201}privateGraph<ParseGraphNode,buildGGraphFromTree(Tree tree) {DefaultEdge>Graph<ParseGraphNode, DefaultEdge> g =newSimpleGraph<ParseGraphNode,DefaultEdge>(DefaultEdge.class);ParseGraphNode root = new ParseGraphNode(tree,"S 0");g.addVertex(root);navigate(tree, g, 0, root);return g;}private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge>g, int l, ParseGraphNode currParent) {//String currParent = tree.label().value()+" $"+Integer.toString(l);//g.addVertex(currParent);if (tree.getChildrenAsList().size()==1)navigate(tree.getChildrenAsList().get(0),g,currParent);elseif (tree.getChildrenAsList().size()==0)return;for(Tree child: tree.getChildrenAsList()){String currChild = null;ParseGraphNode currChildNode = null;try {if (child.isLeaf())continue;if (child.label().value().startsWith("S"))l+1,202navigate(child.getChildrenAsList().get(0),g, l+1, currParent);if (!child.isPhrasal() || child.isPreTerminal())currChild=child.toString()+"#"+Integer.toString(l);elsecurrChild=child.label().value()+"#"+Integer.toString(l);currChildNode = new ParseGraphNode(child,currChild);g.addVertex(currChildNode);g.addEdge(currParent, currChildNode);} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}navigate(child, g, l+1, currChildNode);}}Вычисление сходства на графах, представляющих чащи разбора.Пакет thicket2graph, файл EdgeProductBuilder.java.public class EdgeProductBuilder {private Matcher matcher = new Matcher();privateParseCorefsBuilderParseCorefsBuilder.getInstance();privateGraphFromPTreeBuilderGraphFromPTreeBuilder();ptBuildergraphBuilder==newpublic Graph<ParseGraphNode[], DefaultEdge>buildEdgeProduct(Graph<ParseGraphNode, DefaultEdge> g1,Graph<ParseGraphNode, DefaultEdge> g2 ){Graph<ParseGraphNode[], DefaultEdge> gp =newDefaultEdge>(DefaultEdge.class);SimpleGraph<ParseGraphNode[],203Set<DefaultEdge> edges1 = g1.edgeSet();Set<DefaultEdge> edges2 = g2.edgeSet();// build nodes of product graphfor(DefaultEdge e1:edges1){for(DefaultEdge e2:edges2){ParseGraphNodeg1.getEdgeSource(e1), sourceE1t = g1.getEdgeTarget(e1);sourceE1s=ParseGraphNodeg2.getEdgeSource(e2), sourceE2t = g2.getEdgeTarget(e2);sourceE2s=if(isNotEmpty(matcher.generalize(sourceE1s.getPtNodes(),&&sourceE2s.getPtNodes()))isNotEmpty(matcher.generalize(sourceE1t.getPtNodes(),sourceE2t.getPtNodes())))gp.addVertex(new{sourceE1s, sourceE1t, sourceE2s, sourceE2t } );ParseGraphNode[]}}Set<ParseGraphNode[]> productVerticesSet = gp.vertexSet();List<ParseGraphNode[]>productVerticesListArrayList<ParseGraphNode[]>(productVerticesSet);=newfor(int i=0; i<productVerticesList.size(); i++){for(int j=i+1; j<productVerticesList.size(); j++){ParseGraphNode[]prodVertexI=ParseGraphNode[]prodVertexJ=productVerticesList.get(i);productVerticesList.get(j);if(bothAjacentOrNeitherAdjacent(prodVertexI,prodVertexJ)){gp.addEdge(prodVertexI, prodVertexJ);}}}204return gp;}/** Finding the maximal clique is the slowest part*/publicCollection<Set<ParseGraphNode[]>>getMaximalCommonSubgraphs(Graph<ParseGraphNode[], DefaultEdge> g){BronKerboschCliqueFinder<ParseGraphNode[],DefaultEdge>finder =newBronKerboschCliqueFinder<ParseGraphNode[],DefaultEdge>(g);Collection<Set<ParseGraphNode[]>>finder.getBiggestMaximalCliques();cliques=return cliques;}privatebooleanbothAjacentOrNeitherAdjacent(ParseGraphNode[]prodVertexI,ParseGraphNode[] prodVertexJ) {List<ParseGraphNode> prodVertexIlist =newArrayList<ParseGraphNode>(Arrays.asList(prodVertexI));List<ParseGraphNode> prodVertexJlist =newArrayList<ParseGraphNode>(Arrays.asList(prodVertexJ));prodVertexIlist.retainAll(prodVertexJlist);return (prodVertexIlist.size()==2 || prodVertexIlist.size()==4);}private boolean isNotEmpty(List<List<ParseTreeChunk>> generalize) {if(generalize!=nullgeneralize.get(0).size()>0)return true;else&&generalize.get(0)!=null&&205return false;}publicCollection<Set<ParseGraphNode[]>>assessRelevanceViaMaximalCommonSubgraphs(String para1, String para2) {// first build PTs for each textParseThicket pt1 = ptBuilder.buildParseThicket(para1);ParseThicket pt2 = ptBuilder.buildParseThicket(para2);// then build phrases and rst arcsGraph<ParseGraphNode,graphBuilder.buildGraphFromPT(pt1);DefaultEdge>g1=Graph<ParseGraphNode,graphBuilder.buildGraphFromPT(pt2);DefaultEdge>g2=gp=Graph<ParseGraphNode[],buildEdgeProduct(g1, g2);DefaultEdge>Collection<Set<ParseGraphNode[]>>getMaximalCommonSubgraphs(gp);col=return col;}Приложение 4В данном приложении приведены основные фрагменты кода (наязыке Java), предназначенного для реализации поиска ответа насложные вопросы с помощью вычисления сходства чащ разбора и ихпроекций для вопроса и потенциальных ответов.Оценка итогового значения релевантности (score) на основерезультатов операции сходства текстовых абзацев.
Пакет textsimilarity,файл ParseTreeChunkListScorer.java.public class ParseTreeChunkListScorer {// find the single expression with the highest scorepublic double getParseTreeChunkListScore(List<List<ParseTreeChunk>> matchResult) {double currScore = 0.0;for (List<ParseTreeChunk> chunksGivenPhraseType : matchResult)for (ParseTreeChunk chunk : chunksGivenPhraseType) {206Double score = getScore(chunk);// System.out.println(chunk+ " => score >>> "+score);if (score > currScore) {currScore = score;}}return currScore;}// get max score per phrase type and then sum uppublic double getParseTreeChunkListScoreAggregPhraseType(List<List<ParseTreeChunk>> matchResult) {double currScoreTotal = 0.0;for (List<ParseTreeChunk> chunksGivenPhraseType : matchResult) {double currScorePT = 0.0;for (ParseTreeChunk chunk : chunksGivenPhraseType) {Double score = getScore(chunk);// System.out.println(chunk+ " => score >>> "+score);if (score > currScorePT) {currScorePT = score;}}// if substantial for given phrase typeif (currScorePT > 0.5) {currScoreTotal += currScorePT;}}return currScoreTotal;}// score is meaningful only for chunks which are results of generalizationpublic double getScore(ParseTreeChunk chunk) {double score = 0.0;int i = 0;207for (String l : chunk.getLemmas()) {String pos = chunk.getPOSs().get(i);if (l.equals("*")) {if (pos.startsWith("CD")) { // number vs number gives high score// although different numbersscore += 0.7;} else if (pos.endsWith("_high")) { // if query modification adds 'high'score += 1.0;} else {score += 0.1;}} else {if (pos.startsWith("NN") || pos.startsWith("NP")|| pos.startsWith("CD") || pos.startsWith("RB")) {score += 1.0;} else if (pos.startsWith("VB") || pos.startsWith("JJ")) {if (l.equals("get")) { // 'common' verbs are not that importantscore += 0.3;} else {score += 0.5;}} else {score += 0.3;}}i++;}return score;}}Переупорядочивание результатов поиска.
Пакет textsimilarity,файл SearchResultsProcessor.java.208public class SearchResultsProcessor extends BingQueryRunner {private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.SearchResultsProcessor");private ParseTreeChunkListScorerParseTreeChunkListScorer();parseTreeChunkListScorer=newParserChunker2MatcherProcessor sm;WebSearchEngineResultsScraperWebSearchEngineResultsScraper();scraper=new/** Takes a search engine API (or scraped) search results and calculates theparse tree similarity* between the question and each snippet.
Ranks those snippets with higher* similarity score up*/private List<HitBase> calculateMatchScoreResortHits(List<HitBase> hits,String searchQuery) {List<HitBase> newHitList = new ArrayList<HitBase>();sm = ParserChunker2MatcherProcessor.getInstance();for (HitBase hit : hits) {Stringsnapshot=hit.getAbstractText().replace("<b>...</b>",".").replace("<span class='best-phrase'>", " ").replace("<span>", " ").replace("<span>", "").replace("<b>", "").replace("</b>", "");snapshot = snapshot.replace("</B>", "").replace("<B>", "").replace("<br>", "").replace("</br>", "").replace("...", ".
").replace("|", " ").replace(">", " ");snapshot += " . " + hit.getTitle();Double score = 0.0;try {SentencePairMatchResult matchRes = sm.assessRelevance(snapshot,searchQuery);List<List<ParseTreeChunk>> match = matchRes.getMatchResult();209score = parseTreeChunkListScorer.getParseTreeChunkListScore(match);LOG.finest(score + " | " + snapshot);} catch (Exception e) {LOG.severe("Problem processing snapshot " + snapshot);e.printStackTrace();}hit.setGenerWithQueryScore(score);newHitList.add(hit);}Collections.sort(newHitList, new HitBaseComparable());LOG.info("\n\n ============= NEW ORDER ================= ");for (HitBase hit : newHitList) {LOG.info(hit.toString());}return newHitList;}public void close() {sm.close();}public List<HitBase> runSearch(String query) {List<HitBase> hits = scraper.runSearch(query);hits = calculateMatchScoreResortHits(hits, query);return hits;}public List<HitBase> runSearchViaAPI(String query) {List<HitBase> hits = null;try {List<HitBase> resultList = runSearch(query);210// now we apply our own relevance filterhits = calculateMatchScoreResortHits(resultList, query);} catch (Exception e) {// e.printStackTrace();LOG.info("No search results for query '" + query);return null;}return hits;}}Приложение 5В данном приложении приведены основные фрагменты кода (наязыке Java), предназначенного для построения узорных структур и ихпроекций на чащах разбора и реализации алгоритма кластеризациитекстов.Построение проекции узорной структуры на чащах разбора,алгоритмAddIntent.Пакетpattern_structure,файлPhrasePatternStructure.public class PhrasePatternStructure {int objectCount;int attributeCount;ArrayList<PhraseConcept> conceptList;ParseTreeMatcherDeterministic md;public PhrasePatternStructure(int objectCounts, int attributeCounts) {objectCount = objectCounts;attributeCount = attributeCounts;conceptList = new ArrayList<PhraseConcept>();PhraseConcept bottom = new PhraseConcept();md = new ParseTreeMatcherDeterministic();/*Set<Integer> b_intent = new HashSet<Integer>();for (int index = 0; index < attributeCount; ++index) {b_intent.add(index);211}bottom.setIntent(b_intent);*/bottom.setPosition(0);conceptList.add(bottom);}public int GetMaximalConcept(List<List<ParseTreeChunk>> intent, intGenerator) {boolean parentIsMaximal = true;while(parentIsMaximal) {parentIsMaximal = false;for (int parent : conceptList.get(Generator).parents) {if(conceptList.get(parent).intent.containsAll(intent)) {Generator = parent;parentIsMaximal = true;break;}}}return Generator;}public int AddIntent(List<List<ParseTreeChunk>> intent, int generator){System.out.println("debug");System.out.println("called for " + intent);//printLattice();int generator_tmp = GetMaximalConcept(intent, generator);generator = generator_tmp;if (conceptList.get(generator).intent.equals(intent)) {System.out.println("atconceptList.get(generator).intent);generator:"System.out.println("to add:" + intent);System.out.println("already generated");return generator;}+212Set<Integer>conceptList.get(generator).parents;generatorParents=Set<Integer> newParents = new HashSet<Integer>();for (int candidate : generatorParents) {if (!intent.containsAll(conceptList.get(candidate).intent)){//if (!conceptList.get(candidate).intent.containsAll(intent)){//Set<Integer>HashSet<Integer>(conceptList.get(candidate).intent);intersection=new//List<List<ParseTreeChunk>> intersection = newArrayList<List<ParseTreeChunk>>(conceptList.get(candidate).intent);//intersection.retainAll(intent);List<List<ParseTreeChunk>> intersection = md.matchTwoSentencesGroupedChunksDeterministic(intent,conceptList.get(candidate).intent);System.out.println("recursive call (inclusion)");candidate = AddIntent(intersection, candidate);}boolean addParents = true;System.out.println("now iterating over parents");Iterator<Integer> iterator = newParents.iterator();while (iterator.hasNext()) {Integer parent = iterator.next();if(conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {addParents = false;break;}else {if(conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {iterator.remove();}}}/*for (int parent : newParents) {213System.out.println("parent = " + parent);System.out.println("candidateintent:"+conceptList.get(candidate).intent);System.out.println("parentintent:"+conceptList.get(parent).intent);if(conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {addParents = false;break;}else {if(conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {newParents.remove(parent);}}}*/if (addParents) {newParents.add(candidate);}}System.out.println("size of lattice: " + conceptList.size());PhraseConcept newConcept = new PhraseConcept();newConcept.setIntent(intent);newConcept.setPosition(conceptList.size());conceptList.add(newConcept);conceptList.get(generator).parents.add(newConcept.position);for (int newParent: newParents) {if(conceptList.get(generator).parents.contains(newParent)) {conceptList.get(generator).parents.remove(newParent);}conceptList.get(newConcept.position).parents.add(newParent);}214return newConcept.position;}public void printLatticeStats() {System.out.println("Lattice stats");System.out.println("max_object_index = " + objectCount);System.out.println("max_attribute_index = " + attributeCount);System.out.println("Currentconceptcount="+conceptList.size());}public void printLattice() {for (int i = 0; i < conceptList.size(); ++i) {printConceptByPosition(i);}}public void printConceptByPosition(int index) {System.out.println("Concept at position " + index);conceptList.get(index).printConcept();}publicformGroupedPhrasesFromChunksForPara(List<List<ParseTreeChunk>>List<List<ParseTreeNode>> phrs) {List<List<ParseTreeChunk>>ArrayList<List<ParseTreeChunk>>();results=List<ParseTreeChunk>nps=ArrayList<ParseTreeChunk>(), vps = new ArrayList<ParseTreeChunk>(),pps = new ArrayList<ParseTreeChunk>();for(List<ParseTreeNode> ps:phrs){ParseTreeChunk ch = convertNodeListIntoChunk(ps);String ptype = ps.get(0).getPhraseType();if (ptype.equals("NP")){nps.add(ch);} else if (ptype.equals("VP")){vps.add(ch);} else if (ptype.equals("PP")){pps.add(ch);}newnew215}results.add(nps); results.add(vps); results.add(pps);return results;}privateconvertNodeListIntoChunk(List<ParseTreeNode> ps) {ParseTreeChunkList<String> lemmas = new ArrayList<String>(), poss = newArrayList<String>();for(ParseTreeNode n: ps){lemmas.add(n.getWord());poss.add(n.getPos());}ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);ch.setMainPOS(ps.get(0).getPhraseType());return ch;}}Построение и фильтрация узорной структуры на чащах разбора.Пакет pattern_structure, файл LinguisticPhrasePatternStructure, классLinguisticPhrasePatternStructure(наследуетклассуPhrasePatternStructure).public class LinguisticPatternStructure extends PhrasePatternStructure {public LinguisticPatternStructure(int objectCounts, int attributeCounts) {super(objectCounts, attributeCounts);ConceptLattice cl = null;}216public void AddExtentToAncestors(LinkedHashSet<Integer>extent, intcurNode) {//if (conceptList.get(curNode).parents.size()>0){for (int parent : conceptList.get(curNode).parents){conceptList.get(parent).addExtents(extent);AddExtentToAncestors(extent, parent);}}}publicintAddIntent(List<List<ParseTreeChunk>>intent,LinkedHashSet<Integer>extent,int generator) {System.out.println("debug");System.out.println("called for " + intent);//printLattice();int generator_tmp = GetMaximalConcept(intent, generator);generator = generator_tmp;if (conceptList.get(generator).intent.equals(intent)) {System.out.println("atgenerator:"conceptList.get(generator).intent);System.out.println("to add:" + intent);System.out.println("already generated");+217AddExtentToAncestors(extent, generator);return generator;}Set<Integer>generatorParents=conceptList.get(generator).parents;Set<Integer> newParents = new HashSet<Integer>();for (int candidate : generatorParents) {if (!intent.containsAll(conceptList.get(candidate).intent)){List<List<ParseTreeChunk>> intersection = md.matchTwoSentencesGroupedChunksDeterministic(intent,conceptList.get(candidate).intent);LinkedHashSet<Integer>new_extent=newLinkedHashSet<Integer>();new_extent.addAll(conceptList.get(candidate).extent);new_extent.addAll(extent);if (intent.size()!=intersection.size()){System.out.println("recursivecall(inclusion)");System.out.println(intent+"----"+intersection);candidateAddIntent(intersection,new_extent, candidate);=218}}boolean addParents = true;System.out.println("now iterating over parents");Iterator<Integer> iterator = newParents.iterator();while (iterator.hasNext()) {Integer parent = iterator.next();if(conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {addParents = false;break;}else {if(conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {iterator.remove();}}}if (addParents) {newParents.add(candidate);219}}System.out.println("size of lattice: " + conceptList.size());PhraseConcept newConcept = new PhraseConcept();newConcept.setIntent(intent);LinkedHashSet<Integer>new_extent=newLinkedHashSet<Integer>();new_extent.addAll(conceptList.get(generator).extent);new_extent.addAll(extent);newConcept.addExtents(new_extent);newConcept.setPosition(conceptList.size());conceptList.add(newConcept);conceptList.get(generator).parents.add(newConcept.position);conceptList.get(newConcept.position).childs.add(generator);for (int newParent: newParents) {if(conceptList.get(generator).parents.contains(newParent)) {conceptList.get(generator).parents.remove(newParent);conceptList.get(newParent).childs.remove(generator);220}conceptList.get(newConcept.position).parents.add(newParent);conceptList.get(newParent).addExtents(new_extent);AddExtentToAncestors(new_extent, newParent);conceptList.get(newParent).childs.add(newConcept.position);}return newConcept.position;}public void printLatticeExtended() {for (int i = 0; i < conceptList.size(); ++i) {printConceptByPositionExtended(i);}}public void printConceptByPositionExtended(int index) {System.out.println("Concept at position " + index);conceptList.get(index).printConceptExtended();}221public int [][] toContext(int extentCardinality){int newAttrCount = conceptList.size();ArrayList<PhraseConcept>cList=newArrayList<PhraseConcept>();cList.addAll(conceptList);boolean run = true;int k=0;while (run && k<conceptList.size()){if (conceptList.get(k).intent.size() == attributeCount){if (conceptList.get(k).extent.size() == 0)for (Integer i:conceptList.get(k).parents)cList.remove(i);cList.remove(k);run=false;}elsek+=1;}run = true;k=0;222while (run && k<=newAttrCount){if (cList.get(k).extent.size()==0)k++;run = false;}newAttrCount = cList.size();Set<Integer> nodeExtend;int[][]binaryContextint[extentCardinality][newAttrCount];for (int j = 0; j<newAttrCount; j++){nodeExtend = cList.get(j).extent;for (Integer i: nodeExtend){binaryContext[i][j]=1;}}return binaryContext;}public void logStability(){int min_delta = -1, delta = -1;float sum = 0;for (int i = 0; i < conceptList.size(); ++i) {=new223min_delta = Integer.MAX_VALUE;sum = 0;PhraseConcept pc = conceptList.get(i);Set<Integer> childs = pc.childs;for (Integer j: childs) {delta=pc.extent.size()-conceptList.get(j).extent.size();if (delta<min_delta)min_delta = delta;sum += Math.pow(2, -delta);}pc.intLogStabilityBottom=(Math.log(sum)/Math.log(2.0));pc.intLogStabilityUp = min_delta;}}}Приложение 6В данном приложении приведены основные фрагменты кода (наязыке Java), применявшегося для обучения на текстовых абзацах.Обучение и классификация на лесе регулярных деревьевразбора.Пакетkernel_interface,файлMultiSentenceKernelBasedSearchResultsProcessor.public class MultiSentenceKernelBasedSearchResultsProcessorMultiSentenceSearchResultsProcessor{extends224private static Logger LOG = Logger.getLogger("opennlp.tools.similarity.apps.MultiSentenceKernelBasedSearchResultsProcessor");privateWebSearchEngineResultsScraperWebSearchEngineResultsScraper();scraper=newprotected Matcher matcher = new Matcher();private ParseTreeChunkListScorer parseTreeChunkListScorer = newParseTreeChunkListScorer();protected BingQueryRunnerMultipageSearchResults bingSearcher = newBingQueryRunnerMultipageSearchResults();private SnippetToParagraph snp = new SnippetToParagraph();private TreeKernelRunner tkRunner = new TreeKernelRunner();protected final float lower_threshold = (float) 0.2;protected final float upper_threshold = (float) 0.8;protected final float ratio = (float) 0.4; //соотношение обучающей итестовой выборкиprivate String path;public void setKernelPath (String path){this.path=path;}protected static final String modelFileName = "model.txt";protected static final String trainingFileName = "training.txt";protected static final String unknownToBeClassified = "unknown.txt";protected static final String classifierOutput = "classifier_output.txt";protectedstatic"\\Answers\\answers_test.csv";protectedstatic"\\Answers\\answers_learn.csv";finalfinalStringStringdetailedOutputdetailedLearningOutput==225public List<HitBase> runSearchViaAPI(String query) {List<HitBase> hits = null;List<String[]> output = new ArrayList<String[]>();String[] sent;String[] fullQuery = query.split("!!!");try {List<HitBase>bingSearcher.runSearch(fullQuery[2], 100);//100resultList=// now we apply our own relevance filter//hits = calculateMatchScoreResortHits(resultList, query);hits = resultList;//once we applied our re-ranking, we set highly ranked aspositive set, low-rated as negative set//and classify search results from the middle//training set is formed from original documents for thesearch results,// and 10 of these search results from the middle areclassified//true for snippetshits = filterOutIrrelevantHitsByTreeKernelLearning(hits,fullQuery[2], false);//true for snippets//copying results to the List<String[]>sent = new String[2];sent[1] = fullQuery[0] + " " + fullQuery[1];output.add(new String[] {""});output.add(new String[] {""});output.add(sent);sent = new String[2];sent[1] = fullQuery[2];output.add(sent);output.add(new String[] {""});for(HitBase h : hits){226sent = new String[2];sent[0]h.getGenerWithQueryScore().toString() + " ";=""+output.add(sent);sent = new String[1];sent[0] = "page content = " + h.getPageContent();output.add(sent);sent = new String[1];sent[0]="origsent="+h.getOriginalSentences().toString();output.add(sent);sent = new String[1];sent[0] = "title = " + h.getTitle();output.add(sent);sent = new String[1];sent[0] = "abstract = " + h.getAbstractText();output.add(sent);output.add(new String[] {""});}//appending results to the reportProfileReaderWriter.appendReport(output,path+detailedOutput);} catch (Exception e) {e.printStackTrace();LOG.info("No search results for query '" + fullQuery[2]);return null;}return hits;}private List<HitBase> filterOutIrrelevantHitsByTreeKernelLearning(227List<HitBase> hits, String query, Boolean onlySnippets){List<HitBase> newHitList = new ArrayList<HitBase>();List<HitBase> newHitListTraining = new ArrayList<HitBase>();// form the training set from original documents.