/** * Recursively makes conjunctions by iterating through features at each * offset * * @param iters * iterate over the PropertyLists at each offset * @param currIndex * which offset we're currently on, e..g 1 in the list [0,1,2] * @param conjunctions * list of conjunctions * @param j * which offset list we're currently on, e.g. [0,1,2] in the list * [[0,1],[0,1,2]] * @param tsSize * size of token sequence * @param newfs * new features * @param tsi * token sequence index * @param oldfs * old features * @param iterIndices * counter to keep track how far in each iterator in "iters" * @return new features */ private PropertyList makeConjunctions(PropertyList.Iterator[] iters, int currIndex, int[][] conjunctions, int j, int tsSize, PropertyList newfs, int tsi, PropertyList[] oldfs, int[] iterIndices) { if (iters.length == currIndex) { // base case: add feature for current // conjunction of iters // avoid redundant doubling of feature space; include only upper // triangle if (redundant(conjunctions, j, iterIndices)) { return newfs; } String newFeature = ""; double newValue = 1.0; for (int i = 0; i < iters.length; i++) { String s = iters[i].getKey(); if (featureRegex != null && !featureRegex.matcher(s).matches()) return newfs; newFeature += (i == 0 ? "" : "_&_") + s + (conjunctions[j][i] == 0 ? "" : ("@" + conjunctions[j][i])); newValue *= iters[i].getNumericValue(); } System.err.println("Adding new feature " + newFeature); newfs = PropertyList.add(newFeature, newValue, newfs); } else { // recursive step while (iters[currIndex].hasNext()) { iters[currIndex].next(); iterIndices[currIndex]++; newfs = makeConjunctions(iters, currIndex + 1, conjunctions, j, tsSize, newfs, tsi, oldfs, iterIndices); } // reset iterator at currIndex iters[currIndex] = getOffsetIter(conjunctions, j, currIndex, tsSize, tsi, oldfs); iterIndices[currIndex] = -1; } return newfs; }
public Instance pipe(Instance carrier) { TokenSequence ts = (TokenSequence) carrier.getData(); int tsSize = ts.size(); PropertyList[] oldfs = null; PropertyList[] newfs = null; try { oldfs = new PropertyList[ts.size()]; } catch (Exception e) { System.err.println("Exception allocating oldfs: " + e); } try { newfs = new PropertyList[ts.size()]; } catch (Exception e) { System.err.println("Exception allocating newfs: " + e); } for (int i = 0; i < tsSize; i++) oldfs[i] = ts.get(i).getFeatures(); if (includeOriginalSingletons) for (int i = 0; i < tsSize; i++) newfs[i] = ts.get(i).getFeatures(); for (int i = 0; i < tsSize; i++) { for (int j = 0; j < conjunctions.length; j++) { // allow conjunction offsets of length n - awc PropertyList.Iterator[] iters = getOffsetIters(conjunctions, j, tsSize, i, oldfs); if (iters == null) continue; int[] iterIndices = new int[iters.length]; for (int ii = 0; ii < iterIndices.length; ii++) iterIndices[ii] = -1; newfs[i] = makeConjunctions(iters, 0, conjunctions, j, tsSize, newfs[i], i, oldfs, iterIndices); } } // Put the new PropertyLists in place for (int i = 0; i < ts.size(); i++) ts.get(i).setFeatures(newfs[i]); return carrier; }