luncen 查询条件不能大于1024
package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. */ public class BooleanQuery extends Query implements Iterable<BooleanClause> { private static int maxClauseCount = 10240; /** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */ public static class TooManyClauses extends RuntimeException { public TooManyClauses() { super("maxClauseCount is set to " + maxClauseCount); } } /** Return the maximum number of clauses permitted, 1024 by default. * Attempts to add more than the permitted number of clauses cause {@link * TooManyClauses} to be thrown. * @see #setMaxClauseCount(int) */ public static int getMaxClauseCount() { return maxClauseCount; } /** * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. */ public static void setMaxClauseCount(int maxClauseCount) { if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); } BooleanQuery.maxClauseCount = maxClauseCount; } private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>(); private final boolean disableCoord; /** Constructs an empty boolean query. */ public BooleanQuery() { disableCoord = false; } /** Constructs an empty boolean query. * * {@link Similarity#coord(int,int)} may be disabled in scoring, as * appropriate. For example, this score factor does not make sense for most * automatically generated queries, like {@link WildcardQuery} and {@link * FuzzyQuery}. * * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. */ public BooleanQuery(boolean disableCoord) { this.disableCoord = disableCoord; } /** Returns true iff {@link Similarity#coord(int,int)} is disabled in * scoring for this query instance. * @see #BooleanQuery(boolean) */ public boolean isCoordDisabled() { return disableCoord; } /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. * * <p> * By default no optional clauses are necessary for a match * (unless there are no required clauses). If this method is used, * then the specified number of clauses is required. * </p> * <p> * Use of this method is totally independent of specifying that * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. * </p> * * @param min the number of optional clauses that must match */ public void setMinimumNumberShouldMatch(int min) { this.minNrShouldMatch = min; } protected int minNrShouldMatch = 0; /** * Gets the minimum number of the optional BooleanClauses * which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minNrShouldMatch; } /** Adds a clause to a boolean query. * * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(Query query, BooleanClause.Occur occur) { add(new BooleanClause(query, occur)); } /** Adds a clause to a boolean query. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(BooleanClause clause) { if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); } clauses.add(clause); } /** Returns the set of clauses in this query. */ public BooleanClause[] getClauses() { return clauses.toArray(new BooleanClause[clauses.size()]); } /** Returns the list of clauses in this query. */ public List<BooleanClause> clauses() { return clauses; } /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre> */ @Override public final Iterator<BooleanClause> iterator() { return clauses().iterator(); } /** * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. * * <p>NOTE: this API and implementation is subject to * change suddenly in the next release.</p> */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ protected Similarity similarity; protected ArrayList<Weight> weights; protected int maxCoord; // num optional + num required private final boolean disableCoord; public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { this.similarity = searcher.getSimilarity(); this.disableCoord = disableCoord; weights = new ArrayList<Weight>(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Weight w = c.getQuery().createWeight(searcher); weights.add(w); if (!c.isProhibited()) { maxCoord++; } } } @Override public Query getQuery() { return BooleanQuery.this; } @Override public float getValueForNormalization() throws IOException { float sum = 0.0f; for (int i = 0 ; i < weights.size(); i++) { // call sumOfSquaredWeights for all clauses in case of side effects float s = weights.get(i).getValueForNormalization(); // sum sub weights if (!clauses.get(i).isProhibited()) { // only add to sum for non-prohibited clauses sum += s; } } sum *= getBoost() * getBoost(); // boost each sub-weight return sum ; } public float coord(int overlap, int maxOverlap) { // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, // so coord() is not applied. But when BQ cannot optimize itself away // for a single clause (minNrShouldMatch, prohibited clauses, etc), its // important not to apply coord(1,1) for consistency, it might not be 1.0F return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); } @Override public void normalize(float norm, float topLevelBoost) { topLevelBoost *= getBoost(); // incorporate boost for (Weight w : weights) { // normalize all clauses, (even if prohibited in case of side affects) w.normalize(norm, topLevelBoost); } } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); ComplexExplanation sumExpl = new ComplexExplanation(); sumExpl.setDescription("sum of:"); int coord = 0; float sum = 0.0f; boolean fail = false; int shouldMatchCount = 0; Iterator<BooleanClause> cIter = clauses.iterator(); for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); sumExpl.addDetail(r); } continue; } Explanation e = w.explain(context, doc); if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); sum += e.getValue(); coord++; } else { Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; } } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } } if (fail) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription ("Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription("Failure to match minimum number "+ "of optional clauses: " + minShouldMatch); return sumExpl; } sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); result.addDetail(sumExpl); result.addDetail(new Explanation(coordFactor, "coord("+coord+"/"+maxCoord+")")); return result; } } @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { List<Scorer> required = new ArrayList<Scorer>(); List<Scorer> prohibited = new ArrayList<Scorer>(); List<Scorer> optional = new ArrayList<Scorer>(); Iterator<BooleanClause> cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); Scorer subScorer = w.scorer(context, true, false, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; } } else if (c.isRequired()) { required.add(subScorer); } else if (c.isProhibited()) { prohibited.add(subScorer); } else { optional.add(subScorer); } } // NOTE: we could also use BooleanScorer, if we knew // this BooleanQuery was embedded in another // BooleanQuery that was also using BooleanScorer (ie, // BooleanScorer can nest). But this is hard to // detect and we never do so today... (ie, we only // return BooleanScorer for topScorer): // Check if we can and should return a BooleanScorer // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) { return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord); } if (required.size() == 0 && optional.size() == 0) { // no required and optional clauses. return null; } else if (optional.size() < minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query return null; } // simple conjunction if (optional.size() == 0 && prohibited.size() == 0) { float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord); return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord); } // simple disjunction if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) { float coord[] = new float[optional.size()+1]; for (int i = 0; i < coord.length; i++) { coord[i] = disableCoord ? 1.0f : coord(i, maxCoord); } return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord); } // Return a BooleanScorer2 return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord); } @Override public boolean scoresDocsOutOfOrder() { for (BooleanClause c : clauses) { if (c.isRequired()) { return false; // BS2 (in-order) will be used by scorer() } } // scorer() will return an out-of-order scorer if requested. return true; } } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { return new BooleanWeight(searcher, disableCoord); } @Override public Query rewrite(IndexReader reader) throws IOException { if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries BooleanClause c = clauses.get(0); if (!c.isProhibited()) { // just return clause Query query = c.getQuery().rewrite(reader); // rewrite first if (getBoost() != 1.0f) { // incorporate boost if (query == c.getQuery()) { // if rewrite was no-op query = query.clone(); // then clone before boost } // Since the BooleanQuery only has 1 clause, the BooleanQuery will be // written out. Therefore the rewritten Query's boost must incorporate both // the clause's boost, and the boost of the BooleanQuery itself query.setBoost(getBoost() * query.getBoost()); } return query; } } BooleanQuery clone = null; // recursively rewrite for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Query query = c.getQuery().rewrite(reader); if (query != c.getQuery()) { // clause rewrote: must clone if (clone == null) { // The BooleanQuery clone is lazily initialized so only initialize // it if a rewritten clause differs from the original clause (and hasn't been // initialized already). If nothing differs, the clone isn't needlessly created clone = this.clone(); } clone.clauses.set(i, new BooleanClause(query, c.getOccur())); } } if (clone != null) { return clone; // some clauses rewrote } else { return this; // no clauses rewrote } } // inherit javadoc @Override public void extractTerms(Set<Term> terms) { for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.MUST_NOT) { clause.getQuery().extractTerms(terms); } } } @Override @SuppressWarnings("unchecked") public BooleanQuery clone() { BooleanQuery clone = (BooleanQuery)super.clone(); clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone(); return clone; } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); if (c.isProhibited()) { buffer.append("-"); } else if (c.isRequired()) { buffer.append("+"); } Query subQuery = c.getQuery(); if (subQuery != null) { if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } } else { buffer.append("null"); } if (i != clauses.size()-1) { buffer.append(" "); } } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch()>0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } if (getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(getBoost())); } return buffer.toString(); } /** Returns true iff <code>o</code> is equal to this. */ @Override public boolean equals(Object o) { if (!(o instanceof BooleanQuery)) { return false; } BooleanQuery other = (BooleanQuery)o; return this.getBoost() == other.getBoost() && this.clauses.equals(other.clauses) && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && this.disableCoord == other.disableCoord; } /** Returns a hash code value for this object.*/ @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ clauses.hashCode() + getMinimumNumberShouldMatch() + (disableCoord ? 17:0); } }
下面是在网上看到得一个解决方法:
可以通过设置:
BooleanQuery.setMaxClauseCount(10000);
来解决问题,但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。
Lucene在做大量term值查询时, 如果这值过多, 超1024个term的话, 会出现
TooManyClauses[maxClauseCount is set to 1024] 的异常,因此建议在term过多的情况下采用filter, 而不是query。
以下是该情形在ES中的测试。
- Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff", true).build();
- Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)
- .put("name", NetworkUtils.getLocalAddress().getHostName()).build();
- TransportClient tmp = new TransportClient(finalSettings);
- Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1", 9300));
- //demo 100万数据
- for (int i = 0; i < 1000000; i++)
- {
- client.prepareIndex("test2", "book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()
- .actionGet();
- }
- //demo 近1万个term
- String[] values = new String[10000];
- for (int i = 1; i < 10000; i++)
- {
- values[i] = String.valueOf(i);
- }
- //terms query
- //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);
- TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);
- // SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)
- // .setFrom(0).setSize(100).execute().actionGet();
- //terms filter
- SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)
- .setFrom(0).setSize(100).execute().actionGet();
- SearchHits hits = searchResponse.getHits();
- System.out.println(hits.totalHits());
- for (SearchHit searchHit : hits)
- {
- System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));
- }
上述结果会发现, 用TermsQueryBuilder查询的话, 会出现TooManyClauses的异常, 因为设置了9999个term值。因此,当term过多时,建议采用filter, 而不是query.
http://lucene-group.group.iteye.com/group/topic/10555
http://maxrocray.iteye.com/blog/1860946