All of the logic that drives the SearchHandler comes from SearchComponents. If no components are explicitly declared in the request handler config the the defaults are used. They are:
"query" (usually QueryComponent)
"facet" (usually FacetComponent)
"mlt" (usually MoreLikeThisComponent)
"highlight" (usually HighlightComponent)
"stats" (usually StatsComponent)
"debug" (usually DebugComponent)
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
{
// int sleep = req.getParams().getInt("sleep",0);
// if (sleep > 0) {log.error("SLEEPING for " + sleep); Thread.sleep(sleep);}
if (req.getContentStreams() != null && req.getContentStreams().iterator().hasNext()) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Search requests cannot accept content streams");
}
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
if (rb.requestInfo != null) {
rb.requestInfo.setResponseBuilder(rb);
}
boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);
rb.setDebug(dbg);
if (dbg == false){//if it's true, we are doing everything anyway.
SolrPluginUtils.getDebugInterests(req.getParams().getParams(CommonParams.DEBUG), rb);
}
final RTimer timer = rb.isDebug() ? new RTimer() : null;
ShardHandler shardHandler1 = shardHandlerFactory.getShardHandler();
shardHandler1.checkDistributed(rb);
if (timer == null) {
// non-debugging prepare phase
for( SearchComponent c : components ) {
c.prepare(rb);
}
} else {
// debugging prepare phase
RTimer subt = timer.sub( "prepare" );
for( SearchComponent c : components ) {
rb.setTimer( subt.sub( c.getName() ) );
c.prepare(rb); //设置一些参数
rb.getTimer().stop();
}
subt.stop();
}
if (!rb.isDistrib) {
// a normal non-distributed request
// The semantics of debugging vs not debugging are different enough that
// it makes sense to have two control loops
if(!rb.isDebug()) {
// Process
for( SearchComponent c : components ) {
c.process(rb);
}
}
else {
// Process
RTimer subt = timer.sub( "process" );
for( SearchComponent c : components ) {
rb.setTimer( subt.sub( c.getName() ) );
c.process(rb);
rb.getTimer().stop();
}
subt.stop();
timer.stop();
// add the timing info
if (rb.isDebugTimings()) {
rb.addDebugInfo("timing", timer.asNamedList() );
}
}
} else {
// a distributed request
if (rb.outgoing == null) { // outgoing -- requests to be sent rb.outgoing是将会被发的请求
rb.outgoing = new LinkedList<>();
}
rb.finished = new ArrayList<>();
int nextStage = 0;
do {
//这个循环的终止条件 while (nextStage != Integer. MAX_VALUE )rb.stage = nextStage;
nextStage = ResponseBuilder.STAGE_DONE;
// call all components
for( SearchComponent c : components ) {
// the next stage is the minimum of what all components report
nextStage = Math.min(nextStage, c.distributedProcess(rb));
}
// check the outgoing queue and send requests
while (rb.outgoing.size() > 0) {
// 如果还有请求没有发送,就继续循环
// submit all current request tasks at once
while (rb.outgoing.size() > 0) { //List
outgoing; requests to be sent ShardRequest sreq = rb.outgoing.remove(0);
//请求发出后就从outgoing删除sreq.actualShards = sreq.shards;
if (sreq.actualShards==ShardRequest.ALL_SHARDS) {
sreq.actualShards = rb.shards;
}
sreq.responses = new ArrayList<>();
// TODO: map from shard to address[]
for (String shard : sreq.actualShards) {
ModifiableSolrParams params = new ModifiableSolrParams(sreq.params);
params.remove(ShardParams.SHARDS); // not a top-level request
params.set(CommonParams.DISTRIB, "false"); // not a top-level request
params.remove("indent");
params.remove(CommonParams.HEADER_ECHO_PARAMS);
params.set(ShardParams.IS_SHARD, true); // a sub (shard) request
params.set(ShardParams.SHARD_URL, shard); // so the shard knows what was asked
if (rb.requestInfo != null) {
// we could try and detect when this is needed, but it could be tricky
params.set("NOW", Long.toString(rb.requestInfo.getNOW().getTime()));
}
String shardQt = params.get(ShardParams.SHARDS_QT);
if (shardQt == null) {
params.remove(CommonParams.QT);
} else {
params.set(CommonParams.QT, shardQt);
}
shardHandler1.submit(sreq, shard, params);//shardHandler。submit是对CompletionService.submit的“封装”,submit这里是重点 跟踪进去看,判断这里调用的应该是
HttpShardHandler类,这里有一个非常重要的地方,是每个Shardsubmit一次!!!!!!!!!!!!!!!!}
}
// now wait for replies, but if anyone puts more requests on
// the outgoing queue, send them out immediately (by exiting
// this loop)
boolean tolerant = rb.req.getParams().getBool(ShardParams.SHARDS_TOLERANT, false);
while (rb.outgoing.size() == 0) { //request分发出去之后就会remove掉,==0表示都发送完了,如果有新的request,那么跳出这个循环,这个循环外面还有一层循环,会把request发送出去
ShardResponse srsp = tolerant ?
shardHandler1.takeCompletedIncludingErrors():
shardHandler1.takeCompletedOrError();
if (srsp == null) break; // no more requests to wait for
// Was there an exception?
if (srsp.getException() != null) {
// If things are not tolerant, abort everything and rethrow
if(!tolerant) {
shardHandler1.cancelAll();
if (srsp.getException() instanceof SolrException) {
throw (SolrException)srsp.getException();
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, srsp.getException());
}
} else {
if(rsp.getResponseHeader().get("partialResults") == null) {
rsp.getResponseHeader().add("partialResults", Boolean.TRUE);
}
}
}
rb.finished.add(srsp.getShardRequest());
// let the components see the responses to the request
for(SearchComponent c : components) {
c.handleResponses(rb, srsp.getShardRequest());//这里的SearchComponent 我猜应该是QueryComponent或者说我们最需要关心的就是QueryComponent
}
}
}
for(SearchComponent c : components) {
c.finishStage(rb);
}
// we are done when the next stage is MAX_VALUE
} while (nextStage != Integer.MAX_VALUE);
}
// SOLR-5550: still provide shards.info if requested even for a short circuited distrib request
if(!rb.isDistrib && req.getParams().getBool(ShardParams.SHARDS_INFO, false) && rb.shortCircuitedURL != null) {
NamedList<Object> shardInfo = new SimpleOrderedMap<Object>();
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
if (rsp.getException() != null) {
Throwable cause = rsp.getException();
if (cause instanceof SolrServerException) {
cause = ((SolrServerException)cause).getRootCause();
} else {
if (cause.getCause() != null) {
cause = cause.getCause();
}
}
nl.add("error", cause.toString() );
StringWriter trace = new StringWriter();
cause.printStackTrace(new PrintWriter(trace));
nl.add("trace", trace.toString() );
}
else {
nl.add("numFound", rb.getResults().docList.matches());
nl.add("maxScore", rb.getResults().docList.maxScore());
}
nl.add("shardAddress", rb.shortCircuitedURL);
nl.add("time", rsp.getEndTime()-req.getStartTime()); // elapsed time of this request so far
int pos = rb.shortCircuitedURL.indexOf("://");
String shardInfoName = pos != -1 ? rb.shortCircuitedURL.substring(pos+3) : rb.shortCircuitedURL;
shardInfo.add(shardInfoName, nl);
rsp.getValues().add(ShardParams.SHARDS_INFO,shardInfo);
}
}
public void submit(final ShardRequest sreq, final String shard, final ModifiableSolrParams params) {
// do this outside of the callable for thread safety reasons
final List<String> urls = getURLs(shard);
Callable<ShardResponse> task = new Callable<ShardResponse>() {
@Override
public ShardResponse call() throws Exception {
ShardResponse srsp = new ShardResponse();
if (sreq.nodeName != null) {
srsp.setNodeName(sreq.nodeName);
}
srsp.setShardRequest(sreq);
srsp.setShard(shard);
SimpleSolrResponse ssr = new SimpleSolrResponse();
srsp.setSolrResponse(ssr);
long startTime = System.nanoTime();
try {
params.remove(CommonParams.WT); // use default (currently javabin)
params.remove(CommonParams.VERSION);
// SolrRequest req = new QueryRequest(SolrRequest.METHOD.POST, "/select");
// use generic request to avoid extra processing of queries
QueryRequest req = new QueryRequest(params);
req.setMethod(SolrRequest.METHOD.POST);
// no need to set the response parser as binary is the default
// req.setResponseParser(new BinaryResponseParser());
// if there are no shards available for a slice, urls.size()==0
if (urls.size()==0) {
// TODO: what's the right error code here? We should use the same thing when
// all of the servers for a shard are down.
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "no servers hosting shard: " + shard);
}
if (urls.size() <= 1) { // 意思应该是只有这个shard里面只有一个core(一台机器)
String url = urls.get(0);
srsp.setShardAddress(url);
SolrServer server = new HttpSolrServer(url, httpClient);
try {
ssr.nl = server.request(req); //nl里面已经存的是结果了
} finally {
server.shutdown();
}
} else { //直接看一个shard 多个机器的情况
LBHttpSolrServer.Rsp rsp = httpShardHandlerFactory.makeLoadBalancedRequest(req, urls);
ssr.nl = rsp.getResponse();
srsp.setShardAddress(rsp.getServer());
}
}
catch( ConnectException cex ) {
srsp.setException(cex); //????
} catch (Exception th) {
srsp.setException(th);
if (th instanceof SolrException) {
srsp.setResponseCode(((SolrException)th).code());
} else {
srsp.setResponseCode(-1);
}
}
ssr.elapsedTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
return srsp;
}
};
pending.add( completionService.submit(task) );
}
@Override
public void submit(final ShardRequest sreq, final String shard, final ModifiableSolrParams params) {
// do this outside of the callable for thread safety reasons
final List<String> urls = getURLs(shard);
Callable<ShardResponse> task = new Callable<ShardResponse>() {
@Override
public ShardResponse call() throws Exception {
ShardResponse srsp = new ShardResponse();
if (sreq.nodeName != null) {
srsp.setNodeName(sreq.nodeName);
}
srsp.setShardRequest(sreq);
srsp.setShard(shard);
SimpleSolrResponse ssr = new SimpleSolrResponse();
srsp.setSolrResponse(ssr);
long startTime = System.nanoTime();
try {
params.remove(CommonParams.WT); // use default (currently javabin)
params.remove(CommonParams.VERSION);
// SolrRequest req = new QueryRequest(SolrRequest.METHOD.POST, "/select");
// use generic request to avoid extra processing of queries
QueryRequest req = new QueryRequest(params);
req.setMethod(SolrRequest.METHOD.POST);
// no need to set the response parser as binary is the default
// req.setResponseParser(new BinaryResponseParser());
// if there are no shards available for a slice, urls.size()==0
if (urls.size()==0) {
// TODO: what's the right error code here? We should use the same thing when
// all of the servers for a shard are down.
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "no servers hosting shard: " + shard);
}
if (urls.size() <= 1) { // 意思应该是只有一个shard
String url = urls.get(0);
srsp.setShardAddress(url);
SolrServer server = new HttpSolrServer(url, httpClient);
try {
ssr.nl = server.request(req); //nl里面已经存的是结果了
} finally {
server.shutdown();
}
} else { //直接看多个shard的情况,跟踪进HttpShardHandlerFactory的makeLoadBalancedRequest 见下面的分析
LBHttpSolrServer.Rsp rsp = httpShardHandlerFactory.makeLoadBalancedRequest(req, urls);
ssr.nl = rsp.getResponse();
srsp.setShardAddress(rsp.getServer());
}
}
catch( ConnectException cex ) {
srsp.setException(cex); //????
} catch (Exception th) {
srsp.setException(th);
if (th instanceof SolrException) {
srsp.setResponseCode(((SolrException)th).code());
} else {
srsp.setResponseCode(-1);
}
}
ssr.elapsedTime = TimeUnit.MILLISECONDS.convert(System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
return srsp;
}
};
pending.add( completionService.submit(task) );
}
/**
* Makes a request to one or more of the given urls, using the configured load balancer.
*
* @param req The solr search request that should be sent through the load balancer
* @param urls The list of solr server urls to load balance across
* @return The response from the request
*/
public LBHttpSolrServer.Rsp makeLoadBalancedRequest(final QueryRequest req, List<String> urls)
throws SolrServerException, IOException {
return loadbalancer.request(new LBHttpSolrServer.Req(req, urls));
}
How to use?
SolrServer lbHttpSolrServer = new LBHttpSolrServer("http://host1:8080/solr/","http://host2:8080/solr","http://host3:8080/solr"); //or if you wish to pass the HttpClient do as follows httpClient httpClient = new HttpClient(); SolrServer lbHttpSolrServer = new LBHttpSolrServer(httpClient,"http://host1:8080/solr/","http://host2:8080/solr","http://host3:8080/solr"); This can be used like any other SolrServer implementation. How does the Load Balancing happen ?This is a dumb round-robin Load Balancing. First request goes to 'host1' then to 'host2' and then 'host3' and it starts with 'host1' again. How does failover happen?LBHttpSolrServer does not keep pinging the servers to know if they are alive. If a request to a server fails by an Exception then the host is taken off the list of live servers and moved to a 'dead server list' and the request is resent to the next live server. This process is continued till it tries all the live servers. If atleast one server is alive the request succeeds , and if not it fails. How does it know if a server has come back up ?LBHttpSolrServer keeps pinging the dead servers once a minute (default value) to find if it is alive. The interval can be changed using lbHttpSolrServer.setAliveCheckInterval(60*1000); //time in milliseconds The ping is done in a separate thread. |
/**
* Tries to query a live server from the list provided in Req. Servers in the dead pool are skipped.
* If a request fails due to an IOException, the server is moved to the dead pool for a certain period of
* time, or until a test request on that server succeeds.
*
* Servers are queried in the exact order given (except servers currently in the dead pool are skipped).
* If no live servers from the provided list remain to be tried, a number of previously skipped dead servers will be tried.
* Req.getNumDeadServersToTry() controls how many dead servers will be tried.
*
* If no live servers are found a SolrServerException is thrown.
*
* @param req contains both the request as well as the list of servers to query
*
* @return the result of the request
*
* @throws IOException If there is a low-level I/O error.
*/
public Rsp request(Req req) throws SolrServerException, IOException {
Rsp rsp = new Rsp();
Exception ex = null;
boolean isUpdate = req.request instanceof IsUpdateRequest;
List<ServerWrapper> skipped = new ArrayList<>(req.getNumDeadServersToTry());
for (String serverStr : req.getServers()) {
serverStr = normalize(serverStr);
// if the server is currently a zombie, just skip to the next one
ServerWrapper wrapper = zombieServers.get(serverStr);
if (wrapper != null) {
// System.out.println("ZOMBIE SERVER QUERIED: " + serverStr);
if (skipped.size() < req.getNumDeadServersToTry())
skipped.add(wrapper);
continue;
}
rsp.server = serverStr;
HttpSolrServer server = makeServer(serverStr);
ex = doRequest(server, req, rsp, isUpdate, false, null); //跟踪到doRequest
if (ex == null) {
return rsp; // SUCCESS
}
}
// try the servers we previously skipped
for (ServerWrapper wrapper : skipped) {
ex = doRequest(wrapper.solrServer, req, rsp, isUpdate, true, wrapper.getKey());
if (ex == null) {
return rsp; // SUCCESS
}
}
if (ex == null) {
throw new SolrServerException("No live SolrServers available to handle this request");
} else {
throw new SolrServerException("No live SolrServers available to handle this request:" + zombieServers.keySet(), ex);
}
}
/**
* Tries to query a live server. A SolrServerException is thrown if all servers are dead.
* If the request failed due to IOException then the live server is moved to dead pool and the request is
* retried on another live server. After live servers are exhausted, any servers previously marked as dead
* will be tried before failing the request.
*
* @param request the SolrRequest.
*
* @return response
*
* @throws IOException If there is a low-level I/O error.
*/
@Override
public NamedList<Object> request(final SolrRequest request)
throws SolrServerException, IOException {
Exception ex = null;
ServerWrapper[] serverList = aliveServerList;
int maxTries = serverList.length;
Map<String,ServerWrapper> justFailed = null;
for (int attempts=0; attempts<maxTries; attempts++) {
int count = counter.incrementAndGet();
ServerWrapper wrapper = serverList[count % serverList.length]; //这里做的负载均衡
wrapper.lastUsed = System.currentTimeMillis();
try {
return wrapper.solrServer.request(request);
} catch (SolrException e) {
// Server is alive but the request was malformed or invalid
throw e;
} catch (SolrServerException e) {
if (e.getRootCause() instanceof IOException) {
ex = e;
moveAliveToDead(wrapper);
if (justFailed == null) justFailed = new HashMap<>();
justFailed.put(wrapper.getKey(), wrapper);
} else {
throw e;
}
} catch (Exception e) {
throw new SolrServerException(e);
}
}
// Execute the method.
final HttpResponse response = httpClient.execute(method); //执行request
pending.add( completionService.submit(task) );
ShardResponse srsp = tolerant ?
shardHandler1.takeCompletedIncludingErrors():
shardHandler1.takeCompletedOrError();
private ShardResponse take(boolean bailOnError) {
while (pending.size() > 0) {
try {
Future<ShardResponse> future = completionService.take();
pending.remove(future);
ShardResponse rsp = future.get();
if (bailOnError && rsp.getException() != null) return rsp; // if exception, return immediately
// add response to the response list... we do this after the take() and
// not after the completion of "call" so we know when the last response
// for a request was received. Otherwise we might return the same
// request more than once.
rsp.getShardRequest().responses.add(rsp);
if (rsp.getShardRequest().responses.size() == rsp.getShardRequest().actualShards.length) {
return rsp;
}
} catch (InterruptedException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
} catch (ExecutionException e) {
// should be impossible... the problem with catching the exception
// at this level is we don't know what ShardRequest it applied to
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Impossible Exception",e);
}
}
return null;
}
/**
* Takes up one dead server and check for aliveness. The check is done in a roundrobin. Each server is checked for
* aliveness once in 'x' millis where x is decided by the setAliveCheckinterval() or it is defaulted to 1 minute
*
* @param zombieServer a server in the dead pool
*/
private void checkAZombieServer(ServerWrapper zombieServer) {
long currTime = System.currentTimeMillis();
try {
zombieServer.lastChecked = currTime;
QueryResponse resp = zombieServer.solrServer.query(solrQuery);//通过query一次,其实最终还是调用Request,也就是通过http
if (resp.getStatus() == 0) {
// server has come back up.
// make sure to remove from zombies before adding to alive to avoid a race condition
// where another thread could mark it down, move it back to zombie, and then we delete
// from zombie and lose it forever.
ServerWrapper wrapper = zombieServers.remove(zombieServer.getKey());
if (wrapper != null) {
wrapper.failedPings = 0;
if (wrapper.standard) {
addToAlive(wrapper);
}
} else {
// something else already moved the server from zombie to alive
}
}
} catch (Exception e) {
//Expected. The server is still down.
zombieServer.failedPings++;
// If the server doesn't belong in the standard set belonging to this load balancer
// then simply drop it after a certain number of failed pings.
if (!zombieServer.standard && zombieServer.failedPings >= NONSTANDARD_PING_LIMIT) {
zombieServers.remove(zombieServer.getKey());
}
}
}
static final SolrQuery solrQuery = new SolrQuery("*:*");