Solr (pronounced "solar") is an open source enterprise search platform from the Apache Lucene project. Its major features include full-text search, hit highlighting, faceted search, dynamic clustering, database integration, and rich document (e.g., Word, PDF) handling. Providing distributed search and index replication, Solr is highly scalable. Solr is the most popular enterprise search engine. Solr 4 adds NoSQL features. Solr is written in Java and runs as a standalone full-text search server within a servlet container such as Apache Tomcat or Jetty. Solr uses the Lucene Java search library at its core for full-text indexing and search, and has REST-like HTTP/XML and JSON APIs that make it usable from most popular programming languages. Solr's powerful external configuration allows it to be tailored to many types of application without Java coding, and it has a plugin architecture to support more advanced customization.
%% 测试代码 -module(t). -compile(export_all). start()-> SearchUrl="http://192.168.0.160:8080/solr/hear_me/select", UpdateUrl="http://192.168.0.160:8080/solr/hear_me/update", MltUrl="http://192.168.0.160:8080/solr/hear_me/mlt", {ok,Pid}=esolr:start([{select_url, SearchUrl}, {update_url, UpdateUrl}, {morelikethis_url, MltUrl}]), Pid. search(SolrPid)-> esolr:search("10",[{fields,"*,*"}],SolrPid). add(SolrPid) -> esolr:add([{doc,[{id,"ai234"}, {text,<<"Look me mom!, I'm searching now">>}]}],SolrPid), esolr:add([{doc,[{id,"a3456"}, {text,<<"Look me mom!, I'm searching now">>}]}],SolrPid), esolr:commit(SolrPid).
测试结果如下:
Eshell V5.9 (abort with ^G) 1> P=t:start(). <0.34.0> 2> t:add(P). ok 3> esolr:search("searching",[{fields,"*,*"}],P). {ok,[{"numFound",2},{"start",0}], [{doc,[{"id",<<"ai234">>}, {"_version_",1440978100186775552}]}, {doc,[{"id",<<"a3456">>}, {"_version_",1440978100212989952}]}], []} 4> t:search(P). {ok,[{"numFound",9},{"start",0}], [{doc,[{"c_type",1}, {"c_tags", [<<"å¥⊃3;人">>, <<230,148,190,229,188,131>>, <<"家åº">>, <<229,165,179,229,143,139>>, <<229,165,179,229,173,169,229,173,144>>, <<229,176,143,229,173,169,229,173,144>>, <<231,166,187,229,169,154>>, <<229,135,186,230,137,139>>, <<229,133,132,229,188,159>>]}, {"c_pub_date",<<"2013-07-12T16:29:11.593Z">>}, {"id",<<"97">>}, {"_version_",1440342611812417536}]}, {doc,[{"c_type",1}, {"c_tags", [<<231,189,145,231,187,156>>, <<229,165,179,229,143,139>>, <<228,187,139,231,187,141>>, <<233,171,152,228,184,173>>, <<229,144,140,229,173,166>>, <<230,156,139,229,143,139>>, <<229,140,151,228,186,172>>, ..... ...
代码实现
make_post_request(Request,PendingInfo, State=#esolr{update_url=URL,pending=P,auto_commit=AC,dirty=Dirty}, Timeout) -> {ok,RequestId} = httpc:request(post,{URL,[{"connection", "close"}],"text/xml",Request},[{timeout,Timeout}],[{sync,false}]), Pendings = gb_trees:insert(RequestId,PendingInfo,P), if (AC == always) and Dirty -> CommitRequest = encode_commit(), {ok,C_RequestId} = httpc:request(post,{URL,[{"connection", "close"}],"text/xml",CommitRequest}, [{timeout,State#esolr.commit_timeout}],[{sync,false}]), Pendings2 = gb_trees:insert(C_RequestId,{auto,auto_commit},Pendings), error_logger:info_report([{auto_commit,send}]), {noreply,State#esolr{pending=Pendings2,dirty=false}}; true -> {noreply,State#esolr{pending=Pendings}} end.
% @hidden handle_info({http,{RequestId,HttpResponse}},State = #esolr{pending=P}) -> case gb_trees:lookup(RequestId,P) of {value,{Client,RequestOp}} -> handle_http_response(HttpResponse,RequestOp,Client), {noreply,State#esolr{pending=gb_trees:delete(RequestId,P)}}; none -> {noreply,State} %% the requestid isn't here, probably the request was deleted after a timeout end; parse_search_response(Response,Client) -> {value,{"response",{obj,SearchRespFields}},RestResponse} = lists:keytake("response",1, Response), {value,{"docs",Docs},RespFields} = lists:keytake("docs",1,SearchRespFields), gen_server:reply(Client,{ok,RespFields,[{doc,DocFields} || {obj,DocFields}<-Docs],RestResponse}).
Eshell V5.10.2 (abort with ^G) 1> xmerl:export_simple([{commit,[]}],xmerl_xml). ["<?xml version=\"1.0\"?>",[["<","commit","/>"]]] 2>
HTTPResponse解析还会用到xmerl_scan,xmerl_xpath
handle_http_response({{_HttpV,200,_Reason},_Headers,Data},Op,Client) -> {Response,[]} = xmerl_scan:string(binary_to_list(Data)), [Header] = xmerl_xpath:string("/response/lst[@name='responseHeader']",Response), case parse_xml_response_header(Header) of {ok,QTime} -> parse_xml_response(Op,Response,QTime,Client); {error,Error} -> response_error(Op,Client,Error) end;
除了XML之外,还要解析JSON,这里使用的是RFC4627.