solr主从部署

1、3台solr服务器,采用主从复制的策略实现索引文件的同步,主从就是设置集群中一台server为主,另外为从服务器,从服务器定时从主服务器中同步数据

solr主从部署_第1张图片

主服务器的solr配置(solrconfig.xml

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
<? xml version = "1.0" encoding = "UTF-8" ?>
<!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at
 
      http://www.apache.org/licenses/LICENSE-2.0
 
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-->
 
<!--
  This is a stripped down config file used for a simple example... 
  It is *not* a good example to work from.
-->
< config >
   < luceneMatchVersion >4.9</ luceneMatchVersion >
   
    <!-- A 'dir' option by itself adds any files found in the directory
        to the classpath, this is useful for including all jars in a
        directory.
 
        When a 'regex' is specified in addition to a 'dir', only the
        files in that directory which completely match the regex
        (anchored on both ends) will be included.
 
        If a 'dir' option (with or without a regex) is used and nothing
        is found that matches, a warning will be logged.
 
        The examples below can be used to load some solr-contribs along
        with their external dependencies.
     -->
 
   
   <!--  The DirectoryFactory to use for indexes.
         solr.StandardDirectoryFactory, the default, is filesystem based.
         solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
   < directoryFactory name = "DirectoryFactory" class = "${solr.directoryFactory:solr.StandardDirectoryFactory}" />
 
   < dataDir >${solr.core0.data.dir:}</ dataDir >
 
   <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
   
        <schemaFactory class="ManagedIndexSchemaFactory">
          <bool name="mutable">true</bool>
          <str name="managedSchemaResourceName">managed-schema</str>
        </schemaFactory>
        
        When ManagedIndexSchemaFactory is specified, Solr will load the schema from
        he resource named in 'managedSchemaResourceName', rather than from schema.xml.
        Note that the managed schema resource CANNOT be named schema.xml.  If the managed
        schema does not exist, Solr will create it after reading schema.xml, then rename
        'schema.xml' to 'schema.xml.bak'.
        
        Do NOT hand edit the managed schema - external modifications will be ignored and
        overwritten as a result of schema modification REST API calls.
 
        When ManagedIndexSchemaFactory is specified with mutable = true, schema
        modification REST API calls will be allowed; otherwise, error responses will be
        sent back for these requests.
   -->
   < schemaFactory class = "ClassicIndexSchemaFactory" />
 
   < updateHandler class = "solr.DirectUpdateHandler2" >
     < updateLog >
       < str name = "dir" >${solr.core0.data.dir:}</ str >
     </ updateLog >
   </ updateHandler >
 
   <!-- realtime get handler, guaranteed to return the latest stored fields
     of any document, without the need to commit or open a new searcher. The current
     implementation relies on the updateLog feature being enabled. -->
   < requestHandler name = "/get" class = "solr.RealTimeGetHandler" >
     < lst name = "defaults" >
       < str name = "omitHeader" >true</ str >
     </ lst >
   </ requestHandler
   
  < requestHandler name = "/replication" class = "solr.ReplicationHandler" >
      < lst name = "master" >
         <!--Replicate on 'startup' and 'commit'. 'optimize' is also a valid value for replicateAfter. -->
         < str name = "replicateAfter" >startup</ str >
         < str name = "replicateAfter" >commit</ str >
         < str name = "replicateAfter" >optimize</ str >
        
         <!--Create a backup after 'optimize'. Other values can be 'commit', 'startup'. It is possible to have multiple entries of this config string.  Note that this is just for backup, replication does not require this. -->
         <!-- <str name="backupAfter">optimize</str> -->
 
         <!--If configuration files need to be replicated give the names here, separated by comma -->
         < str name = "confFiles" >schema.xml,stopwords.txt</ str >
                <!--The default value of reservation is 10 secs.See the documentation below . Normally , you should not need to specify this -->
         < str name = "commitReserveDuration" >00:00:10</ str >
             </ lst >
             </ requestHandler >
 
 
   < requestDispatcher handleSelect = "true" >
     < requestParsers enableRemoteStreaming = "false" multipartUploadLimitInKB = "2048" formdataUploadLimitInKB = "2048" />
   </ requestDispatcher >
   
   < requestHandler name = "standard" class = "solr.StandardRequestHandler" default = "true" />
   < requestHandler name = "/analysis/field" startup = "lazy" class = "solr.FieldAnalysisRequestHandler" />
   < requestHandler name = "/update" class = "solr.UpdateRequestHandler"  />
   < requestHandler name = "/admin/" class = "org.apache.solr.handler.admin.AdminHandlers" />
  <!--
   <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
     <lst name="invariants">
       <str name="q">solrpingquery</str>
     </lst>
     <lst name="defaults">
       <str name="echoParams">all</str>
     </lst>
   </requestHandler>
  -->
 
< queryResultWindowSize >1500</ queryResultWindowSize >
< queryResultMaxDocsCached >150</ queryResultMaxDocsCached
< queryResultCache
class = "solr.LRUCache"
size = "15000"
initialSize = "15000"
autowarmCount = "1500" />
   
    
   <!-- config for the admin interface -->
   < admin >
     < defaultQuery >solr</ defaultQuery >
   </ admin >
 
</ config > 

从服务器配置( solrconfig.xml) 

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
<? xml version = "1.0" encoding = "UTF-8" ?>
<!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at
 
      http://www.apache.org/licenses/LICENSE-2.0
 
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-->
 
<!--
  This is a stripped down config file used for a simple example... 
  It is *not* a good example to work from.
-->
< config >
   < luceneMatchVersion >4.9</ luceneMatchVersion >
   
    <!-- A 'dir' option by itself adds any files found in the directory
        to the classpath, this is useful for including all jars in a
        directory.
 
        When a 'regex' is specified in addition to a 'dir', only the
        files in that directory which completely match the regex
        (anchored on both ends) will be included.
 
        If a 'dir' option (with or without a regex) is used and nothing
        is found that matches, a warning will be logged.
 
        The examples below can be used to load some solr-contribs along
        with their external dependencies.
     -->
  
   
   <!--  The DirectoryFactory to use for indexes.
         solr.StandardDirectoryFactory, the default, is filesystem based.
         solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
   < directoryFactory name = "DirectoryFactory" class = "${solr.directoryFactory:solr.StandardDirectoryFactory}" />
 
   < dataDir >${solr.core0.data.dir:}</ dataDir >
 
   <!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
   
        <schemaFactory class="ManagedIndexSchemaFactory">
          <bool name="mutable">true</bool>
          <str name="managedSchemaResourceName">managed-schema</str>
        </schemaFactory>
        
        When ManagedIndexSchemaFactory is specified, Solr will load the schema from
        he resource named in 'managedSchemaResourceName', rather than from schema.xml.
        Note that the managed schema resource CANNOT be named schema.xml.  If the managed
        schema does not exist, Solr will create it after reading schema.xml, then rename
        'schema.xml' to 'schema.xml.bak'.
        
        Do NOT hand edit the managed schema - external modifications will be ignored and
        overwritten as a result of schema modification REST API calls.
 
        When ManagedIndexSchemaFactory is specified with mutable = true, schema
        modification REST API calls will be allowed; otherwise, error responses will be
        sent back for these requests.
   -->
   < schemaFactory class = "ClassicIndexSchemaFactory" />
 
   < updateHandler class = "solr.DirectUpdateHandler2" >
     < updateLog >
       < str name = "dir" >${solr.core0.data.dir:}</ str >
     </ updateLog >
   </ updateHandler >
 
   <!-- realtime get handler, guaranteed to return the latest stored fields
     of any document, without the need to commit or open a new searcher. The current
     implementation relies on the updateLog feature being enabled. -->
   < requestHandler name = "/get" class = "solr.RealTimeGetHandler" >
     < lst name = "defaults" >
       < str name = "omitHeader" >true</ str >
     </ lst >
   </ requestHandler
   
 
   < requestHandler name = "/replication" class = "solr.ReplicationHandler" startup = "lazy" >
       < lst name = "slave" >
 
         <!--fully qualified url for the replication handler of master . It is possible to pass on this as a request param for the fetchindex command-->
         < str name = "masterUrl" >http://10.10.53.235:8080/solr/core0/replication</ str >
 
         <!--Interval in which the slave should poll master .Format is HH:mm:ss . If this is absent slave does not poll automatically.
                       But a fetchindex can be triggered from the admin or the http API -->
         < str name = "pollInterval" >00:10:00</ str >
                 <!-- THE FOLLOWING PARAMETERS ARE USUALLY NOT REQUIRED-->
         <!--to use compression while transferring the index files. The possible values are internal|external
                       if the value is 'external' make sure that your master Solr has the settings to honour the accept-encoding header.
                                see here for details http://wiki.apache.org/solr/SolrHttpCompression
                                         If it is 'internal' everything will be taken care of automatically.
                                                  USE THIS ONLY IF YOUR BANDWIDTH IS LOW . THIS CAN ACTUALLY SLOWDOWN REPLICATION IN A LAN-->
         < str name = "compression" >internal</ str >
                 <!--The following values are used when the slave connects to the master to download the index files.
                               Default values implicitly set as 5000ms and 10000ms respectively. The user DOES NOT need to specify
                                        these unless the bandwidth is extremely low or if there is an extremely high latency-->
         < str name = "httpConnTimeout" >5000</ str >
                 < str name = "httpReadTimeout" >10000</ str >
 
         <!-- If HTTP Basic authentication is enabled on the master, then the slave can be configured with the following -->
      </ lst >
      </ requestHandler >
 
 
   < requestDispatcher handleSelect = "true" >
     < requestParsers enableRemoteStreaming = "false" multipartUploadLimitInKB = "2048" formdataUploadLimitInKB = "2048" />
   </ requestDispatcher >
   
   < requestHandler name = "standard" class = "solr.StandardRequestHandler" default = "true" />
   < requestHandler name = "/analysis/field" startup = "lazy" class = "solr.FieldAnalysisRequestHandler" />
   < requestHandler name = "/update" class = "solr.UpdateRequestHandler"  />
   < requestHandler name = "/admin/" class = "org.apache.solr.handler.admin.AdminHandlers" />
      
   < requestHandler name = "/admin/ping" class = "solr.PingRequestHandler" >
     < lst name = "invariants" >
       < str name = "q" >solrpingquery</ str >
     </ lst >
     < lst name = "defaults" >
       < str name = "echoParams" >all</ str >
     </ lst >
   </ requestHandler >
   
   
< queryResultWindowSize >1500</ queryResultWindowSize >
< queryResultMaxDocsCached >150</ queryResultMaxDocsCached >
< queryResultCache
class = "solr.LRUCache"
size = "15000"
initialSize = "15000"
autowarmCount = "1500" />
   
    
   <!-- config for the admin interface -->
   < admin >
     < defaultQuery >solr</ defaultQuery >
   </ admin >
 
</ config >

 2、solrj实现索引创建和查询

由于采用了主从架构,所以创建直接在主服务器上完成就可以,从服务器会自动同步

solr主从部署_第2张图片

 

查询时前端用nginx做代理,达到负载均衡的效果

solr主从部署_第3张图片

nginx的配置如下: 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
upstream localhost{
    server 10.10.53.177:8080;
    server 10.16.15.121:8080;
    server 10.10.53.235:8080;
     }
 
 
 
server
  {
     listen       80;
     server_name  10.10.53.235;
 
     root        /home/html;
     charset     utf-8;
     include     proxy.conf;
 
 
 
#   location ~ ^/getAllPlaybill.json$
  #   {
  #   if ($query_string ~* "playbillDate=(.*)&mdv=(.*)$") {
#       set $playbillDate $1;
#         rewrite  ^/getAllPlaybill\.json  http://10.10.53.235/getAllPlaybill_$playbillDate.txt break; 
  #  }
  
 
   # }
 
 
     location ~ /
     {
    
    proxy_pass   http://localhost;
     }
 
 
     # Add expires header for static comtent
     location ~ .*\.(gif|jpg|jpeg|png|bmp|swf)$
     {
           access_log   off;
           expires      10d;
     }
 
     location ~ .*\.(js|css)$
     {
         access_log   off;
         expires      1h;
     }
  }

一般情况下,可以设置主写从查,这时候在检索的时候就不去主服务器上,配置如下:

?
1
2
3
4
5
upstream localhost{
    server 10.10.53.177:8080;
    server 10.16.15.121:8080;
    //server 10.10.53.235:8080; 注释即可
     }

你可能感兴趣的:(Solr)