sphinx 使用 python xmlpipe2 数据源 生成索引

source testxml
{
    type = xmlpipe2
    xmlpipe_command = python /tmp/testx.py
}

index testxml
{
        source                                  = testxml
        path                                    = /data/sphinx/testxml
        docinfo                                 = extern
        morphology                              = none
        min_word_len                            = 1
        charset_type                            = utf-8
        min_prefix_len                          = 0
        html_strip                              = 1
        charset_table                           = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
        ngram_len                               = 1
        ngram_chars                             = U+3000..U+2FA1F
}

indexer
{
        mem_limit                               = 128M
}

searchd
{
        port                                    = 4412
        log                                     = /data/log/sphinxsearch/searchd.log
        query_log                               = /data/log/sphinxsearch/query.log
        read_timeout                            = 5
        max_children                            = 30
        pid_file                                = /data/log/sphinxsearch/searchd.pid
        max_matches                             = 1000
        seamless_rotate                         = 1
        preopen_indexes                         = 0
        unlink_old                              = 1
}



PY 代码如下:

# coding=utf-8

from loxun import XmlWriter
from StringIO import StringIO
import pymssql

conn = pymssql.connect(host=r'MyServer2k', user='citymap', password='city@map@com', database='CitycomeMap',as_dict=True,charset='utf8')

cur = conn.cursor()

out = StringIO()
xml = XmlWriter(out)

xml.addNamespace("sphinx","http://www.beihai365.com")
#---docset
xml.startTag("sphinx:docset")
# --- schema
xml.startTag("sphinx:schema")
#--- field
'''....'''
xml.tag("sphinx:field",{"name":"myname"})
xml.tag("sphinx:field",{"name":"myaddress"})
xml.tag("sphinx:field",{"name":"mykeyword"})
'''....'''
xml.tag("sphinx:attr",{"name":"AID","type":"int"})

#--- /field
xml.endTag() 
#--- /schema

#-#---  wenwen--document
cur.execute('SELECT COUNT(*) FROM MapObjectInfo')
tj = cur.fetchone()[0]  #....

pNum = 1000  #......
cutSqlNum = pNum
_p = 1 #..
zNum = 0

while True:
        '''...............................'''
	if (tj-zNum)<pNum:
		cutSqlNum = tj-zNum

	zNum = int(pNum * _p)
	cur.execute('SELECT * FROM (select top %d* from (select top %d* from MapObjectInfo ORDER BY ID DESC)t1 ORDER BY ID)t2 ORDER BY ID DESC' % (cutSqlNum,zNum))
	#print 'SELECT * FROM (select top %d* from (select top %d * from MapObjectInfo ORDER BY ID DESC)t1 ORDER BY ID)t2 ORDER BY ID DESC\n' % (cutSqlNum,zNum)
	row = cur.fetchone_asdict()
	while row:
		if row['ID'] == 0:
			row = cur.fetchone_asdict()
			continue
		xml.startTag("sphinx:document",{"id":row['ID']})
		
		xml.startTag("myname")
		xml.text(row['Name'])
		xml.endTag()

		xml.startTag("myaddress")
		xml.text(row['Address'])
		xml.endTag()

		xml.startTag("mykeyword")
		xml.text(row['Keyword'])
		xml.endTag()

		xml.startTag("AID")
		xml.text(str(row['ID']))
		xml.endTag()
		
		xml.endTag()
		row = cur.fetchone_asdict()
	_p = _p + 1

        #.........
	if zNum > tj:
		break
#---#---  /wenwen /document
conn.close()
xml.endTag()
#--- /docset
xml.close()

print out.getvalue()



你可能感兴趣的:(sphinx 使用 python xmlpipe2 数据源 生成索引)