加亮搜索关键字,摘要出关键字的上下文

# -*- encoding:utf-8 -*-
import re

def get_pattern(query):
    items = []
    for pstr in re.split('\s+', query):
        sub_pstr = re.sub('^[\*\?\+]', '', pstr)
        if sub_pstr: items.append(sub_pstr)

    return '(?i)%s' % '|'.join(items)

def highlight(text, query):
    query = query.strip()
    if text and query:
        def dashrepl(matchobj):
            return '<em class="hL">' + matchobj.group(0) + '</em>'
        return re.sub(get_pattern(query), dashrepl, text)
    else:
        return text

def get_summary_text(obj, query, searchable_text):
    if query and searchable_text:
        try:
            searchable_text = searchable_text.decode('utf-8')
        except:
            try:
                searchable_text = searchable_text.decode('gb18030')
            except:
                return ''

        m = re.search(get_pattern(query), searchable_text)
        if m is None: return ''

        len_text = len(searchable_text)
        if len_text <= 100:
            return highlight(searchable_text, query)

        start, end = m.span()

        summary_text = ''
        if start - 50 > 0:
            summary_text += '... '
            summary_text += searchable_text[start - 50:start]
        else:
            summary_text += searchable_text[:start]

        summary_text += m.group(0)

        end_text = searchable_text[end:]
        if len(end_text) < 50:
            summary_text += end_text
        else:
            summary_text += end_text[:50]
            summary_text += ' ...'

        return highlight(summary_text, query)
    else: return ''

 

你可能感兴趣的:(python)