Hibernate Search索引重建源码解析

使用Hibernate Search可以很方便的实现搜索功能,一般重建全部索引时会使用下面的方法:

    Session session = dao.getSession();
    FullTextSession fullTextSession = Search.getFullTextSession(session);
    try {
     // 只执行一次即可,后续新增的记录Hibernate将自动创建索引
    } catch (InterruptedException e) {
        logger.error("buildIndex error------->", e);

由于遇到重建索引阻塞线程的情况,看了一下Hibernate Search建立索引的源码。
Hibernate Search版本:4.5.0.Final

  1. fullTextSession调用createIndexer()方法,创建MassIndexer的实例来给数据库的记录创建索引
public MassIndexer createIndexer(Class... types) {
        MutableSearchFactory msf = (MutableSearchFactory) getSearchFactoryImplementor();
        ServiceManager serviceManager = msf.getServiceManager();
        MassIndexerFactory service = serviceManager.requestService( MassIndexerFactoryProvider.class, null );
        return service.createMassIndexer( getSearchFactoryImplementor(), getFactory(), types );
  1. 调用MassIndexerImpl的startAndWait()方法开始创建索引,并同步等待索引创建完成
    public void startAndWait() throws InterruptedException {
        BatchCoordinator coordinator = createCoordinator();
        if ( Thread.currentThread().isInterrupted() ) {
            throw new InterruptedException();

    protected BatchCoordinator createCoordinator() {
        return new BatchCoordinator(
                rootEntities, searchFactoryImplementor, sessionFactory,
                typesToIndexInParallel, documentBuilderThreads,
                cacheMode, objectLoadingBatchSize, objectsLimit,
                optimizeAtEnd, purgeAtStart, optimizeAfterPurge,
                monitor, idFetchSize
  1. BatchCoordinator继承了ErrorHandledRunnable,run()方法代码如下
public final void run() {
        ErrorHandler errorHandler = searchFactoryImplementor.getErrorHandler();
        try {
        catch (Exception re) {
            //being this an async thread we want to make sure everything is somehow reported
            errorHandler.handleException( log.massIndexerUnexpectedErrorMessage() , re );
  1. 调用BatchCoordinator的runWithErrorHandler()方法
public void runWithErrorHandler() {
        final BatchBackend backend = searchFactoryImplementor.makeBatchBackend( monitor );
        try {
            beforeBatch( backend ); // purgeAll and pre-optimize activities
            doBatchWork( backend );//
            afterBatch( backend );//清理工作
        catch (InterruptedException e) {
        finally {
private void doBatchWork(BatchBackend backend) throws InterruptedException {
        ExecutorService executor = Executors.newFixedThreadPool( typesToIndexInParallel, "BatchIndexingWorkspace" );//线程池数量默认为1
        for ( Class type : rootEntities ) {
                    new BatchIndexingWorkspace(
                            searchFactoryImplementor, sessionFactory, type,
                            cacheMode, objectLoadingBatchSize, endAllSignal,
                            monitor, backend, objectsLimit, idFetchSize
        endAllSignal.await(); //waits for the executor to finish
  1. BatchIndexingWorkspace集成了ErrorHandledRunnable,实际业务代码在runWithErrorHandler()方法中
public void runWithErrorHandler() {
        try {
            final ErrorHandler errorHandler = searchFactoryImplementor.getErrorHandler();
            final BatchTransactionalContext transactionalContext = new BatchTransactionalContext( searchFactoryImplementor, sessionFactory, errorHandler );
            //first start the consumers, then the producers (reverse order):
            startTransformationToLuceneWork( transactionalContext, errorHandler );
            startProducingPrimaryKeys( transactionalContext, errorHandler );
            try {
                producerEndSignal.await(); //await for all work being sent to the backend
                log.debugf( "All work for type %s has been produced", indexedType.getName() );
            catch (InterruptedException e) {
                //restore interruption signal:
                throw new SearchException( "Interrupted on batch Indexing; index will be left in unknown state!", e );
        finally {

    private void startProducingPrimaryKeys(BatchTransactionalContext transactionalContext, ErrorHandler errorHandler) {
        final Runnable primaryKeyOutputter = new OptionallyWrapInJTATransaction( transactionalContext,
                new IdentifierProducer(
                        primaryKeyStream, sessionFactory,
                        objectLoadingBatchSize, indexedType, monitor,
                        objectsLimit, errorHandler, idFetchSize
        //execIdentifiersLoader has size 1 and is not configurable: ensures the list is consistent as produced by one transaction
        final ThreadPoolExecutor execIdentifiersLoader = Executors.newFixedThreadPool( 1, "identifierloader" );
        try {
            execIdentifiersLoader.execute( primaryKeyOutputter );
        finally {

    private void startTransformationToLuceneWork(BatchTransactionalContext transactionalContext, ErrorHandler errorHandler) {
        final Runnable documentOutputter = new OptionallyWrapInJTATransaction( transactionalContext,
                new IdentifierConsumerDocumentProducer(
                        primaryKeyStream, monitor, sessionFactory, producerEndSignal,
                        cacheMode, indexedType, searchFactoryImplementor,
                        idNameOfIndexedType, backend, errorHandler
        final ThreadPoolExecutor execFirstLoader = Executors.newFixedThreadPool( documentBuilderThreads, "entityloader" );//默认是6个线程
        try {
            for ( int i = 0; i < documentBuilderThreads; i++ ) {
                execFirstLoader.execute( documentOutputter );
        finally {
  1. 生产者和消费者都是通过OptionallyWrapInJTATransaction封装
public void runWithErrorHandler() throws Exception {
        if ( wrapInTransaction ) {
            final Session session;
            final StatelessSession statelessSession;
            if ( sessionAwareRunnable != null ) {
                session = batchContext.factory.openSession();
                statelessSession = null;
            else {
                session = null;
                statelessSession = batchContext.factory.openStatelessSession();


            if ( sessionAwareRunnable != null ) {
                sessionAwareRunnable.run( session );//生产者IdentifierProducer调用该方法
            else {
                statelessSessionAwareRunnable.run( statelessSession );//消费者IdentifierConsumerDocumentProducer调用该方法


            if ( sessionAwareRunnable != null ) {
            else {
        else {
            if ( sessionAwareRunnable != null ) {
                sessionAwareRunnable.run( null );
            else {
                statelessSessionAwareRunnable.run( null );
  1. IdentifierProducer通过Hibernate从数据库查询主键,并放在List中,达到batchSize(默认为10,从MassIndexerImpl中的objectLoadingBatchSize属性层层传递过来)后放入到ProducerConsumerQueue中。
public void run(StatelessSession upperSession) throws Exception {
        log.trace( "started" );
        try {
            inTransactionWrapper( upperSession );
        finally {
        log.trace( "finished" );

    private void inTransactionWrapper(StatelessSession upperSession) throws Exception {
        StatelessSession session = upperSession;
        if ( upperSession == null ) {
            session = sessionFactory.openStatelessSession();
        try {
            Transaction transaction = Helper.getTransactionAndMarkForJoin( session );
            loadAllIdentifiers( session );//从数据库加载指定实体的所有主键
        catch (InterruptedException e) {
            // just quit
        finally {
            if ( upperSession == null ) {

    private void loadAllIdentifiers(final StatelessSession session) throws InterruptedException {
        Number countAsNumber = (Number) session
            .createCriteria( indexedType )
            .setProjection( Projections.rowCount() )
            .setCacheable( false )
        long totalCount = countAsNumber.longValue();
        if ( objectsLimit != 0 && objectsLimit < totalCount ) {
            totalCount = objectsLimit;
        if ( log.isDebugEnabled() ) {
            log.debugf( "going to fetch %d primary keys", totalCount);
        monitor.addToTotalCount( totalCount );

        Criteria criteria = session
            .createCriteria( indexedType )
            .setProjection( Projections.id() )
            .setCacheable( false )
            .setFetchSize( idFetchSize );//默认每次抓取100条,MassIndexerImpl中的idFetchSize属性值传递过来

        ScrollableResults results = criteria.scroll( ScrollMode.FORWARD_ONLY );
        ArrayList destinationList = new ArrayList( batchSize );
        long counter = 0;
        try {
            while ( results.next() ) {
                Serializable id = (Serializable) results.get( 0 );
                destinationList.add( id );
                if ( destinationList.size() == batchSize ) {
                    enqueueList( destinationList );
                    destinationList = new ArrayList( batchSize );
                if ( counter == totalCount ) {
        finally {
        enqueueList( destinationList );

    private void enqueueList(final List idsList) throws InterruptedException {
        if ( ! idsList.isEmpty() ) {
            destination.put( idsList );
            log.tracef( "produced a list of ids %s", idsList );
  1. 消费者IdentifierConsumerDocumentProducer从队列ProducerConsumerQueue中获取主键List,然后根据主键从数据库加载数据。
public void run(Session upperSession) throws Exception {
        log.trace( "started" );
        Session session = upperSession;
        if ( upperSession == null ) {
            session = sessionFactory.openSession();
        session.setFlushMode( FlushMode.MANUAL );
        session.setCacheMode( cacheMode );
        session.setDefaultReadOnly( true );
        try {
            Transaction transaction = Helper.getTransactionAndMarkForJoin( session );
            loadAllFromQueue( session );//从队列获取主键列表
        finally {
            if ( upperSession == null ) {
        log.trace( "finished" );

    private void loadAllFromQueue(Session session) {
        final InstanceInitializer sessionInitializer = new HibernateSessionLoadingInitializer(
                (SessionImplementor) session
        try {
            Object take;
            do {
                take = source.take();//阻塞直到生产者发出结束信号
                if ( take != null ) {
                    List idList = (List) take;
                    log.tracef( "received list of ids %s", idList );
                    loadList( idList, session, sessionInitializer );//根据主键列表从数据库加载数据
            while ( take != null );
        catch (InterruptedException e) {
            // just quit

     * Loads a list of entities of defined type using their identifiers.
     * The loaded objects are then transformed into Lucene Documents
     * and forwarded to the indexing backend.
     * @param listIds the list of entity identifiers (of type
     * @param session the session to be used
     * @param sessionInitializer
     * @throws InterruptedException
    private void loadList(List listIds, Session session, InstanceInitializer sessionInitializer) throws InterruptedException {
        Criteria criteria = session
                .createCriteria( type )
                .setCacheMode( cacheMode )
                .setLockMode( LockMode.NONE )
                .setCacheable( false )
                .setFlushMode( FlushMode.MANUAL )
                .setFetchSize( listIds.size() )
                .setResultTransformer( CriteriaSpecification.DISTINCT_ROOT_ENTITY )
                .add( Restrictions.in( idName, listIds ) );
        List list = criteria.list();
        monitor.entitiesLoaded( list.size() );
        indexAllQueue( session, list, sessionInitializer );

    private void indexAllQueue(Session session, List entities, InstanceInitializer sessionInitializer) {
        try {
            ConversionContext contextualBridge = new ContextualExceptionBridgeHelper();
                if ( entities == null && entities.isEmpty() ) {
                else {
                    log.tracef( "received a list of objects to index: %s", entities );
                    for ( Object object : entities ) {
                        try {
                            index( object, session, sessionInitializer, contextualBridge );
                            monitor.documentsBuilt( 1 );
                        catch (InterruptedException ie) {
                            // rethrowing the interrupted exception
                            throw ie;
                        catch (RuntimeException e) {
                            String errorMsg = log.massIndexerUnableToIndexInstance(
                            errorHandler.handleException( errorMsg, e );
        catch (InterruptedException e) {
            // just quit
