Lucene 索引 搜索使用

 

很折腾,发现一个bug,我的网站的搜索,只能根据 Title 查询,找了半天我发现我用的5.4版本的mysql 所以搞了半天怎么也不支持,FULLTEXT Index;

 

无奈之举使用了Lucene 解决方案做全文索引,代码类见附件:

package io.jpress.searcher;

import com.jfinal.log.Log;
import com.jfinal.plugin.activerecord.Page;
import io.jpress.Consts;
import io.jpress.model.Content;
import io.jpress.model.query.ContentQuery;
import io.jpress.plugin.search.ISearcher;
import io.jpress.plugin.search.SearcherBean;
import me.duzhi.blog.util.Functions;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by 小琬 on 2017/1/3.
 */
public class LuceneSearcher implements ISearcher {

    static Analyzer analyzer = null;//分词器
    public static Log log = Log.getLog(LuceneSearcher.class);

    public static String INDEX_PATH;

    private static Directory directory;

    static {
        INDEX_PATH = Functions.Kit.get("luceneDir");
        if (INDEX_PATH == null) {
            INDEX_PATH = "~/indexes/";
        }
    }

    @Override
    public void init() {
        try {
            if(log.isWarnEnabled()) {
                log.warn("init lucene config");
            }
            File indexDir = new File(INDEX_PATH);
            if (!indexDir.exists()) {
                indexDir.mkdirs();
            }
            directory = NIOFSDirectory.open(indexDir);
        } catch (IOException e) {
            log.error("init lucene path error",e);
        }
    }

    @Override
    public void addBean(SearcherBean bean) {
        IndexWriter writer = null;
        try {
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, new IKAnalyzer());
            writer = new IndexWriter(directory, iwc);
            Document doc = createDoc(bean);
            writer.addDocument(doc);
        } catch (IOException e) {
            log.error("add bean to lucene error",e);
        } finally {
            try {
                writer.close();
            } catch (IOException e) {
                log.error("close failed", e);
            }
        }
    }

    @Override
    public void deleteBean(String beanId) {
        IndexWriter writer = null;
        try {
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, new IKAnalyzer());
            writer = new IndexWriter(directory, iwc);
            writer.deleteDocuments(new Term("sid", beanId));
        } catch (IOException e) {
            log.error("delete bean to lucene error,beanId:"+beanId,e);
        } finally {
            try {
                writer.close();
            } catch (IOException e) {
                log.error("close failed", e);
            }
        }
    }

    @Override
    public void updateBean(SearcherBean bean) {

        deleteBean(bean.getSid());
        addBean(bean);

    }

    /**
     * 创建Doc
     * @param bean
     * @return
     */
    private Document createDoc(SearcherBean bean) {
        Document doc = new Document();
        doc.add(new StringField("sid", bean.getSid(), Field.Store.YES));
        doc.add(new StringField("module", bean.getPost().getModule(), Field.Store.YES));
        doc.add(new TextField("content", bean.getContent(), Field.Store.YES));
        doc.add(new TextField("title", bean.getTitle(), Field.Store.YES));
        doc.add(new StringField("created", DateTools.dateToString(bean.getCreated(), DateTools.Resolution.YEAR), Field.Store.NO));
        doc.add(new StringField("descrption", bean.getDescription(), Field.Store.YES));
        doc.add(new StringField("url", bean.getUrl(), Field.Store.YES));
        return doc;
    }

    @Override
    public Page search(String keyword, String module) {
        try {
            IndexReader aIndexReader = DirectoryReader.open(directory);
            IndexSearcher searcher = null;
            searcher = new IndexSearcher(aIndexReader);
            Query query = getQuery(keyword, module);
            TopDocs topDocs = searcher.search(query, 50);
            List searcherBeans = getSearcherBeans(searcher, topDocs);
            Page searcherBeanPage = new Page<>(searcherBeans, 1, 10, 100, 1000);
            return searcherBeanPage;
        } catch (Exception e) {

        }
        return null;
    }

    /**
     *  转换为SearchBean
     * @param searcher
     * @param topDocs
     * @return
     * @throws IOException
     */
    private List getSearcherBeans(IndexSearcher searcher, TopDocs topDocs) throws IOException {
        List searcherBeans = new ArrayList();
        for (ScoreDoc item : topDocs.scoreDocs) {
            Document doc = searcher.doc(item.doc);
            SearcherBean searcherBean = new SearcherBean();
            searcherBean.setContent(doc.get("content"));
            searcherBean.setSid(doc.get("sid"));
            searcherBean.setUrl(doc.get("url"));
            searcherBean.setTitle(doc.get("title"));
            searcherBean.setDescription(doc.get("descrption"));
            Content content = ContentQuery.me().findById(new BigInteger(searcherBean.getSid()));
            searcherBean.setPost(content);
            searcherBeans.add(searcherBean);
        }
        return searcherBeans;
    }

    /**
     * 获取Query 对象
     * @param keyword
     * @param module
     * @return
     */
    private Query getQuery(String keyword, String module) {
        try {
            QueryParser queryParser1 = new QueryParser(Version.LUCENE_47, "content", new IKAnalyzer());
            Query termQuery1 = queryParser1.parse(keyword);
            QueryParser queryParser2 = new QueryParser(Version.LUCENE_47, "title", new IKAnalyzer());
            Query termQuery2 = queryParser2.parse(keyword);
            TermQuery termQuery3 = new TermQuery(new Term("module", module));
            BooleanQuery booleanClauses = new BooleanQuery();
            booleanClauses.add(new BooleanClause(termQuery1, BooleanClause.Occur.SHOULD));
            booleanClauses.add(new BooleanClause(termQuery2, BooleanClause.Occur.SHOULD));
            booleanClauses.add(new BooleanClause(termQuery3, BooleanClause.Occur.MUST));
            booleanClauses.setMinimumNumberShouldMatch(1);
            return booleanClauses;
        } catch (ParseException e) {
            e.printStackTrace();
        }
        return null;
    }

    @Override
    public Page search(String queryString, String module, int pageNum, int pageSize) {
        IndexReader aIndexReader = null;
        try {
            aIndexReader = DirectoryReader.open(directory);
            IndexSearcher searcher = null;
            searcher = new IndexSearcher(aIndexReader);
            Query query = getQuery(queryString, module);
            // Doc  searcher.search(booleanClauses, 50);
            ScoreDoc lastScoreDoc = getLastScoreDoc(pageNum, pageSize, query, searcher);
            TopDocs topDocs = searcher.searchAfter(lastScoreDoc, query, pageSize);
            List searcherBeans = getSearcherBeans(searcher, topDocs);
            int totalRow = searchTotalRecord(searcher, query);
            int totalPages;
            if ((totalRow % pageSize) == 0) {
                totalPages = totalRow / pageSize;
            } else {
                totalPages = totalRow / pageSize + 1;
            }
            Page searcherBeanPage = new Page<>(searcherBeans, pageNum, pageSize, totalPages, totalRow);
            return searcherBeanPage;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 根据页码和分页大小获取上一次最后一个ScoreDoc
     *
     * @param pageIndex
     * @param pageSize
     * @param query
     * @param indexSearcher
     * @return
     * @throws IOException
     */
    private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher indexSearcher) throws IOException {
        if (pageIndex == 1) return null;//如果是第一页返回空
        int num = pageSize * (pageIndex - 1);//获取上一页的数量
        TopDocs tds = indexSearcher.search(query, num);
        return tds.scoreDocs[num - 1];
    }

    /**
     * @param query
     * @return
     * @throws IOException
     * @Title: searchTotalRecord
     * @Description: 获取符合条件的总记录数
     */
    public static int searchTotalRecord(IndexSearcher searcher, Query query) throws IOException {
        TopDocs topDocs = searcher.search(query, Integer.MAX_VALUE);
        if (topDocs == null || topDocs.scoreDocs == null || topDocs.scoreDocs.length == 0) {
            return 0;
        }
        ScoreDoc[] docs = topDocs.scoreDocs;
        return docs.length;
    }

    /**
     * 重建索引
     */
    public static void reloadIndex() {
        List contents = ContentQuery.me().findByModule(Consts.MODULE_ARTICLE);
        for (Content content : contents) {
            SearcherBean searcherBean = new SearcherBean();
            searcherBean.setPost(content);
            searcherBean.setDescription(content.getShowText());
            searcherBean.setUrl(content.getUrl());
            searcherBean.setCreated(content.getCreated());
            searcherBean.setData(content);
            searcherBean.setContent(content.getShowText());
            searcherBean.setTitle(content.getTitle());
            searcherBean.setSid(String.valueOf(content.getId()));
            new LuceneSearcher().updateBean(searcherBean);
        }
    }
}

 <>

    
            com.janeluo
            ikanalyzer
            2012_u6
        

 

 
            org.apache.lucene
            lucene-core
            4.7.2
        

不折腾,代码大家自己看,比较简单,我花了半天时间,搞定这个,更多代码,参考:

git.oschina.net/duzhime/DUZHI_BLOG

 

除特别注明外,本站所有文章均为duzhi原创,转载请注明出处来自https://www.duzhi.me/article/59.html

联系我们

******

在线咨询:点击这里给我发消息

邮件:ashang.peng#aliyun.com

QR code