Java-AI-Book-Code/src/textsearch/LuceneManager.java at master · navoj/Java-AI-Book-Code

executable file
193 lines (175 loc) · 6.49 KB
package textsearch;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.LockObtainFailedException;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
 * Wrapper class for an embedded Lucene index to support.
 * Copyright 2002-2012 by Mark Watson. All rights reserved.
 * This software is can be used under either of the following licenses:
 * 1. LGPL v3<br/>
 * 2. Apache 2
public class LuceneManager {
  private String data_store_file_root;
   * @param data_store_file_root
  public LuceneManager(String data_store_file_root) {
      this.data_store_file_root = data_store_file_root;
   * If you want to start with a fresh index, call this method immediately
   * after creating a new instance of a LuceneManager.
   * @throws IOException 
   * @throws LockObtainFailedException 
   * @throws CorruptIndexException 
  public void createAndClearLuceneIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
    deleteFilePath(new File(data_store_file_root + "/lucene_index"));
    File index_dir = new File(data_store_file_root + "/lucene_index");
    new IndexWriter(index_dir, new StandardAnalyzer(), true).close();
   * @throws java.io.IOException
  public void close() throws IOException {
      reader.close();
      writer.close();
   * @param document_original_uri
   * @param document_plain_text
   * @throws org.apache.lucene.index.CorruptIndexException
   * @throws java.io.IOException
  public void addDocumentToIndex(String document_original_uri, String document_plain_text) throws CorruptIndexException, IOException {
      File index_dir = new File(data_store_file_root + "/lucene_index");
      writer = new IndexWriter(index_dir, new StandardAnalyzer(), false);
      Document doc = new Document();
      doc.add(new Field("uri", document_original_uri, Field.Store.YES, Field.Index.NO));
      doc.add(new Field("text", document_plain_text, Field.Store.YES, Field.Index.TOKENIZED));
      writer.addDocument(doc);
      writer.optimize();
      writer.close();
   * @param search_query
   * @return
   * @throws org.apache.lucene.queryParser.ParseException
   * @throws java.io.IOException
  public List<String> searchIndexForURIs(String search_query) throws ParseException, IOException {
      reader = IndexReader.open(data_store_file_root + "/lucene_index");
      List<String> ret = new ArrayList<String>();
      Searcher searcher = new IndexSearcher(reader);
      Analyzer analyzer = new StandardAnalyzer();
      QueryParser parser = new QueryParser("text", analyzer);
      Query query = parser.parse(search_query);
      Hits hits = searcher.search(query);
      for (int i = 0; i < hits.length(); i++) {
          System.out.println(" * * searchIndexForURIs: hit: " + hits.doc(i));
          Document doc = hits.doc(i);
          String uri = doc.get("uri");
          ret.add(uri);
      reader.close();
      return ret;
   * @param search_query
   * @return
   * @throws java.lang.Exception
  public List<String[]> searchIndexForURIsAndDocText(String search_query) throws Exception {
      reader = IndexReader.open(data_store_file_root + "/lucene_index");
      List<String[]> ret = new ArrayList<String[]>();
      Searcher searcher = new IndexSearcher(reader);
      Analyzer analyzer = new StandardAnalyzer();
      QueryParser parser = new QueryParser("text", analyzer);
      Query query = parser.parse(search_query);
      System.out.println(" * * test query: " + search_query);
      Hits hits = searcher.search(query);
      for (int i = 0; i < hits.length(); i += 1) {
        Document doc = hits.doc(i);
        System.out.println("     * *  hit: " + hits.doc(i));
        String [] pair = new String[]{doc.get("uri"), doc.get("text")};
        ret.add(pair);
      reader.close();
      return ret;
  private static final String fileSeparator = System.getProperty("file.separator");
  private boolean deleteFilePath(File filePath) {
    System.out.println("deleteFile(" + filePath + ")");
    if (filePath == null) {
        return false;
    if (filePath.isDirectory()) {
        String[] dirListing = filePath.list();
        // For each file/directory in listing, make recursive call.
        int len = dirListing.length;
        for (int i = 0; i < len; i++) {
            if (deleteFilePath(new File(filePath.toString() + fileSeparator + dirListing[i])) == false) {
                // Break and return an error.
                return false;
    // Delete file or directory.
    if (filePath.delete() == false) {
        // Display message and return an error.
        System.out.println("Could not delete: " + filePath.getAbsolutePath());
        return false;
    return true;
  private IndexWriter writer;
  private IndexReader reader;
   * @param args
   * @throws Throwable 
  public static void main(String[] args) throws Throwable {
    LuceneManager lm = new LuceneManager("/tmp");
    // start fresh: create a new index:
    lm.createAndClearLuceneIndex();
    lm.addDocumentToIndex("file://tmp/test1.txt", "This is a test for index and a test for search.");
    lm.addDocumentToIndex("file://tmp/test2.txt", "Please test the index code.");
    lm.addDocumentToIndex("file://tmp/test3.txt", "Please test the index code before tomorrow.");
    // get URIs of matching documents:
    List<String> doc_uris = lm.searchIndexForURIs("test, index");
    System.out.println("Matched document URIs: " + doc_uris);
    // get URIs and document text for matching documents:
    List<String[]> doc_uris_with_text = lm.searchIndexForURIsAndDocText("test, index");
    for (String[] uri_and_text : doc_uris_with_text) {
      System.out.println("Matched document URI:  " + uri_and_text[0]);
      System.out.println("        document text: " + uri_and_text[1]);
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

LuceneManager.java

Latest commit

History

LuceneManager.java

File metadata and controls