接触lucene

北风norther

浏览: 13417 次
性别:
来自: 邯郸

最近访客更多访客>>

minxiaomin

skull

ldzyz007

chaofanbaobao

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene学习

lucene学习 lucene索引库创建索引搜索索引

lucene。。数据从数据库中获得，

所以我下面展示的代码描述的就是一个，

1，从数据库查数据，然后把这些数据通过lucene创建索引库保存在硬盘上。

2，从索引库查出数据。

3，完！

package com.bjtc;

import java.io.File;     
import java.io.Reader;
import java.io.StringReader;
import java.sql.Connection;     
import java.sql.ResultSet;     
import java.sql.Statement;     
import java.util.regex.Pattern;


import org.apache.lucene.analysis.Analyzer;     
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.document.Document;     
import org.apache.lucene.document.Field;     
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FieldType.NumericType;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.PatternAnalyzer;

public class CreateIndex {
	static String indexpath="e:\\indextest\\index";
	static File indexFile = null; 
    Analyzer analyzer = null;  
    String brandsql="(select b.name from brand b where b.id=g.brand_id) as brandName";
    String categorySql="(select c.name from category c where c.id=g.category_id) as categroyName";
    String price ="(select max(s.price) from seller_goods s where s.goods_id=g.id) as Sprice";
    String attrSql="select * from Goods_Attr where goods_id=";
    String sql="select g.* ,"+brandsql+","+categorySql+","+price+" from goods g";
	
	public void create() throws Exception{
		//连接数据库，获得数据源
		Connection conn =DButil.getConnection();
		if(conn == null) {     
            throw new Exception("数据库连接失败");     
        }
		Statement stmt=conn.createStatement();
		ResultSet rs=stmt.executeQuery(sql);
		
		//控制创建索引，与之对应的有IndexReader来读取索引
		IndexWriter indexWriter = null;
		
		 indexFile = new File(indexpath);//创建文件夹
		 if(!indexFile.exists()) {     
            indexFile.mkdir();     
        } 
		 
		 //打开存放索引的路径
		 Directory directory = FSDirectory.open(indexFile); 
		 
		 //中文标准分词器
		 Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_4_9);
		 //Analyzer analyzer2= new IK_CAnalyzer();
		 IndexWriterConfig inWC=new IndexWriterConfig(Version.LUCENE_4_9, analyzer);//IndexWriterConfig
		 inWC.setOpenMode(OpenMode.CREATE);//每次生成索引时把原有索引删除，生存新的索引
		 indexWriter = new IndexWriter(directory,inWC);     
		 
		 Document doc = null;  
		 int x=0;//查看最后一共搜出多少条数据
		 System.out.println("正在创建索引ing.....");
        while(rs.next()) {     
            doc = new Document(); 
            //因为是最新版本的lucene，所以网上很多的方法不能直接使用
            //使用lucene版本是4_9的，下面的方法已经过时不用
            //Field id = new Field("id", String.valueOf(rs.getInt("id")),Field.Store.YES, Field.Index.NOT_ANALYZED);
           
            FieldType fstr=new FieldType();//定义field字段的属性
            fstr.setIndexed(true);//索引
            fstr.setStored(true);//存储
            
            //下面用的StringField，默认是不分词的！
            doc.add(new StringField("brand",rs.getString("brandName"),Field.Store.YES));
            doc.add(new StringField("category",rs.getString("categroyName"),Field.Store.YES));
            doc.add(new StringField("brief",rs.getString("brief")==null?" ":rs.getString("brief"),Field.Store.YES));
            doc.add(new StringField("type_no", rs.getString("type_no"), Field.Store.YES));
            //下面用到了FieldType使其分词并被索引。不推荐这样用
            //建议使用TextField("name", rs.getString("name"),Store.YES);
            doc.add(new Field("name", rs.getString("name"),fstr));   
            doc.add(new StringField("code",rs.getString("code"),Field.Store.YES));
            //document中可以存空串，但放null
            doc.add(new StringField("image",rs.getString("image")==null?"":rs.getString("image"),Store.YES));
            
           /* FieldType fInt=new FieldType();配置数字类型，
            fInt.setNumericType(NumericType.INT);
            fInt.setIndexed(false);不索引
            fInt.setStored(true);
            FieldType fFloat=new FieldType();
            fFloat.setNumericType(NumericType.FLOAT);
            fFloat.setIndexed(true);
            fFloat.setStored(true);*/
            doc.add(new IntField("id",rs.getInt("id"),Store.YES));
            doc.add(new FloatField("price", rs.getFloat("Sprice"),Store.YES));
            doc.add(new IntField("click_count",rs.getInt("click_count"),Store.YES));
            doc.add(new IntField("attention",rs.getInt("attention"),Store.YES));
            String strs="";
            String sqll=attrSql+rs.getInt("id");
            Statement stmt2=conn.createStatement();
    		ResultSet rs2=stmt2.executeQuery(sqll);
    		while(rs2.next()){
    			strs=rs2.getString("attr_value")+","+strs;
    		}
    		/*PatternAnalyzer pa=new PatternAnalyzer(",");此处使用的是自定义分词器，可以在doc里存TokenStream,但不可以存储
    		TokenStream ts= analyzer.tokenStream("GoodsAttr", new StringReader(strs));*/
    		rs2.close();
    		doc.add(new Field("GoodsAttr",strs,fstr));
            indexWriter.addDocument(doc);
            x++;
        }
        System.out.println("数据库查询结果   ："+x);
        System.out.println("索引创建完成！");
        indexWriter.close();  
        directory.close();
	}
	public static void main(String[] args) throws Exception{
		new CreateIndex().create();
	}
}

建立好索引库后就开始搜索吧

package com.bjtc;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import TEST.MyAnalyzer;


public class search {
	String indexPath = "e:\\indextest\\index";
	private Integer currentPage;
	private Integer MaxPage;
	private List<Goods> list;
	
	public Integer getMaxPage() {
		return MaxPage;
	}

	public List<Goods> getList() {
		return list;
	}

	public Integer getCurrentPage() {
		return currentPage;
	}
	/**@NortherSong
	 * 多个条件精确搜索，下面有类似淘宝京东大搜索框搜索
	 * 实现了分页功能
	 * @param brand	搜索条件
	 * @param category 	搜索条件
	 * @param price	搜索条件
	 * @param attr	搜索条件
	 * @param pagerSize	一页中含数据
	 * @param currentPage	页码
	 * @throws IOException
	 * @throws ParseException
	 */
	
	public search(String brand,String category,String price,String attr,int pagerSize,int currentPage) throws IOException, ParseException{
		System.out.println("搜索条件：");
		System.out.println("category ------"+category);
		System.out.println("brand    ------"+brand);
		System.out.println("attr     ------"+attr);
		
		BooleanQuery bq=new BooleanQuery();//多个搜索条件的Query
		
		//Term是最小的搜索单元
		TermQuery termQuery1 = new TermQuery(new Term("brand", brand));
		TermQuery termQuery2 = new TermQuery(new Term("category", category));
		
		if(price.length()>0){
			String[] ps=price.split("-");
			//NumericRangeQuery.newFloatRange范围搜索
			Query q= NumericRangeQuery.newFloatRange("price", Float.valueOf(ps[0]), Float.valueOf(ps[1]), true, true);
			bq.add(q, Occur.MUST);
		}
		//Occur.MUST表示BooleanQuery中条件为并的关系，SHORLD：或
		if(null!=brand&&brand.trim().length()!=0)
			bq.add(termQuery1, Occur.MUST);
		if(null!=category&&category.trim().length()!=0)
			bq.add(termQuery2, Occur.MUST);
		if(null!=attr&&attr.trim().length()!=0)
		{
			String[] attrs = attr.split(" ");
			for(String atr:attrs){
				if(atr.length()>1){
					atr= atr.trim();
					System.out.println(attr);
					bq.add(new TermQuery(new Term("goodsAttr", atr)), Occur.MUST);
					//bq.add(q, Occur.MUST);
				}
			}
		}
		
		//同创建索引时一样，要打开存放索引的路径
		Directory d = FSDirectory.open(new File(indexPath));
		IndexReader reader = DirectoryReader.open(d);//流读取
		//对所搜索出的数据进行排序
		Sort sort= new Sort();
		//默认为false 升序
		SortField s= new SortField("price", SortField.Type.FLOAT);
		sort.setSort(s);
		
		IndexSearcher searcher = new IndexSearcher(reader);//搜索  
		//searcher.search(QUERY,FILTER过滤器,最多获取数据DOCUMENT条数,sort排序);
		TopDocs topDocs = searcher.search(bq, null, 10000,sort);
		System.out.println("符合条件的" + topDocs.totalHits + "---");
		//分页
		int begin=pagerSize*(currentPage-1);
		int end=Math.min(topDocs.scoreDocs.length, begin+pagerSize);
		
		List<Goods> list = new ArrayList<Goods>();
		Goods g = null;
		for(int i=begin;i<end;i++){
			int docSn = topDocs.scoreDocs[i].doc;
			Document doc = reader.document(docSn);
			g = new Goods();
			g.setId(Integer.parseInt(doc.get("id")));
			g.setName(doc.get("name"));
			g.setCode(doc.get("code"));
			g.setBrandName(doc.get("brand"));
			g.setCategoryName(doc.get("category"));
			g.setPrice(Float.valueOf(doc.get("price")));
			g.setS(doc.get("goodsAttr"));//z注意大小写
			list.add(g);
		}
		//用完记得关闭流~
		reader.close();
		d.close();
		this.MaxPage=topDocs.totalHits;
		this.currentPage=currentPage;
		this.list= list;
	}
	
	public  search(String queryStr,int pagerSize,int currentPage) throws IOException, ParseException {
		
	//	QueryParser qp = new QueryParser(Version.LUCENE_4_9, "goodsAttr",
	//			new PatternAnalyzer(" "));对单一的字段进行搜索 例如条件可以是“联想   G”这样，我可能搜出手机或者电脑
		MultiFieldQueryParser mp = new MultiFieldQueryParser(//搜索多个字段 例如“联想  电脑  红色”
				Version.LUCENE_4_9, new String[] {"name","brandName","categoryName"},
				new SmartChineseAnalyzer(Version.LUCENE_4_9));
		mp.setDefaultOperator(Operator.AND);//多个字段之间的关系是或还是并专业点是  &&还是||
		
		Query query= mp.parse(queryStr);
		
		Directory d = FSDirectory.open(new File(indexPath));
		IndexReader reader = DirectoryReader.open(d);
		
		Sort s= new Sort();
		//默认为false 升序
		SortField sf= new SortField("price", SortField.Type.FLOAT);
		s.setSort(sf);
		IndexSearcher searcher = new IndexSearcher(reader);

		TopDocs topDocs = searcher.search(query, null, 10000,s);
		System.out.println("符合条件的" + topDocs.totalHits + "---");
		int begin=pagerSize*(currentPage-1);
		int end=Math.min(topDocs.scoreDocs.length, begin+pagerSize);
		
		List<Goods> list = new ArrayList<Goods>();
		Goods g = null;
		for(int i=begin;i<end;i++){
			int docSn = topDocs.scoreDocs[i].doc;
			Document doc = reader.document(docSn);
			g = new Goods();
			g.setId(Integer.parseInt(doc.get("id")));
			g.setName(doc.get("name"));
			g.setCode(doc.get("code"));
			g.setBrandName(doc.get("brand"));
			g.setCategoryName(doc.get("category"));
			g.setPrice(Float.valueOf(doc.get("price")));
			g.setS(doc.get("goodsAttr"));
			list.add(g);
		}
		
		reader.close();
		d.close();
		this.MaxPage=topDocs.totalHits;
		this.currentPage=currentPage;
		this.list= list;
	}
	
	public static void main(String[] args) throws IOException, ParseException{
		search ss= new search("8.5Kg 2500W tcl", 13,1);
		for(Goods g:ss.getList()){
			System.out.println("name           "+g.getName());
			System.out.println("attr           "+g.getS());
			
		}
	}
	
}

分享到：