关于Solr 谁可以告诉我这个问题怎么解决
最近搭建一个全文检索平台。最初考虑只采用lucene,然后自己写索引构建程序、检索框架等,类似osc @红薯 的方案,以前也做过比较熟悉。但有两个问题,1比较复杂,工作量和维护量都比较大。2 检索会有一定的延时。
看了看Solr决定采用solr,可以节省很大一部分开发时间。但有几个问题想请教下 osc 里的全文检索高手,希望大家不吝赐教:
1第一种方案,solr配置数据库,自动处理建索引。这样会不会延时很大,无法做到实时检索?
2第二种方案,通过solrj客户端在应用端 处理建索引问题,比如在发布一篇文章的时候,通过http 提交到solr 服务端上同时建索引,这样能不能达到实时检索?而且同时这个时候 应用端也会通过 http 检索 solr, 建索引检索同时进行,这样访问量大的时候会不会导致 内存泄露、索引文件磁盘I/O负载不了的问题?
有经验的同学能不能讨论下那种方案较好点,对实时性要求高点。或者配置上怎么优化?
当然这个项目是企业内部应用,访问量不会太大,服务器资源有限,所以无法用到solr的分布式特性,比如索引复制、多核来解决这些问题。而且由于可能会部署在 windows下,排除了以前做过的sphinx、nlpbamboo 基于Postgresql数据库的方案。
Lucene是一个开放源代码的全文检索引擎工具包,即它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎
Solr是一个高性能,采用Java5开发,基于Lucene的全文搜索服务器。同时对其进行了扩展,提供了比Lucene更为丰富的查询语言,同时实现了可配置、可扩展并对查询性能进行了优化,并且提供了一个完善的功能管理界面,是一款非常优秀的全文搜索引擎。它对外提供类似于Web-service的API接口。用户可以通过http请求,向搜索引擎服务器提交一定格式的XML文件,生成索引;也可以通过Http Solr Get操作提出查找请求,并得到XML格式的返回结果;
Solr和Lucene的本质区别有以下三点:搜索服务器,企业级和管理。Lucene本质上是搜索库,不是独立的应用程序,而Solr是。Lucene专注于搜索底层的建设,而Solr专注于企业应用。Lucene不负责支撑搜索服务所必须的管理,而Solr负责。所以说,一句话概括Solr: Solr是Lucene面向企业搜索应用的扩展
修改主方法
public int saveContent(String enterpriseId, String enterpriseName, String lableType, String resouce, String pubDate,
String content) {
int state = 0;
LBHttpSolrServer server = SolrUtilgetSolrServer(apgetEnterprisenewSolrUrl());
SolrQuery query = new SolrQuery();
queryset("q", "enterpriseId:" + enterpriseId);
try {
QueryResponse qr = serverquery(query);
List<EnterpriseContentBean> contentList = qrgetBeans(EnterpriseContentBeanclass);
// 设置需要保存的文章信息
for (EnterpriseContentBean bean : contentList) {
beansetEnterpriseId(enterpriseId);
beansetEnterpriseName(enterpriseName);
List<String> contents = new ArrayList<String>();
contentsadd(content);
beansetContent(contents);
beansetPubDate(pubDate);
Systemoutprintln("pubDate======>" + pubDate);
List<String> lableTypes = ArraysasList(lableTypesplit(","));
beansetLableType(lableTypes);
beansetResource(resouce);
beansetIsVisited_s("1");
}
serveraddBeans(contentList);
servercommit();
} catch (SolrServerException e) {
state = 1;
Systemoutprintln("修改solr数据报错");
eprintStackTrace();
} catch (IOException e) {
state = 1;
Systemoutprintln("修改solr数据报错");
eprintStackTrace();
}
return state;
}
删除主方法
public int deletContent(String enterpriseId) {
LBHttpSolrServer server = SolrUtilgetSolrServer(apgetEnterprisenewSolrUrl());
int state = 0;
try {
serverdeleteById(enterpriseId);
servercommit();
} catch (SolrServerException e) {
state = 1;
Systemoutprintln("删除solr数据报错");
eprintStackTrace();
} catch (IOException e) {
state = 1;
Systemoutprintln("删除solr数据报错");
eprintStackTrace();
}
return state;
}
solr工具类
package comdinfobocutils;
import javaioIOException;
import javanetMalformedURLException;
import javautilArrayList;
import javautilArrays;
import javautilCollection;
import javautilList;
import orgapachesolrclientsolrjSolrQuery;
import orgapachesolrclientsolrjSolrServerException;
import orgapachesolrclientsolrjimplLBHttpSolrServer;
import orgapachesolrclientsolrjresponseQueryResponse;
import orgapachesolrcommonSolrDocumentList;
import orgapachesolrcommonSolrInputDocument;
import comdinfobocenterprisebeanEnterpriseContentBean;
import comdinfobocenterprisenewbeanSolrQueryResult;
/
与Solr服务器交互的工具类
@author qiuyj
/
public class SolrUtil {
/
获取与指定Solr地址的连接
@param solrUrl
@return
/
public static LBHttpSolrServer getSolrServer(String solrUrl){
final int ONE_HUNDRED_MS = 10000000;
if(solrUrl == null || ""equals(solrUrl)){
throw new RuntimeException("Solr url can not be empty!");
}
LBHttpSolrServer solrServer = null;
try {
solrServer = new LBHttpSolrServer(solrUrl);
solrServersetConnectionTimeout(ONE_HUNDRED_MS);
} catch (MalformedURLException e) {
eprintStackTrace();
} //SolrUtilgetSolrServer(solrUrl);
//solrServersetDefaultMaxConnectionsPerHost(100);
//solrServersetMaxTotalConnections(100);
return solrServer;
}
/
向指定的Solr地址添加一条数据
@param solrUrl
@param doc
@throws Exception
/
public static void add(String solrUrl, SolrInputDocument doc) throws Exception {
if(doc == null){
throw new RuntimeException("SolrInputDocument object can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solradd(doc);
solrcommit();
}
/
向指定的Solr地址用JavaBean添加一条数据
@param solrUrl
@param obj
@throws Exception
/
public static void add(String solrUrl, Object obj) throws Exception {
if(obj == null){
throw new RuntimeException("Object to be inserted can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solraddBean(obj);
solrcommit();
}
/
向指定Solr地址批量添加数据
@param solrUrl
@param docs
@throws Exception
/
@SuppressWarnings("unchecked")
public static void addAll(String solrUrl, Collection< extends Object> objs) throws Exception {
if(objs == null){
throw new RuntimeException("Object collection can not be null!");
}
if(objssize() == 0){
return;
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
if(objsiterator()next() instanceof SolrInputDocument){
solradd((Collection<SolrInputDocument>)objs);
} else {
solraddBeans(objs);
}
solrcommit();
}
/
根据给定的id,从solr中删除对应信息
@param solrUrl
@param ids
/
public static void deleteByIds(String solrUrl, String ids) throws Exception {
if(ids == null || idslength == 0){
throw new RuntimeException("Ids can not be empty!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solrdeleteById(ArraysasList(ids));
solrcommit();
}
public static void deleteByIds(String solrUrl, Integer ids) throws Exception {
if(ids == null || idslength == 0){
throw new RuntimeException("Ids can not be empty!");
}
List<String> stringIdList = new ArrayList<>(idslength);
for(Integer id : ids){
stringIdListadd("" + id);
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solrdeleteById(stringIdList);
solrcommit();
}
/
删除指定Solr路径下符合指定查询条件的数据
@param solrUrl
@param condition
@throws Exception
/
public static void deleteByCondition(String solrUrl, String condition) throws Exception {
if(condition == null || ""equals(condition)){
throw new RuntimeException("Condition can not be empty!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solrdeleteByQuery(condition);
solrcommit();
}
/
删除指定Solr路径下的所有数据
@param solrUrl
@throws Exception
/
public static void deleteAll(String solrUrl) throws Exception {
deleteByCondition(solrUrl, ":");
}
/
根据 指定查询条件从Solr中查询数据,并以SolrDocument的List形式返回
@param solrUrl
@param query
@return
@throws Exception
/
public static SolrDocumentList queryAndGetSolrDocumentList(String solrUrl, SolrQuery query) throws Exception {
if(query == null){
throw new RuntimeException("SolrQuery object can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
QueryResponse resp = solrquery(query);
return respgetResults();
}
/
根据 指定查询条件从Solr中查询数据,并以QueryResponse形式返回
@param solrUrl
@param query
@return
@throws Exception
/
public static QueryResponse queryAndGetSolrQueryResponse(String solrUrl, SolrQuery query) throws Exception {
if(query == null){
throw new RuntimeException("SolrQuery object can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
QueryResponse resp = solrquery(query);
return resp;
}
/
根据 指定查询条件从Solr中查询数据,并以Java Bean的List形式返回
@param solrUrl
@param query
@param returnClass 返回的List集合的泛型
@return
@throws Exception
/
public static <T> List<T> queryAndGetBeanList(String solrUrl, SolrQuery query, Class<T> returnClass) throws Exception {
if(query == null){
throw new RuntimeException("SolrQuery object can not be null!");
}
if(returnClass == null){
throw new RuntimeException("Return class can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
QueryResponse resp = solrquery(query);
return respgetBeans(returnClass);
}
/
根据 指定查询条件从Solr中查询数据,并以SolrQueryResult对象的形式返回,其中包含List对象和totalCount
@param solrUrl
@param query
@param returnClass 返回的List集合的泛型
@return
@throws Exception
/
public static <T> SolrQueryResult<T> queryAndGetSolrQueryResult(String solrUrl, SolrQuery query, Class<T> returnClass) throws Exception {
SolrQueryResult<T> result = new SolrQueryResult<T>();
if(query == null){
throw new RuntimeException("SolrQuery object can not be null!");
}
if(returnClass == null){
throw new RuntimeException("Return class can not be null!");
}
LBHttpSolrServer solr = getSolrServer(solrUrl);
solrsetConnectionTimeout(10000);
QueryResponse resp = solrquery(query);
List<T> resultList = respgetBeans(returnClass);
long totalCount = respgetResults()getNumFound();
resultsetResultList(resultList);
resultsetTotalCount(totalCount);
return result;
}
/
根据 指定查询条件从Solr中查询数据,并以SolrQueryResult对象的形式返回,其中包含List对象和totalCount
@param solrUrl
@param query
@param returnClass 返回的List集合的泛型
@return
@throws Exception
/
public static <T> SolrQueryResult<T> queryAndGetSolrQueryResult(LBHttpSolrServer solr, SolrQuery query, Class<T> returnClass) throws Exception {
SolrQueryResult<T> result = new SolrQueryResult<T>();
if(query == null){
throw new RuntimeException("SolrQuery object can not be null!");
}
if(returnClass == null){
throw new RuntimeException("Return class can not be null!");
}
QueryResponse resp = solrquery(query);
List<T> resultList = respgetBeans(returnClass);
long totalCount = respgetResults()getNumFound();
resultsetResultList(resultList);
resultsetTotalCount(totalCount);
return result;
}
/
用以过滤一些影响Solr查询的特殊字符,如左右括号、星号等
@param str
@return
/
public static String filterSpecialCharacters(String str){
if(str == null){
return str;
}
str = strreplace("(", "\\(");
str = strreplace(")", "\\)");
str = strreplace("", "\\");
return str;
}
public static void updateSolrById(LBHttpSolrServer server){
SolrQuery query = new SolrQuery();
String id="5d495a00a5c8118c03ef0bec0111dd8d";
int state=0;
String name="新疆金风科技股份有限公司";
queryset("q", "enterpriseId:"+id);
try {
QueryResponse qr = serverquery(query);
List<EnterpriseContentBean> contentList = qrgetBeans(EnterpriseContentBeanclass);
//设置需要保存的文章信息
for(EnterpriseContentBean bean:contentList){
// beansetEnterpriseId(enterpriseId);
beansetEnterpriseName(name);
beansetResource("东方财富网港股频道");
}
serveraddBeans(contentList);
servercommit();
} catch (SolrServerException e) {
state = 1;
eprintStackTrace();
} catch (IOException e) {
state = 1;
eprintStackTrace();
}
}
public static void main(String[] args) {
try {
LBHttpSolrServerenterpriseServer=new LBHttpSolrServer("http://115182226165:8008/solr/enterprisenew");
enterpriseServersetConnectionTimeout(10000000);
updateSolrById(enterpriseServer);
Systemoutprintln("over");
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
eprintStackTrace();
}
}
}
0条评论