1   /**
2    * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
3    *
4    * This library is free software; you can redistribute it and/or modify it under
5    * the terms of the GNU Lesser General Public License as published by the Free
6    * Software Foundation; either version 2.1 of the License, or (at your option)
7    * any later version.
8    *
9    * This library is distributed in the hope that it will be useful, but WITHOUT
10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11   * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
12   * details.
13   */
14  
15  package com.liferay.portal.search.lucene;
16  
17  import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
18  import com.liferay.portal.kernel.log.Log;
19  import com.liferay.portal.kernel.log.LogFactoryUtil;
20  import com.liferay.portal.kernel.search.Field;
21  import com.liferay.portal.kernel.util.PropsKeys;
22  import com.liferay.portal.kernel.util.StringPool;
23  import com.liferay.portal.kernel.util.StringUtil;
24  import com.liferay.portal.kernel.util.Validator;
25  import com.liferay.portal.util.PropsUtil;
26  import com.liferay.util.lucene.KeywordsUtil;
27  
28  import java.io.IOException;
29  
30  import java.util.HashSet;
31  import java.util.Map;
32  import java.util.Set;
33  import java.util.concurrent.ConcurrentHashMap;
34  
35  import org.apache.lucene.analysis.Analyzer;
36  import org.apache.lucene.analysis.TokenStream;
37  import org.apache.lucene.analysis.WhitespaceAnalyzer;
38  import org.apache.lucene.document.Document;
39  import org.apache.lucene.index.Term;
40  import org.apache.lucene.queryParser.ParseException;
41  import org.apache.lucene.queryParser.QueryParser;
42  import org.apache.lucene.search.BooleanClause;
43  import org.apache.lucene.search.BooleanQuery;
44  import org.apache.lucene.search.IndexSearcher;
45  import org.apache.lucene.search.Query;
46  import org.apache.lucene.search.TermQuery;
47  import org.apache.lucene.search.WildcardQuery;
48  import org.apache.lucene.search.highlight.Highlighter;
49  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
50  import org.apache.lucene.search.highlight.QueryScorer;
51  import org.apache.lucene.search.highlight.QueryTermExtractor;
52  import org.apache.lucene.search.highlight.SimpleFragmenter;
53  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
54  import org.apache.lucene.search.highlight.WeightedTerm;
55  
56  /**
57   * <a href="LuceneHelperImpl.java.html"><b><i>View Source</i></b></a>
58   *
59   * @author Brian Wing Shun Chan
60   * @author Harry Mark
61   * @author Bruno Farache
62   */
63  public class LuceneHelperImpl implements LuceneHelper {
64  
65      public void addDocument(long companyId, Document document)
66          throws IOException {
67  
68          IndexAccessor indexAccessor = _getIndexAccessor(companyId);
69  
70          indexAccessor.addDocument(document);
71      }
72  
73      public void addExactTerm(
74          BooleanQuery booleanQuery, String field, String value) {
75  
76          //text = KeywordsUtil.escape(value);
77  
78          Query query = new TermQuery(new Term(field, value));
79  
80          booleanQuery.add(query, BooleanClause.Occur.SHOULD);
81      }
82  
83      public void addRequiredTerm(
84          BooleanQuery booleanQuery, String field, String value, boolean like) {
85  
86          if (like) {
87              value = StringUtil.replace(
88                  value, StringPool.PERCENT, StringPool.STAR);
89  
90              value = value.toLowerCase();
91  
92              WildcardQuery wildcardQuery = new WildcardQuery(
93                  new Term(field, value));
94  
95              booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
96          }
97          else {
98              //text = KeywordsUtil.escape(value);
99  
100             Term term = new Term(field, value);
101             TermQuery termQuery = new TermQuery(term);
102 
103             booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
104         }
105     }
106 
107     public void addTerm(
108             BooleanQuery booleanQuery, String field, String value, boolean like)
109         throws ParseException {
110 
111         if (Validator.isNull(value)) {
112             return;
113         }
114 
115         if (like) {
116             value = StringUtil.replace(
117                 value, StringPool.PERCENT, StringPool.BLANK);
118 
119             value = value.toLowerCase();
120 
121             Term term = new Term(
122                 field, StringPool.STAR.concat(value).concat(StringPool.STAR));
123 
124             WildcardQuery wildcardQuery = new WildcardQuery(term);
125 
126             booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
127         }
128         else {
129             QueryParser queryParser = new QueryParser(field, getAnalyzer());
130 
131             try {
132                 Query query = queryParser.parse(value);
133 
134                 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
135             }
136             catch (ParseException pe) {
137                 if (_log.isDebugEnabled()) {
138                     _log.debug(
139                         "ParseException thrown, reverting to literal search",
140                         pe);
141                 }
142 
143                 value = KeywordsUtil.escape(value);
144 
145                 Query query = queryParser.parse(value);
146 
147                 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
148             }
149         }
150     }
151 
152     public void delete(long companyId) {
153         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
154 
155         indexAccessor.delete();
156     }
157 
158     public void deleteDocuments(long companyId, Term term) throws IOException {
159         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
160 
161         indexAccessor.deleteDocuments(term);
162     }
163 
164     public Analyzer getAnalyzer() {
165         try {
166             return (Analyzer)_analyzerClass.newInstance();
167         }
168         catch (Exception e) {
169             throw new RuntimeException(e);
170         }
171     }
172 
173     public String[] getQueryTerms(Query query) {
174         String[] fieldNames = new String[] {
175             Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
176             Field.USER_NAME
177         };
178 
179         WeightedTerm[] weightedTerms = null;
180 
181         for (String fieldName : fieldNames) {
182             weightedTerms = QueryTermExtractor.getTerms(
183                 query, false, fieldName);
184 
185             if (weightedTerms.length > 0) {
186                 break;
187             }
188         }
189 
190         Set<String> queryTerms = new HashSet<String>();
191 
192         for (WeightedTerm weightedTerm : weightedTerms) {
193             queryTerms.add(weightedTerm.getTerm());
194         }
195 
196         return queryTerms.toArray(new String[queryTerms.size()]);
197     }
198 
199     public IndexSearcher getSearcher(long companyId, boolean readOnly)
200         throws IOException {
201 
202         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
203 
204         return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
205     }
206 
207     public String getSnippet(
208             Query query, String field, String s, int maxNumFragments,
209             int fragmentLength, String fragmentSuffix, String preTag,
210             String postTag)
211         throws IOException {
212 
213         SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
214             preTag, postTag);
215 
216         QueryScorer queryScorer = new QueryScorer(query, field);
217 
218         Highlighter highlighter = new Highlighter(
219             simpleHTMLFormatter, queryScorer);
220 
221         highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
222 
223         TokenStream tokenStream = getAnalyzer().tokenStream(
224             field, new UnsyncStringReader(s));
225 
226         try {
227             String snippet = highlighter.getBestFragments(
228                 tokenStream, s, maxNumFragments, fragmentSuffix);
229 
230             if (Validator.isNotNull(snippet) &&
231                 !StringUtil.endsWith(snippet, fragmentSuffix)) {
232 
233                 snippet = snippet + fragmentSuffix;
234             }
235 
236             return snippet;
237         }
238         catch (InvalidTokenOffsetsException itoe) {
239             throw new IOException(itoe.getMessage());
240         }
241     }
242 
243     public void updateDocument(long companyId, Term term, Document document)
244         throws IOException {
245 
246         IndexAccessor indexAccessor = _getIndexAccessor(companyId);
247 
248         indexAccessor.updateDocument(term, document);
249     }
250 
251     public void shutdown() {
252         for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
253             indexAccessor.close();
254         }
255     }
256 
257     private LuceneHelperImpl() {
258         String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
259 
260         if (Validator.isNotNull(analyzerName)) {
261             try {
262                 _analyzerClass = Class.forName(analyzerName);
263             }
264             catch (Exception e) {
265                 _log.error(e);
266             }
267         }
268     }
269 
270     private IndexAccessor _getIndexAccessor(long companyId) {
271         IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
272 
273         if (indexAccessor == null) {
274             synchronized (this) {
275                 indexAccessor = _indexAccessorMap.get(companyId);
276 
277                 if (indexAccessor == null) {
278                     indexAccessor = new IndexAccessorImpl(companyId);
279 
280                     _indexAccessorMap.put(companyId, indexAccessor);
281                 }
282             }
283         }
284 
285         return indexAccessor;
286     }
287 
288     private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
289 
290     private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
291     private Map<Long, IndexAccessor> _indexAccessorMap =
292         new ConcurrentHashMap<Long, IndexAccessor>();
293 
294 }