root / src / gr / ebs / gss / server / ejb / indexer / IndexerMDBean.java @ 623:66f69a7348ed
History | View | Annotate | Download (11.6 kB)
1 |
/*
|
---|---|
2 |
* Copyright 2007, 2008, 2009 Electronic Business Systems Ltd.
|
3 |
*
|
4 |
* This file is part of GSS.
|
5 |
*
|
6 |
* GSS is free software: you can redistribute it and/or modify
|
7 |
* it under the terms of the GNU General Public License as published by
|
8 |
* the Free Software Foundation, either version 3 of the License, or
|
9 |
* (at your option) any later version.
|
10 |
*
|
11 |
* GSS is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with GSS. If not, see <http://www.gnu.org/licenses/>.
|
18 |
*/
|
19 |
package gr.ebs.gss.server.ejb.indexer; |
20 |
|
21 |
import static gr.ebs.gss.server.configuration.GSSConfigurationFactory.getConfiguration; |
22 |
import gr.ebs.gss.client.exceptions.ObjectNotFoundException; |
23 |
import gr.ebs.gss.server.domain.FileBody; |
24 |
import gr.ebs.gss.server.domain.FileHeader; |
25 |
import gr.ebs.gss.server.domain.FileTag; |
26 |
import gr.ebs.gss.server.ejb.GSSDAO; |
27 |
|
28 |
import java.io.File; |
29 |
import java.io.IOException; |
30 |
import java.io.StringWriter; |
31 |
import java.io.UnsupportedEncodingException; |
32 |
import java.util.ArrayList; |
33 |
import java.util.List; |
34 |
import java.util.StringTokenizer; |
35 |
|
36 |
import javax.ejb.ActivationConfigProperty; |
37 |
import javax.ejb.EJB; |
38 |
import javax.ejb.EJBException; |
39 |
import javax.ejb.MessageDriven; |
40 |
import javax.ejb.TransactionAttribute; |
41 |
import javax.ejb.TransactionAttributeType; |
42 |
import javax.jms.JMSException; |
43 |
import javax.jms.MapMessage; |
44 |
import javax.jms.Message; |
45 |
import javax.jms.MessageListener; |
46 |
import javax.xml.parsers.DocumentBuilder; |
47 |
import javax.xml.parsers.DocumentBuilderFactory; |
48 |
import javax.xml.parsers.ParserConfigurationException; |
49 |
import javax.xml.transform.OutputKeys; |
50 |
import javax.xml.transform.Transformer; |
51 |
import javax.xml.transform.TransformerConfigurationException; |
52 |
import javax.xml.transform.TransformerException; |
53 |
import javax.xml.transform.TransformerFactory; |
54 |
import javax.xml.transform.dom.DOMSource; |
55 |
import javax.xml.transform.stream.StreamResult; |
56 |
|
57 |
import org.apache.commons.httpclient.HttpClient; |
58 |
import org.apache.commons.httpclient.HttpException; |
59 |
import org.apache.commons.httpclient.methods.PostMethod; |
60 |
import org.apache.commons.httpclient.methods.StringRequestEntity; |
61 |
import org.apache.commons.httpclient.methods.multipart.FilePart; |
62 |
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; |
63 |
import org.apache.commons.httpclient.methods.multipart.Part; |
64 |
import org.apache.commons.httpclient.methods.multipart.StringPart; |
65 |
import org.apache.commons.logging.Log; |
66 |
import org.apache.commons.logging.LogFactory; |
67 |
import org.jboss.ejb3.annotation.ResourceAdapter; |
68 |
import org.w3c.dom.Document; |
69 |
import org.w3c.dom.Element; |
70 |
import org.w3c.dom.Node; |
71 |
|
72 |
/**
|
73 |
* Message driven bean that accepts messages whenever a document is created,
|
74 |
* modified or deleted and adds/removes the item from the search index.
|
75 |
*/
|
76 |
@MessageDriven(activationConfig={@ActivationConfigProperty(propertyName="destinationType", propertyValue="javax.jms.Queue"), |
77 |
@ActivationConfigProperty(propertyName="destination", propertyValue="queue/gss-indexingQueue")}) |
78 |
@ResourceAdapter("hornetq-ra.rar") |
79 |
public class IndexerMDBean implements MessageListener { |
80 |
/**
|
81 |
* The logger
|
82 |
*/
|
83 |
private static final Log logger = LogFactory.getLog(IndexerMDBean.class); |
84 |
|
85 |
/**
|
86 |
* EJB offering access to the JPA entity manager
|
87 |
*/
|
88 |
@EJB GSSDAO dao;
|
89 |
|
90 |
/**
|
91 |
* Decides to add or drop an item from the index depending on the message
|
92 |
* received
|
93 |
*
|
94 |
* It currently uses the patched solr API for rich documents. This API does not
|
95 |
* allow indexing time field boosting. For this reason we have to use the dismax search API (instead of the
|
96 |
* standard) that allows for search time field boosting
|
97 |
*
|
98 |
* @param msg
|
99 |
* @see javax.jms.MessageListener#onMessage(javax.jms.Message)
|
100 |
*/
|
101 |
@Override
|
102 |
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
|
103 |
public void onMessage(Message msg) { |
104 |
PostMethod method = null;
|
105 |
String idStr = ""; |
106 |
try {
|
107 |
MapMessage map = (MapMessage) msg; |
108 |
Long id = (Long) map.getObject("id"); |
109 |
idStr = id.toString(); |
110 |
boolean delete = map.getBoolean("delete"); |
111 |
HttpClient httpClient = new HttpClient();
|
112 |
if (delete) {
|
113 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
114 |
String deleteXMLMsg = "<delete><id>" + idStr + "</id></delete>"; |
115 |
if (logger.isDebugEnabled())
|
116 |
logger.debug(deleteXMLMsg); |
117 |
method.setRequestEntity(new StringRequestEntity(deleteXMLMsg, "text/xml", "iso8859-1")); |
118 |
int statusCode = httpClient.executeMethod(method);
|
119 |
if (logger.isDebugEnabled())
|
120 |
logger.debug("HTTP status: " + statusCode);
|
121 |
String response = method.getResponseBodyAsString();
|
122 |
if (logger.isDebugEnabled())
|
123 |
logger.debug(response); |
124 |
|
125 |
method.releaseConnection(); |
126 |
if (statusCode != 200) |
127 |
throw new EJBException("Response from Solr for deleting file id " + |
128 |
idStr + " had status: " + statusCode);
|
129 |
sendCommit(httpClient, 0);
|
130 |
} else {
|
131 |
FileHeader file = dao.getFileForIndexing(id); |
132 |
FileBody body = file.getCurrentBody(); |
133 |
String type = null; |
134 |
String mime = body.getMimeType();
|
135 |
boolean nofile = false; |
136 |
if (body.getFileSize() > getConfiguration().getLong("solrDocumentUploadLimitInKB") * 1024) |
137 |
nofile = true;
|
138 |
else if (mime.equals("application/pdf")) |
139 |
type = "pdf";
|
140 |
else if (mime.equals("text/plain")) |
141 |
type = "text";
|
142 |
else if (mime.equals("text/html")) |
143 |
type = "html";
|
144 |
else if (mime.endsWith("msword")) |
145 |
type = "doc";
|
146 |
else if (mime.endsWith("ms-excel")) |
147 |
type = "xls";
|
148 |
else if (mime.endsWith("powerpoint")) |
149 |
type = "ppt";
|
150 |
else
|
151 |
nofile = true;
|
152 |
if (!nofile) {
|
153 |
method = new PostMethod(getConfiguration().getString("solrUpdateRichUrl")); |
154 |
List<Part> parts = new ArrayList<Part>(); |
155 |
parts.add(new StringPart("stream.type", type)); |
156 |
StringBuffer fieldnames = new StringBuffer("id,name"); |
157 |
if (!file.getFileTags().isEmpty())
|
158 |
fieldnames.append(",tag");
|
159 |
parts.add(new StringPart("fieldnames", fieldnames.toString())); |
160 |
parts.add(new StringPart("id", idStr)); |
161 |
parts.add(new StringPart("name", tokenizeFilename(file.getName()), "UTF-8")); |
162 |
for (FileTag tag : file.getFileTags())
|
163 |
parts.add(new StringPart("tag", tag.getTag(), "UTF-8")); |
164 |
parts.add(new StringPart("stream.fieldname", "body")); |
165 |
parts.add(new StringPart("commit", "true")); |
166 |
parts.add(new FilePart(file.getName(), new File(body.getStoredFilePath()))); |
167 |
method.setRequestEntity(new MultipartRequestEntity(parts.toArray(new Part[1]), method.getParams())); |
168 |
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
|
169 |
if (logger.isDebugEnabled())
|
170 |
logger.debug("Sending rich document " + idStr);
|
171 |
int statusCode = httpClient.executeMethod(method);
|
172 |
if (logger.isDebugEnabled())
|
173 |
logger.debug("HTTP status: " + statusCode);
|
174 |
String response = method.getResponseBodyAsString();
|
175 |
if (logger.isDebugEnabled())
|
176 |
logger.debug(response); |
177 |
if (statusCode != 200) |
178 |
throw new EJBException("Response from Solr for updating file id " + |
179 |
idStr + " had status: " + statusCode);
|
180 |
} else {
|
181 |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); |
182 |
DocumentBuilder db = dbf.newDocumentBuilder();
|
183 |
Document doc = db.newDocument();
|
184 |
Node root = doc.createElement("add");
|
185 |
doc.appendChild(root); |
186 |
Node docNode = doc.createElement("doc");
|
187 |
root.appendChild(docNode); |
188 |
Element field = doc.createElement("field"); |
189 |
field.setAttribute("name", "id"); |
190 |
docNode.appendChild(field); |
191 |
field.appendChild(doc.createTextNode(idStr)); |
192 |
|
193 |
field = doc.createElement("field");
|
194 |
field.setAttribute("name", "name"); |
195 |
docNode.appendChild(field); |
196 |
field.appendChild(doc.createTextNode(tokenizeFilename(file.getName()))); |
197 |
|
198 |
for (FileTag tag : file.getFileTags()) {
|
199 |
field = doc.createElement("field");
|
200 |
field.setAttribute("name", "tag"); |
201 |
docNode.appendChild(field); |
202 |
field.appendChild(doc.createTextNode(tag.getTag())); |
203 |
} |
204 |
|
205 |
TransformerFactory fact = TransformerFactory.newInstance(); |
206 |
Transformer trans = fact.newTransformer();
|
207 |
trans.setOutputProperty(OutputKeys.INDENT, "yes"); |
208 |
StringWriter sw = new StringWriter(); |
209 |
StreamResult sr = new StreamResult(sw); |
210 |
DOMSource source = new DOMSource(doc); |
211 |
trans.transform(source, sr); |
212 |
if (logger.isDebugEnabled())
|
213 |
logger.debug(sw.toString()); |
214 |
|
215 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
216 |
method.setRequestEntity(new StringRequestEntity(sw.toString(),"text/xml", "UTF-8")); |
217 |
int statusCode = httpClient.executeMethod(method);
|
218 |
if (logger.isDebugEnabled())
|
219 |
logger.debug("HTTP status: " + statusCode);
|
220 |
String response = method.getResponseBodyAsString();
|
221 |
if (logger.isDebugEnabled())
|
222 |
logger.debug(response); |
223 |
|
224 |
method.releaseConnection(); |
225 |
if (statusCode != 200) |
226 |
throw new EJBException("Response from Solr for updating file id " + |
227 |
idStr + " had status: " + statusCode);
|
228 |
|
229 |
sendCommit(httpClient, 0);
|
230 |
} |
231 |
} |
232 |
} |
233 |
catch (JMSException e) {
|
234 |
throw new EJBException("Error processing file ID " + idStr, e); |
235 |
} catch (UnsupportedEncodingException e) { |
236 |
throw new EJBException("Error processing file ID " + idStr, e); |
237 |
} catch (HttpException e) {
|
238 |
throw new EJBException("Error processing file ID " + idStr, e); |
239 |
} catch (IOException e) { |
240 |
throw new EJBException("Error processing file ID " + idStr, e); |
241 |
} catch (ObjectNotFoundException e) {
|
242 |
logger.warn("Error processing file ID " + idStr + ": Indexing " + |
243 |
"aborted because the file could not be found");
|
244 |
} catch (ParserConfigurationException e) { |
245 |
throw new EJBException("Error processing file ID " + idStr, e); |
246 |
} catch (TransformerConfigurationException e) { |
247 |
throw new EJBException("Error processing file ID " + idStr, e); |
248 |
} catch (TransformerException e) { |
249 |
throw new EJBException("Error processing file ID " + idStr, e); |
250 |
} |
251 |
finally {
|
252 |
if (method != null) |
253 |
method.releaseConnection(); |
254 |
} |
255 |
} |
256 |
|
257 |
/**
|
258 |
* Sends a commit message to the solr server
|
259 |
*
|
260 |
* @param httpClient
|
261 |
* @param retryCount If the commit fails, it is retried three times. This parameter is passed in the recursive
|
262 |
* calls to stop the recursion
|
263 |
* @throws UnsupportedEncodingException
|
264 |
* @throws IOException
|
265 |
* @throws HttpException
|
266 |
*/
|
267 |
private void sendCommit(HttpClient httpClient, int retryCount) throws UnsupportedEncodingException, IOException, HttpException { |
268 |
PostMethod method = null;
|
269 |
try {
|
270 |
if (logger.isDebugEnabled())
|
271 |
logger.debug("Commit retry: " + retryCount);
|
272 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
273 |
method.setRequestEntity(new StringRequestEntity("<commit/>", "text/xml", "iso8859-1")); |
274 |
int statusCode = httpClient.executeMethod(method);
|
275 |
if (logger.isDebugEnabled())
|
276 |
logger.debug("HTTP status: " + statusCode);
|
277 |
String response = method.getResponseBodyAsString();
|
278 |
if (logger.isDebugEnabled())
|
279 |
logger.debug(response); |
280 |
if (statusCode != 200 && retryCount < 2) { |
281 |
try {
|
282 |
Thread.sleep(10000); // Give Solr a little time to be available. |
283 |
} catch (InterruptedException e) { |
284 |
} |
285 |
sendCommit(httpClient, retryCount + 1);
|
286 |
} |
287 |
} |
288 |
finally {
|
289 |
if (method != null) |
290 |
method.releaseConnection(); |
291 |
} |
292 |
} |
293 |
|
294 |
private String tokenizeFilename(String filename){ |
295 |
StringBuffer result = new StringBuffer(); |
296 |
StringTokenizer tokenizer = new StringTokenizer(filename,"._"); |
297 |
while(tokenizer.hasMoreTokens()){
|
298 |
result.append(tokenizer.nextToken()); |
299 |
result.append(" ");
|
300 |
} |
301 |
result.append(filename); |
302 |
return result.toString();
|
303 |
} |
304 |
} |