Revision 0fcbf8bd src/gr/ebs/gss/server/ejb/indexer/IndexerMDBean.java
b/src/gr/ebs/gss/server/ejb/indexer/IndexerMDBean.java | ||
---|---|---|
20 | 20 |
|
21 | 21 |
import static gr.ebs.gss.server.configuration.GSSConfigurationFactory.getConfiguration; |
22 | 22 |
import gr.ebs.gss.client.exceptions.ObjectNotFoundException; |
23 |
import gr.ebs.gss.server.configuration.GSSConfigurationFactory; |
|
23 | 24 |
import gr.ebs.gss.server.domain.FileBody; |
24 | 25 |
import gr.ebs.gss.server.domain.FileHeader; |
25 | 26 |
import gr.ebs.gss.server.domain.FileTag; |
27 |
import gr.ebs.gss.server.ejb.ExternalAPI; |
|
26 | 28 |
import gr.ebs.gss.server.ejb.GSSDAO; |
27 | 29 |
|
28 | 30 |
import java.io.File; |
29 | 31 |
import java.io.IOException; |
30 |
import java.io.StringWriter; |
|
31 | 32 |
import java.io.UnsupportedEncodingException; |
32 |
import java.util.ArrayList; |
|
33 |
import java.util.List; |
|
33 |
import java.net.MalformedURLException; |
|
34 | 34 |
import java.util.StringTokenizer; |
35 | 35 |
|
36 | 36 |
import javax.ejb.ActivationConfigProperty; |
... | ... | |
43 | 43 |
import javax.jms.MapMessage; |
44 | 44 |
import javax.jms.Message; |
45 | 45 |
import javax.jms.MessageListener; |
46 |
import javax.xml.parsers.DocumentBuilder; |
|
47 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
48 |
import javax.xml.parsers.ParserConfigurationException; |
|
49 |
import javax.xml.transform.OutputKeys; |
|
50 |
import javax.xml.transform.Transformer; |
|
51 |
import javax.xml.transform.TransformerConfigurationException; |
|
52 |
import javax.xml.transform.TransformerException; |
|
53 |
import javax.xml.transform.TransformerFactory; |
|
54 |
import javax.xml.transform.dom.DOMSource; |
|
55 |
import javax.xml.transform.stream.StreamResult; |
|
56 | 46 |
|
47 |
import org.apache.commons.configuration.Configuration; |
|
48 |
import org.apache.commons.configuration.ConfigurationException; |
|
57 | 49 |
import org.apache.commons.httpclient.HttpClient; |
58 | 50 |
import org.apache.commons.httpclient.HttpException; |
59 | 51 |
import org.apache.commons.httpclient.methods.PostMethod; |
60 | 52 |
import org.apache.commons.httpclient.methods.StringRequestEntity; |
61 |
import org.apache.commons.httpclient.methods.multipart.FilePart; |
|
62 |
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity; |
|
63 |
import org.apache.commons.httpclient.methods.multipart.Part; |
|
64 |
import org.apache.commons.httpclient.methods.multipart.StringPart; |
|
65 | 53 |
import org.apache.commons.logging.Log; |
66 | 54 |
import org.apache.commons.logging.LogFactory; |
55 |
import org.apache.solr.client.solrj.SolrServerException; |
|
56 |
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; |
|
57 |
import org.apache.solr.client.solrj.request.AbstractUpdateRequest; |
|
58 |
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; |
|
59 |
import org.apache.solr.common.SolrException; |
|
60 |
import org.apache.solr.common.SolrInputDocument; |
|
67 | 61 |
import org.jboss.ejb3.annotation.ResourceAdapter; |
68 |
import org.w3c.dom.Document; |
|
69 |
import org.w3c.dom.Element; |
|
70 |
import org.w3c.dom.Node; |
|
71 | 62 |
|
72 | 63 |
/** |
73 | 64 |
* Message driven bean that accepts messages whenever a document is created, |
... | ... | |
85 | 76 |
/** |
86 | 77 |
* EJB offering access to the JPA entity manager |
87 | 78 |
*/ |
88 |
@EJB GSSDAO dao;
|
|
79 |
@EJB ExternalAPI service;
|
|
89 | 80 |
|
90 | 81 |
/** |
91 | 82 |
* Decides to add or drop an item from the index depending on the message |
... | ... | |
99 | 90 |
* @see javax.jms.MessageListener#onMessage(javax.jms.Message) |
100 | 91 |
*/ |
101 | 92 |
@Override |
102 |
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) |
|
103 | 93 |
public void onMessage(Message msg) { |
104 |
PostMethod method = null; |
|
105 |
String idStr = ""; |
|
94 |
Long id = null; |
|
106 | 95 |
try { |
107 | 96 |
MapMessage map = (MapMessage) msg; |
108 |
Long id = (Long) map.getObject("id"); |
|
109 |
idStr = id.toString(); |
|
97 |
id = (Long) map.getObject("id"); |
|
110 | 98 |
boolean delete = map.getBoolean("delete"); |
111 |
HttpClient httpClient = new HttpClient();
|
|
99 |
Configuration config = GSSConfigurationFactory.getConfiguration();
|
|
112 | 100 |
if (delete) { |
113 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
|
114 |
String deleteXMLMsg = "<delete><id>" + idStr + "</id></delete>"; |
|
115 |
if (logger.isDebugEnabled()) |
|
116 |
logger.debug(deleteXMLMsg); |
|
117 |
method.setRequestEntity(new StringRequestEntity(deleteXMLMsg, "text/xml", "iso8859-1")); |
|
118 |
int statusCode = httpClient.executeMethod(method); |
|
119 |
if (logger.isDebugEnabled()) |
|
120 |
logger.debug("HTTP status: " + statusCode); |
|
121 |
String response = method.getResponseBodyAsString(); |
|
122 |
if (logger.isDebugEnabled()) |
|
123 |
logger.debug(response); |
|
124 |
|
|
125 |
method.releaseConnection(); |
|
126 |
if (statusCode != 200) |
|
127 |
throw new EJBException("Response from Solr for deleting file id " + |
|
128 |
idStr + " had status: " + statusCode); |
|
129 |
sendCommit(httpClient, 0); |
|
101 |
sendDelete(config.getString("solr.url"), id); |
|
130 | 102 |
} else { |
131 |
FileHeader file = dao.getFileForIndexing(id); |
|
132 |
FileBody body = file.getCurrentBody(); |
|
133 |
String type = null; |
|
134 |
String mime = body.getMimeType(); |
|
135 |
boolean nofile = false; |
|
136 |
if (body.getFileSize() > getConfiguration().getLong("solrDocumentUploadLimitInKB") * 1024) |
|
137 |
nofile = true; |
|
138 |
else if (mime.equals("application/pdf")) |
|
139 |
type = "pdf"; |
|
140 |
else if (mime.equals("text/plain")) |
|
141 |
type = "text"; |
|
142 |
else if (mime.equals("text/html")) |
|
143 |
type = "html"; |
|
144 |
else if (mime.endsWith("msword")) |
|
145 |
type = "doc"; |
|
146 |
else if (mime.endsWith("ms-excel")) |
|
147 |
type = "xls"; |
|
148 |
else if (mime.endsWith("powerpoint")) |
|
149 |
type = "ppt"; |
|
150 |
else |
|
151 |
nofile = true; |
|
152 |
if (!nofile) { |
|
153 |
method = new PostMethod(getConfiguration().getString("solrUpdateRichUrl")); |
|
154 |
List<Part> parts = new ArrayList<Part>(); |
|
155 |
parts.add(new StringPart("stream.type", type)); |
|
156 |
StringBuffer fieldnames = new StringBuffer("id,name"); |
|
157 |
if (!file.getFileTags().isEmpty()) |
|
158 |
fieldnames.append(",tag"); |
|
159 |
parts.add(new StringPart("fieldnames", fieldnames.toString())); |
|
160 |
parts.add(new StringPart("id", idStr)); |
|
161 |
parts.add(new StringPart("name", tokenizeFilename(file.getName()), "UTF-8")); |
|
162 |
for (FileTag tag : file.getFileTags()) |
|
163 |
parts.add(new StringPart("tag", tag.getTag(), "UTF-8")); |
|
164 |
parts.add(new StringPart("stream.fieldname", "body")); |
|
165 |
parts.add(new StringPart("commit", "true")); |
|
166 |
parts.add(new FilePart(file.getName(), new File(body.getStoredFilePath()))); |
|
167 |
method.setRequestEntity(new MultipartRequestEntity(parts.toArray(new Part[1]), method.getParams())); |
|
168 |
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000); |
|
169 |
if (logger.isDebugEnabled()) |
|
170 |
logger.debug("Sending rich document " + idStr); |
|
171 |
int statusCode = httpClient.executeMethod(method); |
|
172 |
if (logger.isDebugEnabled()) |
|
173 |
logger.debug("HTTP status: " + statusCode); |
|
174 |
String response = method.getResponseBodyAsString(); |
|
175 |
if (logger.isDebugEnabled()) |
|
176 |
logger.debug(response); |
|
177 |
if (statusCode != 200) |
|
178 |
throw new EJBException("Response from Solr for updating file id " + |
|
179 |
idStr + " had status: " + statusCode); |
|
180 |
} else { |
|
181 |
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); |
|
182 |
DocumentBuilder db = dbf.newDocumentBuilder(); |
|
183 |
Document doc = db.newDocument(); |
|
184 |
Node root = doc.createElement("add"); |
|
185 |
doc.appendChild(root); |
|
186 |
Node docNode = doc.createElement("doc"); |
|
187 |
root.appendChild(docNode); |
|
188 |
Element field = doc.createElement("field"); |
|
189 |
field.setAttribute("name", "id"); |
|
190 |
docNode.appendChild(field); |
|
191 |
field.appendChild(doc.createTextNode(idStr)); |
|
192 |
|
|
193 |
field = doc.createElement("field"); |
|
194 |
field.setAttribute("name", "name"); |
|
195 |
docNode.appendChild(field); |
|
196 |
field.appendChild(doc.createTextNode(tokenizeFilename(file.getName()))); |
|
197 |
|
|
198 |
for (FileTag tag : file.getFileTags()) { |
|
199 |
field = doc.createElement("field"); |
|
200 |
field.setAttribute("name", "tag"); |
|
201 |
docNode.appendChild(field); |
|
202 |
field.appendChild(doc.createTextNode(tag.getTag())); |
|
203 |
} |
|
204 |
|
|
205 |
TransformerFactory fact = TransformerFactory.newInstance(); |
|
206 |
Transformer trans = fact.newTransformer(); |
|
207 |
trans.setOutputProperty(OutputKeys.INDENT, "yes"); |
|
208 |
StringWriter sw = new StringWriter(); |
|
209 |
StreamResult sr = new StreamResult(sw); |
|
210 |
DOMSource source = new DOMSource(doc); |
|
211 |
trans.transform(source, sr); |
|
212 |
if (logger.isDebugEnabled()) |
|
213 |
logger.debug(sw.toString()); |
|
214 |
|
|
215 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
|
216 |
method.setRequestEntity(new StringRequestEntity(sw.toString(),"text/xml", "UTF-8")); |
|
217 |
int statusCode = httpClient.executeMethod(method); |
|
218 |
if (logger.isDebugEnabled()) |
|
219 |
logger.debug("HTTP status: " + statusCode); |
|
220 |
String response = method.getResponseBodyAsString(); |
|
221 |
if (logger.isDebugEnabled()) |
|
222 |
logger.debug(response); |
|
223 |
|
|
224 |
method.releaseConnection(); |
|
225 |
if (statusCode != 200) |
|
226 |
throw new EJBException("Response from Solr for updating file id " + |
|
227 |
idStr + " had status: " + statusCode); |
|
228 |
|
|
229 |
sendCommit(httpClient, 0); |
|
230 |
} |
|
231 |
} |
|
103 |
service.postFileToSolr(id); |
|
104 |
} |
|
232 | 105 |
} |
233 | 106 |
catch (JMSException e) { |
234 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
235 |
} catch (UnsupportedEncodingException e) { |
|
236 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
237 |
} catch (HttpException e) { |
|
238 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
239 |
} catch (IOException e) { |
|
240 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
241 |
} catch (ObjectNotFoundException e) { |
|
242 |
logger.warn("Error processing file ID " + idStr + ": Indexing " + |
|
243 |
"aborted because the file could not be found"); |
|
244 |
} catch (ParserConfigurationException e) { |
|
245 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
246 |
} catch (TransformerConfigurationException e) { |
|
247 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
248 |
} catch (TransformerException e) { |
|
249 |
throw new EJBException("Error processing file ID " + idStr, e); |
|
107 |
throw new EJBException("Error processing file ID " + id, e); |
|
108 |
} |
|
109 |
catch (IOException e) { |
|
110 |
throw new EJBException("Error processing file ID " + id, e); |
|
250 | 111 |
} |
251 |
finally { |
|
252 |
if (method != null) |
|
253 |
method.releaseConnection(); |
|
112 |
catch (SolrServerException e) { |
|
113 |
throw new EJBException(e); |
|
254 | 114 |
} |
255 | 115 |
} |
256 | 116 |
|
117 |
|
|
257 | 118 |
/** |
258 |
* Sends a commit message to the solr server |
|
259 |
* |
|
260 |
* @param httpClient |
|
261 |
* @param retryCount If the commit fails, it is retried three times. This parameter is passed in the recursive |
|
262 |
* calls to stop the recursion |
|
263 |
* @throws UnsupportedEncodingException |
|
119 |
* Sends a delete command to solr. The id is the Long id of the indexed document |
|
120 |
* |
|
121 |
* @param solrUrl |
|
122 |
* @param id |
|
123 |
* @throws SolrServerException |
|
264 | 124 |
* @throws IOException |
265 |
* @throws HttpException |
|
266 | 125 |
*/ |
267 |
private void sendCommit(HttpClient httpClient, int retryCount) throws UnsupportedEncodingException, IOException, HttpException { |
|
268 |
PostMethod method = null; |
|
269 |
try { |
|
270 |
if (logger.isDebugEnabled()) |
|
271 |
logger.debug("Commit retry: " + retryCount); |
|
272 |
method = new PostMethod(getConfiguration().getString("solrUpdateUrl")); |
|
273 |
method.setRequestEntity(new StringRequestEntity("<commit/>", "text/xml", "iso8859-1")); |
|
274 |
int statusCode = httpClient.executeMethod(method); |
|
275 |
if (logger.isDebugEnabled()) |
|
276 |
logger.debug("HTTP status: " + statusCode); |
|
277 |
String response = method.getResponseBodyAsString(); |
|
278 |
if (logger.isDebugEnabled()) |
|
279 |
logger.debug(response); |
|
280 |
if (statusCode != 200 && retryCount < 2) { |
|
281 |
try { |
|
282 |
Thread.sleep(10000); // Give Solr a little time to be available. |
|
283 |
} catch (InterruptedException e) { |
|
284 |
} |
|
285 |
sendCommit(httpClient, retryCount + 1); |
|
286 |
} |
|
287 |
} |
|
288 |
finally { |
|
289 |
if (method != null) |
|
290 |
method.releaseConnection(); |
|
291 |
} |
|
126 |
private void sendDelete(String solrUrl, Long id) throws SolrServerException, IOException { |
|
127 |
CommonsHttpSolrServer solr = new CommonsHttpSolrServer(solrUrl); |
|
128 |
solr.deleteById(id.toString()); |
|
129 |
solr.commit(); |
|
292 | 130 |
} |
293 | 131 |
|
294 |
private String tokenizeFilename(String filename){ |
|
295 |
StringBuffer result = new StringBuffer(); |
|
296 |
StringTokenizer tokenizer = new StringTokenizer(filename,"._"); |
|
297 |
while(tokenizer.hasMoreTokens()){ |
|
298 |
result.append(tokenizer.nextToken()); |
|
299 |
result.append(" "); |
|
300 |
} |
|
301 |
result.append(filename); |
|
302 |
return result.toString(); |
|
303 |
} |
|
132 |
|
|
304 | 133 |
} |
Also available in: Unified diff