Revision 0fcbf8bd src/gr/ebs/gss/server/ejb/indexer/IndexerMDBean.java

b/src/gr/ebs/gss/server/ejb/indexer/IndexerMDBean.java
20 20

  
21 21
import static gr.ebs.gss.server.configuration.GSSConfigurationFactory.getConfiguration;
22 22
import gr.ebs.gss.client.exceptions.ObjectNotFoundException;
23
import gr.ebs.gss.server.configuration.GSSConfigurationFactory;
23 24
import gr.ebs.gss.server.domain.FileBody;
24 25
import gr.ebs.gss.server.domain.FileHeader;
25 26
import gr.ebs.gss.server.domain.FileTag;
27
import gr.ebs.gss.server.ejb.ExternalAPI;
26 28
import gr.ebs.gss.server.ejb.GSSDAO;
27 29

  
28 30
import java.io.File;
29 31
import java.io.IOException;
30
import java.io.StringWriter;
31 32
import java.io.UnsupportedEncodingException;
32
import java.util.ArrayList;
33
import java.util.List;
33
import java.net.MalformedURLException;
34 34
import java.util.StringTokenizer;
35 35

  
36 36
import javax.ejb.ActivationConfigProperty;
......
43 43
import javax.jms.MapMessage;
44 44
import javax.jms.Message;
45 45
import javax.jms.MessageListener;
46
import javax.xml.parsers.DocumentBuilder;
47
import javax.xml.parsers.DocumentBuilderFactory;
48
import javax.xml.parsers.ParserConfigurationException;
49
import javax.xml.transform.OutputKeys;
50
import javax.xml.transform.Transformer;
51
import javax.xml.transform.TransformerConfigurationException;
52
import javax.xml.transform.TransformerException;
53
import javax.xml.transform.TransformerFactory;
54
import javax.xml.transform.dom.DOMSource;
55
import javax.xml.transform.stream.StreamResult;
56 46

  
47
import org.apache.commons.configuration.Configuration;
48
import org.apache.commons.configuration.ConfigurationException;
57 49
import org.apache.commons.httpclient.HttpClient;
58 50
import org.apache.commons.httpclient.HttpException;
59 51
import org.apache.commons.httpclient.methods.PostMethod;
60 52
import org.apache.commons.httpclient.methods.StringRequestEntity;
61
import org.apache.commons.httpclient.methods.multipart.FilePart;
62
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
63
import org.apache.commons.httpclient.methods.multipart.Part;
64
import org.apache.commons.httpclient.methods.multipart.StringPart;
65 53
import org.apache.commons.logging.Log;
66 54
import org.apache.commons.logging.LogFactory;
55
import org.apache.solr.client.solrj.SolrServerException;
56
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
57
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
58
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
59
import org.apache.solr.common.SolrException;
60
import org.apache.solr.common.SolrInputDocument;
67 61
import org.jboss.ejb3.annotation.ResourceAdapter;
68
import org.w3c.dom.Document;
69
import org.w3c.dom.Element;
70
import org.w3c.dom.Node;
71 62

  
72 63
/**
73 64
 * Message driven bean that accepts messages whenever a document is created,
......
85 76
	/**
86 77
	 * EJB offering access to the JPA entity manager
87 78
	 */
88
	@EJB GSSDAO dao;
79
	@EJB ExternalAPI service;
89 80

  
90 81
	/**
91 82
	 * Decides to add or drop an item from the index depending on the message
......
99 90
	 * @see javax.jms.MessageListener#onMessage(javax.jms.Message)
100 91
	 */
101 92
	@Override
102
	@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
103 93
	public void onMessage(Message msg) {
104
		PostMethod method = null;
105
		String idStr = "";
94
		Long id = null;
106 95
		try {
107 96
			MapMessage map = (MapMessage) msg;
108
			Long id = (Long) map.getObject("id");
109
			idStr = id.toString();
97
			id = (Long) map.getObject("id");
110 98
			boolean delete = map.getBoolean("delete");
111
			HttpClient httpClient = new HttpClient();
99
			Configuration config = GSSConfigurationFactory.getConfiguration();
112 100
			if (delete) {
113
				method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
114
				String deleteXMLMsg = "<delete><id>" + idStr + "</id></delete>";
115
				if (logger.isDebugEnabled())
116
					logger.debug(deleteXMLMsg);
117
				method.setRequestEntity(new StringRequestEntity(deleteXMLMsg, "text/xml", "iso8859-1"));
118
				int statusCode = httpClient.executeMethod(method);
119
				if (logger.isDebugEnabled())
120
					logger.debug("HTTP status: " + statusCode);
121
				String response = method.getResponseBodyAsString();
122
				if (logger.isDebugEnabled())
123
					logger.debug(response);
124

  
125
				method.releaseConnection();
126
				if (statusCode != 200)
127
					throw new EJBException("Response from Solr for deleting file id " +
128
								idStr + " had status: " + statusCode);
129
				sendCommit(httpClient, 0);
101
				sendDelete(config.getString("solr.url"), id);
130 102
			} else {
131
				FileHeader file = dao.getFileForIndexing(id);
132
				FileBody body = file.getCurrentBody();
133
				String type = null;
134
				String mime = body.getMimeType();
135
				boolean nofile = false;
136
				if (body.getFileSize() > getConfiguration().getLong("solrDocumentUploadLimitInKB") * 1024)
137
					nofile = true;
138
				else if (mime.equals("application/pdf"))
139
					type = "pdf";
140
				else if (mime.equals("text/plain"))
141
					type = "text";
142
				else if (mime.equals("text/html"))
143
					type = "html";
144
				else if (mime.endsWith("msword"))
145
					type = "doc";
146
				else if (mime.endsWith("ms-excel"))
147
					type = "xls";
148
				else if (mime.endsWith("powerpoint"))
149
					type = "ppt";
150
				else
151
					nofile = true;
152
				if (!nofile) {
153
					method = new PostMethod(getConfiguration().getString("solrUpdateRichUrl"));
154
					List<Part> parts = new ArrayList<Part>();
155
					parts.add(new StringPart("stream.type", type));
156
					StringBuffer fieldnames = new StringBuffer("id,name");
157
					if (!file.getFileTags().isEmpty())
158
						fieldnames.append(",tag");
159
					parts.add(new StringPart("fieldnames", fieldnames.toString()));
160
					parts.add(new StringPart("id", idStr));
161
					parts.add(new StringPart("name", tokenizeFilename(file.getName()), "UTF-8"));
162
					for (FileTag tag : file.getFileTags())
163
						parts.add(new StringPart("tag", tag.getTag(), "UTF-8"));
164
					parts.add(new StringPart("stream.fieldname", "body"));
165
					parts.add(new StringPart("commit", "true"));
166
					parts.add(new FilePart(file.getName(), new File(body.getStoredFilePath())));
167
					method.setRequestEntity(new MultipartRequestEntity(parts.toArray(new Part[1]), method.getParams()));
168
					httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
169
					if (logger.isDebugEnabled())
170
						logger.debug("Sending rich document " + idStr);
171
					int statusCode = httpClient.executeMethod(method);
172
					if (logger.isDebugEnabled())
173
						logger.debug("HTTP status: " + statusCode);
174
					String response = method.getResponseBodyAsString();
175
					if (logger.isDebugEnabled())
176
						logger.debug(response);
177
					if (statusCode != 200)
178
						throw new EJBException("Response from Solr for updating file id " +
179
									idStr + " had status: " + statusCode);
180
				} else {
181
					DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
182
					DocumentBuilder db = dbf.newDocumentBuilder();
183
					Document doc = db.newDocument();
184
					Node root = doc.createElement("add");
185
					doc.appendChild(root);
186
					Node docNode = doc.createElement("doc");
187
					root.appendChild(docNode);
188
					Element field = doc.createElement("field");
189
					field.setAttribute("name", "id");
190
					docNode.appendChild(field);
191
					field.appendChild(doc.createTextNode(idStr));
192

  
193
					field = doc.createElement("field");
194
					field.setAttribute("name", "name");
195
					docNode.appendChild(field);
196
					field.appendChild(doc.createTextNode(tokenizeFilename(file.getName())));
197

  
198
					for (FileTag tag : file.getFileTags()) {
199
						field = doc.createElement("field");
200
						field.setAttribute("name", "tag");
201
						docNode.appendChild(field);
202
						field.appendChild(doc.createTextNode(tag.getTag()));
203
					}
204

  
205
					TransformerFactory fact = TransformerFactory.newInstance();
206
					Transformer trans = fact.newTransformer();
207
					trans.setOutputProperty(OutputKeys.INDENT, "yes");
208
					StringWriter sw = new StringWriter();
209
					StreamResult sr = new StreamResult(sw);
210
					DOMSource source = new DOMSource(doc);
211
					trans.transform(source, sr);
212
					if (logger.isDebugEnabled())
213
						logger.debug(sw.toString());
214

  
215
					method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
216
					method.setRequestEntity(new StringRequestEntity(sw.toString(),"text/xml", "UTF-8"));
217
					int statusCode = httpClient.executeMethod(method);
218
					if (logger.isDebugEnabled())
219
						logger.debug("HTTP status: " + statusCode);
220
					String response = method.getResponseBodyAsString();
221
					if (logger.isDebugEnabled())
222
						logger.debug(response);
223

  
224
					method.releaseConnection();
225
					if (statusCode != 200)
226
						throw new EJBException("Response from Solr for updating file id " +
227
									idStr + " had status: " + statusCode);
228

  
229
					sendCommit(httpClient, 0);
230
				}
231
			}
103
				service.postFileToSolr(id);
104
			}	
232 105
		}
233 106
		catch (JMSException e) {
234
			throw new EJBException("Error processing file ID " + idStr, e);
235
		} catch (UnsupportedEncodingException e) {
236
			throw new EJBException("Error processing file ID " + idStr, e);
237
		} catch (HttpException e) {
238
			throw new EJBException("Error processing file ID " + idStr, e);
239
		} catch (IOException e) {
240
			throw new EJBException("Error processing file ID " + idStr, e);
241
		} catch (ObjectNotFoundException e) {
242
			logger.warn("Error processing file ID " + idStr + ": Indexing " +
243
					"aborted because the file could not be found");
244
		} catch (ParserConfigurationException e) {
245
			throw new EJBException("Error processing file ID " + idStr, e);
246
		} catch (TransformerConfigurationException e) {
247
			throw new EJBException("Error processing file ID " + idStr, e);
248
		} catch (TransformerException e) {
249
			throw new EJBException("Error processing file ID " + idStr, e);
107
			throw new EJBException("Error processing file ID " + id, e);
108
		}
109
		catch (IOException e) {
110
			throw new EJBException("Error processing file ID " + id, e);
250 111
		}
251
		finally {
252
			if (method != null)
253
				method.releaseConnection();
112
		catch (SolrServerException e) {
113
			throw new EJBException(e);
254 114
		}
255 115
	}
256 116

  
117

  
257 118
	/**
258
	 * Sends a commit message to the solr server
259
	 *
260
	 * @param httpClient
261
	 * @param retryCount If the commit fails, it is retried three times. This parameter is passed in the recursive
262
	 * 					calls to stop the recursion
263
	 * @throws UnsupportedEncodingException
119
	 * Sends a delete command to solr. The id is the Long id of the indexed document
120
	 * 
121
	 * @param solrUrl
122
	 * @param id
123
	 * @throws SolrServerException
264 124
	 * @throws IOException
265
	 * @throws HttpException
266 125
	 */
267
	private void sendCommit(HttpClient httpClient, int retryCount) throws UnsupportedEncodingException, IOException, HttpException {
268
		PostMethod method = null;
269
		try {
270
			if (logger.isDebugEnabled())
271
				logger.debug("Commit retry: " + retryCount);
272
			method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
273
			method.setRequestEntity(new StringRequestEntity("<commit/>", "text/xml", "iso8859-1"));
274
			int statusCode = httpClient.executeMethod(method);
275
			if (logger.isDebugEnabled())
276
				logger.debug("HTTP status: " + statusCode);
277
			String response = method.getResponseBodyAsString();
278
			if (logger.isDebugEnabled())
279
				logger.debug(response);
280
			if (statusCode != 200 && retryCount < 2) {
281
				try {
282
					Thread.sleep(10000); // Give Solr a little time to be available.
283
				} catch (InterruptedException e) {
284
				}
285
				sendCommit(httpClient, retryCount + 1);
286
			}
287
		}
288
		finally {
289
			if (method != null)
290
				method.releaseConnection();
291
		}
126
	private void sendDelete(String solrUrl, Long id)	throws SolrServerException, IOException {
127
		CommonsHttpSolrServer solr = new CommonsHttpSolrServer(solrUrl);
128
		solr.deleteById(id.toString());
129
		solr.commit();
292 130
	}
293 131

  
294
	private String tokenizeFilename(String filename){
295
		StringBuffer result = new StringBuffer();
296
		StringTokenizer tokenizer = new StringTokenizer(filename,"._");
297
		while(tokenizer.hasMoreTokens()){
298
			result.append(tokenizer.nextToken());
299
			result.append(" ");
300
		}
301
		result.append(filename);
302
		return result.toString();
303
	}
132

  
304 133
}

Also available in: Unified diff