Statistics
| Branch: | Tag: | Revision:

root / src / gr / ebs / gss / server / ejb / indexer / IndexerMDBean.java @ 623:66f69a7348ed

History | View | Annotate | Download (11.6 kB)

1
/*
2
 * Copyright 2007, 2008, 2009 Electronic Business Systems Ltd.
3
 *
4
 * This file is part of GSS.
5
 *
6
 * GSS is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * GSS is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with GSS.  If not, see <http://www.gnu.org/licenses/>.
18
 */
19
package gr.ebs.gss.server.ejb.indexer;
20

    
21
import static gr.ebs.gss.server.configuration.GSSConfigurationFactory.getConfiguration;
22
import gr.ebs.gss.client.exceptions.ObjectNotFoundException;
23
import gr.ebs.gss.server.domain.FileBody;
24
import gr.ebs.gss.server.domain.FileHeader;
25
import gr.ebs.gss.server.domain.FileTag;
26
import gr.ebs.gss.server.ejb.GSSDAO;
27

    
28
import java.io.File;
29
import java.io.IOException;
30
import java.io.StringWriter;
31
import java.io.UnsupportedEncodingException;
32
import java.util.ArrayList;
33
import java.util.List;
34
import java.util.StringTokenizer;
35

    
36
import javax.ejb.ActivationConfigProperty;
37
import javax.ejb.EJB;
38
import javax.ejb.EJBException;
39
import javax.ejb.MessageDriven;
40
import javax.ejb.TransactionAttribute;
41
import javax.ejb.TransactionAttributeType;
42
import javax.jms.JMSException;
43
import javax.jms.MapMessage;
44
import javax.jms.Message;
45
import javax.jms.MessageListener;
46
import javax.xml.parsers.DocumentBuilder;
47
import javax.xml.parsers.DocumentBuilderFactory;
48
import javax.xml.parsers.ParserConfigurationException;
49
import javax.xml.transform.OutputKeys;
50
import javax.xml.transform.Transformer;
51
import javax.xml.transform.TransformerConfigurationException;
52
import javax.xml.transform.TransformerException;
53
import javax.xml.transform.TransformerFactory;
54
import javax.xml.transform.dom.DOMSource;
55
import javax.xml.transform.stream.StreamResult;
56

    
57
import org.apache.commons.httpclient.HttpClient;
58
import org.apache.commons.httpclient.HttpException;
59
import org.apache.commons.httpclient.methods.PostMethod;
60
import org.apache.commons.httpclient.methods.StringRequestEntity;
61
import org.apache.commons.httpclient.methods.multipart.FilePart;
62
import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
63
import org.apache.commons.httpclient.methods.multipart.Part;
64
import org.apache.commons.httpclient.methods.multipart.StringPart;
65
import org.apache.commons.logging.Log;
66
import org.apache.commons.logging.LogFactory;
67
import org.jboss.ejb3.annotation.ResourceAdapter;
68
import org.w3c.dom.Document;
69
import org.w3c.dom.Element;
70
import org.w3c.dom.Node;
71

    
72
/**
73
 * Message driven bean that accepts messages whenever a document is created,
74
 * modified or deleted and adds/removes the item from the search index.
75
 */
76
@MessageDriven(activationConfig={@ActivationConfigProperty(propertyName="destinationType", propertyValue="javax.jms.Queue"),
77
                                                                        @ActivationConfigProperty(propertyName="destination", propertyValue="queue/gss-indexingQueue")})
78
@ResourceAdapter("hornetq-ra.rar")
79
public class IndexerMDBean implements MessageListener {
80
        /**
81
         * The logger
82
         */
83
        private static final Log logger = LogFactory.getLog(IndexerMDBean.class);
84

    
85
        /**
86
         * EJB offering access to the JPA entity manager
87
         */
88
        @EJB GSSDAO dao;
89

    
90
        /**
91
         * Decides to add or drop an item from the index depending on the message
92
         * received
93
         *
94
         * It currently uses the patched solr API for rich documents. This API does not
95
         * allow indexing time field boosting. For this reason we have to use the dismax search API (instead of the
96
         * standard) that allows for search time field boosting
97
         *
98
         * @param msg
99
         * @see javax.jms.MessageListener#onMessage(javax.jms.Message)
100
         */
101
        @Override
102
        @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
103
        public void onMessage(Message msg) {
104
                PostMethod method = null;
105
                String idStr = "";
106
                try {
107
                        MapMessage map = (MapMessage) msg;
108
                        Long id = (Long) map.getObject("id");
109
                        idStr = id.toString();
110
                        boolean delete = map.getBoolean("delete");
111
                        HttpClient httpClient = new HttpClient();
112
                        if (delete) {
113
                                method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
114
                                String deleteXMLMsg = "<delete><id>" + idStr + "</id></delete>";
115
                                if (logger.isDebugEnabled())
116
                                        logger.debug(deleteXMLMsg);
117
                                method.setRequestEntity(new StringRequestEntity(deleteXMLMsg, "text/xml", "iso8859-1"));
118
                                int statusCode = httpClient.executeMethod(method);
119
                                if (logger.isDebugEnabled())
120
                                        logger.debug("HTTP status: " + statusCode);
121
                                String response = method.getResponseBodyAsString();
122
                                if (logger.isDebugEnabled())
123
                                        logger.debug(response);
124

    
125
                                method.releaseConnection();
126
                                if (statusCode != 200)
127
                                        throw new EJBException("Response from Solr for deleting file id " +
128
                                                                idStr + " had status: " + statusCode);
129
                                sendCommit(httpClient, 0);
130
                        } else {
131
                                FileHeader file = dao.getFileForIndexing(id);
132
                                FileBody body = file.getCurrentBody();
133
                                String type = null;
134
                                String mime = body.getMimeType();
135
                                boolean nofile = false;
136
                                if (body.getFileSize() > getConfiguration().getLong("solrDocumentUploadLimitInKB") * 1024)
137
                                        nofile = true;
138
                                else if (mime.equals("application/pdf"))
139
                                        type = "pdf";
140
                                else if (mime.equals("text/plain"))
141
                                        type = "text";
142
                                else if (mime.equals("text/html"))
143
                                        type = "html";
144
                                else if (mime.endsWith("msword"))
145
                                        type = "doc";
146
                                else if (mime.endsWith("ms-excel"))
147
                                        type = "xls";
148
                                else if (mime.endsWith("powerpoint"))
149
                                        type = "ppt";
150
                                else
151
                                        nofile = true;
152
                                if (!nofile) {
153
                                        method = new PostMethod(getConfiguration().getString("solrUpdateRichUrl"));
154
                                        List<Part> parts = new ArrayList<Part>();
155
                                        parts.add(new StringPart("stream.type", type));
156
                                        StringBuffer fieldnames = new StringBuffer("id,name");
157
                                        if (!file.getFileTags().isEmpty())
158
                                                fieldnames.append(",tag");
159
                                        parts.add(new StringPart("fieldnames", fieldnames.toString()));
160
                                        parts.add(new StringPart("id", idStr));
161
                                        parts.add(new StringPart("name", tokenizeFilename(file.getName()), "UTF-8"));
162
                                        for (FileTag tag : file.getFileTags())
163
                                                parts.add(new StringPart("tag", tag.getTag(), "UTF-8"));
164
                                        parts.add(new StringPart("stream.fieldname", "body"));
165
                                        parts.add(new StringPart("commit", "true"));
166
                                        parts.add(new FilePart(file.getName(), new File(body.getStoredFilePath())));
167
                                        method.setRequestEntity(new MultipartRequestEntity(parts.toArray(new Part[1]), method.getParams()));
168
                                        httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
169
                                        if (logger.isDebugEnabled())
170
                                                logger.debug("Sending rich document " + idStr);
171
                                        int statusCode = httpClient.executeMethod(method);
172
                                        if (logger.isDebugEnabled())
173
                                                logger.debug("HTTP status: " + statusCode);
174
                                        String response = method.getResponseBodyAsString();
175
                                        if (logger.isDebugEnabled())
176
                                                logger.debug(response);
177
                                        if (statusCode != 200)
178
                                                throw new EJBException("Response from Solr for updating file id " +
179
                                                                        idStr + " had status: " + statusCode);
180
                                } else {
181
                                        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
182
                                        DocumentBuilder db = dbf.newDocumentBuilder();
183
                                        Document doc = db.newDocument();
184
                                        Node root = doc.createElement("add");
185
                                        doc.appendChild(root);
186
                                        Node docNode = doc.createElement("doc");
187
                                        root.appendChild(docNode);
188
                                        Element field = doc.createElement("field");
189
                                        field.setAttribute("name", "id");
190
                                        docNode.appendChild(field);
191
                                        field.appendChild(doc.createTextNode(idStr));
192

    
193
                                        field = doc.createElement("field");
194
                                        field.setAttribute("name", "name");
195
                                        docNode.appendChild(field);
196
                                        field.appendChild(doc.createTextNode(tokenizeFilename(file.getName())));
197

    
198
                                        for (FileTag tag : file.getFileTags()) {
199
                                                field = doc.createElement("field");
200
                                                field.setAttribute("name", "tag");
201
                                                docNode.appendChild(field);
202
                                                field.appendChild(doc.createTextNode(tag.getTag()));
203
                                        }
204

    
205
                                        TransformerFactory fact = TransformerFactory.newInstance();
206
                                        Transformer trans = fact.newTransformer();
207
                                        trans.setOutputProperty(OutputKeys.INDENT, "yes");
208
                                        StringWriter sw = new StringWriter();
209
                                        StreamResult sr = new StreamResult(sw);
210
                                        DOMSource source = new DOMSource(doc);
211
                                        trans.transform(source, sr);
212
                                        if (logger.isDebugEnabled())
213
                                                logger.debug(sw.toString());
214

    
215
                                        method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
216
                                        method.setRequestEntity(new StringRequestEntity(sw.toString(),"text/xml", "UTF-8"));
217
                                        int statusCode = httpClient.executeMethod(method);
218
                                        if (logger.isDebugEnabled())
219
                                                logger.debug("HTTP status: " + statusCode);
220
                                        String response = method.getResponseBodyAsString();
221
                                        if (logger.isDebugEnabled())
222
                                                logger.debug(response);
223

    
224
                                        method.releaseConnection();
225
                                        if (statusCode != 200)
226
                                                throw new EJBException("Response from Solr for updating file id " +
227
                                                                        idStr + " had status: " + statusCode);
228

    
229
                                        sendCommit(httpClient, 0);
230
                                }
231
                        }
232
                }
233
                catch (JMSException e) {
234
                        throw new EJBException("Error processing file ID " + idStr, e);
235
                } catch (UnsupportedEncodingException e) {
236
                        throw new EJBException("Error processing file ID " + idStr, e);
237
                } catch (HttpException e) {
238
                        throw new EJBException("Error processing file ID " + idStr, e);
239
                } catch (IOException e) {
240
                        throw new EJBException("Error processing file ID " + idStr, e);
241
                } catch (ObjectNotFoundException e) {
242
                        logger.warn("Error processing file ID " + idStr + ": Indexing " +
243
                                        "aborted because the file could not be found");
244
                } catch (ParserConfigurationException e) {
245
                        throw new EJBException("Error processing file ID " + idStr, e);
246
                } catch (TransformerConfigurationException e) {
247
                        throw new EJBException("Error processing file ID " + idStr, e);
248
                } catch (TransformerException e) {
249
                        throw new EJBException("Error processing file ID " + idStr, e);
250
                }
251
                finally {
252
                        if (method != null)
253
                                method.releaseConnection();
254
                }
255
        }
256

    
257
        /**
258
         * Sends a commit message to the solr server
259
         *
260
         * @param httpClient
261
         * @param retryCount If the commit fails, it is retried three times. This parameter is passed in the recursive
262
         *                                         calls to stop the recursion
263
         * @throws UnsupportedEncodingException
264
         * @throws IOException
265
         * @throws HttpException
266
         */
267
        private void sendCommit(HttpClient httpClient, int retryCount) throws UnsupportedEncodingException, IOException, HttpException {
268
                PostMethod method = null;
269
                try {
270
                        if (logger.isDebugEnabled())
271
                                logger.debug("Commit retry: " + retryCount);
272
                        method = new PostMethod(getConfiguration().getString("solrUpdateUrl"));
273
                        method.setRequestEntity(new StringRequestEntity("<commit/>", "text/xml", "iso8859-1"));
274
                        int statusCode = httpClient.executeMethod(method);
275
                        if (logger.isDebugEnabled())
276
                                logger.debug("HTTP status: " + statusCode);
277
                        String response = method.getResponseBodyAsString();
278
                        if (logger.isDebugEnabled())
279
                                logger.debug(response);
280
                        if (statusCode != 200 && retryCount < 2) {
281
                                try {
282
                                        Thread.sleep(10000); // Give Solr a little time to be available.
283
                                } catch (InterruptedException e) {
284
                                }
285
                                sendCommit(httpClient, retryCount + 1);
286
                        }
287
                }
288
                finally {
289
                        if (method != null)
290
                                method.releaseConnection();
291
                }
292
        }
293

    
294
        private String tokenizeFilename(String filename){
295
                StringBuffer result = new StringBuffer();
296
                StringTokenizer tokenizer = new StringTokenizer(filename,"._");
297
                while(tokenizer.hasMoreTokens()){
298
                        result.append(tokenizer.nextToken());
299
                        result.append(" ");
300
                }
301
                result.append(filename);
302
                return result.toString();
303
        }
304
}