Revision 15:5e82fb01dfb3 TroubleTicketParser.py
b/TroubleTicketParser.py | ||
---|---|---|
16 | 16 |
|
17 | 17 |
from glob import iglob |
18 | 18 |
|
19 |
from xml.etree.ElementTree import ElementTree, dump |
|
19 |
from xml.etree import ElementTree as ET |
|
20 |
|
|
20 | 21 |
|
21 | 22 |
NREN_NAME = "Name" |
22 | 23 |
MAIL_PATTERN = "DefinitionPattern/MailPattern" |
... | ... | |
25 | 26 |
SEARCH_FIELD = "SearchField" |
26 | 27 |
REGEXP = "*/RegExp" |
27 | 28 |
REGEXP_DIRECT = "RegExp" |
29 |
REPLACEMENT = "PossibleValue/Replace" |
|
30 |
DEFAULT_RS = "other" |
|
28 | 31 |
|
29 | 32 |
class TroubleTicketParser: |
33 |
"""Parses, normalises, and stores trouble tickets. |
|
30 | 34 |
|
35 |
When it initialises, the class reads all templates that it finds in |
|
36 |
the templates directory (Templates/) and stores them in an internal |
|
37 |
dictionary as XML trees, keyed by the origin e-mail address. It then |
|
38 |
compliles a second two-level dictionary. In the first level, the key |
|
39 |
is key is the NREN name and the value is the second second level |
|
40 |
dictionary. In the second level, the key is the regular expression |
|
41 |
name and the value is the regular expression string as found in the |
|
42 |
template file. |
|
43 |
""" |
|
31 | 44 |
def __init__(self): |
45 |
# Dictionary containing all templates as XML trees. |
|
32 | 46 |
self.templates = {} |
47 |
# Dictionary containing regular expressions with their matches found |
|
48 |
# upon parsing a trouble ticket. |
|
33 | 49 |
self.resultset = {} |
50 |
# Dictionary mapping origin addresses to dictionaries of |
|
51 |
# regular expressions. |
|
34 | 52 |
self.re_dict = {} |
53 |
#Dictionary mapping origin addresses to dictionaries of replacement |
|
54 |
# values. |
|
55 |
self.replacements_dict = {} |
|
56 |
# Dictionary mapping origin e-mail addresses to NRENs. |
|
35 | 57 |
self.nren_dict = {} |
36 | 58 |
self.read_templates() |
37 | 59 |
self.build_re_dict() |
38 | 60 |
|
39 | 61 |
def read_templates(self): |
62 |
"""Reads all templates from the templates directory. |
|
63 |
|
|
64 |
The templates are read from the templates directory (Templates/) |
|
65 |
and stored in the internal templates dictionary as XML trees. They |
|
66 |
are keyed by the origin e-mail address. |
|
67 |
""" |
|
40 | 68 |
for template in iglob('Templates/*.xml'): |
41 |
tree = ElementTree() |
|
69 |
tree = ET.ElementTree()
|
|
42 | 70 |
tree.parse(template) |
43 |
mailpattern_els = tree.findall(MAIL_PATTERN)
|
|
44 |
for mailpattern_el in mailpattern_els:
|
|
45 |
mailfield_el = mailpattern_el.find(MAILFIELD)
|
|
46 |
if mailfield_el.text == "From":
|
|
47 |
regexp_el = mailpattern_el.find(REGEXP)
|
|
48 |
if regexp_el is not None:
|
|
49 |
source = regexp_el.text.replace("\\", "")
|
|
71 |
mailpatterns = tree.findall(MAIL_PATTERN) |
|
72 |
for mailpattern in mailpatterns:
|
|
73 |
mailfield = mailpattern.find(MAILFIELD)
|
|
74 |
if mailfield.text == "From": |
|
75 |
regexp = mailpattern.find(REGEXP)
|
|
76 |
if regexp is not None: |
|
77 |
source = regexp.text.replace("\\", "") |
|
50 | 78 |
self.templates[source] = tree |
51 | 79 |
self.re_dict[source] = {} |
52 |
name_el = tree.find(NREN_NAME) |
|
53 |
self.nren_dict[source] = name_el.text |
|
80 |
self.replacements_dict[source] = {} |
|
81 |
name = tree.find(NREN_NAME) |
|
82 |
self.nren_dict[source] = name.text |
|
54 | 83 |
|
55 | 84 |
def build_re_dict(self): |
56 |
for k, t in self.templates.iteritems(): |
|
57 |
search_pattern_els = t.findall(SEARCH_PATTERN) |
|
58 |
for search_pattern_el in search_pattern_els: |
|
59 |
search_field_el = search_pattern_el.find(SEARCH_FIELD) |
|
60 |
regexp_el = search_pattern_el.find(REGEXP_DIRECT) |
|
61 |
search_field = search_field_el.text |
|
62 |
regexp = regexp_el.text |
|
63 |
self.re_dict[k][search_field] = re.compile(regexp, |
|
64 |
re.UNICODE |
|
65 |
| re.DOTALL |
|
66 |
| re.MULTILINE) |
|
85 |
"""Builds regexp and replacement dictionaries for the templates read. |
|
86 |
|
|
87 |
For each template that has been read and stored internally as |
|
88 |
an XML tree, this method constructs a dictionary containing |
|
89 |
the regular expressions contained in that template. The |
|
90 |
dictionary is entered in a dictionary keyed by the origin |
|
91 |
e-mail address. In this way we have a two-level dictionary. In |
|
92 |
the first level keys are origin e-mails addresses and values |
|
93 |
are dictionaries whose keys are regular expression names whose |
|
94 |
values are the regular expressions themselves. |
|
95 |
|
|
96 |
Some regular expressions in the templates files specify |
|
97 |
replacement values for their matches. These are entered in a |
|
98 |
different dictionary. The key of that dictionary is the |
|
99 |
origin e-mail address, again. The values are the search fields |
|
100 |
with replacement pairs. For each search field we then |
|
101 |
associate a second-level dictionary that contains the |
|
102 |
replacement pairs themselves. |
|
103 |
""" |
|
104 |
for source, t in self.templates.iteritems(): |
|
105 |
search_patterns = t.findall(SEARCH_PATTERN) |
|
106 |
for search_pattern in search_patterns: |
|
107 |
search_field = search_pattern.find(SEARCH_FIELD) |
|
108 |
regexp = search_pattern.find(REGEXP_DIRECT) |
|
109 |
search_field_str = search_field.text |
|
110 |
regexp_str = regexp.text |
|
111 |
self.re_dict[source][search_field_str] = re.compile(regexp_str, |
|
112 |
re.UNICODE |
|
113 |
| re.DOTALL |
|
114 |
| re.MULTILINE) |
|
115 |
replacements = search_pattern.findall(REPLACEMENT) |
|
116 |
if replacements: |
|
117 |
pairs = {} |
|
118 |
for replacement in replacements: |
|
119 |
left = replacement.find('in') |
|
120 |
right = replacement.find('as') |
|
121 |
pairs[left.text] = right.text |
|
122 |
self.replacements_dict[source][search_field_str] = pairs |
|
67 | 123 |
|
68 | 124 |
def parse_body(self, body, source): |
125 |
"""Parses the body of a trouble ticket mail coming from a source. |
|
126 |
|
|
127 |
The method parses the body of a trouble ticket mail given the source, |
|
128 |
i.e., the origin e-mail address of the trouble ticket, in order |
|
129 |
to be able to determine the template to apply. |
|
130 |
""" |
|
69 | 131 |
self.resultset = {} |
70 | 132 |
self.resultset['FROM'] = source |
71 | 133 |
self.resultset['NREN'] = self.nren_dict[source] |
72 | 134 |
re_dict = self.re_dict[source] |
73 |
for search_field, regexp in re_dict.iteritems(): |
|
135 |
for search_field_str, regexp in re_dict.iteritems():
|
|
74 | 136 |
match = regexp.search(body) |
75 | 137 |
if match is not None: |
76 |
self.resultset[search_field] = match.group(1) |
|
138 |
if search_field_str in self.replacements_dict[source]: |
|
139 |
pairs = self.replacements_dict[source][search_field_str] |
|
140 |
if match.group(1) in pairs: |
|
141 |
self.resultset[search_field_str] = pairs[match.group(1)] |
|
142 |
else: |
|
143 |
self.resultset[search_field_str] = DEFAULT_RS |
|
144 |
else: |
|
145 |
self.resultset[search_field_str] = match.group(1) |
|
77 | 146 |
|
78 | 147 |
def parse(self, message): |
148 |
"""Parses a trouble ticket. |
|
149 |
|
|
150 |
The method gets a full trouble ticket. It will establish its |
|
151 |
originating e-mail address (based on its header field), and |
|
152 |
will then proceed to parse its body by using the appropriate |
|
153 |
template. |
|
154 |
""" |
|
79 | 155 |
for source in self.re_dict.keys(): |
80 | 156 |
if message['header'].rfind(source) != -1: |
81 | 157 |
self.parse_body(message['body'], source) |
Also available in: Unified diff