/TroubleTicketParser.py - Diff - ENOC TT Handler - Greek Research and Technology Network's projects

Revision 15:5e82fb01dfb3 TroubleTicketParser.py

     from glob import iglob
     from xml.etree.ElementTree import ElementTree, dump
     from xml.etree import ElementTree as ET
     NREN_NAME = "Name"
     MAIL_PATTERN = "DefinitionPattern/MailPattern"
-...
     SEARCH_FIELD = "SearchField"
     REGEXP = "*/RegExp"
     REGEXP_DIRECT = "RegExp"
     REPLACEMENT = "PossibleValue/Replace"
     DEFAULT_RS = "other"
     class TroubleTicketParser:
         """Parses, normalises, and stores trouble tickets.
         When it initialises, the class reads all templates that it finds in
         the templates directory (Templates/) and stores them in an internal
         dictionary as XML trees, keyed by the origin e-mail address. It then
         compliles a second two-level dictionary. In the first level, the key
         is key is the NREN name and the value is the second second level
         dictionary. In the second level, the key is the regular expression
         name and the value is the regular expression string as found in the
         template file.
         """
         def __init__(self):
             # Dictionary containing all templates as XML trees.
             self.templates = {}
             # Dictionary containing regular expressions with their matches found
             # upon parsing a trouble ticket.
             self.resultset = {}
             # Dictionary mapping origin addresses to dictionaries of
             # regular expressions.
             self.re_dict = {}
             #Dictionary mapping origin addresses to dictionaries of replacement
             # values.
             self.replacements_dict = {}
             # Dictionary mapping origin e-mail addresses to NRENs.
             self.nren_dict = {}
             self.read_templates()
             self.build_re_dict()
         def read_templates(self):
             """Reads all templates from the templates directory.
             The templates are read from the templates directory (Templates/)
             and stored in the internal templates dictionary as XML trees. They
             are keyed by the origin e-mail address.
             """
             for template in iglob('Templates/*.xml'):
                 tree = ElementTree()
                 tree = ET.ElementTree()
                 tree.parse(template)
                 mailpattern_els = tree.findall(MAIL_PATTERN)
                 for mailpattern_el in mailpattern_els:
                     mailfield_el = mailpattern_el.find(MAILFIELD)
                     if mailfield_el.text == "From":
                         regexp_el = mailpattern_el.find(REGEXP)
                         if regexp_el is not None:
                             source = regexp_el.text.replace("\\", "")
                 mailpatterns = tree.findall(MAIL_PATTERN)
                 for mailpattern in mailpatterns:
                     mailfield = mailpattern.find(MAILFIELD)
                     if mailfield.text == "From":
                         regexp = mailpattern.find(REGEXP)
                         if regexp is not None:
                             source = regexp.text.replace("\\", "")
                             self.templates[source] = tree
                             self.re_dict[source] = {}
                             name_el = tree.find(NREN_NAME)
                             self.nren_dict[source] = name_el.text
                             self.replacements_dict[source] = {}
                             name = tree.find(NREN_NAME)
                             self.nren_dict[source] = name.text
         def build_re_dict(self):
             for k, t in self.templates.iteritems():
                 search_pattern_els = t.findall(SEARCH_PATTERN)
                 for search_pattern_el in search_pattern_els:
                     search_field_el = search_pattern_el.find(SEARCH_FIELD)
                     regexp_el = search_pattern_el.find(REGEXP_DIRECT)
                     search_field = search_field_el.text
                     regexp = regexp_el.text
                     self.re_dict[k][search_field] = re.compile(regexp,
                                                                re.UNICODE
                                                                | re.DOTALL
                                                                | re.MULTILINE)
             """Builds regexp and replacement dictionaries for the templates read.
             For each template that has been read and stored internally as
             an XML tree, this method constructs a dictionary containing
             the regular expressions contained in that template. The
             dictionary is entered in a dictionary keyed by the origin
             e-mail address. In this way we have a two-level dictionary. In
             the first level keys are origin e-mails addresses and values
             are dictionaries whose keys are regular expression names whose
             values are the regular expressions themselves.
             Some regular expressions in the templates files specify
             replacement values for their matches. These are entered in a
             different dictionary.  The key of that dictionary is the
             origin e-mail address, again. The values are the search fields
             with replacement pairs. For each search field we then
             associate a second-level dictionary that contains the
             replacement pairs themselves.
             """
             for source, t in self.templates.iteritems():
                 search_patterns = t.findall(SEARCH_PATTERN)
                 for search_pattern in search_patterns:
                     search_field = search_pattern.find(SEARCH_FIELD)
                     regexp = search_pattern.find(REGEXP_DIRECT)
                     search_field_str = search_field.text
                     regexp_str = regexp.text
                     self.re_dict[source][search_field_str] = re.compile(regexp_str,
                                                                         re.UNICODE
                                                                         | re.DOTALL
                                                                         | re.MULTILINE)
                     replacements = search_pattern.findall(REPLACEMENT)
                     if replacements:
                         pairs = {}
                         for replacement in replacements:
                             left = replacement.find('in')
                             right = replacement.find('as')
                             pairs[left.text] = right.text
                         self.replacements_dict[source][search_field_str] = pairs
         def parse_body(self, body, source):
             """Parses the body of a trouble ticket mail coming from a source.
             The method parses the body of a trouble ticket mail given the source,
             i.e., the origin e-mail address of the trouble ticket, in order
             to be able to determine the template to apply.
             """
             self.resultset = {}
             self.resultset['FROM'] = source
             self.resultset['NREN'] = self.nren_dict[source]
             re_dict = self.re_dict[source]
             for search_field, regexp in re_dict.iteritems():
             for search_field_str, regexp in re_dict.iteritems():
                 match = regexp.search(body)
                 if match is not None:
                     self.resultset[search_field] = match.group(1)
                     if search_field_str in self.replacements_dict[source]:
                         pairs = self.replacements_dict[source][search_field_str]
                         if match.group(1) in pairs:
                             self.resultset[search_field_str] = pairs[match.group(1)]
                         else:
                             self.resultset[search_field_str] = DEFAULT_RS
                     else:
                         self.resultset[search_field_str] = match.group(1)
         def parse(self, message):
             """Parses a trouble ticket.
             The method gets a full trouble ticket. It will establish its
             originating e-mail address (based on its header field), and
             will then proceed to parse its body by using the appropriate
             template.
             """
             for source in self.re_dict.keys():
                 if message['header'].rfind(source) != -1:
                     self.parse_body(message['body'], source)

Also available in: Unified diff