Revision 15:5e82fb01dfb3 TroubleTicketParser.py

b/TroubleTicketParser.py
16 16

  
17 17
from glob import iglob
18 18

  
19
from xml.etree.ElementTree import ElementTree, dump
19
from xml.etree import ElementTree as ET
20

  
20 21

  
21 22
NREN_NAME = "Name"
22 23
MAIL_PATTERN = "DefinitionPattern/MailPattern"
......
25 26
SEARCH_FIELD = "SearchField"
26 27
REGEXP = "*/RegExp"
27 28
REGEXP_DIRECT = "RegExp"
29
REPLACEMENT = "PossibleValue/Replace"
30
DEFAULT_RS = "other"
28 31

  
29 32
class TroubleTicketParser:
33
    """Parses, normalises, and stores trouble tickets.
30 34

  
35
    When it initialises, the class reads all templates that it finds in
36
    the templates directory (Templates/) and stores them in an internal
37
    dictionary as XML trees, keyed by the origin e-mail address. It then
38
    compliles a second two-level dictionary. In the first level, the key
39
    is key is the NREN name and the value is the second second level
40
    dictionary. In the second level, the key is the regular expression
41
    name and the value is the regular expression string as found in the
42
    template file.
43
    """
31 44
    def __init__(self):
45
        # Dictionary containing all templates as XML trees.
32 46
        self.templates = {}
47
        # Dictionary containing regular expressions with their matches found
48
        # upon parsing a trouble ticket.
33 49
        self.resultset = {}
50
        # Dictionary mapping origin addresses to dictionaries of
51
        # regular expressions.
34 52
        self.re_dict = {}
53
        #Dictionary mapping origin addresses to dictionaries of replacement
54
        # values.
55
        self.replacements_dict = {}
56
        # Dictionary mapping origin e-mail addresses to NRENs.
35 57
        self.nren_dict = {}
36 58
        self.read_templates()
37 59
        self.build_re_dict()
38 60

  
39 61
    def read_templates(self):
62
        """Reads all templates from the templates directory.
63
        
64
        The templates are read from the templates directory (Templates/)
65
        and stored in the internal templates dictionary as XML trees. They
66
        are keyed by the origin e-mail address.
67
        """
40 68
        for template in iglob('Templates/*.xml'):
41
            tree = ElementTree()
69
            tree = ET.ElementTree()
42 70
            tree.parse(template)
43
            mailpattern_els = tree.findall(MAIL_PATTERN)
44
            for mailpattern_el in mailpattern_els:
45
                mailfield_el = mailpattern_el.find(MAILFIELD)
46
                if mailfield_el.text == "From":
47
                    regexp_el = mailpattern_el.find(REGEXP)
48
                    if regexp_el is not None:
49
                        source = regexp_el.text.replace("\\", "")
71
            mailpatterns = tree.findall(MAIL_PATTERN)
72
            for mailpattern in mailpatterns:
73
                mailfield = mailpattern.find(MAILFIELD)
74
                if mailfield.text == "From":
75
                    regexp = mailpattern.find(REGEXP)
76
                    if regexp is not None:
77
                        source = regexp.text.replace("\\", "")
50 78
                        self.templates[source] = tree
51 79
                        self.re_dict[source] = {}
52
                        name_el = tree.find(NREN_NAME)
53
                        self.nren_dict[source] = name_el.text
80
                        self.replacements_dict[source] = {}
81
                        name = tree.find(NREN_NAME)
82
                        self.nren_dict[source] = name.text
54 83
        
55 84
    def build_re_dict(self):
56
        for k, t in self.templates.iteritems():
57
            search_pattern_els = t.findall(SEARCH_PATTERN)
58
            for search_pattern_el in search_pattern_els:
59
                search_field_el = search_pattern_el.find(SEARCH_FIELD)
60
                regexp_el = search_pattern_el.find(REGEXP_DIRECT)
61
                search_field = search_field_el.text
62
                regexp = regexp_el.text
63
                self.re_dict[k][search_field] = re.compile(regexp,
64
                                                           re.UNICODE
65
                                                           | re.DOTALL
66
                                                           | re.MULTILINE)
85
        """Builds regexp and replacement dictionaries for the templates read.
86

  
87
        For each template that has been read and stored internally as
88
        an XML tree, this method constructs a dictionary containing
89
        the regular expressions contained in that template. The
90
        dictionary is entered in a dictionary keyed by the origin
91
        e-mail address. In this way we have a two-level dictionary. In
92
        the first level keys are origin e-mails addresses and values
93
        are dictionaries whose keys are regular expression names whose
94
        values are the regular expressions themselves.
95
        
96
        Some regular expressions in the templates files specify
97
        replacement values for their matches. These are entered in a
98
        different dictionary.  The key of that dictionary is the
99
        origin e-mail address, again. The values are the search fields
100
        with replacement pairs. For each search field we then
101
        associate a second-level dictionary that contains the
102
        replacement pairs themselves.
103
        """
104
        for source, t in self.templates.iteritems():
105
            search_patterns = t.findall(SEARCH_PATTERN)
106
            for search_pattern in search_patterns:
107
                search_field = search_pattern.find(SEARCH_FIELD)
108
                regexp = search_pattern.find(REGEXP_DIRECT)
109
                search_field_str = search_field.text
110
                regexp_str = regexp.text
111
                self.re_dict[source][search_field_str] = re.compile(regexp_str,
112
                                                                    re.UNICODE
113
                                                                    | re.DOTALL
114
                                                                    | re.MULTILINE)
115
                replacements = search_pattern.findall(REPLACEMENT)
116
                if replacements:
117
                    pairs = {}
118
                    for replacement in replacements:
119
                        left = replacement.find('in')
120
                        right = replacement.find('as')
121
                        pairs[left.text] = right.text
122
                    self.replacements_dict[source][search_field_str] = pairs
67 123

  
68 124
    def parse_body(self, body, source):
125
        """Parses the body of a trouble ticket mail coming from a source.
126

  
127
        The method parses the body of a trouble ticket mail given the source,
128
        i.e., the origin e-mail address of the trouble ticket, in order
129
        to be able to determine the template to apply.
130
        """
69 131
        self.resultset = {}
70 132
        self.resultset['FROM'] = source
71 133
        self.resultset['NREN'] = self.nren_dict[source]
72 134
        re_dict = self.re_dict[source]
73
        for search_field, regexp in re_dict.iteritems():
135
        for search_field_str, regexp in re_dict.iteritems():
74 136
            match = regexp.search(body)
75 137
            if match is not None:
76
                self.resultset[search_field] = match.group(1)
138
                if search_field_str in self.replacements_dict[source]:
139
                    pairs = self.replacements_dict[source][search_field_str]
140
                    if match.group(1) in pairs:
141
                        self.resultset[search_field_str] = pairs[match.group(1)]
142
                    else:
143
                        self.resultset[search_field_str] = DEFAULT_RS
144
                else:
145
                    self.resultset[search_field_str] = match.group(1)
77 146
                
78 147
    def parse(self, message):
148
        """Parses a trouble ticket.
149

  
150
        The method gets a full trouble ticket. It will establish its
151
        originating e-mail address (based on its header field), and
152
        will then proceed to parse its body by using the appropriate
153
        template.
154
        """
79 155
        for source in self.re_dict.keys():
80 156
            if message['header'].rfind(source) != -1:
81 157
                self.parse_body(message['body'], source)

Also available in: Unified diff