Statistics
| Branch: | Revision:

root / TroubleTicket_db2db.py @ 21:06babbe78f55

History | View | Annotate | Download (10.5 kB)

1
#!/usr/bin/env python
2
#-------------------------------------------------------------------------------
3
# Name:        TroubleTicketParser.py
4
# Purpose:     To parse Trouble Tickets mail messages stored in database
5
#
6
# Author:      Panos Louridas, from an initial version by Spyros Kopsidas
7
# Copyright:   (c) EGEE Project 2009
8
# Licence:     Apache Software License
9
#
10
#-------------------------------------------------------------------------------
11

    
12
import sys
13

    
14
from optparse import OptionParser
15

    
16
from datetime import datetime
17

    
18
import imaplib
19

    
20
from email import message_from_string
21
from email.header import decode_header
22

    
23
import TTConfig
24
import MySQLdb
25

    
26
from TroubleTicketParser import *
27

    
28
def get_unparsed_emails_from_imap(server, username, passwd):
29
    """A generator for returning unparsed emails from the IMAP server."""
30
    c = imaplib.IMAP4(server)
31
    c.login(username, passwd)
32
    c.select()
33
    typ, data = c.search(None, 'ALL')
34
    for num in data[0].split():
35
        typ, data = c.fetch(num, '(RFC822)')
36
        for response_part in data:
37
            if isinstance(response_part, tuple):
38
                msg = message_from_string(response_part[1])
39
                header = ""
40
                body = ""
41
                if msg.is_multipart():
42
                    for msg_part in msg.get_payload(decode=True):
43
                        body = body + msg_part.get_payload(decode=True)
44
                else:
45
                    body = msg.get_payload(decode=True)
46
                for header_key in msg.keys():
47
                    header = header + "\n" + header_key + ": " + msg[header_key]
48
                email = {
49
                    'ID': msg['Message-ID'],
50
                    'subject' : decode_header(msg['subject'])[0][0],
51
                    'header'  : header,
52
                    'body'    : body
53
                    }
54
                print email
55
                yield email
56
    c.close()
57
    c.logout()
58
    exit()
59
    
60
def get_unparsed_emails_from_db(cursor, table):
61
    """ A generator for returning unparsed emails from the unnormalised DB."""
62
    # Not using the more practical """ string because we want to
63
    # be able to add easily the following line for testing:
64
    # "AND header like '%grnet%' AND body like '%9966%' "
65
    query = "SELECT Message_ID, Subject, Header, Body " + \
66
        "FROM " + table + " " + \
67
        "WHERE IS_STANDARDISED IS NULL " + \
68
        "ORDER BY Date_Stored ASC"
69
    cursor.execute(query)
70
    print "Found %d raw mails." % cursor.rowcount
71

    
72
    while (1):
73
        row = cursor.fetchone()
74
        if row is None:
75
            return
76
        email = {
77
            'ID': row[0],
78
            'subject' : row[1],
79
            'header'  : row[2],
80
            'body'    : row[3]
81
            }
82
        yield email
83

    
84
def get_partner_original_id(partner_id, original_id):
85
    """Returns the partner_original_id.
86

87
    Composes the partner_original_id field in the normalised database
88
    by using the rule: partner_id + '_' + original_id.
89
    """
90
    return partner_id + '_' + str(original_id)
91
        
92
def get_datetime_str(raw_datetime):
93
    """A utility function for returning datetime objects out of strings.
94
    
95
    Tries to parse them according to the following rules:
96
    %d-%m-%Y
97
    %Y-%m-%d
98
    """
99
    try:
100
        dt = datetime.strptime(raw_datetime, "%d-%m-%Y")
101
    except ValueError:
102
        try:
103
            dt = datetime.strptime(raw_datetime, "%Y-%m-%d")
104
        except ValueError:
105
            return None
106
    return dt.isoformat()
107

    
108
def insert_values(values, db, destination_table, connection, ttid):
109
    """Insert a normalised message to the database.
110
    
111
    Inserts the normalised message represented by the values dictionary
112
    to the database at the given destination_table. ttid is the
113
    message id in the unnormalised email table.
114
    """
115

    
116
    # Find messages previously normalised with the same NREN / ID combination
117
    # as this will be an update. The previous ones will have their
118
    # MESSAGE_RNUMBER attribute increased by 1.
119
    cursor = connection.cursor()
120
    original_id = int(values.get("ORIGINAL_ID", ""))
121
    partner_id = values.get("NREN", "")
122
    partner_original_id = get_partner_original_id(partner_id, original_id)
123
    query = """SELECT ENOC_ID, MESSAGE_RNUMBER
124
        FROM %s 
125
        WHERE PARTNER_ORIGINAL_ID = '%s'"""
126
    query = query % (destination_table, partner_original_id)
127
    cursor.execute(query)
128
    # We need to fetchall to ensure that we have all the data in a
129
    # snapshot in our program, so that the update that follows
130
    # work correctly.
131
    previous_messages = cursor.fetchall()
132
    for message in previous_messages:
133
        enoc_id = message[0]
134
        message_rnumber = message[1]
135
        update_stmt = 'UPDATE ' + destination_table + ' ' + \
136
            'SET MESSAGE_RNUMBER = MESSAGE_RNUMBER + 1 ' + \
137
            'WHERE ENOC_ID  = ' + str(enoc_id)
138
        cursor.execute(update_stmt)
139
        
140
    statement_pattern = "INSERT INTO " + destination_table + """
141
        (PARTNER_ORIGINAL_ID,
142
        UNNORMALISED_ID,
143
        TT_OPEN_DATETIME, 
144
        OPEN_ENGINEER,
145
        CLOSE_ENGINEER, 
146
        TT_LASTUPDATE_DATETIME,
147
        TT_TYPE,
148
        NETWORK_NODE,
149
        NETWORK_LINK_CIRCUIT,
150
        TT_TITLE, 
151
        TT_SHORT_DESCRIPTION,
152
        TT_PRIORITY,
153
        TT_STATUS,
154
        TT_CLOSE_DATETIME,
155
        TT_LONG_DESCRIPTION,
156
        ADDITIONAL_DATA, 
157
        HISTORY,
158
        MESSAGE_RNUMBER) 
159
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, 
160
        %s, %s, %s, %s, %s, %s, %s, %s)"""
161
          
162
    raw_datetime = values.get("TT_OPEN_DATETIME", "")
163
    tt_open_datetime = get_datetime_str(raw_datetime)
164
    if tt_open_datetime is None:
165
        return
166
    raw_datetime = values.get("TT_CLOSE_DATETIME", "")
167
    tt_close_datetime = get_datetime_str(raw_datetime)
168
    raw_datetime = values.get("TT_LASTUPDATE_DATETIME", "")
169
    tt_lastupdate_datetime = get_datetime_str(raw_datetime)
170

    
171
    statement_params = (
172
        get_partner_original_id(partner_id, original_id),
173
        ttid,
174
        tt_open_datetime,
175
        values.get("OPEN_ENGINEER", ""),
176
        values.get("CLOSE_ENGINEER", ""),
177
        tt_lastupdate_datetime,
178
        values.get("TT_TYPE", ""),
179
        values.get("NETWORK_NODE", ""),
180
        values.get("NETWORK_LINK_CIRCUIT", ""),
181
        values.get("TT_TITLE", ""),
182
        values.get("TT_SHORT_DESCRIPTION", ""),
183
        values.get("TT_PRIORITY", ""),
184
        values.get("TT_STATUS", ""),
185
        tt_close_datetime,
186
        values.get("TT_LONG_DESCRIPTION", ""),
187
        values.get("ADDITIONAL_DATA", ""),
188
        values.get("TT_HISTORY", ""),
189
        1)
190

    
191
    # Insert the new normalised ticket.
192
    cursor.execute(statement_pattern, statement_params)
193
    cursor.close()
194
    connection.commit()
195

    
196
def mark_as_parsed(connection, db, source_table, ttid):
197
    """Marks the row with key ttid as processed in the email table."""
198
    update_stmt = "UPDATE %s.%s SET IS_STANDARDISED = 1 " + \
199
        "WHERE %s.Message_ID = '%s'"
200
    update_stmt = update_stmt % (db, source_table, source_table, ttid)
201
    cursor = connection.cursor()
202
    cursor.execute(update_stmt)
203
    cursor.close()
204
    connection.commit()
205

    
206
def overwrite_print(output_str, overwrite):
207
    """Outputs output_str on top of the current output by overwrite
208
    characters. Returns the number of characters output."""
209
    sys.stdout.write("%s%s\r" % (output_str, " " * overwrite))
210
    return len(output_str)
211

    
212
def main(argv):
213
    parser = OptionParser()
214
    parser.add_option("-m", "--mail", dest="mail",
215
                      action="store_true",
216
                      default=False,
217
                      help="read tickets from IMAP server")
218
    parser.add_option("-f", "--frompass", dest="frompass",
219
                      help="password of trouble tickets DB or IMAP server",
220
                      metavar="PASSWORD")
221
    parser.add_option("-t", "--topass", dest="topass",
222
                      help="password of normalised DB",
223
                      metavar="PASSWORD")    
224
    (options, args) = parser.parse_args()
225
    if options.frompass is None:
226
        print "Password of trouble tickets database is missing\n"
227
        parser.print_help()
228
        exit(-1)
229
    if options.topass is None:
230
        print "Password of normalised database is missing\n"
231
        parser.print_help()
232
        exit(-1)
233
        
234
    TTConfig.TTdb['passwd'] = options.frompass
235
    TTConfig.TTdb_v2['passwd'] = options.topass
236

    
237
    normalised_db = TTConfig.TTdb_v2['db']
238
    try:
239
        db2_conn = MySQLdb.connect(**TTConfig.TTdb_v2)
240
        print "Successfully connected to database " + normalised_db
241
    except:
242
        print "Error connecting to database " + normalised_db
243
        exit()
244

    
245
    emails_table = TTConfig.configuration['table_raw_emails']
246

    
247
    ttp = TroubleTicketParser()
248

    
249
    mails_read = 0
250
    rows_inserted = 0
251
    overwrite = 0
252
    if options.mail is True:
253
        print "Trying to read unparsed tickets from IMAP server..."
254
        host = TTConfig.IMAP_server['host']
255
        user = TTConfig.IMAP_server['user']
256
        unparsed_emails = get_unparsed_emails_from_imap(host,
257
                                                        user,
258
                                                        options.frompass)
259
    else:
260
        print "Trying to read unparsed tickets from " \
261
            + str(TTConfig.TTdb['db']) + "..."
262
        raw_db = TTConfig.TTdb['db']
263
        try:
264
            db1_conn = MySQLdb.connect(**TTConfig.TTdb)
265
            db1_cursor = db1_conn.cursor()
266
            print "Successfully connected to database " + raw_db
267
        except:
268
            print "Error connecting to database " + raw_db
269
            exit()
270
        unparsed_emails = get_unparsed_emails_from_db(db1_cursor, emails_table)
271
    for raw_mail in unparsed_emails:
272
        mails_read = mails_read + 1
273
        num_found_keys = 0
274
        ttp.parse(raw_mail)
275
        parse_results = ttp.resultset
276
        num_found_keys = len(parse_results.keys())
277
        if ('ORIGINAL_ID' in parse_results
278
            and parse_results['ORIGINAL_ID'].isdigit()
279
            and num_found_keys >= TTConfig.configuration['min_keys_parsed']):
280
            ttid = raw_mail['ID']
281
            insert_values(parse_results,
282
                          TTConfig.TTdb_v2['db'],
283
                          TTConfig.configuration['stdTickets_table'],
284
                          db2_conn,
285
                          ttid)
286
            mark_as_parsed(db1_conn, raw_db, emails_table, ttid)
287
            rows_inserted = rows_inserted + 1
288
        overwrite = overwrite_print("Mails read: %d; normalised: %d"
289
                                    % (mails_read, rows_inserted),
290
                                    overwrite)
291

    
292
    db1_conn.close()            
293
    db2_conn.close()
294

    
295
if __name__ == "__main__":
296
    main(sys.argv[1:])
297