root / TroubleTicket_db2db.py @ 21:06babbe78f55
History | View | Annotate | Download (10.5 kB)
1 |
#!/usr/bin/env python
|
---|---|
2 |
#-------------------------------------------------------------------------------
|
3 |
# Name: TroubleTicketParser.py
|
4 |
# Purpose: To parse Trouble Tickets mail messages stored in database
|
5 |
#
|
6 |
# Author: Panos Louridas, from an initial version by Spyros Kopsidas
|
7 |
# Copyright: (c) EGEE Project 2009
|
8 |
# Licence: Apache Software License
|
9 |
#
|
10 |
#-------------------------------------------------------------------------------
|
11 |
|
12 |
import sys |
13 |
|
14 |
from optparse import OptionParser |
15 |
|
16 |
from datetime import datetime |
17 |
|
18 |
import imaplib |
19 |
|
20 |
from email import message_from_string |
21 |
from email.header import decode_header |
22 |
|
23 |
import TTConfig |
24 |
import MySQLdb |
25 |
|
26 |
from TroubleTicketParser import * |
27 |
|
28 |
def get_unparsed_emails_from_imap(server, username, passwd): |
29 |
"""A generator for returning unparsed emails from the IMAP server."""
|
30 |
c = imaplib.IMAP4(server) |
31 |
c.login(username, passwd) |
32 |
c.select() |
33 |
typ, data = c.search(None, 'ALL') |
34 |
for num in data[0].split(): |
35 |
typ, data = c.fetch(num, '(RFC822)')
|
36 |
for response_part in data: |
37 |
if isinstance(response_part, tuple): |
38 |
msg = message_from_string(response_part[1])
|
39 |
header = ""
|
40 |
body = ""
|
41 |
if msg.is_multipart():
|
42 |
for msg_part in msg.get_payload(decode=True): |
43 |
body = body + msg_part.get_payload(decode=True)
|
44 |
else:
|
45 |
body = msg.get_payload(decode=True)
|
46 |
for header_key in msg.keys(): |
47 |
header = header + "\n" + header_key + ": " + msg[header_key] |
48 |
email = { |
49 |
'ID': msg['Message-ID'], |
50 |
'subject' : decode_header(msg['subject'])[0][0], |
51 |
'header' : header,
|
52 |
'body' : body
|
53 |
} |
54 |
print email
|
55 |
yield email
|
56 |
c.close() |
57 |
c.logout() |
58 |
exit()
|
59 |
|
60 |
def get_unparsed_emails_from_db(cursor, table): |
61 |
""" A generator for returning unparsed emails from the unnormalised DB."""
|
62 |
# Not using the more practical """ string because we want to
|
63 |
# be able to add easily the following line for testing:
|
64 |
# "AND header like '%grnet%' AND body like '%9966%' "
|
65 |
query = "SELECT Message_ID, Subject, Header, Body " + \
|
66 |
"FROM " + table + " " + \ |
67 |
"WHERE IS_STANDARDISED IS NULL " + \
|
68 |
"ORDER BY Date_Stored ASC"
|
69 |
cursor.execute(query) |
70 |
print "Found %d raw mails." % cursor.rowcount |
71 |
|
72 |
while (1): |
73 |
row = cursor.fetchone() |
74 |
if row is None: |
75 |
return
|
76 |
email = { |
77 |
'ID': row[0], |
78 |
'subject' : row[1], |
79 |
'header' : row[2], |
80 |
'body' : row[3] |
81 |
} |
82 |
yield email
|
83 |
|
84 |
def get_partner_original_id(partner_id, original_id): |
85 |
"""Returns the partner_original_id.
|
86 |
|
87 |
Composes the partner_original_id field in the normalised database
|
88 |
by using the rule: partner_id + '_' + original_id.
|
89 |
"""
|
90 |
return partner_id + '_' + str(original_id) |
91 |
|
92 |
def get_datetime_str(raw_datetime): |
93 |
"""A utility function for returning datetime objects out of strings.
|
94 |
|
95 |
Tries to parse them according to the following rules:
|
96 |
%d-%m-%Y
|
97 |
%Y-%m-%d
|
98 |
"""
|
99 |
try:
|
100 |
dt = datetime.strptime(raw_datetime, "%d-%m-%Y")
|
101 |
except ValueError: |
102 |
try:
|
103 |
dt = datetime.strptime(raw_datetime, "%Y-%m-%d")
|
104 |
except ValueError: |
105 |
return None |
106 |
return dt.isoformat()
|
107 |
|
108 |
def insert_values(values, db, destination_table, connection, ttid): |
109 |
"""Insert a normalised message to the database.
|
110 |
|
111 |
Inserts the normalised message represented by the values dictionary
|
112 |
to the database at the given destination_table. ttid is the
|
113 |
message id in the unnormalised email table.
|
114 |
"""
|
115 |
|
116 |
# Find messages previously normalised with the same NREN / ID combination
|
117 |
# as this will be an update. The previous ones will have their
|
118 |
# MESSAGE_RNUMBER attribute increased by 1.
|
119 |
cursor = connection.cursor() |
120 |
original_id = int(values.get("ORIGINAL_ID", "")) |
121 |
partner_id = values.get("NREN", "") |
122 |
partner_original_id = get_partner_original_id(partner_id, original_id) |
123 |
query = """SELECT ENOC_ID, MESSAGE_RNUMBER
|
124 |
FROM %s
|
125 |
WHERE PARTNER_ORIGINAL_ID = '%s'"""
|
126 |
query = query % (destination_table, partner_original_id) |
127 |
cursor.execute(query) |
128 |
# We need to fetchall to ensure that we have all the data in a
|
129 |
# snapshot in our program, so that the update that follows
|
130 |
# work correctly.
|
131 |
previous_messages = cursor.fetchall() |
132 |
for message in previous_messages: |
133 |
enoc_id = message[0]
|
134 |
message_rnumber = message[1]
|
135 |
update_stmt = 'UPDATE ' + destination_table + ' ' + \ |
136 |
'SET MESSAGE_RNUMBER = MESSAGE_RNUMBER + 1 ' + \
|
137 |
'WHERE ENOC_ID = ' + str(enoc_id) |
138 |
cursor.execute(update_stmt) |
139 |
|
140 |
statement_pattern = "INSERT INTO " + destination_table + """ |
141 |
(PARTNER_ORIGINAL_ID,
|
142 |
UNNORMALISED_ID,
|
143 |
TT_OPEN_DATETIME,
|
144 |
OPEN_ENGINEER,
|
145 |
CLOSE_ENGINEER,
|
146 |
TT_LASTUPDATE_DATETIME,
|
147 |
TT_TYPE,
|
148 |
NETWORK_NODE,
|
149 |
NETWORK_LINK_CIRCUIT,
|
150 |
TT_TITLE,
|
151 |
TT_SHORT_DESCRIPTION,
|
152 |
TT_PRIORITY,
|
153 |
TT_STATUS,
|
154 |
TT_CLOSE_DATETIME,
|
155 |
TT_LONG_DESCRIPTION,
|
156 |
ADDITIONAL_DATA,
|
157 |
HISTORY,
|
158 |
MESSAGE_RNUMBER)
|
159 |
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
160 |
%s, %s, %s, %s, %s, %s, %s, %s)"""
|
161 |
|
162 |
raw_datetime = values.get("TT_OPEN_DATETIME", "") |
163 |
tt_open_datetime = get_datetime_str(raw_datetime) |
164 |
if tt_open_datetime is None: |
165 |
return
|
166 |
raw_datetime = values.get("TT_CLOSE_DATETIME", "") |
167 |
tt_close_datetime = get_datetime_str(raw_datetime) |
168 |
raw_datetime = values.get("TT_LASTUPDATE_DATETIME", "") |
169 |
tt_lastupdate_datetime = get_datetime_str(raw_datetime) |
170 |
|
171 |
statement_params = ( |
172 |
get_partner_original_id(partner_id, original_id), |
173 |
ttid, |
174 |
tt_open_datetime, |
175 |
values.get("OPEN_ENGINEER", ""), |
176 |
values.get("CLOSE_ENGINEER", ""), |
177 |
tt_lastupdate_datetime, |
178 |
values.get("TT_TYPE", ""), |
179 |
values.get("NETWORK_NODE", ""), |
180 |
values.get("NETWORK_LINK_CIRCUIT", ""), |
181 |
values.get("TT_TITLE", ""), |
182 |
values.get("TT_SHORT_DESCRIPTION", ""), |
183 |
values.get("TT_PRIORITY", ""), |
184 |
values.get("TT_STATUS", ""), |
185 |
tt_close_datetime, |
186 |
values.get("TT_LONG_DESCRIPTION", ""), |
187 |
values.get("ADDITIONAL_DATA", ""), |
188 |
values.get("TT_HISTORY", ""), |
189 |
1)
|
190 |
|
191 |
# Insert the new normalised ticket.
|
192 |
cursor.execute(statement_pattern, statement_params) |
193 |
cursor.close() |
194 |
connection.commit() |
195 |
|
196 |
def mark_as_parsed(connection, db, source_table, ttid): |
197 |
"""Marks the row with key ttid as processed in the email table."""
|
198 |
update_stmt = "UPDATE %s.%s SET IS_STANDARDISED = 1 " + \
|
199 |
"WHERE %s.Message_ID = '%s'"
|
200 |
update_stmt = update_stmt % (db, source_table, source_table, ttid) |
201 |
cursor = connection.cursor() |
202 |
cursor.execute(update_stmt) |
203 |
cursor.close() |
204 |
connection.commit() |
205 |
|
206 |
def overwrite_print(output_str, overwrite): |
207 |
"""Outputs output_str on top of the current output by overwrite
|
208 |
characters. Returns the number of characters output."""
|
209 |
sys.stdout.write("%s%s\r" % (output_str, " " * overwrite)) |
210 |
return len(output_str) |
211 |
|
212 |
def main(argv): |
213 |
parser = OptionParser() |
214 |
parser.add_option("-m", "--mail", dest="mail", |
215 |
action="store_true",
|
216 |
default=False,
|
217 |
help="read tickets from IMAP server")
|
218 |
parser.add_option("-f", "--frompass", dest="frompass", |
219 |
help="password of trouble tickets DB or IMAP server",
|
220 |
metavar="PASSWORD")
|
221 |
parser.add_option("-t", "--topass", dest="topass", |
222 |
help="password of normalised DB",
|
223 |
metavar="PASSWORD")
|
224 |
(options, args) = parser.parse_args() |
225 |
if options.frompass is None: |
226 |
print "Password of trouble tickets database is missing\n" |
227 |
parser.print_help() |
228 |
exit(-1) |
229 |
if options.topass is None: |
230 |
print "Password of normalised database is missing\n" |
231 |
parser.print_help() |
232 |
exit(-1) |
233 |
|
234 |
TTConfig.TTdb['passwd'] = options.frompass
|
235 |
TTConfig.TTdb_v2['passwd'] = options.topass
|
236 |
|
237 |
normalised_db = TTConfig.TTdb_v2['db']
|
238 |
try:
|
239 |
db2_conn = MySQLdb.connect(**TTConfig.TTdb_v2) |
240 |
print "Successfully connected to database " + normalised_db |
241 |
except:
|
242 |
print "Error connecting to database " + normalised_db |
243 |
exit()
|
244 |
|
245 |
emails_table = TTConfig.configuration['table_raw_emails']
|
246 |
|
247 |
ttp = TroubleTicketParser() |
248 |
|
249 |
mails_read = 0
|
250 |
rows_inserted = 0
|
251 |
overwrite = 0
|
252 |
if options.mail is True: |
253 |
print "Trying to read unparsed tickets from IMAP server..." |
254 |
host = TTConfig.IMAP_server['host']
|
255 |
user = TTConfig.IMAP_server['user']
|
256 |
unparsed_emails = get_unparsed_emails_from_imap(host, |
257 |
user, |
258 |
options.frompass) |
259 |
else:
|
260 |
print "Trying to read unparsed tickets from " \ |
261 |
+ str(TTConfig.TTdb['db']) + "..." |
262 |
raw_db = TTConfig.TTdb['db']
|
263 |
try:
|
264 |
db1_conn = MySQLdb.connect(**TTConfig.TTdb) |
265 |
db1_cursor = db1_conn.cursor() |
266 |
print "Successfully connected to database " + raw_db |
267 |
except:
|
268 |
print "Error connecting to database " + raw_db |
269 |
exit()
|
270 |
unparsed_emails = get_unparsed_emails_from_db(db1_cursor, emails_table) |
271 |
for raw_mail in unparsed_emails: |
272 |
mails_read = mails_read + 1
|
273 |
num_found_keys = 0
|
274 |
ttp.parse(raw_mail) |
275 |
parse_results = ttp.resultset |
276 |
num_found_keys = len(parse_results.keys())
|
277 |
if ('ORIGINAL_ID' in parse_results |
278 |
and parse_results['ORIGINAL_ID'].isdigit() |
279 |
and num_found_keys >= TTConfig.configuration['min_keys_parsed']): |
280 |
ttid = raw_mail['ID']
|
281 |
insert_values(parse_results, |
282 |
TTConfig.TTdb_v2['db'],
|
283 |
TTConfig.configuration['stdTickets_table'],
|
284 |
db2_conn, |
285 |
ttid) |
286 |
mark_as_parsed(db1_conn, raw_db, emails_table, ttid) |
287 |
rows_inserted = rows_inserted + 1
|
288 |
overwrite = overwrite_print("Mails read: %d; normalised: %d"
|
289 |
% (mails_read, rows_inserted), |
290 |
overwrite) |
291 |
|
292 |
db1_conn.close() |
293 |
db2_conn.close() |
294 |
|
295 |
if __name__ == "__main__": |
296 |
main(sys.argv[1:])
|
297 |
|