Revision 75dc539e snf-cyclades-app/synnefo/logic/management/commands/reconcile-servers.py
b/snf-cyclades-app/synnefo/logic/management/commands/reconcile-servers.py | ||
---|---|---|
1 |
# Copyright 2011-2012 GRNET S.A. All rights reserved.
|
|
1 |
# Copyright 2011-2013 GRNET S.A. All rights reserved.
|
|
2 | 2 |
# |
3 | 3 |
# Redistribution and use in source and binary forms, with or without |
4 | 4 |
# modification, are permitted provided that the following conditions |
... | ... | |
35 | 35 |
|
36 | 36 |
""" |
37 | 37 |
import sys |
38 |
import datetime
|
|
39 |
|
|
38 |
import logging
|
|
39 |
import subprocess |
|
40 | 40 |
from optparse import make_option |
41 |
|
|
42 |
from django.core.management.base import BaseCommand, CommandError |
|
43 |
|
|
44 |
from synnefo.db.models import (Backend, VirtualMachine, Network, |
|
45 |
pooled_rapi_client) |
|
46 |
from synnefo.logic import reconciliation, utils |
|
47 |
from synnefo.logic import backend as backend_mod |
|
48 |
from synnefo.util.mac2eui64 import mac2eui64 |
|
41 |
from django.core.management.base import BaseCommand |
|
49 | 42 |
from synnefo.management.common import get_backend |
43 |
from synnefo.logic import reconciliation |
|
44 |
from synnefo.webproject.management.utils import parse_bool |
|
50 | 45 |
|
51 | 46 |
|
52 | 47 |
class Command(BaseCommand): |
53 | 48 |
can_import_settings = True |
54 | 49 |
|
55 | 50 |
help = 'Reconcile contents of Synnefo DB with state of Ganeti backend' |
56 |
output_transaction = True # The management command runs inside |
|
57 |
# an SQL transaction |
|
58 | 51 |
option_list = BaseCommand.option_list + ( |
59 |
make_option('--detect-stale', action='store_true', dest='detect_stale', |
|
60 |
default=False, help='Detect stale VM entries in DB'), |
|
61 |
make_option('--detect-orphans', action='store_true', |
|
62 |
dest='detect_orphans', |
|
63 |
default=False, help='Detect orphan instances in Ganeti'), |
|
64 |
make_option('--detect-unsynced', action='store_true', |
|
65 |
dest='detect_unsynced', |
|
66 |
default=False, help='Detect unsynced operstate between ' + |
|
67 |
'DB and Ganeti'), |
|
68 |
make_option('--detect-build-errors', action='store_true', |
|
69 |
dest='detect_build_errors', default=False, |
|
70 |
help='Detect instances with build error'), |
|
71 |
make_option('--detect-unsynced-nics', action='store_true', |
|
72 |
dest='detect_unsynced_nics', default=False, |
|
73 |
help='Detect unsynced nics between DB and Ganeti'), |
|
74 |
make_option('--detect-unsynced-flavors', action='store_true', |
|
75 |
dest='detect_unsynced_flavors', default=False, |
|
76 |
help='Detect unsynced flavors between DB and Ganeti'), |
|
77 |
make_option('--detect-all', action='store_true', |
|
78 |
dest='detect_all', |
|
79 |
default=False, help='Enable all --detect-* arguments'), |
|
52 |
make_option('--backend-id', default=None, dest='backend-id', |
|
53 |
help='Reconcilie VMs only for this backend'), |
|
54 |
make_option("--parallel", |
|
55 |
dest="parallel", |
|
56 |
default="True", |
|
57 |
choices=["True", "False"], |
|
58 |
metavar="True|False", |
|
59 |
help="Perform server reconciliation for each backend" |
|
60 |
" parallel."), |
|
80 | 61 |
make_option('--fix-stale', action='store_true', dest='fix_stale', |
81 | 62 |
default=False, help='Fix (remove) stale DB entries in DB'), |
82 | 63 |
make_option('--fix-orphans', action='store_true', dest='fix_orphans', |
... | ... | |
84 | 65 |
make_option('--fix-unsynced', action='store_true', dest='fix_unsynced', |
85 | 66 |
default=False, help='Fix server operstate in DB, set ' + |
86 | 67 |
'from Ganeti'), |
87 |
make_option('--fix-build-errors', action='store_true', |
|
88 |
dest='fix_build_errors', default=False, |
|
89 |
help='Fix (remove) instances with build errors'), |
|
90 | 68 |
make_option('--fix-unsynced-nics', action='store_true', |
91 | 69 |
dest='fix_unsynced_nics', default=False, |
92 | 70 |
help='Fix unsynced nics between DB and Ganeti'), |
... | ... | |
95 | 73 |
help='Fix unsynced flavors between DB and Ganeti'), |
96 | 74 |
make_option('--fix-all', action='store_true', dest='fix_all', |
97 | 75 |
default=False, help='Enable all --fix-* arguments'), |
98 |
make_option('--backend-id', default=None, dest='backend-id', |
|
99 |
help='Reconcilie VMs only for this backend'), |
|
100 | 76 |
) |
101 | 77 |
|
102 | 78 |
def _process_args(self, options): |
103 |
keys_detect = [k for k in options.keys() if k.startswith('detect_')] |
|
104 | 79 |
keys_fix = [k for k in options.keys() if k.startswith('fix_')] |
105 |
|
|
106 |
if not reduce(lambda x, y: x or y, |
|
107 |
map(lambda x: options[x], keys_detect)): |
|
108 |
options['detect_all'] = True |
|
109 |
|
|
110 |
if options['detect_all']: |
|
111 |
for kd in keys_detect: |
|
112 |
options[kd] = True |
|
113 | 80 |
if options['fix_all']: |
114 | 81 |
for kf in keys_fix: |
115 | 82 |
options[kf] = True |
116 | 83 |
|
117 |
for kf in keys_fix: |
|
118 |
kd = kf.replace('fix_', 'detect_', 1) |
|
119 |
if (options[kf] and not options[kd]): |
|
120 |
raise CommandError("Cannot use --%s without corresponding " |
|
121 |
"--%s argument" % (kf, kd)) |
|
122 |
|
|
123 | 84 |
def handle(self, **options): |
124 |
verbosity = int(options['verbosity']) |
|
125 |
self._process_args(options) |
|
126 | 85 |
backend_id = options['backend-id'] |
127 | 86 |
if backend_id: |
128 | 87 |
backends = [get_backend(backend_id)] |
129 | 88 |
else: |
130 |
backends = Backend.objects.filter(offline=False) |
|
131 |
|
|
132 |
with_nics = options["detect_unsynced_nics"] |
|
133 |
|
|
134 |
DBVMs = reconciliation.get_servers_from_db(backends, with_nics) |
|
135 |
GanetiVMs = reconciliation.get_instances_from_ganeti(backends) |
|
136 |
|
|
137 |
# |
|
138 |
# Detect problems |
|
139 |
# |
|
140 |
if options['detect_stale']: |
|
141 |
stale = reconciliation.stale_servers_in_db(DBVMs, GanetiVMs) |
|
142 |
if len(stale) > 0: |
|
143 |
print >> sys.stderr, "Found the following stale server IDs: " |
|
144 |
print " " + "\n ".join( |
|
145 |
[str(x) for x in stale]) |
|
146 |
elif verbosity == 2: |
|
147 |
print >> sys.stderr, "Found no stale server IDs in DB." |
|
148 |
|
|
149 |
if options['detect_orphans']: |
|
150 |
orphans = reconciliation.orphan_instances_in_ganeti(DBVMs, |
|
151 |
GanetiVMs) |
|
152 |
if len(orphans) > 0: |
|
153 |
print >> sys.stderr, "Found orphan Ganeti instances with IDs: " |
|
154 |
print " " + "\n ".join( |
|
155 |
[str(x) for x in orphans]) |
|
156 |
elif verbosity == 2: |
|
157 |
print >> sys.stderr, "Found no orphan Ganeti instances." |
|
158 |
|
|
159 |
if options['detect_unsynced']: |
|
160 |
unsynced = reconciliation.unsynced_operstate(DBVMs, GanetiVMs) |
|
161 |
if len(unsynced) > 0: |
|
162 |
print >> sys.stderr, "The operstate of the following server" \ |
|
163 |
" IDs is out-of-sync:" |
|
164 |
print " " + "\n ".join( |
|
165 |
["%d is %s in DB, %s in Ganeti" % |
|
166 |
(x[0], x[1], ('UP' if x[2] else 'DOWN')) |
|
167 |
for x in unsynced]) |
|
168 |
elif verbosity == 2: |
|
169 |
print >> sys.stderr, "The operstate of all servers is in sync." |
|
170 |
|
|
171 |
if options['detect_build_errors']: |
|
172 |
build_errors = reconciliation.\ |
|
173 |
instances_with_build_errors(DBVMs, GanetiVMs) |
|
174 |
if len(build_errors) > 0: |
|
175 |
msg = "The os for the following server IDs was not build"\ |
|
176 |
" successfully:" |
|
177 |
print >> sys.stderr, msg |
|
178 |
print " " + "\n ".join( |
|
179 |
["%d" % x for x in build_errors]) |
|
180 |
elif verbosity == 2: |
|
181 |
print >> sys.stderr, "Found no instances with build errors." |
|
182 |
|
|
183 |
if options['detect_unsynced_nics']: |
|
184 |
def pretty_print_nics(nics): |
|
185 |
if not nics: |
|
186 |
print ''.ljust(18) + 'None' |
|
187 |
for index, info in nics.items(): |
|
188 |
print ''.ljust(18) + 'nic/' + str(index) +\ |
|
189 |
': MAC: %s, IP: %s, Network: %s' % \ |
|
190 |
(info['mac'], info['ipv4'], info['network']) |
|
191 |
|
|
192 |
unsynced_nics = reconciliation.unsynced_nics(DBVMs, GanetiVMs) |
|
193 |
if len(unsynced_nics) > 0: |
|
194 |
msg = "The NICs of the servers with the following IDs are"\ |
|
195 |
" unsynced:" |
|
196 |
print >> sys.stderr, msg |
|
197 |
for id, nics in unsynced_nics.items(): |
|
198 |
print ''.ljust(2) + '%6d:' % id |
|
199 |
print ''.ljust(8) + '%8s:' % 'DB' |
|
200 |
pretty_print_nics(nics[0]) |
|
201 |
print ''.ljust(8) + '%8s:' % 'Ganeti' |
|
202 |
pretty_print_nics(nics[1]) |
|
203 |
elif verbosity == 2: |
|
204 |
print >> sys.stderr, "All instance nics are synced." |
|
205 |
|
|
206 |
if options["detect_unsynced_flavors"]: |
|
207 |
unsynced_flavors = reconciliation.unsynced_flavors(DBVMs, |
|
208 |
GanetiVMs) |
|
209 |
if len(unsynced_flavors) > 0: |
|
210 |
print >> sys.stderr, "The flavor of the following server" \ |
|
211 |
" IDs is out-of-sync:" |
|
212 |
print " " + "\n ".join( |
|
213 |
["%d is %s in DB, %s in Ganeti" % |
|
214 |
(x[0], x[1], x[2]) |
|
215 |
for x in unsynced_flavors]) |
|
216 |
elif verbosity == 2: |
|
217 |
print >> sys.stderr, "All instance flavors are synced." |
|
218 |
|
|
219 |
# |
|
220 |
# Then fix them |
|
221 |
# |
|
222 |
if options['fix_stale'] and len(stale) > 0: |
|
223 |
print >> sys.stderr, \ |
|
224 |
"Simulating successful Ganeti removal for %d " \ |
|
225 |
"servers in the DB:" % len(stale) |
|
226 |
for vm in VirtualMachine.objects.filter(pk__in=stale): |
|
227 |
event_time = datetime.datetime.now() |
|
228 |
backend_mod.process_op_status( |
|
229 |
vm=vm, |
|
230 |
etime=event_time, |
|
231 |
jobid=-0, |
|
232 |
opcode='OP_INSTANCE_REMOVE', status='success', |
|
233 |
logmsg='Reconciliation: simulated Ganeti event') |
|
234 |
print >> sys.stderr, " ...done" |
|
235 |
|
|
236 |
if options['fix_orphans'] and len(orphans) > 0: |
|
237 |
print >> sys.stderr, \ |
|
238 |
"Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \ |
|
239 |
len(orphans) |
|
240 |
for id in orphans: |
|
241 |
try: |
|
242 |
vm = VirtualMachine.objects.get(pk=id) |
|
243 |
with pooled_rapi_client(vm) as client: |
|
244 |
client.DeleteInstance(utils.id_to_instance_name(id)) |
|
245 |
except VirtualMachine.DoesNotExist: |
|
246 |
print >> sys.stderr, "No entry for VM %d in DB !!" % id |
|
247 |
print >> sys.stderr, " ...done" |
|
89 |
backends = reconciliation.get_online_backends() |
|
90 |
|
|
91 |
parallel = parse_bool(options["parallel"]) |
|
92 |
if parallel and len(backends) > 1: |
|
93 |
cmd = sys.argv |
|
94 |
processes = [] |
|
95 |
for backend in backends: |
|
96 |
p = subprocess.Popen(cmd + ["--backend-id=%s" % backend.id]) |
|
97 |
processes.append(p) |
|
98 |
for p in processes: |
|
99 |
p.wait() |
|
100 |
return |
|
101 |
|
|
102 |
verbosity = int(options["verbosity"]) |
|
103 |
|
|
104 |
logger = logging.getLogger("reconcile-severs") |
|
105 |
logger.propagate = 0 |
|
106 |
|
|
107 |
formatter = logging.Formatter("%(message)s") |
|
108 |
log_handler = logging.StreamHandler() |
|
109 |
log_handler.setFormatter(formatter) |
|
110 |
if verbosity == 2: |
|
111 |
formatter = logging.Formatter("%(asctime)s: %(message)s") |
|
112 |
log_handler.setFormatter(formatter) |
|
113 |
logger.setLevel(logging.DEBUG) |
|
114 |
elif verbosity == 1: |
|
115 |
logger.setLevel(logging.INFO) |
|
116 |
else: |
|
117 |
logger.setLevel(logging.WARNING) |
|
248 | 118 |
|
249 |
if options['fix_unsynced'] and len(unsynced) > 0: |
|
250 |
print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \ |
|
251 |
len(unsynced) |
|
252 |
for id, db_state, ganeti_up in unsynced: |
|
253 |
vm = VirtualMachine.objects.get(pk=id) |
|
254 |
opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ |
|
255 |
else "OP_INSTANCE_SHUTDOWN" |
|
256 |
event_time = datetime.datetime.now() |
|
257 |
backend_mod.process_op_status( |
|
258 |
vm=vm, etime=event_time, jobid=-0, |
|
259 |
opcode=opcode, status='success', |
|
260 |
logmsg='Reconciliation: simulated Ganeti event') |
|
261 |
print >> sys.stderr, " ...done" |
|
119 |
logger.addHandler(log_handler) |
|
262 | 120 |
|
263 |
if options['fix_build_errors'] and len(build_errors) > 0: |
|
264 |
print >> sys.stderr, "Setting the state of %d build-errors VMs:" %\ |
|
265 |
len(build_errors) |
|
266 |
for id in build_errors: |
|
267 |
vm = VirtualMachine.objects.get(pk=id) |
|
268 |
event_time = datetime.datetime.now() |
|
269 |
backend_mod.process_op_status( |
|
270 |
vm=vm, etime=event_time, jobid=-0, |
|
271 |
opcode="OP_INSTANCE_CREATE", status='error', |
|
272 |
logmsg='Reconciliation: simulated Ganeti event') |
|
273 |
print >> sys.stderr, " ...done" |
|
121 |
self._process_args(options) |
|
274 | 122 |
|
275 |
if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: |
|
276 |
print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ |
|
277 |
len(unsynced_nics) |
|
278 |
for id, nics in unsynced_nics.items(): |
|
279 |
vm = VirtualMachine.objects.get(pk=id) |
|
280 |
nics = nics[1] # Ganeti nics |
|
281 |
if nics == {}: # No nics |
|
282 |
vm.nics.all.delete() |
|
283 |
continue |
|
284 |
for index, nic in nics.items(): |
|
285 |
net_id = utils.id_from_network_name(nic['network']) |
|
286 |
subnet6 = Network.objects.get(id=net_id).subnet6 |
|
287 |
# Produce ipv6 |
|
288 |
ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None |
|
289 |
nic['ipv6'] = ipv6 |
|
290 |
# Rename ipv4 to ip |
|
291 |
nic['ip'] = nic['ipv4'] |
|
292 |
# Dict to sorted list |
|
293 |
final_nics = [] |
|
294 |
nics_keys = nics.keys() |
|
295 |
nics_keys.sort() |
|
296 |
for i in nics_keys: |
|
297 |
if nics[i]['network']: |
|
298 |
final_nics.append(nics[i]) |
|
299 |
else: |
|
300 |
print 'Network of nic %d of vm %s is None. ' \ |
|
301 |
'Can not reconcile' % (i, vm.backend_vm_id) |
|
302 |
event_time = datetime.datetime.now() |
|
303 |
backend_mod.process_net_status(vm=vm, etime=event_time, |
|
304 |
nics=final_nics) |
|
305 |
print >> sys.stderr, " ...done" |
|
306 |
if options["fix_unsynced_flavors"] and len(unsynced_flavors) > 0: |
|
307 |
print >> sys.stderr, "Setting the flavor of %d unsynced VMs:" % \ |
|
308 |
len(unsynced_flavors) |
|
309 |
for id, db_flavor, gnt_flavor in unsynced_flavors: |
|
310 |
vm = VirtualMachine.objects.get(pk=id) |
|
311 |
old_state = vm.operstate |
|
312 |
opcode = "OP_INSTANCE_SET_PARAMS" |
|
313 |
beparams = {"vcpus": gnt_flavor.cpu, |
|
314 |
"minmem": gnt_flavor.ram, |
|
315 |
"maxmem": gnt_flavor.ram} |
|
316 |
event_time = datetime.datetime.now() |
|
317 |
backend_mod.process_op_status( |
|
318 |
vm=vm, etime=event_time, jobid=-0, |
|
319 |
opcode=opcode, status='success', |
|
320 |
beparams=beparams, |
|
321 |
logmsg='Reconciliation: simulated Ganeti event') |
|
322 |
# process_op_status with beparams will set the vmstate to |
|
323 |
# shutdown. Fix this be returning it to old state |
|
324 |
vm = VirtualMachine.objects.get(pk=id) |
|
325 |
vm.operstate = old_state |
|
326 |
vm.save() |
|
327 |
print >> sys.stderr, " ...done" |
|
123 |
for backend in backends: |
|
124 |
r = reconciliation.BackendReconciler(backend=backend, |
|
125 |
logger=logger, |
|
126 |
options=options) |
|
127 |
r.reconcile() |
Also available in: Unified diff