Revision 0e9a423f
b/snf-cyclades-app/synnefo/logic/management/commands/network_reconcile.py | ||
---|---|---|
1 |
# Copyright 2011-2012 GRNET S.A. All rights reserved. |
|
2 |
# |
|
3 |
# Redistribution and use in source and binary forms, with or without |
|
4 |
# modification, are permitted provided that the following conditions |
|
5 |
# are met: |
|
6 |
# |
|
7 |
# 1. Redistributions of source code must retain the above copyright |
|
8 |
# notice, this list of conditions and the following disclaimer. |
|
9 |
# |
|
10 |
# 2. Redistributions in binary form must reproduce the above copyright |
|
11 |
# notice, this list of conditions and the following disclaimer in the |
|
12 |
# documentation and/or other materials provided with the distribution. |
|
13 |
# |
|
14 |
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
|
15 |
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
16 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
17 |
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
|
18 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
19 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
|
20 |
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
21 |
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
|
22 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
|
23 |
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
24 |
# SUCH DAMAGE. |
|
25 |
# |
|
26 |
# The views and conclusions contained in the software and documentation are |
|
27 |
# those of the authors and should not be interpreted as representing official |
|
28 |
# policies, either expressed or implied, of GRNET S.A. |
|
29 |
# |
|
30 |
"""Reconciliation management command |
|
31 |
|
|
32 |
Management command to reconcile the contents of the Synnefo DB with |
|
33 |
the state of the Ganeti backend. See docstring on top of |
|
34 |
logic/reconciliation.py for a description of reconciliation rules. |
|
35 |
|
|
36 |
""" |
|
37 |
import datetime |
|
38 |
|
|
39 |
from optparse import make_option |
|
40 |
|
|
41 |
from django.conf import settings |
|
42 |
from django.core.management.base import BaseCommand, CommandError |
|
43 |
|
|
44 |
from synnefo.db.models import Backend, Network, BackendNetwork |
|
45 |
from synnefo.logic import reconciliation, backend |
|
46 |
|
|
47 |
|
|
48 |
class Command(BaseCommand): |
|
49 |
can_import_settings = True |
|
50 |
|
|
51 |
help = 'Reconcile contents of Synnefo DB with state of Ganeti backend' |
|
52 |
output_transaction = True # The management command runs inside |
|
53 |
# an SQL transaction |
|
54 |
option_list = BaseCommand.option_list + ( |
|
55 |
make_option('--fix-all', action='store_true', |
|
56 |
dest='fix', default=False, |
|
57 |
help='Fix all issues.'), |
|
58 |
) |
|
59 |
|
|
60 |
def handle(self, **options): |
|
61 |
self.verbosity = int(options['verbosity']) |
|
62 |
fix = options['fix'] |
|
63 |
reconcile_networks(self.stdout, fix) |
|
64 |
|
|
65 |
|
|
66 |
def reconcile_networks(out, fix): |
|
67 |
# Get models from DB |
|
68 |
backends = Backend.objects.exclude(offline=True) |
|
69 |
networks = Network.objects.filter(deleted=False) |
|
70 |
|
|
71 |
# Get info from all ganeti backends |
|
72 |
ganeti_networks = {} |
|
73 |
ganeti_hanging_networks = {} |
|
74 |
for b in backends: |
|
75 |
g_nets = reconciliation.get_networks_from_ganeti(b) |
|
76 |
ganeti_networks[b] = g_nets |
|
77 |
g_hanging_nets = reconciliation.hanging_networks(b, g_nets) |
|
78 |
ganeti_hanging_networks[b] = g_hanging_nets |
|
79 |
|
|
80 |
# Perform reconciliation for each network |
|
81 |
for network in networks: |
|
82 |
net_id = network.id |
|
83 |
destroying = network.action == 'DESTROY' |
|
84 |
|
|
85 |
# Perform reconcilliation for each backend |
|
86 |
for b in backends: |
|
87 |
info = (net_id, b.clustername) |
|
88 |
back_network = None |
|
89 |
|
|
90 |
try: |
|
91 |
# Get the model describing the network to this backend |
|
92 |
back_network = BackendNetwork.objects.get(network=network, |
|
93 |
backend=b) |
|
94 |
except BackendNetwork.DoesNotExist: |
|
95 |
out.write('D: No DB entry for network %d in backend %s\n' % info) |
|
96 |
if fix: |
|
97 |
out.write('F: Created entry in DB\n') |
|
98 |
back_network = \ |
|
99 |
BackendNetwork.objects.create(network=network, |
|
100 |
backend=b) |
|
101 |
|
|
102 |
try: |
|
103 |
# Get the info from backend |
|
104 |
ganeti_networks[b][net_id] |
|
105 |
except KeyError: |
|
106 |
# Stale network does not exist in backend |
|
107 |
if destroying: |
|
108 |
out.write('D: Stale network %d in backend %s\n' % info) |
|
109 |
if fix: |
|
110 |
out.write("F: Issued OP_NETWORK_REMOVE'\n") |
|
111 |
etime = datetime.datetime.now() |
|
112 |
backend.process_network_status(back_network, etime, |
|
113 |
0, 'OP_NETWORK_REMOVE', 'success', |
|
114 |
'Reconciliation simulated event.') |
|
115 |
continue |
|
116 |
else: |
|
117 |
# Pending network |
|
118 |
out.write('D: Pending network %d in backend %s\n' % info) |
|
119 |
if fix: |
|
120 |
out.write('F: Creating network in backend.\n') |
|
121 |
backend.create_network(network, [b]) |
|
122 |
# Skip rest reconciliation as the network is just |
|
123 |
# being created |
|
124 |
continue |
|
125 |
|
|
126 |
try: |
|
127 |
hanging_groups = ganeti_hanging_networks[b][net_id] |
|
128 |
except KeyError: |
|
129 |
# Network is connected to all nodegroups |
|
130 |
hanging_groups = [] |
|
131 |
|
|
132 |
if hanging_groups and not destroying: |
|
133 |
# Hanging network = not connected to all nodegroups of backend |
|
134 |
out.write('D: Network %d in backend %s is not connected to ' |
|
135 |
'the following groups:\n' % info) |
|
136 |
out.write('- ' + '\n- '.join(hanging_groups) + '\n') |
|
137 |
if fix: |
|
138 |
for group in hanging_groups: |
|
139 |
out.write('F: Connecting network %d to nodegroup %s\n' |
|
140 |
% (net_id, group)) |
|
141 |
backend.connect_network_group(b, network, group) |
|
142 |
elif back_network and back_network.operstate != 'ACTIVE': |
|
143 |
# Network is active |
|
144 |
out.write('D: Unsynced network %d in backend %s\n' % info) |
|
145 |
if fix: |
|
146 |
out.write("F: Issued OP_NETWORK_CONNECT\n") |
|
147 |
etime = datetime.datetime.now() |
|
148 |
backend.process_network_status(back_network, etime, |
|
149 |
0, 'OP_NETWORK_CONNECT', 'success', |
|
150 |
'Reconciliation simulated event.') |
|
151 |
|
|
152 |
# Detect Orphan Networks in Ganeti |
|
153 |
db_network_ids = set([net.id for net in networks]) |
|
154 |
for back_end, ganeti_networks in ganeti_networks.items(): |
|
155 |
ganeti_network_ids = set(ganeti_networks.keys()) |
|
156 |
orphans = ganeti_network_ids - db_network_ids |
|
157 |
|
|
158 |
if len(orphans) > 0: |
|
159 |
out.write('D: Orphan Networks in backend %s:\n' % back_end.clustername) |
|
160 |
out.write('- ' + '\n- '.join([str(o) for o in orphans]) + '\n') |
|
161 |
client = back_end.client |
|
162 |
if fix: |
|
163 |
#XXX:Move this to backend |
|
164 |
for id in orphans: |
|
165 |
out.write('Disconnecting and deleting network %d\n' % id) |
|
166 |
network = '%s%s' % (settings.BACKEND_PREFIX_ID, str(id)) |
|
167 |
for group in client.GetGroups(): |
|
168 |
client.DisconnectNetwork(network, group) |
|
169 |
client.DeleteNetwork(network) |
b/snf-cyclades-app/synnefo/logic/management/commands/reconcile.py | ||
---|---|---|
36 | 36 |
""" |
37 | 37 |
import sys |
38 | 38 |
import datetime |
39 |
import subprocess |
|
39 | 40 |
|
40 | 41 |
from optparse import make_option |
41 | 42 |
|
... | ... | |
65 | 66 |
make_option('--detect-build-errors', action='store_true', |
66 | 67 |
dest='detect_build_errors', default=False, |
67 | 68 |
help='Detect instances with build error'), |
69 |
make_option('--detect-unsynced-nics', action='store_true', |
|
70 |
dest='detect_unsynced_nics', default=False, |
|
71 |
help='Detect unsynced nics between DB and Ganeti'), |
|
68 | 72 |
make_option('--detect-all', action='store_true', |
69 | 73 |
dest='detect_all', |
70 | 74 |
default=False, help='Enable all --detect-* arguments'), |
... | ... | |
78 | 82 |
make_option('--fix-build-errors', action='store_true', |
79 | 83 |
dest='fix_build_errors', default=False, |
80 | 84 |
help='Fix (remove) instances with build errors'), |
85 |
make_option('--fix-unsynced-nics', action='store_true', |
|
86 |
dest='fix_unsynced_nics', default=False, |
|
87 |
help='Fix unsynced nics between DB and Ganeti'), |
|
81 | 88 |
make_option('--fix-all', action='store_true', dest='fix_all', |
82 | 89 |
default=False, help='Enable all --fix-* arguments')) |
83 | 90 |
|
... | ... | |
109 | 116 |
D = reconciliation.get_servers_from_db() |
110 | 117 |
G = reconciliation.get_instances_from_ganeti() |
111 | 118 |
|
119 |
DBNics = reconciliation.get_nics_from_db() |
|
120 |
GNics = reconciliation.get_nics_from_ganeti() |
|
112 | 121 |
# |
113 | 122 |
# Detect problems |
114 | 123 |
# |
... | ... | |
152 | 161 |
elif verbosity == 2: |
153 | 162 |
print >> sys.stderr, "Found no instances with build errors." |
154 | 163 |
|
164 |
if options['detect_unsynced_nics']: |
|
165 |
def pretty_print_nics(nics): |
|
166 |
if not nics: |
|
167 |
print ''.ljust(18) + 'None' |
|
168 |
for index, info in nics.items(): |
|
169 |
print ''.ljust(18) + 'nic/' + str(index) + ': MAC: %s, IP: %s, Network: %s' % \ |
|
170 |
(info['mac'], info['ipv4'], info['network']) |
|
171 |
|
|
172 |
unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics) |
|
173 |
if len(unsynced_nics) > 0: |
|
174 |
print >> sys.stderr, "The nics of servers with the folloing ID's "\ |
|
175 |
"are unsynced:" |
|
176 |
for id, nics in unsynced_nics.items(): |
|
177 |
print ''.ljust(2) + '%6d:' % id |
|
178 |
print ''.ljust(8) + '%8s:' % 'DB' |
|
179 |
pretty_print_nics(nics[0]) |
|
180 |
print ''.ljust(8) + '%8s:' % 'Ganeti' |
|
181 |
pretty_print_nics(nics[1]) |
|
182 |
elif verbosity == 2: |
|
183 |
print >> sys.stderr, "All instance nics are synced." |
|
184 |
|
|
155 | 185 |
# |
156 | 186 |
# Then fix them |
157 | 187 |
# |
... | ... | |
184 | 214 |
opcode = "OP_INSTANCE_REBOOT" if ganeti_up \ |
185 | 215 |
else "OP_INSTANCE_SHUTDOWN" |
186 | 216 |
event_time = datetime.datetime.now() |
187 |
backend.process_op_status(vm=vm, etime=event_time ,jobid=-0,
|
|
217 |
backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
|
|
188 | 218 |
opcode=opcode, status='success', |
189 | 219 |
logmsg='Reconciliation: simulated Ganeti event') |
190 | 220 |
print >> sys.stderr, " ...done" |
... | ... | |
195 | 225 |
for id in build_errors: |
196 | 226 |
vm = VirtualMachine.objects.get(pk=id) |
197 | 227 |
event_time = datetime.datetime.now() |
198 |
backend.process_op_status(vm=vm, etime=event_time ,jobid=-0,
|
|
228 |
backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
|
|
199 | 229 |
opcode="OP_INSTANCE_CREATE", status='error', |
200 | 230 |
logmsg='Reconciliation: simulated Ganeti event') |
201 | 231 |
print >> sys.stderr, " ...done" |
202 | 232 |
|
233 |
if options['fix_unsynced_nics'] and len(unsynced_nics) > 0: |
|
234 |
print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \ |
|
235 |
len(unsynced_nics) |
|
236 |
for id, nics in unsynced_nics.items(): |
|
237 |
vm = VirtualMachine.objects.get(pk=id) |
|
238 |
nics = nics[1] # Ganeti nics |
|
239 |
if nics == {}: # No nics |
|
240 |
vm.nics.all.delete() |
|
241 |
continue |
|
242 |
for index, nic in nics.items(): |
|
243 |
# Produce ipv6 |
|
244 |
ipv6 = mac2eui64(nic['mac'], settings.PUBLIC_IPV6_PREFIX) |
|
245 |
nic['ipv6'] = ipv6 |
|
246 |
# Rename ipv4 to ip |
|
247 |
nic['ip'] = nic['ipv4'] |
|
248 |
# Dict to sorted list |
|
249 |
final_nics = [] |
|
250 |
nics_keys = nics.keys() |
|
251 |
nics_keys.sort() |
|
252 |
for i in nics_keys: |
|
253 |
if nics[i]['network']: |
|
254 |
final_nics.append(nics[i]) |
|
255 |
else: |
|
256 |
print 'Network of nic %d of vm %s is None. ' \ |
|
257 |
'Can not reconcile' % (i, vm.backend_vm_id) |
|
258 |
event_time = datetime.datetime.now() |
|
259 |
backend.process_net_status(vm=vm, etime=event_time, nics=final_nics) |
|
260 |
print >> sys.stderr, " ...done" |
|
261 |
|
|
262 |
|
|
263 |
def mac2eui64(mac, prefixstr): |
|
264 |
process = subprocess.Popen(["mac2eui64", mac, prefixstr], |
|
265 |
stdout=subprocess.PIPE) |
|
266 |
return process.stdout.read().rstrip() |
b/snf-cyclades-app/synnefo/logic/reconciliation.py | ||
---|---|---|
58 | 58 |
|
59 | 59 |
import logging |
60 | 60 |
import sys |
61 |
import itertools |
|
61 | 62 |
|
62 | 63 |
from django.core.management import setup_environ |
63 | 64 |
try: |
... | ... | |
70 | 71 |
|
71 | 72 |
from datetime import datetime, timedelta |
72 | 73 |
|
73 |
from synnefo.db.models import VirtualMachine |
|
74 |
from synnefo.db.models import VirtualMachine, Network, BackendNetwork
|
|
74 | 75 |
from synnefo.util.dictconfig import dictConfig |
75 | 76 |
from synnefo.util.rapi import GanetiApiError |
76 | 77 |
from synnefo.logic.backend import get_ganeti_instances |
... | ... | |
186 | 187 |
|
187 | 188 |
return snf_instances |
188 | 189 |
|
190 |
# |
|
191 |
# Nics |
|
192 |
# |
|
193 |
def get_nics_from_ganeti(): |
|
194 |
"""Get network interfaces for each ganeti instance. |
|
195 |
|
|
196 |
""" |
|
197 |
instances = get_ganeti_instances(bulk=True) |
|
198 |
prefix = settings.BACKEND_PREFIX_ID |
|
199 |
|
|
200 |
snf_instances_nics = {} |
|
201 |
for i in instances: |
|
202 |
if i['name'].startswith(prefix): |
|
203 |
try: |
|
204 |
id = int(i['name'].split(prefix)[1]) |
|
205 |
except Exception: |
|
206 |
log.error("Ignoring instance with malformed name %s", |
|
207 |
i['name']) |
|
208 |
continue |
|
209 |
if id in snf_instances_nics: |
|
210 |
log.error("Ignoring instance with duplicate Synnefo id %s", |
|
211 |
i['name']) |
|
212 |
continue |
|
213 |
|
|
214 |
ips = zip(itertools.repeat('ipv4'), i['nic.ips']) |
|
215 |
macs = zip(itertools.repeat('mac'), i['nic.macs']) |
|
216 |
networks = zip(itertools.repeat('network'), i['nic.networks']) |
|
217 |
# modes = zip(itertools.repeat('mode'), i['nic.modes']) |
|
218 |
# links = zip(itertools.repeat('link'), i['nic.links']) |
|
219 |
# nics = zip(ips,macs,modes,networks,links) |
|
220 |
nics = zip(ips, macs, networks) |
|
221 |
nics = map(lambda x:dict(x), nics) |
|
222 |
nics = dict(enumerate(nics)) |
|
223 |
snf_instances_nics[id] = nics |
|
224 |
|
|
225 |
return snf_instances_nics |
|
226 |
|
|
227 |
|
|
228 |
def get_nics_from_db(): |
|
229 |
"""Get network interfaces for each vm in DB. |
|
230 |
|
|
231 |
""" |
|
232 |
instances = VirtualMachine.objects.filter(deleted=False) |
|
233 |
instances_nics = {} |
|
234 |
for instance in instances: |
|
235 |
nics = {} |
|
236 |
for n in instance.nics.all(): |
|
237 |
ipv4 = n.ipv4 |
|
238 |
nic = {'mac': n.mac, |
|
239 |
'network': n.network.backend_id, |
|
240 |
'ipv4': ipv4 if ipv4 != '' else None |
|
241 |
} |
|
242 |
nics[n.index] = nic |
|
243 |
instances_nics[instance.id] = nics |
|
244 |
return instances_nics |
|
245 |
|
|
246 |
|
|
247 |
def unsynced_nics(DBNics, GNics): |
|
248 |
"""Find unsynced network interfaces between DB and Ganeti. |
|
249 |
|
|
250 |
@ rtype: dict; {instance_id: ganeti_nics} |
|
251 |
@ return Dictionary containing the instances ids that have unsynced network |
|
252 |
interfaces between DB and Ganeti and the network interfaces in Ganeti. |
|
253 |
|
|
254 |
""" |
|
255 |
idD = set(DBNics.keys()) |
|
256 |
idG = set(GNics.keys()) |
|
257 |
|
|
258 |
unsynced = {} |
|
259 |
for i in idD & idG: |
|
260 |
nicsD = DBNics[i] |
|
261 |
nicsG = GNics[i] |
|
262 |
if len(nicsD) != len(nicsG): |
|
263 |
unsynced[i] = (nicsD, nicsG) |
|
264 |
continue |
|
265 |
for index in nicsG.keys(): |
|
266 |
nicD = nicsD[index] |
|
267 |
nicG = nicsG[index] |
|
268 |
if nicD['ipv4'] != nicG['ipv4'] or \ |
|
269 |
nicD['mac'] != nicG['mac'] or \ |
|
270 |
nicD['network'] != nicG['network']: |
|
271 |
unsynced[i] = (nicsD, nicsG) |
|
272 |
break |
|
273 |
|
|
274 |
return unsynced |
|
275 |
|
|
276 |
# |
|
277 |
# Networks |
|
278 |
# |
|
279 |
def get_networks_from_ganeti(backend): |
|
280 |
prefix = settings.BACKEND_PREFIX_ID |
|
281 |
|
|
282 |
networks = {} |
|
283 |
for net in backend.client.GetNetworks(bulk=True): |
|
284 |
if net['name'].startswith(prefix): |
|
285 |
# TODO: Get it from fun. Catch errors |
|
286 |
id = int(net['name'].split(prefix)[1]) |
|
287 |
networks[id] = net |
|
288 |
|
|
289 |
return networks |
|
290 |
|
|
291 |
|
|
292 |
def hanging_networks(backend, GNets): |
|
293 |
"""Get networks that are not connected to all Nodegroups. |
|
294 |
|
|
295 |
""" |
|
296 |
def get_network_groups(group_list): |
|
297 |
groups = set() |
|
298 |
for g in group_list: |
|
299 |
g_name = g.split('(')[0] |
|
300 |
groups.add(g_name) |
|
301 |
return groups |
|
302 |
|
|
303 |
groups = set(backend.client.GetGroups()) |
|
304 |
|
|
305 |
hanging = {} |
|
306 |
for id, info in GNets.items(): |
|
307 |
group_list = get_network_groups(info['group_list']) |
|
308 |
if group_list != groups: |
|
309 |
hanging[id] = groups - group_list |
|
310 |
return hanging |
|
311 |
|
|
189 | 312 |
|
190 | 313 |
# Only for testing this module individually |
191 | 314 |
def main(): |
Also available in: Unified diff