Revision c346aed0
b/snf-cyclades-app/synnefo/logic/management/commands/reconcile-networks.py | ||
---|---|---|
34 | 34 |
logic/reconciliation.py for a description of reconciliation rules. |
35 | 35 |
|
36 | 36 |
""" |
37 |
import sys |
|
37 | 38 |
import datetime |
38 | 39 |
import bitarray |
39 | 40 |
|
... | ... | |
45 | 46 |
|
46 | 47 |
from synnefo.db.models import Backend, Network, BackendNetwork |
47 | 48 |
from synnefo.db.pools import IPPool |
48 |
from synnefo.logic import reconciliation, backend, utils |
|
49 |
from synnefo.logic import reconciliation, utils |
|
50 |
from synnefo.logic import backend as backend_mod |
|
51 |
|
|
52 |
fix = False |
|
53 |
write = sys.stdout.write |
|
49 | 54 |
|
50 | 55 |
|
51 | 56 |
class Command(BaseCommand): |
52 |
help = 'Reconcile contents of Synnefo DB with state of Ganeti backend' |
|
57 |
help = """Reconcile contents of Synnefo DB with state of Ganeti backend |
|
58 |
|
|
59 |
Network reconciliation can detect and fix the following cases: |
|
60 |
- Missing database entries for a network in a Ganeti backend |
|
61 |
- Stale database networks, which do no exist in the Ganeti backend |
|
62 |
- Missing Ganeti networks |
|
63 |
- Ganeti networks that are not connected to all Ganeti nodegroups |
|
64 |
- Networks that have unsynced state |
|
65 |
- Networks that have unsynced IP pools |
|
66 |
- Orphan networks in the Ganeti backend |
|
67 |
""" |
|
68 |
|
|
53 | 69 |
can_import_settings = True |
54 | 70 |
output_transaction = True # The management command runs inside |
55 | 71 |
# an SQL transaction |
... | ... | |
63 | 79 |
) |
64 | 80 |
|
65 | 81 |
def handle(self, **options): |
66 |
self.verbosity = int(options['verbosity'])
|
|
82 |
global fix, write
|
|
67 | 83 |
fix = options['fix'] |
84 |
write = self.stdout.write |
|
85 |
self.verbosity = int(options['verbosity']) |
|
68 | 86 |
conflicting_ips = options['conflicting_ips'] |
69 |
reconcile_networks(self.stdout, fix, conflicting_ips)
|
|
87 |
reconcile_networks(conflicting_ips) |
|
70 | 88 |
|
71 | 89 |
|
72 |
def reconcile_networks(out, fix, conflicting_ips):
|
|
90 |
def reconcile_networks(conflicting_ips=False):
|
|
73 | 91 |
# Get models from DB |
74 | 92 |
backends = Backend.objects.exclude(offline=True) |
75 | 93 |
networks = Network.objects.filter(deleted=False) |
... | ... | |
85 | 103 |
|
86 | 104 |
# Perform reconciliation for each network |
87 | 105 |
for network in networks: |
88 |
net_id = network.id |
|
89 |
destroying = network.action == 'DESTROY' |
|
90 |
uses_pool = not network.public or PUBLIC_USE_POOL |
|
91 | 106 |
ip_available_maps = [] |
92 | 107 |
ip_reserved_maps = [] |
93 |
|
|
94 |
# Perform reconcilliation for each backend |
|
95 |
for b in backends: |
|
96 |
if network.public and not \ |
|
97 |
BackendNetwork.objects.filter(network=network, |
|
98 |
backend=b).exists(): |
|
99 |
continue |
|
100 |
|
|
101 |
info = (net_id, b.clustername) |
|
102 |
back_network = None |
|
103 |
|
|
104 |
try: |
|
105 |
# Get the model describing the network to this backend |
|
106 |
back_network = BackendNetwork.objects.get(network=network, |
|
107 |
backend=b) |
|
108 |
except BackendNetwork.DoesNotExist: |
|
109 |
out.write('D: No DB entry for network %d in backend %s\n' % info) |
|
110 |
if fix: |
|
111 |
out.write('F: Created entry in DB\n') |
|
112 |
back_network = \ |
|
113 |
BackendNetwork.objects.create(network=network, |
|
114 |
backend=b) |
|
108 |
uses_pool = not network.public or PUBLIC_USE_POOL |
|
109 |
for bend in backends: |
|
110 |
bnet = get_backend_network(network, bend) |
|
111 |
if not bnet: |
|
112 |
# CASE-1: Paritioned network |
|
113 |
if not network.public: |
|
114 |
bnet = reconcile_parted_network(network, bend) |
|
115 |
if not fix: |
|
116 |
continue |
|
115 | 117 |
else: |
116 | 118 |
continue |
117 | 119 |
|
118 | 120 |
try: |
119 |
# Get the info from backend |
|
120 |
ganeti_networks[b][net_id] |
|
121 |
gnet = ganeti_networks[b][network.id] |
|
121 | 122 |
except KeyError: |
122 |
# Stale network does not exist in backend |
|
123 |
if destroying: |
|
124 |
if back_network.operstate != "DELETED": |
|
125 |
out.write('D: Stale network %d in backend %s\n' % info) |
|
126 |
if fix: |
|
127 |
out.write("F: Issued OP_NETWORK_REMOVE'\n") |
|
128 |
etime = datetime.datetime.now() |
|
129 |
backend.process_network_status(back_network, etime, |
|
130 |
0, 'OP_NETWORK_REMOVE', 'success', |
|
131 |
'Reconciliation simulated event.') |
|
123 |
# Network does not exist in backend. If the network action is |
|
124 |
# DESTROY, then we must destroy the network in the backend. |
|
125 |
# Else we have to create it! |
|
126 |
if network.action == "DESTROY" and bnet.operstate != "DELETED": |
|
127 |
# CASE-2: Stale DB network |
|
128 |
reconcile_stale_network(bnet) |
|
129 |
# Skip rest reconciliation as the backend is just being |
|
130 |
# deleted |
|
132 | 131 |
continue |
133 | 132 |
else: |
134 |
# Pending network |
|
135 |
out.write('D: Pending network %d in backend %s\n' % info) |
|
136 |
if fix: |
|
137 |
out.write('F: Creating network in backend.\n') |
|
138 |
backend.create_network(network, [b]) |
|
139 |
# Skip rest reconciliation as the network is just |
|
140 |
# being created |
|
133 |
# CASE-3: Missing Ganeti network |
|
134 |
reconcile_missing_network(network, bend) |
|
135 |
# Skip rest reconciliation as the network is just |
|
136 |
# being created |
|
141 | 137 |
continue |
142 | 138 |
|
143 | 139 |
try: |
144 |
hanging_groups = ganeti_hanging_networks[b][net_id]
|
|
140 |
hanging_groups = ganeti_hanging_networks[bend][network.id]
|
|
145 | 141 |
except KeyError: |
146 | 142 |
# Network is connected to all nodegroups |
147 | 143 |
hanging_groups = [] |
148 | 144 |
|
149 |
if hanging_groups and not destroying: |
|
150 |
# Hanging network = not connected to all nodegroups of backend |
|
151 |
out.write('D: Network %d in backend %s is not connected to ' |
|
152 |
'the following groups:\n' % info) |
|
153 |
out.write('- ' + '\n- '.join(hanging_groups) + '\n') |
|
154 |
if fix: |
|
155 |
for group in hanging_groups: |
|
156 |
out.write('F: Connecting network %d to nodegroup %s\n' |
|
157 |
% (net_id, group)) |
|
158 |
backend.connect_network(network, b, group=group) |
|
159 |
elif back_network and back_network.operstate != 'ACTIVE': |
|
160 |
# Network is active |
|
161 |
out.write('D: Unsynced network %d in backend %s\n' % info) |
|
162 |
if fix: |
|
163 |
out.write("F: Issued OP_NETWORK_CONNECT\n") |
|
164 |
etime = datetime.datetime.now() |
|
165 |
backend.process_network_status(back_network, etime, |
|
166 |
0, 'OP_NETWORK_CONNECT', 'success', |
|
167 |
'Reconciliation simulated event.') |
|
168 |
network = Network.objects.get(id=network.id) |
|
145 |
if hanging_groups: |
|
146 |
# CASE-3: Ganeti networks not connected to all nodegroups |
|
147 |
reconcile_hanging_groups(network, bend, hanging_groups) |
|
148 |
continue |
|
149 |
|
|
150 |
if bnet.operstate != 'ACTIVE': |
|
151 |
# CASE-4: Unsynced network state. At this point the network |
|
152 |
# exists and is connected to all nodes so is must be active! |
|
153 |
reconcile_unsynced_network(network, bend, bnet) |
|
169 | 154 |
|
170 | 155 |
if uses_pool: |
171 |
# Reconcile IP Pools |
|
172 |
gnet = ganeti_networks[b][net_id] |
|
173 |
converter = IPPool(Foo(gnet['network'])) |
|
174 |
a_map = bitarray_from_map(gnet['map']) |
|
175 |
a_map.invert() |
|
176 |
reserved = gnet['external_reservations'] |
|
177 |
r_map = a_map.copy() |
|
178 |
r_map.setall(True) |
|
179 |
for address in reserved.split(','): |
|
180 |
index = converter.value_to_index(address) |
|
181 |
a_map[index] = True |
|
182 |
r_map[index] = False |
|
183 |
ip_available_maps.append(a_map) |
|
184 |
ip_reserved_maps.append(r_map) |
|
156 |
# Get ganeti IP Pools |
|
157 |
available_map, reserved_map = get_network_pool(gnet) |
|
158 |
ip_available_maps.append(available_map) |
|
159 |
ip_reserved_maps.append(reserved_map) |
|
185 | 160 |
|
186 | 161 |
if uses_pool and (ip_available_maps or ip_reserved_maps): |
187 |
available_map = reduce(lambda x, y: x & y, ip_available_maps) |
|
188 |
reserved_map = reduce(lambda x, y: x & y, ip_reserved_maps) |
|
189 |
|
|
190 |
pool = network.get_pool() |
|
191 |
un_available = pool.available != available_map |
|
192 |
un_reserved = pool.reserved != reserved_map |
|
193 |
if un_available or un_reserved: |
|
194 |
out.write("Detected unsynchronized pool for network %r:\n" % |
|
195 |
network.id) |
|
196 |
if un_available: |
|
197 |
out.write("Available:\n\tDB: %r\n\tGB: %r\n" % |
|
198 |
(pool.available.to01(), available_map.to01())) |
|
199 |
if fix: |
|
200 |
pool.available = available_map |
|
201 |
if un_reserved: |
|
202 |
out.write("Reserved:\n\tDB: %r\n\tGB: %r\n" % |
|
203 |
(pool.reserved.to01(), reserved_map.to01())) |
|
204 |
if fix: |
|
205 |
pool.reserved = reserved_map |
|
206 |
if fix: |
|
207 |
out.write("Synchronized pools for network %r.\n" % network.id) |
|
208 |
pool.save() |
|
209 |
|
|
210 |
|
|
211 |
# Detect conflicting IPs: Detect NIC's that have the same IP |
|
212 |
# in the same network. |
|
162 |
# CASE-5: Unsynced IP Pools |
|
163 |
reconcile_ip_pools(network, ip_available_maps, ip_reserved_maps) |
|
164 |
|
|
213 | 165 |
if conflicting_ips: |
214 |
machine_ips = network.nics.all().values_list('ipv4', 'machine') |
|
215 |
ips = map(lambda x: x[0], machine_ips) |
|
216 |
distinct_ips = set(ips) |
|
217 |
if len(distinct_ips) < len(ips): |
|
218 |
out.write('D: Conflicting IP in network %s.\n' % net_id) |
|
219 |
conflicts = ips |
|
220 |
for i in distinct_ips: |
|
221 |
conflicts.remove(i) |
|
222 |
for i in conflicts: |
|
223 |
machines = [utils.id_to_instance_name(x[1]) \ |
|
224 |
for x in machine_ips if x[0] == i] |
|
225 |
out.write('\tIP:%s Machines: %s\n' % |
|
226 |
(i, ', '.join(machines))) |
|
227 |
if fix: |
|
228 |
out.write('F: Can not fix it. Manually resolve the' |
|
229 |
' conflict.\n') |
|
166 |
detect_conflicting_ips() |
|
167 |
|
|
168 |
# CASE-6: Orphan networks |
|
169 |
reconcile_orphan_networks(networks, ganeti_networks) |
|
170 |
|
|
171 |
|
|
172 |
def get_backend_network(network, backend): |
|
173 |
try: |
|
174 |
return BackendNetwork.objects.get(network=network, backend=backend) |
|
175 |
except BackendNetwork.DoesNotExist: |
|
176 |
return None |
|
177 |
|
|
178 |
|
|
179 |
def reconcile_parted_network(network, backend): |
|
180 |
write("D: Missing DB entry for network %s in backend %s\n" % |
|
181 |
(network, backend)) |
|
182 |
if fix: |
|
183 |
network.create_backend_network(backend) |
|
184 |
write("F: Created DB entry\n") |
|
185 |
bnet = get_backend_network(network, backend) |
|
186 |
return bnet |
|
230 | 187 |
|
188 |
|
|
189 |
def reconcile_stale_network(backend_network): |
|
190 |
write("D: Stale DB entry for network %s in backend %s\n" % |
|
191 |
(backend_network.network, backend_network.backend)) |
|
192 |
if fix: |
|
193 |
etime = datetime.datetime.now() |
|
194 |
backend_mod.process_network_status(backend_network, etime, 0, |
|
195 |
"OP_NETWORK_REMOVE", |
|
196 |
"success", |
|
197 |
"Reconciliation simulated event") |
|
198 |
write("F: Reconciled event: OP_NETWORK_REMOVE\n") |
|
199 |
|
|
200 |
|
|
201 |
def reconcile_missing_network(network, backend): |
|
202 |
write("D: Missing Ganeti network %s in backend %s\n" % |
|
203 |
(network, backend)) |
|
204 |
if fix: |
|
205 |
backend_mod.create_network(network, [backend]) |
|
206 |
write("F: Issued OP_NETWORK_CONNECT\n") |
|
207 |
|
|
208 |
|
|
209 |
def reconcile_hanging_groups(network, backend, hanging_groups): |
|
210 |
write('D: Network %s in backend %s is not connected to ' |
|
211 |
'the following groups:\n' % (network, backend)) |
|
212 |
write('- ' + '\n- '.join(hanging_groups) + '\n') |
|
213 |
if fix: |
|
214 |
for group in hanging_groups: |
|
215 |
write('F: Connecting network %s to nodegroup %s\n' |
|
216 |
% (network, group)) |
|
217 |
backend_mod.connect_network(network, backend, group=group) |
|
218 |
|
|
219 |
|
|
220 |
def reconcile_unsynced_network(network, backend, backend_network): |
|
221 |
write("D: Unsynced network %s in backend %s\n" % (network, backend)) |
|
222 |
if fix: |
|
223 |
write("F: Issuing OP_NETWORK_CONNECT\n") |
|
224 |
etime = datetime.datetime.now() |
|
225 |
backend_mod.process_network_status(backend_network, etime, 0, |
|
226 |
"OP_NETWORK_CONNECT", |
|
227 |
"success", |
|
228 |
"Reconciliation simulated eventd") |
|
229 |
|
|
230 |
|
|
231 |
def reconcile_ip_pools(network, available_maps, reserved_maps): |
|
232 |
available_map = reduce(lambda x, y: x & y, available_maps) |
|
233 |
reserved_map = reduce(lambda x, y: x & y, reserved_maps) |
|
234 |
|
|
235 |
pool = network.get_pool() |
|
236 |
if pool.available != available_map: |
|
237 |
write("D: Unsynced available map of network %s:\n" |
|
238 |
"\tDB: %r\n\tGB: %r\n" % |
|
239 |
(network, pool.available.to01(), available_map.to01(), network)) |
|
240 |
if fix: |
|
241 |
pool.available = available_map |
|
242 |
if pool.reserved != reserved_map: |
|
243 |
write("D: Unsynced reserved map of network %s:\n" |
|
244 |
"\tDB: %r\n\tGB: %r\n" % |
|
245 |
(network, pool.reserved.to01(), reserved_map.to01())) |
|
246 |
if fix: |
|
247 |
pool.reserved = reserved_map |
|
248 |
pool.save() |
|
249 |
|
|
250 |
|
|
251 |
def detect_conflicting_ips(network): |
|
252 |
"""Detect NIC's that have the same IP in the same network.""" |
|
253 |
machine_ips = network.nics.all().values_list('ipv4', 'machine') |
|
254 |
ips = map(lambda x: x[0], machine_ips) |
|
255 |
distinct_ips = set(ips) |
|
256 |
if len(distinct_ips) < len(ips): |
|
257 |
for i in distinct_ips: |
|
258 |
ips.remove(i) |
|
259 |
for i in ips: |
|
260 |
machines = [utils.id_to_instance_name(x[1]) \ |
|
261 |
for x in machine_ips if x[0] == i] |
|
262 |
write('D: Conflicting IP:%s Machines: %s\n' % |
|
263 |
(i, ', '.join(machines))) |
|
264 |
|
|
265 |
|
|
266 |
def reconcile_orphan_networks(db_networks, ganeti_networks): |
|
231 | 267 |
# Detect Orphan Networks in Ganeti |
232 |
db_network_ids = set([net.id for net in networks]) |
|
268 |
db_network_ids = set([net.id for net in db_networks])
|
|
233 | 269 |
for back_end, ganeti_networks in ganeti_networks.items(): |
234 | 270 |
ganeti_network_ids = set(ganeti_networks.keys()) |
235 | 271 |
orphans = ganeti_network_ids - db_network_ids |
236 | 272 |
|
237 | 273 |
if len(orphans) > 0: |
238 |
out.write('D: Orphan Networks in backend %s:\n' % back_end.clustername)
|
|
239 |
out.write('- ' + '\n- '.join([str(o) for o in orphans]) + '\n')
|
|
274 |
write('D: Orphan Networks in backend %s:\n' % back_end.clustername) |
|
275 |
write('- ' + '\n- '.join([str(o) for o in orphans]) + '\n') |
|
240 | 276 |
if fix: |
241 | 277 |
for net_id in orphans: |
242 |
out.write('Disconnecting and deleting network %d\n' % net_id)
|
|
278 |
write('Disconnecting and deleting network %d\n' % net_id) |
|
243 | 279 |
network = Network.objects.get(id=net_id) |
244 |
backend.delete_network(network, backends=[back_end]) |
|
280 |
backend_mod.delete_network(network, backends=[back_end]) |
|
281 |
|
|
282 |
|
|
283 |
def get_network_pool(gnet): |
|
284 |
"""Return available and reserved IP maps. |
|
285 |
|
|
286 |
Extract the available and reserved IP map from the info return from Ganeti |
|
287 |
for a network. |
|
288 |
|
|
289 |
""" |
|
290 |
converter = IPPool(Foo(gnet['network'])) |
|
291 |
a_map = bitarray_from_map(gnet['map']) |
|
292 |
a_map.invert() |
|
293 |
reserved = gnet['external_reservations'] |
|
294 |
r_map = a_map.copy() |
|
295 |
r_map.setall(True) |
|
296 |
for address in reserved.split(','): |
|
297 |
index = converter.value_to_index(address) |
|
298 |
a_map[index] = True |
|
299 |
r_map[index] = False |
|
300 |
return a_map, r_map |
|
245 | 301 |
|
246 | 302 |
|
247 | 303 |
def bitarray_from_map(bitmap): |
248 | 304 |
return bitarray.bitarray(bitmap.replace("X", "1").replace(".", "0")) |
249 | 305 |
|
250 | 306 |
|
251 |
|
|
252 | 307 |
class Foo(): |
253 | 308 |
def __init__(self, subnet): |
254 | 309 |
self.available_map = '' |
Also available in: Unified diff