Revision c346aed0

b/snf-cyclades-app/synnefo/logic/management/commands/reconcile-networks.py
34 34
logic/reconciliation.py for a description of reconciliation rules.
35 35

  
36 36
"""
37
import sys
37 38
import datetime
38 39
import bitarray
39 40

  
......
45 46

  
46 47
from synnefo.db.models import Backend, Network, BackendNetwork
47 48
from synnefo.db.pools import IPPool
48
from synnefo.logic import reconciliation, backend, utils
49
from synnefo.logic import reconciliation, utils
50
from synnefo.logic import backend as backend_mod
51

  
52
fix = False
53
write = sys.stdout.write
49 54

  
50 55

  
51 56
class Command(BaseCommand):
52
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
57
    help = """Reconcile contents of Synnefo DB with state of Ganeti backend
58

  
59
Network reconciliation can detect and fix the following cases:
60
    - Missing database entries for a network in a Ganeti backend
61
    - Stale database networks, which do no exist in the Ganeti backend
62
    - Missing Ganeti networks
63
    - Ganeti networks that are not connected to all Ganeti nodegroups
64
    - Networks that have unsynced state
65
    - Networks that have unsynced IP pools
66
    - Orphan networks in the Ganeti backend
67
"""
68

  
53 69
    can_import_settings = True
54 70
    output_transaction = True  # The management command runs inside
55 71
                               # an SQL transaction
......
63 79
        )
64 80

  
65 81
    def handle(self, **options):
66
        self.verbosity = int(options['verbosity'])
82
        global fix, write
67 83
        fix = options['fix']
84
        write = self.stdout.write
85
        self.verbosity = int(options['verbosity'])
68 86
        conflicting_ips = options['conflicting_ips']
69
        reconcile_networks(self.stdout, fix, conflicting_ips)
87
        reconcile_networks(conflicting_ips)
70 88

  
71 89

  
72
def reconcile_networks(out, fix, conflicting_ips):
90
def reconcile_networks(conflicting_ips=False):
73 91
    # Get models from DB
74 92
    backends = Backend.objects.exclude(offline=True)
75 93
    networks = Network.objects.filter(deleted=False)
......
85 103

  
86 104
    # Perform reconciliation for each network
87 105
    for network in networks:
88
        net_id = network.id
89
        destroying = network.action == 'DESTROY'
90
        uses_pool = not network.public or PUBLIC_USE_POOL
91 106
        ip_available_maps = []
92 107
        ip_reserved_maps = []
93

  
94
        # Perform reconcilliation for each backend
95
        for b in backends:
96
            if network.public and not \
97
                BackendNetwork.objects.filter(network=network,
98
                                              backend=b).exists():
99
                    continue
100

  
101
            info = (net_id, b.clustername)
102
            back_network = None
103

  
104
            try:
105
                # Get the model describing the network to this backend
106
                back_network = BackendNetwork.objects.get(network=network,
107
                                                          backend=b)
108
            except BackendNetwork.DoesNotExist:
109
                out.write('D: No DB entry for network %d in backend %s\n' % info)
110
                if fix:
111
                    out.write('F: Created entry in DB\n')
112
                    back_network = \
113
                        BackendNetwork.objects.create(network=network,
114
                                                      backend=b)
108
        uses_pool = not network.public or PUBLIC_USE_POOL
109
        for bend in backends:
110
            bnet = get_backend_network(network, bend)
111
            if not bnet:
112
                # CASE-1: Paritioned network
113
                if not network.public:
114
                    bnet = reconcile_parted_network(network, bend)
115
                    if not fix:
116
                        continue
115 117
                else:
116 118
                    continue
117 119

  
118 120
            try:
119
                # Get the info from backend
120
                ganeti_networks[b][net_id]
121
                gnet = ganeti_networks[b][network.id]
121 122
            except KeyError:
122
                # Stale network does not exist in backend
123
                if destroying:
124
                    if back_network.operstate != "DELETED":
125
                        out.write('D: Stale network %d in backend %s\n' % info)
126
                        if fix:
127
                            out.write("F: Issued OP_NETWORK_REMOVE'\n")
128
                            etime = datetime.datetime.now()
129
                            backend.process_network_status(back_network, etime,
130
                                                0, 'OP_NETWORK_REMOVE', 'success',
131
                                                'Reconciliation simulated event.')
123
                # Network does not exist in backend. If the network action is
124
                # DESTROY, then we must destroy the network in the backend.
125
                # Else we have to create it!
126
                if network.action == "DESTROY" and bnet.operstate != "DELETED":
127
                    # CASE-2: Stale DB network
128
                    reconcile_stale_network(bnet)
129
                    # Skip rest reconciliation as the backend is just being
130
                    # deleted
132 131
                    continue
133 132
                else:
134
                    # Pending network
135
                    out.write('D: Pending network %d in backend %s\n' % info)
136
                    if fix:
137
                        out.write('F: Creating network in backend.\n')
138
                        backend.create_network(network, [b])
139
                        # Skip rest reconciliation as the network is just
140
                        # being created
133
                    # CASE-3: Missing Ganeti network
134
                    reconcile_missing_network(network, bend)
135
                    # Skip rest reconciliation as the network is just
136
                    # being created
141 137
                    continue
142 138

  
143 139
            try:
144
                hanging_groups = ganeti_hanging_networks[b][net_id]
140
                hanging_groups = ganeti_hanging_networks[bend][network.id]
145 141
            except KeyError:
146 142
                # Network is connected to all nodegroups
147 143
                hanging_groups = []
148 144

  
149
            if hanging_groups and not destroying:
150
                # Hanging network = not connected to all nodegroups of backend
151
                out.write('D: Network %d in backend %s is not connected to '
152
                          'the following groups:\n' % info)
153
                out.write('-  ' + '\n-  '.join(hanging_groups) + '\n')
154
                if fix:
155
                    for group in hanging_groups:
156
                        out.write('F: Connecting network %d to nodegroup %s\n'
157
                                  % (net_id, group))
158
                        backend.connect_network(network, b, group=group)
159
            elif back_network and back_network.operstate != 'ACTIVE':
160
                # Network is active
161
                out.write('D: Unsynced network %d in backend %s\n' % info)
162
                if fix:
163
                    out.write("F: Issued OP_NETWORK_CONNECT\n")
164
                    etime = datetime.datetime.now()
165
                    backend.process_network_status(back_network, etime,
166
                                        0, 'OP_NETWORK_CONNECT', 'success',
167
                                        'Reconciliation simulated event.')
168
                    network = Network.objects.get(id=network.id)
145
            if hanging_groups:
146
                # CASE-3: Ganeti networks not connected to all nodegroups
147
                reconcile_hanging_groups(network, bend, hanging_groups)
148
                continue
149

  
150
            if bnet.operstate != 'ACTIVE':
151
                # CASE-4: Unsynced network state. At this point the network
152
                # exists and is connected to all nodes so is must be active!
153
                reconcile_unsynced_network(network, bend, bnet)
169 154

  
170 155
            if uses_pool:
171
                # Reconcile IP Pools
172
                gnet = ganeti_networks[b][net_id]
173
                converter = IPPool(Foo(gnet['network']))
174
                a_map = bitarray_from_map(gnet['map'])
175
                a_map.invert()
176
                reserved = gnet['external_reservations']
177
                r_map = a_map.copy()
178
                r_map.setall(True)
179
                for address in reserved.split(','):
180
                    index = converter.value_to_index(address)
181
                    a_map[index] = True
182
                    r_map[index] = False
183
                ip_available_maps.append(a_map)
184
                ip_reserved_maps.append(r_map)
156
                # Get ganeti IP Pools
157
                available_map, reserved_map = get_network_pool(gnet)
158
                ip_available_maps.append(available_map)
159
                ip_reserved_maps.append(reserved_map)
185 160

  
186 161
        if uses_pool and (ip_available_maps or ip_reserved_maps):
187
            available_map = reduce(lambda x, y: x & y, ip_available_maps)
188
            reserved_map = reduce(lambda x, y: x & y, ip_reserved_maps)
189

  
190
            pool = network.get_pool()
191
            un_available = pool.available != available_map
192
            un_reserved = pool.reserved != reserved_map
193
            if un_available or un_reserved:
194
                out.write("Detected unsynchronized pool for network %r:\n" %
195
                          network.id)
196
                if un_available:
197
                    out.write("Available:\n\tDB: %r\n\tGB: %r\n" %
198
                             (pool.available.to01(), available_map.to01()))
199
                    if fix:
200
                        pool.available = available_map
201
                if un_reserved:
202
                    out.write("Reserved:\n\tDB: %r\n\tGB: %r\n" %
203
                             (pool.reserved.to01(), reserved_map.to01()))
204
                    if fix:
205
                        pool.reserved = reserved_map
206
                if fix:
207
                    out.write("Synchronized pools for network %r.\n" % network.id)
208
            pool.save()
209

  
210

  
211
        # Detect conflicting IPs: Detect NIC's that have the same IP
212
        # in the same network.
162
            # CASE-5: Unsynced IP Pools
163
            reconcile_ip_pools(network, ip_available_maps, ip_reserved_maps)
164

  
213 165
        if conflicting_ips:
214
            machine_ips = network.nics.all().values_list('ipv4', 'machine')
215
            ips = map(lambda x: x[0], machine_ips)
216
            distinct_ips = set(ips)
217
            if len(distinct_ips) < len(ips):
218
                out.write('D: Conflicting IP in network %s.\n' % net_id)
219
                conflicts = ips
220
                for i in distinct_ips:
221
                    conflicts.remove(i)
222
                for i in conflicts:
223
                    machines = [utils.id_to_instance_name(x[1]) \
224
                                for x in machine_ips if x[0] == i]
225
                    out.write('\tIP:%s Machines: %s\n' %
226
                              (i, ', '.join(machines)))
227
                if fix:
228
                    out.write('F: Can not fix it. Manually resolve the'
229
                              ' conflict.\n')
166
            detect_conflicting_ips()
167

  
168
    # CASE-6: Orphan networks
169
    reconcile_orphan_networks(networks, ganeti_networks)
170

  
171

  
172
def get_backend_network(network, backend):
173
    try:
174
        return BackendNetwork.objects.get(network=network, backend=backend)
175
    except BackendNetwork.DoesNotExist:
176
        return None
177

  
178

  
179
def reconcile_parted_network(network, backend):
180
    write("D: Missing DB entry for network %s in backend %s\n" %
181
          (network, backend))
182
    if fix:
183
        network.create_backend_network(backend)
184
        write("F: Created DB entry\n")
185
        bnet = get_backend_network(network, backend)
186
        return bnet
230 187

  
188

  
189
def reconcile_stale_network(backend_network):
190
    write("D: Stale DB entry for network %s in backend %s\n" %
191
          (backend_network.network, backend_network.backend))
192
    if fix:
193
        etime = datetime.datetime.now()
194
        backend_mod.process_network_status(backend_network, etime, 0,
195
                                          "OP_NETWORK_REMOVE",
196
                                          "success",
197
                                          "Reconciliation simulated event")
198
        write("F: Reconciled event: OP_NETWORK_REMOVE\n")
199

  
200

  
201
def reconcile_missing_network(network, backend):
202
    write("D: Missing Ganeti network %s in backend %s\n" %
203
          (network, backend))
204
    if fix:
205
        backend_mod.create_network(network, [backend])
206
        write("F: Issued OP_NETWORK_CONNECT\n")
207

  
208

  
209
def reconcile_hanging_groups(network, backend, hanging_groups):
210
    write('D: Network %s in backend %s is not connected to '
211
          'the following groups:\n' % (network, backend))
212
    write('-  ' + '\n-  '.join(hanging_groups) + '\n')
213
    if fix:
214
        for group in hanging_groups:
215
            write('F: Connecting network %s to nodegroup %s\n'
216
                  % (network, group))
217
            backend_mod.connect_network(network, backend, group=group)
218

  
219

  
220
def reconcile_unsynced_network(network, backend, backend_network):
221
    write("D: Unsynced network %s in backend %s\n" % (network, backend))
222
    if fix:
223
        write("F: Issuing OP_NETWORK_CONNECT\n")
224
        etime = datetime.datetime.now()
225
        backend_mod.process_network_status(backend_network, etime, 0,
226
                                          "OP_NETWORK_CONNECT",
227
                                          "success",
228
                                          "Reconciliation simulated eventd")
229

  
230

  
231
def reconcile_ip_pools(network, available_maps, reserved_maps):
232
    available_map = reduce(lambda x, y: x & y, available_maps)
233
    reserved_map = reduce(lambda x, y: x & y, reserved_maps)
234

  
235
    pool = network.get_pool()
236
    if pool.available != available_map:
237
        write("D: Unsynced available map of network %s:\n"
238
              "\tDB: %r\n\tGB: %r\n" %
239
              (network, pool.available.to01(), available_map.to01(), network))
240
        if fix:
241
            pool.available = available_map
242
    if pool.reserved != reserved_map:
243
        write("D: Unsynced reserved map of network %s:\n"
244
              "\tDB: %r\n\tGB: %r\n" %
245
              (network, pool.reserved.to01(), reserved_map.to01()))
246
        if fix:
247
            pool.reserved = reserved_map
248
    pool.save()
249

  
250

  
251
def detect_conflicting_ips(network):
252
    """Detect NIC's that have the same IP in the same network."""
253
    machine_ips = network.nics.all().values_list('ipv4', 'machine')
254
    ips = map(lambda x: x[0], machine_ips)
255
    distinct_ips = set(ips)
256
    if len(distinct_ips) < len(ips):
257
        for i in distinct_ips:
258
            ips.remove(i)
259
        for i in ips:
260
            machines = [utils.id_to_instance_name(x[1]) \
261
                        for x in machine_ips if x[0] == i]
262
            write('D: Conflicting IP:%s Machines: %s\n' %
263
                  (i, ', '.join(machines)))
264

  
265

  
266
def reconcile_orphan_networks(db_networks, ganeti_networks):
231 267
    # Detect Orphan Networks in Ganeti
232
    db_network_ids = set([net.id for net in networks])
268
    db_network_ids = set([net.id for net in db_networks])
233 269
    for back_end, ganeti_networks in ganeti_networks.items():
234 270
        ganeti_network_ids = set(ganeti_networks.keys())
235 271
        orphans = ganeti_network_ids - db_network_ids
236 272

  
237 273
        if len(orphans) > 0:
238
            out.write('D: Orphan Networks in backend %s:\n' % back_end.clustername)
239
            out.write('-  ' + '\n-  '.join([str(o) for o in orphans]) + '\n')
274
            write('D: Orphan Networks in backend %s:\n' % back_end.clustername)
275
            write('-  ' + '\n-  '.join([str(o) for o in orphans]) + '\n')
240 276
            if fix:
241 277
                for net_id in orphans:
242
                    out.write('Disconnecting and deleting network %d\n' % net_id)
278
                    write('Disconnecting and deleting network %d\n' % net_id)
243 279
                    network = Network.objects.get(id=net_id)
244
                    backend.delete_network(network, backends=[back_end])
280
                    backend_mod.delete_network(network, backends=[back_end])
281

  
282

  
283
def get_network_pool(gnet):
284
    """Return available and reserved IP maps.
285

  
286
    Extract the available and reserved IP map from the info return from Ganeti
287
    for a network.
288

  
289
    """
290
    converter = IPPool(Foo(gnet['network']))
291
    a_map = bitarray_from_map(gnet['map'])
292
    a_map.invert()
293
    reserved = gnet['external_reservations']
294
    r_map = a_map.copy()
295
    r_map.setall(True)
296
    for address in reserved.split(','):
297
        index = converter.value_to_index(address)
298
        a_map[index] = True
299
        r_map[index] = False
300
    return a_map, r_map
245 301

  
246 302

  
247 303
def bitarray_from_map(bitmap):
248 304
    return bitarray.bitarray(bitmap.replace("X", "1").replace(".", "0"))
249 305

  
250 306

  
251

  
252 307
class Foo():
253 308
    def __init__(self, subnet):
254 309
        self.available_map = ''

Also available in: Unified diff