Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / reconciliation.py @ e77a29ab

History | View | Annotate | Download (10.9 kB)

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4
# Copyright 2011 GRNET S.A. All rights reserved.
5
#
6
# Redistribution and use in source and binary forms, with or
7
# without modification, are permitted provided that the following
8
# conditions are met:
9
#
10
#   1. Redistributions of source code must retain the above
11
#      copyright notice, this list of conditions and the following
12
#      disclaimer.
13
#
14
#   2. Redistributions in binary form must reproduce the above
15
#      copyright notice, this list of conditions and the following
16
#      disclaimer in the documentation and/or other materials
17
#      provided with the distribution.
18
#
19
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
20
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
23
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
# POSSIBILITY OF SUCH DAMAGE.
31
#
32
# The views and conclusions contained in the software and
33
# documentation are those of the authors and should not be
34
# interpreted as representing official policies, either expressed
35
# or implied, of GRNET S.A.
36
#
37
"""Business logic for reconciliation
38

39
Reconcile the contents of the DB with the actual state of the
40
Ganeti backend.
41

42
Let D be the set of VMs in the DB, G the set of VMs in Ganeti.
43
RULES:
44
    R1. Stale servers in DB:
45
            For any v in D but not in G:
46
            Set deleted=True.
47
    R2. Orphan instances in Ganet:
48
            For any v in G with deleted=True in D:
49
            Issue OP_INSTANCE_DESTROY.
50
    R3. Unsynced operstate:
51
            For any v whose operating state differs between G and V:
52
            Set the operating state in D based on the state in G.
53
In the code, D, G are Python dicts mapping instance ids to operating state.
54
For D, the operating state is chosen from VirtualMachine.OPER_STATES.
55
For G, the operating state is True if the machine is up, False otherwise.
56

57
"""
58

    
59
import logging
60
import sys
61
import itertools
62

    
63
from django.core.management import setup_environ
64
try:
65
    from synnefo import settings
66
except ImportError:
67
    raise Exception("Cannot import settings, make sure PYTHONPATH contains "
68
                    "the parent directory of the Synnefo Django project.")
69
setup_environ(settings)
70

    
71

    
72
from datetime import datetime, timedelta
73

    
74
from synnefo.db.models import (VirtualMachine, pooled_rapi_client)
75
from synnefo.logic.rapi import GanetiApiError
76
from synnefo.logic.backend import get_instances
77
from synnefo.logic import utils
78

    
79

    
80
log = logging.getLogger()
81

    
82

    
83
def stale_servers_in_db(D, G):
84
    idD = set(D.keys())
85
    idG = set(G.keys())
86

    
87
    stale = set()
88
    for i in idD - idG:
89
        if D[i] == 'BUILD':
90
            vm = VirtualMachine.objects.get(id=i)
91
            # Check time to avoid many rapi calls
92
            if datetime.now() > vm.backendtime + timedelta(seconds=5):
93
                with pooled_rapi_client(vm) as c:
94
                    try:
95
                        job_status = c.GetJobStatus(vm.backendjobid)['status']
96
                        if job_status in ('queued', 'waiting', 'running'):
97
                            # Server is still building in Ganeti
98
                            continue
99
                        else:
100
                            c.GetInstance(utils.id_to_instance_name(i))
101
                            # Server has just been created in Ganeti
102
                            continue
103
                    except GanetiApiError:
104
                        stale.add(i)
105
        else:
106
            stale.add(i)
107

    
108
    return stale
109

    
110

    
111
def orphan_instances_in_ganeti(D, G):
112
    idD = set(D.keys())
113
    idG = set(G.keys())
114

    
115
    return idG - idD
116

    
117

    
118
def unsynced_operstate(D, G):
119
    unsynced = set()
120
    idD = set(D.keys())
121
    idG = set(G.keys())
122

    
123
    for i in idD & idG:
124
        vm_unsynced = (G[i] and D[i] != "STARTED") or\
125
                      (not G[i] and D[i] not in ('BUILD', 'ERROR', 'STOPPED'))
126
        if vm_unsynced:
127
            unsynced.add((i, D[i], G[i]))
128
        if not G[i] and D[i] == 'BUILD':
129
            vm = VirtualMachine.objects.get(id=i)
130
            # Check time to avoid many rapi calls
131
            if datetime.now() > vm.backendtime + timedelta(seconds=5):
132
                with pooled_rapi_client(vm) as c:
133
                    try:
134
                        job_info = c.GetJobStatus(job_id=vm.backendjobid)
135
                        if job_info['status'] == 'success':
136
                            unsynced.add((i, D[i], G[i]))
137
                    except GanetiApiError:
138
                        pass
139

    
140
    return unsynced
141

    
142

    
143
def instances_with_build_errors(D, G):
144
    failed = set()
145
    idD = set(D.keys())
146
    idG = set(G.keys())
147

    
148
    for i in idD & idG:
149
        if not G[i] and D[i] == 'BUILD':
150
            vm = VirtualMachine.objects.get(id=i)
151
            if not vm.backendjobid:  # VM has not been enqueued in the backend
152
                if datetime.now() > vm.created + timedelta(seconds=120):
153
                    # If a job has not been enqueued after 2 minutues, then
154
                    # it must be a stale entry..
155
                    failed.add(i)
156
            elif datetime.now() > vm.backendtime + timedelta(seconds=30):
157
                # Check time to avoid many rapi calls
158
                with pooled_rapi_client(vm) as c:
159
                    try:
160
                        job_info = c.GetJobStatus(job_id=vm.backendjobid)
161
                        if job_info['status'] == 'error':
162
                            failed.add(i)
163
                    except GanetiApiError:
164
                        failed.add(i)
165

    
166
    return failed
167

    
168

    
169
def get_servers_from_db(backends):
170
    vms = VirtualMachine.objects.filter(deleted=False, backend__in=backends)
171
    return dict(map(lambda x: (x.id, x.operstate), vms))
172

    
173

    
174
def get_instances_from_ganeti(backends):
175
    instances = []
176
    for backend in backends:
177
        instances.append(get_instances(backend))
178
    ganeti_instances = reduce(list.__add__, instances, [])
179
    snf_instances = {}
180
    snf_nics = {}
181

    
182
    prefix = settings.BACKEND_PREFIX_ID
183
    for i in ganeti_instances:
184
        if i['name'].startswith(prefix):
185
            try:
186
                id = utils.id_from_instance_name(i['name'])
187
            except Exception:
188
                log.error("Ignoring instance with malformed name %s",
189
                          i['name'])
190
                continue
191

    
192
            if id in snf_instances:
193
                log.error("Ignoring instance with duplicate Synnefo id %s",
194
                          i['name'])
195
                continue
196

    
197
            snf_instances[id] = i['oper_state']
198
            snf_nics[id] = get_nics_from_instance(i)
199

    
200
    return snf_instances, snf_nics
201

    
202

    
203
#
204
# Nics
205
#
206
def get_nics_from_ganeti(backends):
207
    """Get network interfaces for each ganeti instance.
208

209
    """
210
    instances = []
211
    for backend in backends:
212
        instances.append(get_instances(backend))
213
    instances = reduce(list.__add__, instances, [])
214
    prefix = settings.BACKEND_PREFIX_ID
215

    
216
    snf_instances_nics = {}
217
    for i in instances:
218
        if i['name'].startswith(prefix):
219
            try:
220
                id = utils.id_from_instance_name(i['name'])
221
            except Exception:
222
                log.error("Ignoring instance with malformed name %s",
223
                          i['name'])
224
                continue
225
            if id in snf_instances_nics:
226
                log.error("Ignoring instance with duplicate Synnefo id %s",
227
                          i['name'])
228
                continue
229

    
230
            snf_instances_nics[id] = get_nics_from_instance(i)
231

    
232
    return snf_instances_nics
233

    
234

    
235
def get_nics_from_instance(i):
236
    ips = zip(itertools.repeat('ipv4'), i['nic.ips'])
237
    macs = zip(itertools.repeat('mac'), i['nic.macs'])
238
    networks = zip(itertools.repeat('network'), i['nic.networks'])
239
    # modes = zip(itertools.repeat('mode'), i['nic.modes'])
240
    # links = zip(itertools.repeat('link'), i['nic.links'])
241
    # nics = zip(ips,macs,modes,networks,links)
242
    nics = zip(ips, macs, networks)
243
    nics = map(lambda x: dict(x), nics)
244
    nics = dict(enumerate(nics))
245
    return nics
246

    
247

    
248
def get_nics_from_db(backends):
249
    """Get network interfaces for each vm in DB.
250

251
    """
252
    instances = VirtualMachine.objects.filter(deleted=False,
253
                                              backend__in=backends)
254
    instances_nics = {}
255
    for instance in instances:
256
        nics = {}
257
        for n in instance.nics.all():
258
            ipv4 = n.ipv4
259
            nic = {'mac':      n.mac,
260
                   'network':  n.network.backend_id,
261
                   'ipv4':     ipv4 if ipv4 != '' else None
262
                   }
263
            nics[n.index] = nic
264
        instances_nics[instance.id] = nics
265
    return instances_nics
266

    
267

    
268
def unsynced_nics(DBNics, GNics):
269
    """Find unsynced network interfaces between DB and Ganeti.
270

271
    @ rtype: dict; {instance_id: ganeti_nics}
272
    @ return Dictionary containing the instances ids that have unsynced network
273
    interfaces between DB and Ganeti and the network interfaces in Ganeti.
274

275
    """
276
    idD = set(DBNics.keys())
277
    idG = set(GNics.keys())
278

    
279
    unsynced = {}
280
    for i in idD & idG:
281
        nicsD = DBNics[i]
282
        nicsG = GNics[i]
283
        if len(nicsD) != len(nicsG):
284
            unsynced[i] = (nicsD, nicsG)
285
            continue
286
        for index in nicsG.keys():
287
            nicD = nicsD[index]
288
            nicG = nicsG[index]
289
            diff = (nicD['ipv4'] != nicG['ipv4'] or
290
                    nicD['mac'] != nicG['mac'] or
291
                    nicD['network'] != nicG['network'])
292
            if diff:
293
                    unsynced[i] = (nicsD, nicsG)
294
                    break
295

    
296
    return unsynced
297

    
298
#
299
# Networks
300
#
301

    
302

    
303
def get_networks_from_ganeti(backend):
304
    prefix = settings.BACKEND_PREFIX_ID + 'net-'
305

    
306
    networks = {}
307
    with pooled_rapi_client(backend) as c:
308
        for net in c.GetNetworks(bulk=True):
309
            if net['name'].startswith(prefix):
310
                id = utils.id_from_network_name(net['name'])
311
                networks[id] = net
312

    
313
    return networks
314

    
315

    
316
def hanging_networks(backend, GNets):
317
    """Get networks that are not connected to all Nodegroups.
318

319
    """
320
    def get_network_groups(group_list):
321
        groups = set()
322
        for g in group_list:
323
            g_name = g.split('(')[0]
324
            groups.add(g_name)
325
        return groups
326

    
327
    with pooled_rapi_client(backend) as c:
328
        groups = set(c.GetGroups())
329

    
330
    hanging = {}
331
    for id, info in GNets.items():
332
        group_list = get_network_groups(info['group_list'])
333
        if group_list != groups:
334
            hanging[id] = groups - group_list
335
    return hanging
336

    
337

    
338
# Only for testing this module individually
339
def main():
340
    print get_instances_from_ganeti()
341

    
342

    
343
if __name__ == "__main__":
344
    sys.exit(main())