Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / reconciliation.py @ b2272468

History | View | Annotate | Download (11 kB)

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
#
4
# Copyright 2011 GRNET S.A. All rights reserved.
5
#
6
# Redistribution and use in source and binary forms, with or
7
# without modification, are permitted provided that the following
8
# conditions are met:
9
#
10
#   1. Redistributions of source code must retain the above
11
#      copyright notice, this list of conditions and the following
12
#      disclaimer.
13
#
14
#   2. Redistributions in binary form must reproduce the above
15
#      copyright notice, this list of conditions and the following
16
#      disclaimer in the documentation and/or other materials
17
#      provided with the distribution.
18
#
19
# THIS SOFTWARE IS PROVIDED BY GRNET S.A. ``AS IS'' AND ANY EXPRESS
20
# OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GRNET S.A OR
23
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26
# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30
# POSSIBILITY OF SUCH DAMAGE.
31
#
32
# The views and conclusions contained in the software and
33
# documentation are those of the authors and should not be
34
# interpreted as representing official policies, either expressed
35
# or implied, of GRNET S.A.
36
#
37
"""Business logic for reconciliation
38

39
Reconcile the contents of the DB with the actual state of the
40
Ganeti backend.
41

42
Let D be the set of VMs in the DB, G the set of VMs in Ganeti.
43
RULES:
44
    R1. Stale servers in DB:
45
            For any v in D but not in G:
46
            Set deleted=True.
47
    R2. Orphan instances in Ganet:
48
            For any v in G with deleted=True in D:
49
            Issue OP_INSTANCE_DESTROY.
50
    R3. Unsynced operstate:
51
            For any v whose operating state differs between G and V:
52
            Set the operating state in D based on the state in G.
53
In the code, D, G are Python dicts mapping instance ids to operating state.
54
For D, the operating state is chosen from VirtualMachine.OPER_STATES.
55
For G, the operating state is True if the machine is up, False otherwise.
56

57
"""
58

    
59
import logging
60
import sys
61
import itertools
62

    
63
from django.core.management import setup_environ
64
try:
65
    from synnefo import settings
66
except ImportError:
67
    raise Exception("Cannot import settings, make sure PYTHONPATH contains "
68
                    "the parent directory of the Synnefo Django project.")
69
setup_environ(settings)
70

    
71

    
72
from datetime import datetime, timedelta
73

    
74
from synnefo.db.models import (VirtualMachine, pooled_rapi_client)
75
from synnefo.logic.rapi import GanetiApiError
76
from synnefo.logic.backend import get_ganeti_instances, get_backends
77
from synnefo.logic import utils
78

    
79

    
80
log = logging.getLogger()
81

    
82
try:
83
    CHECK_INTERVAL = settings.RECONCILIATION_CHECK_INTERVAL
84
except AttributeError:
85
    CHECK_INTERVAL = 60
86

    
87

    
88
def needs_reconciliation(vm):
89
    now = datetime.now()
90
    return (now > vm.updated + timedelta(seconds=CHECK_INTERVAL)) or\
91
           (now > vm.backendtime + timedelta(seconds=2*CHECK_INTERVAL))
92

    
93

    
94
def stale_servers_in_db(D, G):
95
    idD = set(D.keys())
96
    idG = set(G.keys())
97

    
98
    stale = set()
99
    for i in idD - idG:
100
        if D[i] == 'BUILD':
101
            vm = VirtualMachine.objects.get(id=i)
102
            if needs_reconciliation(vm):
103
                with pooled_rapi_client(vm) as c:
104
                    try:
105
                        job_status = c.GetJobStatus(vm.backendjobid)['status']
106
                        if job_status in ('queued', 'waiting', 'running'):
107
                            # Server is still building in Ganeti
108
                            continue
109
                        else:
110
                            c.GetInstance(utils.id_to_instance_name(i))
111
                            # Server has just been created in Ganeti
112
                            continue
113
                    except GanetiApiError:
114
                        stale.add(i)
115
        else:
116
            stale.add(i)
117

    
118
    return stale
119

    
120

    
121
def orphan_instances_in_ganeti(D, G):
122
    idD = set(D.keys())
123
    idG = set(G.keys())
124

    
125
    return idG - idD
126

    
127

    
128
def unsynced_operstate(D, G):
129
    unsynced = set()
130
    idD = set(D.keys())
131
    idG = set(G.keys())
132

    
133
    for i in idD & idG:
134
        vm_unsynced = (G[i] and D[i] != "STARTED") or\
135
                      (not G[i] and D[i] not in ('BUILD', 'ERROR', 'STOPPED'))
136
        if vm_unsynced:
137
            unsynced.add((i, D[i], G[i]))
138
        if not G[i] and D[i] == 'BUILD':
139
            vm = VirtualMachine.objects.get(id=i)
140
            if needs_reconciliation(vm):
141
                with pooled_rapi_client(vm) as c:
142
                    try:
143
                        job_info = c.GetJobStatus(job_id=vm.backendjobid)
144
                        if job_info['status'] == 'success':
145
                            unsynced.add((i, D[i], G[i]))
146
                    except GanetiApiError:
147
                        pass
148

    
149
    return unsynced
150

    
151

    
152
def instances_with_build_errors(D, G):
153
    failed = set()
154
    idD = set(D.keys())
155
    idG = set(G.keys())
156

    
157
    for i in idD & idG:
158
        if not G[i] and D[i] == 'BUILD':
159
            vm = VirtualMachine.objects.get(id=i)
160
            if not vm.backendjobid:  # VM has not been enqueued in the backend
161
                if datetime.now() > vm.created + timedelta(seconds=120):
162
                    # If a job has not been enqueued after 2 minutues, then
163
                    # it must be a stale entry..
164
                    failed.add(i)
165
            elif needs_reconciliation(vm):
166
                # Check time to avoid many rapi calls
167
                with pooled_rapi_client(vm) as c:
168
                    try:
169
                        job_info = c.GetJobStatus(job_id=vm.backendjobid)
170
                        if job_info['status'] == 'error':
171
                            failed.add(i)
172
                    except GanetiApiError:
173
                        failed.add(i)
174

    
175
    return failed
176

    
177

    
178
def get_servers_from_db(backend=None):
179
    backends = get_backends(backend)
180
    vms = VirtualMachine.objects.filter(deleted=False, backend__in=backends)
181
    return dict(map(lambda x: (x.id, x.operstate), vms))
182

    
183

    
184
def get_instances_from_ganeti(backend=None):
185
    ganeti_instances = get_ganeti_instances(backend=backend, bulk=True)
186
    snf_instances = {}
187
    snf_nics = {}
188

    
189
    prefix = settings.BACKEND_PREFIX_ID
190
    for i in ganeti_instances:
191
        if i['name'].startswith(prefix):
192
            try:
193
                id = utils.id_from_instance_name(i['name'])
194
            except Exception:
195
                log.error("Ignoring instance with malformed name %s",
196
                          i['name'])
197
                continue
198

    
199
            if id in snf_instances:
200
                log.error("Ignoring instance with duplicate Synnefo id %s",
201
                          i['name'])
202
                continue
203

    
204
            snf_instances[id] = i['oper_state']
205
            snf_nics[id] = get_nics_from_instance(i)
206

    
207
    return snf_instances, snf_nics
208

    
209

    
210
#
211
# Nics
212
#
213
def get_nics_from_ganeti(backend=None):
214
    """Get network interfaces for each ganeti instance.
215

216
    """
217
    instances = get_ganeti_instances(backend=backend, bulk=True)
218
    prefix = settings.BACKEND_PREFIX_ID
219

    
220
    snf_instances_nics = {}
221
    for i in instances:
222
        if i['name'].startswith(prefix):
223
            try:
224
                id = utils.id_from_instance_name(i['name'])
225
            except Exception:
226
                log.error("Ignoring instance with malformed name %s",
227
                          i['name'])
228
                continue
229
            if id in snf_instances_nics:
230
                log.error("Ignoring instance with duplicate Synnefo id %s",
231
                          i['name'])
232
                continue
233

    
234
            snf_instances_nics[id] = get_nics_from_instance(i)
235

    
236
    return snf_instances_nics
237

    
238

    
239
def get_nics_from_instance(i):
240
    ips = zip(itertools.repeat('ipv4'), i['nic.ips'])
241
    macs = zip(itertools.repeat('mac'), i['nic.macs'])
242
    networks = zip(itertools.repeat('network'), i['nic.networks.names'])
243
    names = zip(itertools.repeat('name'), i['nic.names'])
244
    # modes = zip(itertools.repeat('mode'), i['nic.modes'])
245
    # links = zip(itertools.repeat('link'), i['nic.links'])
246
    # nics = zip(ips,macs,modes,networks,links)
247
    nics = zip(ips, macs, networks, names)
248
    nics = map(lambda x: dict(x), nics)
249
    nics = dict(enumerate(nics))
250
    return nics
251

    
252

    
253
def get_nics_from_db(backend=None):
254
    """Get network interfaces for each vm in DB.
255

256
    """
257
    backends = get_backends(backend)
258
    instances = VirtualMachine.objects.filter(deleted=False,
259
                                              backend__in=backends)
260
    instances_nics = {}
261
    for instance in instances:
262
        nics = {}
263
        for n in instance.nics.all():
264
            ipv4 = n.ipv4
265
            nic = {'mac':      n.mac,
266
                   'network':  n.network.backend_id,
267
                   'ipv4':     ipv4 if ipv4 != '' else None
268
                   }
269
            nics[n.index] = nic
270
        instances_nics[instance.id] = nics
271
    return instances_nics
272

    
273

    
274
def unsynced_nics(DBNics, GNics):
275
    """Find unsynced network interfaces between DB and Ganeti.
276

277
    @ rtype: dict; {instance_id: ganeti_nics}
278
    @ return Dictionary containing the instances ids that have unsynced network
279
    interfaces between DB and Ganeti and the network interfaces in Ganeti.
280

281
    """
282
    idD = set(DBNics.keys())
283
    idG = set(GNics.keys())
284

    
285
    unsynced = {}
286
    for i in idD & idG:
287
        nicsD = DBNics[i]
288
        nicsG = GNics[i]
289
        if len(nicsD) != len(nicsG):
290
            unsynced[i] = (nicsD, nicsG)
291
            continue
292
        for index in nicsG.keys():
293
            nicD = nicsD[index]
294
            nicG = nicsG[index]
295
            diff = (nicD['ipv4'] != nicG['ipv4'] or
296
                    nicD['mac'] != nicG['mac'] or
297
                    nicD['network'] != nicG['network'])
298
            if diff:
299
                    unsynced[i] = (nicsD, nicsG)
300
                    break
301

    
302
    return unsynced
303

    
304
#
305
# Networks
306
#
307

    
308

    
309
def get_networks_from_ganeti(backend):
310
    prefix = settings.BACKEND_PREFIX_ID + 'net-'
311

    
312
    networks = {}
313
    with pooled_rapi_client(backend) as c:
314
        for net in c.GetNetworks(bulk=True):
315
            if net['name'].startswith(prefix):
316
                id = utils.id_from_network_name(net['name'])
317
                networks[id] = net
318

    
319
    return networks
320

    
321

    
322
def hanging_networks(backend, GNets):
323
    """Get networks that are not connected to all Nodegroups.
324

325
    """
326
    def get_network_groups(group_list):
327
        groups = set()
328
        for (name, mode, link) in group_list:
329
            groups.add(name)
330
        return groups
331

    
332
    with pooled_rapi_client(backend) as c:
333
        groups = set(c.GetGroups())
334

    
335
    hanging = {}
336
    for id, info in GNets.items():
337
        group_list = get_network_groups(info['group_list'])
338
        if group_list != groups:
339
            hanging[id] = groups - group_list
340
    return hanging
341

    
342

    
343
# Only for testing this module individually
344
def main():
345
    print get_instances_from_ganeti()
346

    
347

    
348
if __name__ == "__main__":
349
    sys.exit(main())