Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / management / commands / reconcile-servers.py @ cc92b70f

History | View | Annotate | Download (13.1 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
#   1. Redistributions of source code must retain the above copyright
8
#      notice, this list of conditions and the following disclaimer.
9
#
10
#  2. Redistributions in binary form must reproduce the above copyright
11
#     notice, this list of conditions and the following disclaimer in the
12
#     documentation and/or other materials provided with the distribution.
13
#
14
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
# SUCH DAMAGE.
25
#
26
# The views and conclusions contained in the software and documentation are
27
# those of the authors and should not be interpreted as representing official
28
# policies, either expressed or implied, of GRNET S.A.
29
#
30
"""Reconciliation management command
31

32
Management command to reconcile the contents of the Synnefo DB with
33
the state of the Ganeti backend. See docstring on top of
34
logic/reconciliation.py for a description of reconciliation rules.
35

36
"""
37
import sys
38
import datetime
39
import subprocess
40

    
41
from optparse import make_option
42

    
43
from django.core.management.base import BaseCommand, CommandError
44

    
45
from synnefo.db.models import VirtualMachine, Network, pooled_rapi_client
46
from synnefo.logic import reconciliation, utils
47
from synnefo.logic import backend as backend_mod
48
from synnefo.management.common import get_backend
49

    
50

    
51
class Command(BaseCommand):
52
    can_import_settings = True
53

    
54
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
55
    output_transaction = True  # The management command runs inside
56
                               # an SQL transaction
57
    option_list = BaseCommand.option_list + (
58
        make_option('--detect-stale', action='store_true', dest='detect_stale',
59
                    default=False, help='Detect stale VM entries in DB'),
60
        make_option('--detect-orphans', action='store_true',
61
                    dest='detect_orphans',
62
                    default=False, help='Detect orphan instances in Ganeti'),
63
        make_option('--detect-unsynced', action='store_true',
64
                    dest='detect_unsynced',
65
                    default=False, help='Detect unsynced operstate between ' +
66
                                        'DB and Ganeti'),
67
        make_option('--detect-build-errors', action='store_true',
68
                    dest='detect_build_errors', default=False,
69
                    help='Detect instances with build error'),
70
        make_option('--detect-unsynced-nics', action='store_true',
71
                    dest='detect_unsynced_nics', default=False,
72
                    help='Detect unsynced nics between DB and Ganeti'),
73
        make_option('--detect-all', action='store_true',
74
                    dest='detect_all',
75
                    default=False, help='Enable all --detect-* arguments'),
76
        make_option('--fix-stale', action='store_true', dest='fix_stale',
77
                    default=False, help='Fix (remove) stale DB entries in DB'),
78
        make_option('--fix-orphans', action='store_true', dest='fix_orphans',
79
                    default=False, help='Fix (remove) orphan Ganeti VMs'),
80
        make_option('--fix-unsynced', action='store_true', dest='fix_unsynced',
81
                    default=False, help='Fix server operstate in DB, set ' +
82
                                        'from Ganeti'),
83
        make_option('--fix-build-errors', action='store_true',
84
                    dest='fix_build_errors', default=False,
85
                    help='Fix (remove) instances with build errors'),
86
        make_option('--fix-unsynced-nics', action='store_true',
87
                     dest='fix_unsynced_nics', default=False,
88
                     help='Fix unsynced nics between DB and Ganeti'),
89
        make_option('--fix-all', action='store_true', dest='fix_all',
90
                    default=False, help='Enable all --fix-* arguments'),
91
        make_option('--backend-id', default=None, dest='backend-id',
92
                    help='Reconcilie VMs only for this backend'),
93
    )
94

    
95
    def _process_args(self, options):
96
        keys_detect = [k for k in options.keys() if k.startswith('detect_')]
97
        keys_fix = [k for k in options.keys() if k.startswith('fix_')]
98

    
99
        if not reduce(lambda x, y: x or y,
100
                      map(lambda x: options[x], keys_detect)):
101
            options['detect_all'] = True
102

    
103
        if options['detect_all']:
104
            for kd in keys_detect:
105
                options[kd] = True
106
        if options['fix_all']:
107
            for kf in keys_fix:
108
                options[kf] = True
109

    
110
        for kf in keys_fix:
111
            kd = kf.replace('fix_', 'detect_', 1)
112
            if (options[kf] and not options[kd]):
113
                raise CommandError("Cannot use --%s without corresponding "
114
                                   "--%s argument" % (kf, kd))
115

    
116
    def handle(self, **options):
117
        verbosity = int(options['verbosity'])
118
        self._process_args(options)
119
        backend_id = options['backend-id']
120
        backend = get_backend(backend_id) if backend_id else None
121

    
122
        D = reconciliation.get_servers_from_db(backend)
123
        G, GNics = reconciliation.get_instances_from_ganeti(backend)
124

    
125
        DBNics = reconciliation.get_nics_from_db(backend)
126

    
127
        #
128
        # Detect problems
129
        #
130
        if options['detect_stale']:
131
            stale = reconciliation.stale_servers_in_db(D, G)
132
            if len(stale) > 0:
133
                print >> sys.stderr, "Found the following stale server IDs: "
134
                print "    " + "\n    ".join(
135
                    [str(x) for x in stale])
136
            elif verbosity == 2:
137
                print >> sys.stderr, "Found no stale server IDs in DB."
138

    
139
        if options['detect_orphans']:
140
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
141
            if len(orphans) > 0:
142
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
143
                print "    " + "\n    ".join(
144
                    [str(x) for x in orphans])
145
            elif verbosity == 2:
146
                print >> sys.stderr, "Found no orphan Ganeti instances."
147

    
148
        if options['detect_unsynced']:
149
            unsynced = reconciliation.unsynced_operstate(D, G)
150
            if len(unsynced) > 0:
151
                print >> sys.stderr, "The operstate of the following server" \
152
                                     " IDs is out-of-sync:"
153
                print "    " + "\n    ".join(
154
                    ["%d is %s in DB, %s in Ganeti" %
155
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
156
                     for x in unsynced])
157
            elif verbosity == 2:
158
                print >> sys.stderr, "The operstate of all servers is in sync."
159

    
160
        if options['detect_build_errors']:
161
            build_errors = reconciliation.instances_with_build_errors(D, G)
162
            if len(build_errors) > 0:
163
                print >> sys.stderr, "The os for the following server IDs was "\
164
                                     "not build successfully:"
165
                print "    " + "\n    ".join(
166
                    ["%d" % x for x in build_errors])
167
            elif verbosity == 2:
168
                print >> sys.stderr, "Found no instances with build errors."
169

    
170
        if options['detect_unsynced_nics']:
171
            def pretty_print_nics(nics):
172
                if not nics:
173
                    print ''.ljust(18) + 'None'
174
                for index, info in nics.items():
175
                    print ''.ljust(18) + 'nic/' + str(index) + ': MAC: %s, IP: %s, Network: %s' % \
176
                      (info['mac'], info['ipv4'], info['network'])
177

    
178
            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
179
            if len(unsynced_nics) > 0:
180
                print >> sys.stderr, "The NICs of servers with the following IDs "\
181
                                     "are unsynced:"
182
                for id, nics in unsynced_nics.items():
183
                    print ''.ljust(2) + '%6d:' % id
184
                    print ''.ljust(8) + '%8s:' % 'DB'
185
                    pretty_print_nics(nics[0])
186
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
187
                    pretty_print_nics(nics[1])
188
            elif verbosity == 2:
189
                print >> sys.stderr, "All instance nics are synced."
190

    
191
        #
192
        # Then fix them
193
        #
194
        if options['fix_stale'] and len(stale) > 0:
195
            print >> sys.stderr, \
196
                "Simulating successful Ganeti removal for %d " \
197
                "servers in the DB:" % len(stale)
198
            for vm in VirtualMachine.objects.filter(pk__in=stale):
199
                event_time = datetime.datetime.now()
200
                backend_mod.process_op_status(
201
                    vm=vm,
202
                    etime=event_time,
203
                    jobid=-0,
204
                    opcode='OP_INSTANCE_REMOVE', status='success',
205
                    logmsg='Reconciliation: simulated Ganeti event')
206
            print >> sys.stderr, "    ...done"
207

    
208
        if options['fix_orphans'] and len(orphans) > 0:
209
            print >> sys.stderr, \
210
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
211
                len(orphans)
212
            for id in orphans:
213
                try:
214
                    vm = VirtualMachine.objects.get(pk=id)
215
                    with pooled_rapi_client(vm) as client:
216
                        client.DeleteInstance(utils.id_to_instance_name(id))
217
                except VirtualMachine.DoesNotExist:
218
                    print >> sys.stderr, "No entry for VM %d in DB !!" % id
219
            print >> sys.stderr, "    ...done"
220

    
221
        if options['fix_unsynced'] and len(unsynced) > 0:
222
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
223
                len(unsynced)
224
            for id, db_state, ganeti_up in unsynced:
225
                vm = VirtualMachine.objects.get(pk=id)
226
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
227
                         else "OP_INSTANCE_SHUTDOWN"
228
                event_time = datetime.datetime.now()
229
                backend_mod.process_op_status(
230
                    vm=vm, etime=event_time, jobid=-0,
231
                    opcode=opcode, status='success',
232
                    logmsg='Reconciliation: simulated Ganeti event')
233
            print >> sys.stderr, "    ...done"
234

    
235
        if options['fix_build_errors'] and len(build_errors) > 0:
236
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" % \
237
                len(build_errors)
238
            for id in build_errors:
239
                vm = VirtualMachine.objects.get(pk=id)
240
                event_time = datetime.datetime.now()
241
                backend_mod.process_op_status(vm=vm, etime=event_time, jobid=-0,
242
                    opcode="OP_INSTANCE_CREATE", status='error',
243
                    logmsg='Reconciliation: simulated Ganeti event')
244
            print >> sys.stderr, "    ...done"
245

    
246
        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
247
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
248
                                  len(unsynced_nics)
249
            for id, nics in unsynced_nics.items():
250
                vm = VirtualMachine.objects.get(pk=id)
251
                nics = nics[1]  # Ganeti nics
252
                if nics == {}:  # No nics
253
                    vm.nics.all.delete()
254
                    continue
255
                for index, nic in nics.items():
256
                    net_id = utils.id_from_network_name(nic['network'])
257
                    subnet6 = Network.objects.get(id=net_id).subnet6
258
                    # Produce ipv6
259
                    ipv6 = subnet6 and mac2eui64(nic['mac'], subnet6) or None
260
                    nic['ipv6'] = ipv6
261
                    # Rename ipv4 to ip
262
                    nic['ip'] = nic['ipv4']
263
                # Dict to sorted list
264
                final_nics = []
265
                nics_keys = nics.keys()
266
                nics_keys.sort()
267
                for i in nics_keys:
268
                    if nics[i]['network']:
269
                        final_nics.append(nics[i])
270
                    else:
271
                        print 'Network of nic %d of vm %s is None. ' \
272
                              'Can not reconcile' % (i, vm.backend_vm_id)
273
                event_time = datetime.datetime.now()
274
                backend_mod.process_net_status(vm=vm, etime=event_time,
275
                                               nics=final_nics)
276
            print >> sys.stderr, "    ...done"
277

    
278

    
279
def mac2eui64(mac, prefixstr):
280
    process = subprocess.Popen(["mac2eui64", mac, prefixstr],
281
                               stdout=subprocess.PIPE)
282
    return process.stdout.read().rstrip()