Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / management / commands / reconcile.py @ d30f29aa

History | View | Annotate | Download (12.3 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
#   1. Redistributions of source code must retain the above copyright
8
#      notice, this list of conditions and the following disclaimer.
9
#
10
#  2. Redistributions in binary form must reproduce the above copyright
11
#     notice, this list of conditions and the following disclaimer in the
12
#     documentation and/or other materials provided with the distribution.
13
#
14
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
# SUCH DAMAGE.
25
#
26
# The views and conclusions contained in the software and documentation are
27
# those of the authors and should not be interpreted as representing official
28
# policies, either expressed or implied, of GRNET S.A.
29
#
30
"""Reconciliation management command
31

32
Management command to reconcile the contents of the Synnefo DB with
33
the state of the Ganeti backend. See docstring on top of
34
logic/reconciliation.py for a description of reconciliation rules.
35

36
"""
37
import sys
38
import datetime
39
import subprocess
40

    
41
from optparse import make_option
42

    
43
from django.conf import settings
44
from django.core.management.base import BaseCommand, CommandError
45

    
46
from synnefo.db.models import VirtualMachine
47
from synnefo.logic import reconciliation, backend, utils
48

    
49

    
50
class Command(BaseCommand):
51
    can_import_settings = True
52

    
53
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
54
    output_transaction = True  # The management command runs inside
55
                               # an SQL transaction
56
    option_list = BaseCommand.option_list + (
57
        make_option('--detect-stale', action='store_true', dest='detect_stale',
58
                    default=False, help='Detect stale VM entries in DB'),
59
        make_option('--detect-orphans', action='store_true',
60
                    dest='detect_orphans',
61
                    default=False, help='Detect orphan instances in Ganeti'),
62
        make_option('--detect-unsynced', action='store_true',
63
                    dest='detect_unsynced',
64
                    default=False, help='Detect unsynced operstate between ' +
65
                                        'DB and Ganeti'),
66
        make_option('--detect-build-errors', action='store_true',
67
                    dest='detect_build_errors', default=False,
68
                    help='Detect instances with build error'),
69
        make_option('--detect-unsynced-nics', action='store_true',
70
                    dest='detect_unsynced_nics', default=False,
71
                    help='Detect unsynced nics between DB and Ganeti'),
72
        make_option('--detect-all', action='store_true',
73
                    dest='detect_all',
74
                    default=False, help='Enable all --detect-* arguments'),
75
        make_option('--fix-stale', action='store_true', dest='fix_stale',
76
                    default=False, help='Fix (remove) stale DB entries in DB'),
77
        make_option('--fix-orphans', action='store_true', dest='fix_orphans',
78
                    default=False, help='Fix (remove) orphan Ganeti VMs'),
79
        make_option('--fix-unsynced', action='store_true', dest='fix_unsynced',
80
                    default=False, help='Fix server operstate in DB, set ' +
81
                                        'from Ganeti'),
82
        make_option('--fix-build-errors', action='store_true',
83
                    dest='fix_build_errors', default=False,
84
                    help='Fix (remove) instances with build errors'),
85
         make_option('--fix-unsynced-nics', action='store_true',
86
                    dest='fix_unsynced_nics', default=False,
87
                    help='Fix unsynced nics between DB and Ganeti'),
88
        make_option('--fix-all', action='store_true', dest='fix_all',
89
                    default=False, help='Enable all --fix-* arguments'))
90

    
91
    def _process_args(self, options):
92
        keys_detect = [k for k in options.keys() if k.startswith('detect_')]
93
        keys_fix = [k for k in options.keys() if k.startswith('fix_')]
94

    
95
        if options['detect_all']:
96
            for kd in keys_detect:
97
                options[kd] = True
98
        if options['fix_all']:
99
            for kf in keys_fix:
100
                options[kf] = True
101

    
102
        if not reduce(lambda x, y: x or y,
103
                      map(lambda x: options[x], keys_detect)):
104
            raise CommandError("At least one of --detect-* must be specified")
105

    
106
        for kf in keys_fix:
107
            kd = kf.replace('fix_', 'detect_', 1)
108
            if (options[kf] and not options[kd]):
109
                raise CommandError("Cannot use --%s without corresponding "
110
                                   "--%s argument" % (kf, kd))
111

    
112
    def handle(self, **options):
113
        verbosity = int(options['verbosity'])
114
        self._process_args(options)
115

    
116
        D = reconciliation.get_servers_from_db()
117
        G = reconciliation.get_instances_from_ganeti()
118

    
119
        DBNics = reconciliation.get_nics_from_db()
120
        GNics = reconciliation.get_nics_from_ganeti()
121
        #
122
        # Detect problems
123
        #
124
        if options['detect_stale']:
125
            stale = reconciliation.stale_servers_in_db(D, G)
126
            if len(stale) > 0:
127
                print >> sys.stderr, "Found the following stale server IDs: "
128
                print "    " + "\n    ".join(
129
                    [str(x) for x in stale])
130
            elif verbosity == 2:
131
                print >> sys.stderr, "Found no stale server IDs in DB."
132

    
133
        if options['detect_orphans']:
134
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
135
            if len(orphans) > 0:
136
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
137
                print "    " + "\n    ".join(
138
                    [str(x) for x in orphans])
139
            elif verbosity == 2:
140
                print >> sys.stderr, "Found no orphan Ganeti instances."
141

    
142
        if options['detect_unsynced']:
143
            unsynced = reconciliation.unsynced_operstate(D, G)
144
            if len(unsynced) > 0:
145
                print >> sys.stderr, "The operstate of the following server" \
146
                                     " IDs is out-of-sync:"
147
                print "    " + "\n    ".join(
148
                    ["%d is %s in DB, %s in Ganeti" %
149
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
150
                     for x in unsynced])
151
            elif verbosity == 2:
152
                print >> sys.stderr, "The operstate of all servers is in sync."
153

    
154
        if options['detect_build_errors']:
155
            build_errors = reconciliation.instances_with_build_errors(D, G)
156
            if len(build_errors) > 0:
157
                print >> sys.stderr, "The os for the following server IDs was "\
158
                                     "not build successfully:"
159
                print "    " + "\n    ".join(
160
                    ["%d" % x for x in build_errors])
161
            elif verbosity == 2:
162
                print >> sys.stderr, "Found no instances with build errors."
163

    
164
        if options['detect_unsynced_nics']:
165
            def pretty_print_nics(nics):
166
                if not nics:
167
                    print ''.ljust(18) + 'None'
168
                for index, info in nics.items():
169
                    print ''.ljust(18) + 'nic/' + str(index) + ': MAC: %s, IP: %s, Network: %s' % \
170
                      (info['mac'], info['ipv4'], info['network'])
171

    
172
            unsynced_nics = reconciliation.unsynced_nics(DBNics, GNics)
173
            if len(unsynced_nics) > 0:
174
                print >> sys.stderr, "The nics of servers with the folloing ID's "\
175
                                     "are unsynced:"
176
                for id, nics in unsynced_nics.items():
177
                    print ''.ljust(2) + '%6d:' % id
178
                    print ''.ljust(8) + '%8s:' % 'DB'
179
                    pretty_print_nics(nics[0])
180
                    print ''.ljust(8) + '%8s:' % 'Ganeti'
181
                    pretty_print_nics(nics[1])
182
            elif verbosity == 2:
183
                print >> sys.stderr, "All instance nics are synced."
184

    
185
        #
186
        # Then fix them
187
        #
188
        if options['fix_stale'] and len(stale) > 0:
189
            print >> sys.stderr, \
190
                "Simulating successful Ganeti removal for %d " \
191
                "servers in the DB:" % len(stale)
192
            for vm in VirtualMachine.objects.filter(pk__in=stale):
193
                event_time = datetime.datetime.now()
194
                backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
195
                    opcode='OP_INSTANCE_REMOVE', status='success',
196
                    logmsg='Reconciliation: simulated Ganeti event')
197
            print >> sys.stderr, "    ...done"
198

    
199
        if options['fix_orphans'] and len(orphans) > 0:
200
            print >> sys.stderr, \
201
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
202
                len(orphans)
203
            for id in orphans:
204
                vm = VirtualMachine.objects.get(pk=id)
205
                vm.client.DeleteInstance(utils.id_to_instance_name(id))
206
            print >> sys.stderr, "    ...done"
207

    
208
        if options['fix_unsynced'] and len(unsynced) > 0:
209
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
210
                len(unsynced)
211
            for id, db_state, ganeti_up in unsynced:
212
                vm = VirtualMachine.objects.get(pk=id)
213
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
214
                         else "OP_INSTANCE_SHUTDOWN"
215
                event_time = datetime.datetime.now()
216
                backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
217
                    opcode=opcode, status='success',
218
                    logmsg='Reconciliation: simulated Ganeti event')
219
            print >> sys.stderr, "    ...done"
220

    
221
        if options['fix_build_errors'] and len(build_errors) > 0:
222
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" % \
223
                len(build_errors)
224
            for id in build_errors:
225
                vm = VirtualMachine.objects.get(pk=id)
226
                event_time = datetime.datetime.now()
227
                backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
228
                    opcode="OP_INSTANCE_CREATE", status='error',
229
                    logmsg='Reconciliation: simulated Ganeti event')
230
            print >> sys.stderr, "    ...done"
231

    
232
        if options['fix_unsynced_nics'] and len(unsynced_nics) > 0:
233
            print >> sys.stderr, "Setting the nics of %d out-of-sync VMs:" % \
234
                                  len(unsynced_nics)
235
            for id, nics in unsynced_nics.items():
236
                vm = VirtualMachine.objects.get(pk=id)
237
                nics = nics[1]  # Ganeti nics
238
                if nics == {}:  # No nics
239
                    vm.nics.all.delete()
240
                    continue
241
                for index, nic in nics.items():
242
                    # Produce ipv6
243
                    ipv6 = mac2eui64(nic['mac'], settings.PUBLIC_IPV6_PREFIX)
244
                    nic['ipv6'] = ipv6
245
                    # Rename ipv4 to ip
246
                    nic['ip'] = nic['ipv4']
247
                # Dict to sorted list
248
                final_nics = []
249
                nics_keys = nics.keys()
250
                nics_keys.sort()
251
                for i in nics_keys:
252
                    if nics[i]['network']:
253
                        final_nics.append(nics[i])
254
                    else:
255
                        print 'Network of nic %d of vm %s is None. ' \
256
                              'Can not reconcile' % (i, vm.backend_vm_id)
257
                event_time = datetime.datetime.now()
258
                backend.process_net_status(vm=vm, etime=event_time, nics=final_nics)
259
            print >> sys.stderr, "    ...done"
260

    
261

    
262
def mac2eui64(mac, prefixstr):
263
    process = subprocess.Popen(["mac2eui64", mac, prefixstr],
264
                                stdout=subprocess.PIPE)
265
    return process.stdout.read().rstrip()