Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / management / commands / reconcile.py @ 4161cb41

History | View | Annotate | Download (9.3 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
#   1. Redistributions of source code must retain the above copyright
8
#      notice, this list of conditions and the following disclaimer.
9
#
10
#  2. Redistributions in binary form must reproduce the above copyright
11
#     notice, this list of conditions and the following disclaimer in the
12
#     documentation and/or other materials provided with the distribution.
13
#
14
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
# SUCH DAMAGE.
25
#
26
# The views and conclusions contained in the software and documentation are
27
# those of the authors and should not be interpreted as representing official
28
# policies, either expressed or implied, of GRNET S.A.
29
#
30
"""Reconciliation management command
31

32
Management command to reconcile the contents of the Synnefo DB with
33
the state of the Ganeti backend. See docstring on top of
34
logic/reconciliation.py for a description of reconciliation rules.
35

36
"""
37
import sys
38
import datetime
39

    
40
from optparse import make_option
41

    
42
from django.conf import settings
43
from django.core.management.base import BaseCommand, CommandError
44

    
45
from synnefo.db.models import VirtualMachine
46
from synnefo.logic import reconciliation, backend
47

    
48

    
49
class Command(BaseCommand):
50
    can_import_settings = True
51

    
52
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
53
    output_transaction = True  # The management command runs inside
54
                               # an SQL transaction
55
    option_list = BaseCommand.option_list + (
56
        make_option('--detect-stale', action='store_true', dest='detect_stale',
57
                    default=False, help='Detect stale VM entries in DB'),
58
        make_option('--detect-orphans', action='store_true',
59
                    dest='detect_orphans',
60
                    default=False, help='Detect orphan instances in Ganeti'),
61
        make_option('--detect-unsynced', action='store_true',
62
                    dest='detect_unsynced',
63
                    default=False, help='Detect unsynced operstate between ' +
64
                                        'DB and Ganeti'),
65
        make_option('--detect-build-errors', action='store_true',
66
                    dest='detect_build_errors', default=False,
67
                    help='Detect instances with build error'),
68
        make_option('--detect-all', action='store_true',
69
                    dest='detect_all',
70
                    default=False, help='Enable all --detect-* arguments'),
71
        make_option('--fix-stale', action='store_true', dest='fix_stale',
72
                    default=False, help='Fix (remove) stale DB entries in DB'),
73
        make_option('--fix-orphans', action='store_true', dest='fix_orphans',
74
                    default=False, help='Fix (remove) orphan Ganeti VMs'),
75
        make_option('--fix-unsynced', action='store_true', dest='fix_unsynced',
76
                    default=False, help='Fix server operstate in DB, set ' +
77
                                        'from Ganeti'),
78
        make_option('--fix-build-errors', action='store_true',
79
                    dest='fix_build_errors', default=False,
80
                    help='Fix (remove) instances with build errors'),
81
        make_option('--fix-all', action='store_true', dest='fix_all',
82
                    default=False, help='Enable all --fix-* arguments'))
83

    
84
    def _process_args(self, options):
85
        keys_detect = [k for k in options.keys() if k.startswith('detect_')]
86
        keys_fix = [k for k in options.keys() if k.startswith('fix_')]
87

    
88
        if options['detect_all']:
89
            for kd in keys_detect:
90
                options[kd] = True
91
        if options['fix_all']:
92
            for kf in keys_fix:
93
                options[kf] = True
94

    
95
        if not reduce(lambda x, y: x or y,
96
                      map(lambda x: options[x], keys_detect)):
97
            raise CommandError("At least one of --detect-* must be specified")
98

    
99
        for kf in keys_fix:
100
            kd = kf.replace('fix_', 'detect_', 1)
101
            if (options[kf] and not options[kd]):
102
                raise CommandError("Cannot use --%s without corresponding "
103
                                   "--%s argument" % (kf, kd))
104

    
105
    def handle(self, **options):
106
        verbosity = int(options['verbosity'])
107
        self._process_args(options)
108

    
109
        D = reconciliation.get_servers_from_db()
110
        G = reconciliation.get_instances_from_ganeti()
111

    
112
        #
113
        # Detect problems
114
        #
115
        if options['detect_stale']:
116
            stale = reconciliation.stale_servers_in_db(D, G)
117
            if len(stale) > 0:
118
                print >> sys.stderr, "Found the following stale server IDs: "
119
                print "    " + "\n    ".join(
120
                    [str(x) for x in stale])
121
            elif verbosity == 2:
122
                print >> sys.stderr, "Found no stale server IDs in DB."
123

    
124
        if options['detect_orphans']:
125
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
126
            if len(orphans) > 0:
127
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
128
                print "    " + "\n    ".join(
129
                    [str(x) for x in orphans])
130
            elif verbosity == 2:
131
                print >> sys.stderr, "Found no orphan Ganeti instances."
132

    
133
        if options['detect_unsynced']:
134
            unsynced = reconciliation.unsynced_operstate(D, G)
135
            if len(unsynced) > 0:
136
                print >> sys.stderr, "The operstate of the following server" \
137
                                     " IDs is out-of-sync:"
138
                print "    " + "\n    ".join(
139
                    ["%d is %s in DB, %s in Ganeti" %
140
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
141
                     for x in unsynced])
142
            elif verbosity == 2:
143
                print >> sys.stderr, "The operstate of all servers is in sync."
144

    
145
        if options['detect_build_errors']:
146
            build_errors = reconciliation.instances_with_build_errors(D, G)
147
            if len(build_errors) > 0:
148
                print >> sys.stderr, "The os for the following server IDs was "\
149
                                     "not build successfully:"
150
                print "    " + "\n    ".join(
151
                    ["%d" % x for x in build_errors])
152
            elif verbosity == 2:
153
                print >> sys.stderr, "Found no instances with build errors."
154

    
155
        #
156
        # Then fix them
157
        #
158
        if options['fix_stale'] and len(stale) > 0:
159
            print >> sys.stderr, \
160
                "Simulating successful Ganeti removal for %d " \
161
                "servers in the DB:" % len(stale)
162
            for vm in VirtualMachine.objects.filter(pk__in=stale):
163
                event_time = datetime.datetime.now()
164
                backend.process_op_status(vm=vm, etime=event_time, jobid=-0,
165
                    opcode='OP_INSTANCE_REMOVE', status='success',
166
                    logmsg='Reconciliation: simulated Ganeti event')
167
            print >> sys.stderr, "    ...done"
168

    
169
        if options['fix_orphans'] and len(orphans) > 0:
170
            print >> sys.stderr, \
171
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
172
                len(orphans)
173
            for id in orphans:
174
                vm = VirtualMachine.objects.get(pk=id)
175
                vm.client.DeleteInstance('%s%s' %
176
                                    (settings.BACKEND_PREFIX_ID, str(id)))
177
            print >> sys.stderr, "    ...done"
178

    
179
        if options['fix_unsynced'] and len(unsynced) > 0:
180
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
181
                len(unsynced)
182
            for id, db_state, ganeti_up in unsynced:
183
                vm = VirtualMachine.objects.get(pk=id)
184
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
185
                         else "OP_INSTANCE_SHUTDOWN"
186
                event_time = datetime.datetime.now()
187
                backend.process_op_status(vm=vm, etime=event_time ,jobid=-0,
188
                    opcode=opcode, status='success',
189
                    logmsg='Reconciliation: simulated Ganeti event')
190
            print >> sys.stderr, "    ...done"
191

    
192
        if options['fix_build_errors'] and len(build_errors) > 0:
193
            print >> sys.stderr, "Setting the state of %d build-errors VMs:" % \
194
                len(build_errors)
195
            for id in build_errors:
196
                vm = VirtualMachine.objects.get(pk=id)
197
                event_time = datetime.datetime.now()
198
                backend.process_op_status(vm=vm, etime=event_time ,jobid=-0,
199
                    opcode="OP_INSTANCE_CREATE", status='error',
200
                    logmsg='Reconciliation: simulated Ganeti event')
201
            print >> sys.stderr, "    ...done"
202