Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / management / commands / reconcile.py @ e3e7e245

History | View | Annotate | Download (9.3 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
#   1. Redistributions of source code must retain the above copyright
8
#      notice, this list of conditions and the following disclaimer.
9
#
10
#  2. Redistributions in binary form must reproduce the above copyright
11
#     notice, this list of conditions and the following disclaimer in the
12
#     documentation and/or other materials provided with the distribution.
13
#
14
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
# SUCH DAMAGE.
25
#
26
# The views and conclusions contained in the software and documentation are
27
# those of the authors and should not be interpreted as representing official
28
# policies, either expressed or implied, of GRNET S.A.
29
#
30
"""Reconciliation management command
31

32
Management command to reconcile the contents of the Synnefo DB with
33
the state of the Ganeti backend. See docstring on top of
34
logic/reconciliation.py for a description of reconciliation rules.
35

36
"""
37
import sys
38

    
39
from optparse import make_option
40

    
41
from django.conf import settings
42
from django.core.management.base import BaseCommand, CommandError
43

    
44
from synnefo.db.models import VirtualMachine, Network
45
from synnefo.logic import reconciliation, backend
46
from synnefo.util.rapi import GanetiRapiClient
47

    
48

    
49
_valid_users = set()
50
_invalid_users = set()
51

    
52

    
53
def user_exists(user):
54
    from astakos.im.models import AstakosUser
55

    
56
    if user in _valid_users:
57
        return True
58
    elif user in _invalid_users:
59
        return False
60

    
61
    try:
62
        AstakosUser.objects.get(email=user)
63
    except AstakosUser.DoesNotExist:
64
        _invalid_users.add(user)
65
        return False
66
    else:
67
        _valid_users.add(user)
68
        return True
69

    
70

    
71
class Command(BaseCommand):
72
    can_import_settings = True
73

    
74
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
75
    output_transaction = True  # The management command runs inside
76
                               # an SQL transaction
77
    option_list = BaseCommand.option_list + (
78
        make_option('--detect-stale', action='store_true', dest='detect_stale',
79
                    default=False, help='Detect stale VM entries in DB'),
80
        make_option('--detect-orphans', action='store_true',
81
                    dest='detect_orphans',
82
                    default=False, help='Detect orphan instances in Ganeti'),
83
        make_option('--detect-unsynced', action='store_true',
84
                    dest='detect_unsynced',
85
                    default=False, help='Detect unsynced operstate between ' +
86
                                        'DB and Ganeti'),
87
        make_option('--detect-orphan-servers',
88
                    action='store_true',
89
                    dest='detect_orphan_servers',
90
                    help='Detect VMs with an invalid owner'),
91
        make_option('--detect-orphan-networks',
92
                    action='store_true',
93
                    dest='detect_orphan_networks',
94
                    help='Detect networks with an invalid owner'),
95
        make_option('--detect-all', action='store_true',
96
                    dest='detect_all',
97
                    default=False, help='Enable all --detect-* arguments'),
98
        make_option('--fix-stale', action='store_true', dest='fix_stale',
99
                    default=False, help='Fix (remove) stale DB entries in DB'),
100
        make_option('--fix-orphans', action='store_true', dest='fix_orphans',
101
                    default=False, help='Fix (remove) orphan Ganeti VMs'),
102
        make_option('--fix-unsynced', action='store_true', dest='fix_unsynced',
103
                    default=False, help='Fix server operstate in DB, set ' +
104
                                        'from Ganeti'),
105
        make_option('--fix-all', action='store_true', dest='fix_all',
106
                    default=False, help='Enable all --fix-* arguments'))
107

    
108
    def _process_args(self, options):
109
        keys_detect = [k for k in options.keys() if k.startswith('detect_')]
110
        keys_fix = [k for k in options.keys() if k.startswith('fix_')]
111

    
112
        if options['detect_all']:
113
            for kd in keys_detect:
114
                options[kd] = True
115
        if options['fix_all']:
116
            for kf in keys_fix:
117
                options[kf] = True
118

    
119
        if not reduce(lambda x, y: x or y,
120
                      map(lambda x: options[x], keys_detect)):
121
            raise CommandError("At least one of --detect-* must be specified")
122

    
123
        for kf in keys_fix:
124
            kd = kf.replace('fix_', 'detect_', 1)
125
            if (options[kf] and not options[kd]):
126
                raise CommandError("Cannot use --%s without corresponding "
127
                                   "--%s argument" % (kf, kd))
128

    
129
    def handle(self, **options):
130
        verbosity = int(options['verbosity'])
131
        self._process_args(options)
132

    
133
        D = reconciliation.get_servers_from_db()
134
        G = reconciliation.get_instances_from_ganeti()
135

    
136
        #
137
        # Detect problems
138
        #
139
        if options['detect_stale']:
140
            stale = reconciliation.stale_servers_in_db(D, G)
141
            if len(stale) > 0:
142
                print >> sys.stderr, "Found the following stale server IDs: "
143
                print "    " + "\n    ".join(
144
                    [str(x) for x in stale])
145
            elif verbosity == 2:
146
                print >> sys.stderr, "Found no stale server IDs in DB."
147

    
148
        if options['detect_orphans']:
149
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
150
            if len(orphans) > 0:
151
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
152
                print "    " + "\n    ".join(
153
                    [str(x) for x in orphans])
154
            elif verbosity == 2:
155
                print >> sys.stderr, "Found no orphan Ganeti instances."
156

    
157
        if options['detect_unsynced']:
158
            unsynced = reconciliation.unsynced_operstate(D, G)
159
            if len(unsynced) > 0:
160
                print >> sys.stderr, "The operstate of the following server" \
161
                                     " IDs is out-of-sync:"
162
                print "    " + "\n    ".join(
163
                    ["%d is %s in DB, %s in Ganeti" %
164
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
165
                     for x in unsynced])
166
            elif verbosity == 2:
167
                print >> sys.stderr, "The operstate of all servers is in sync."
168

    
169
        if options['detect_orphan_servers']:
170
            for server in VirtualMachine.objects.filter(deleted=False):
171
                owner = server.userid
172
                if not user_exists(owner):
173
                    msg = "Server %d (%s) has unknown owner %s\n" % (
174
                            server.id, server.name, owner)
175
                    self.stdout.write(msg)
176

    
177
        if options['detect_orphan_networks']:
178
            for network in Network.objects.exclude(state='DELETED'):
179
                owner = network.userid
180
                if owner and not user_exists(owner):
181
                    msg = "Network %d (%s) has unknown owner %s\n" % (
182
                            network.id, network.name, owner)
183
                    self.stdout.write(msg)
184

    
185
        #
186
        # Then fix them
187
        #
188
        if options['fix_stale'] and len(stale) > 0:
189
            print >> sys.stderr, \
190
                "Simulating successful Ganeti removal for %d " \
191
                "servers in the DB:" % len(stale)
192
            for vm in VirtualMachine.objects.filter(pk__in=stale):
193
                backend.process_op_status(vm=vm, jobid=-0,
194
                    opcode='OP_INSTANCE_REMOVE', status='success',
195
                    logmsg='Reconciliation: simulated Ganeti event')
196
            print >> sys.stderr, "    ...done"
197

    
198
        if options['fix_orphans'] and len(orphans) > 0:
199
            print >> sys.stderr, \
200
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
201
                len(orphans)
202
            for id in orphans:
203
                rapi = GanetiRapiClient(*settings.GANETI_CLUSTER_INFO)
204
                rapi.DeleteInstance('%s%s' %
205
                                    (settings.BACKEND_PREFIX_ID, str(id)))
206
            print >> sys.stderr, "    ...done"
207

    
208
        if options['fix_unsynced'] and len(unsynced) > 0:
209
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
210
                len(unsynced)
211
            for id, db_state, ganeti_up in unsynced:
212
                vm = VirtualMachine.objects.get(pk=id)
213
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
214
                         else "OP_INSTANCE_SHUTDOWN"
215
                backend.process_op_status(vm=vm, jobid=-0,
216
                    opcode=opcode, status='success',
217
                    logmsg='Reconciliation: simulated Ganeti event')
218
            print >> sys.stderr, "    ...done"