Statistics
| Branch: | Tag: | Revision:

root / snf-cyclades-app / synnefo / logic / management / commands / reconcile.py @ 7e136fd8

History | View | Annotate | Download (9.4 kB)

1
# Copyright 2011-2012 GRNET S.A. All rights reserved.
2
#
3
# Redistribution and use in source and binary forms, with or without
4
# modification, are permitted provided that the following conditions
5
# are met:
6
#
7
#   1. Redistributions of source code must retain the above copyright
8
#      notice, this list of conditions and the following disclaimer.
9
#
10
#  2. Redistributions in binary form must reproduce the above copyright
11
#     notice, this list of conditions and the following disclaimer in the
12
#     documentation and/or other materials provided with the distribution.
13
#
14
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
# SUCH DAMAGE.
25
#
26
# The views and conclusions contained in the software and documentation are
27
# those of the authors and should not be interpreted as representing official
28
# policies, either expressed or implied, of GRNET S.A.
29
#
30
"""Reconciliation management command
31

32
Management command to reconcile the contents of the Synnefo DB with
33
the state of the Ganeti backend. See docstring on top of
34
logic/reconciliation.py for a description of reconciliation rules.
35

36
"""
37
import sys
38

    
39
from datetime import datetime, timedelta
40
from optparse import make_option
41

    
42
from django.conf import settings
43
from django.db.models import Q
44
from django.core.management.base import BaseCommand, CommandError
45

    
46
from synnefo.db.models import VirtualMachine, Network
47
from synnefo.logic import reconciliation, backend
48
from synnefo.util.rapi import GanetiRapiClient
49

    
50

    
51
_valid_users = set()
52
_invalid_users = set()
53

    
54

    
55
def user_exists(user):
56
    from astakos.im.models import AstakosUser
57

    
58
    if user in _valid_users:
59
        return True
60
    elif user in _invalid_users:
61
        return False
62

    
63
    try:
64
        AstakosUser.objects.get(email=user)
65
    except AstakosUser.DoesNotExist:
66
        _invalid_users.add(user)
67
        return False
68
    else:
69
        _valid_users.add(user)
70
        return True
71

    
72

    
73
class Command(BaseCommand):
74
    can_import_settings = True
75

    
76
    help = 'Reconcile contents of Synnefo DB with state of Ganeti backend'
77
    output_transaction = True  # The management command runs inside
78
                               # an SQL transaction
79
    option_list = BaseCommand.option_list + (
80
        make_option('--detect-stale', action='store_true', dest='detect_stale',
81
                    default=False, help='Detect stale VM entries in DB'),
82
        make_option('--detect-orphans', action='store_true',
83
                    dest='detect_orphans',
84
                    default=False, help='Detect orphan instances in Ganeti'),
85
        make_option('--detect-unsynced', action='store_true',
86
                    dest='detect_unsynced',
87
                    default=False, help='Detect unsynced operstate between ' +
88
                                        'DB and Ganeti'),
89
        make_option('--detect-orphan-servers',
90
                    action='store_true',
91
                    dest='detect_orphan_servers',
92
                    help='Detect VMs with an invalid owner'),
93
        make_option('--detect-orphan-networks',
94
                    action='store_true',
95
                    dest='detect_orphan_networks',
96
                    help='Detect networks with an invalid owner'),
97
        make_option('--detect-all', action='store_true',
98
                    dest='detect_all',
99
                    default=False, help='Enable all --detect-* arguments'),
100
        make_option('--fix-stale', action='store_true', dest='fix_stale',
101
                    default=False, help='Fix (remove) stale DB entries in DB'),
102
        make_option('--fix-orphans', action='store_true', dest='fix_orphans',
103
                    default=False, help='Fix (remove) orphan Ganeti VMs'),
104
        make_option('--fix-unsynced', action='store_true', dest='fix_unsynced',
105
                    default=False, help='Fix server operstate in DB, set ' +
106
                                        'from Ganeti'),
107
        make_option('--fix-all', action='store_true', dest='fix_all',
108
                    default=False, help='Enable all --fix-* arguments'))
109

    
110
    def _process_args(self, options):
111
        keys_detect = [k for k in options.keys() if k.startswith('detect_')]
112
        keys_fix = [k for k in options.keys() if k.startswith('fix_')]
113

    
114
        if options['detect_all']:
115
            for kd in keys_detect:
116
                options[kd] = True
117
        if options['fix_all']:
118
            for kf in keys_fix:
119
                options[kf] = True
120

    
121
        if not reduce(lambda x, y: x or y,
122
                      map(lambda x: options[x], keys_detect)):
123
            raise CommandError("At least one of --detect-* must be specified")
124

    
125
        for kf in keys_fix:
126
            kd = kf.replace('fix_', 'detect_', 1)
127
            if (options[kf] and not options[kd]):
128
                raise CommandError("Cannot use --%s without corresponding "
129
                                   "--%s argument" % (kf, kd))
130

    
131
    def handle(self, **options):
132
        verbosity = int(options['verbosity'])
133
        self._process_args(options)
134

    
135
        D = reconciliation.get_servers_from_db()
136
        G = reconciliation.get_instances_from_ganeti()
137

    
138
        #
139
        # Detect problems
140
        #
141
        if options['detect_stale']:
142
            stale = reconciliation.stale_servers_in_db(D, G)
143
            if len(stale) > 0:
144
                print >> sys.stderr, "Found the following stale server IDs: "
145
                print "    " + "\n    ".join(
146
                    [str(x) for x in stale])
147
            elif verbosity == 2:
148
                print >> sys.stderr, "Found no stale server IDs in DB."
149

    
150
        if options['detect_orphans']:
151
            orphans = reconciliation.orphan_instances_in_ganeti(D, G)
152
            if len(orphans) > 0:
153
                print >> sys.stderr, "Found orphan Ganeti instances with IDs: "
154
                print "    " + "\n    ".join(
155
                    [str(x) for x in orphans])
156
            elif verbosity == 2:
157
                print >> sys.stderr, "Found no orphan Ganeti instances."
158

    
159
        if options['detect_unsynced']:
160
            unsynced = reconciliation.unsynced_operstate(D, G)
161
            if len(unsynced) > 0:
162
                print >> sys.stderr, "The operstate of the following server" \
163
                                     " IDs is out-of-sync:"
164
                print "    " + "\n    ".join(
165
                    ["%d is %s in DB, %s in Ganeti" %
166
                     (x[0], x[1], ('UP' if x[2] else 'DOWN'))
167
                     for x in unsynced])
168
            elif verbosity == 2:
169
                print >> sys.stderr, "The operstate of all servers is in sync."
170

    
171
        if options['detect_orphan_servers']:
172
            for server in VirtualMachine.objects.filter(deleted=False):
173
                owner = server.userid
174
                if not user_exists(owner):
175
                    msg = "Server %d (%s) has unknown owner %s\n" % (
176
                            server.id, server.name, owner)
177
                    self.stdout.write(msg)
178

    
179
        if options['detect_orphan_networks']:
180
            for network in Network.objects.exclude(state='DELETED'):
181
                owner = network.userid
182
                if owner and not user_exists(owner):
183
                    msg = "Network %d (%s) has unknown owner %s\n" % (
184
                            network.id, network.name, owner)
185
                    self.stdout.write(msg)
186

    
187
        #
188
        # Then fix them
189
        #
190
        if options['fix_stale'] and len(stale) > 0:
191
            print >> sys.stderr, \
192
                "Simulating successful Ganeti removal for %d " \
193
                "servers in the DB:" % len(stale)
194
            for vm in VirtualMachine.objects.filter(pk__in=stale):
195
                backend.process_op_status(vm=vm, jobid=-0,
196
                    opcode='OP_INSTANCE_REMOVE', status='success',
197
                    logmsg='Reconciliation: simulated Ganeti event')
198
            print >> sys.stderr, "    ...done"
199

    
200
        if options['fix_orphans'] and len(orphans) > 0:
201
            print >> sys.stderr, \
202
                "Issuing OP_INSTANCE_REMOVE for %d Ganeti instances:" % \
203
                len(orphans)
204
            for id in orphans:
205
                rapi = GanetiRapiClient(*settings.GANETI_CLUSTER_INFO)
206
                rapi.DeleteInstance('%s%s' %
207
                                    (settings.BACKEND_PREFIX_ID, str(id)))
208
            print >> sys.stderr, "    ...done"
209

    
210
        if options['fix_unsynced'] and len(unsynced) > 0:
211
            print >> sys.stderr, "Setting the state of %d out-of-sync VMs:" % \
212
                len(unsynced)
213
            for id, db_state, ganeti_up in unsynced:
214
                vm = VirtualMachine.objects.get(pk=id)
215
                opcode = "OP_INSTANCE_REBOOT" if ganeti_up \
216
                         else "OP_INSTANCE_SHUTDOWN"
217
                backend.process_op_status(vm=vm, jobid=-0,
218
                    opcode=opcode, status='success',
219
                    logmsg='Reconciliation: simulated Ganeti event')
220
            print >> sys.stderr, "    ...done"