Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 982ed68e

History | View | Annotate | Download (388.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57
from ganeti import query
58
from ganeti import qlang
59

    
60
import ganeti.masterd.instance # pylint: disable-msg=W0611
61

    
62
# Common opcode attributes
63

    
64
#: output fields for a query operation
65
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
66

    
67

    
68
#: the shutdown timeout
69
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
70
                     ht.TPositiveInt)
71

    
72
#: the force parameter
73
_PForce = ("force", False, ht.TBool)
74

    
75
#: a required instance name (for single-instance LUs)
76
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
77

    
78
#: Whether to ignore offline nodes
79
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
80

    
81
#: a required node name (for single-node LUs)
82
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
83

    
84
#: the migration type (live/non-live)
85
_PMigrationMode = ("mode", None,
86
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
87

    
88
#: the obsolete 'live' mode (boolean)
89
_PMigrationLive = ("live", None, ht.TMaybeBool)
90

    
91

    
92
# End types
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - redefine HPATH and HTYPE
102
    - optionally redefine their run requirements:
103
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104

105
  Note that all commands require root permissions.
106

107
  @ivar dry_run_result: the value (if any) that will be returned to the caller
108
      in dry-run mode (signalled by opcode dry_run parameter)
109
  @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
110
      they should get if not already defined, and types they must match
111

112
  """
113
  HPATH = None
114
  HTYPE = None
115
  _OP_PARAMS = []
116
  REQ_BGL = True
117

    
118
  def __init__(self, processor, op, context, rpc):
119
    """Constructor for LogicalUnit.
120

121
    This needs to be overridden in derived classes in order to check op
122
    validity.
123

124
    """
125
    self.proc = processor
126
    self.op = op
127
    self.cfg = context.cfg
128
    self.context = context
129
    self.rpc = rpc
130
    # Dicts used to declare locking needs to mcpu
131
    self.needed_locks = None
132
    self.acquired_locks = {}
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    self.__ssh = None
139
    # logging
140
    self.Log = processor.Log # pylint: disable-msg=C0103
141
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
142
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
143
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
144
    # support for dry-run
145
    self.dry_run_result = None
146
    # support for generic debug attribute
147
    if (not hasattr(self.op, "debug_level") or
148
        not isinstance(self.op.debug_level, int)):
149
      self.op.debug_level = 0
150

    
151
    # Tasklets
152
    self.tasklets = None
153

    
154
    # The new kind-of-type-system
155
    op_id = self.op.OP_ID
156
    for attr_name, aval, test in self._OP_PARAMS:
157
      if not hasattr(op, attr_name):
158
        if aval == ht.NoDefault:
159
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
160
                                     (op_id, attr_name), errors.ECODE_INVAL)
161
        else:
162
          if callable(aval):
163
            dval = aval()
164
          else:
165
            dval = aval
166
          setattr(self.op, attr_name, dval)
167
      attr_val = getattr(op, attr_name)
168
      if test == ht.NoType:
169
        # no tests here
170
        continue
171
      if not callable(test):
172
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
173
                                     " given type is not a proper type (%s)" %
174
                                     (op_id, attr_name, test))
175
      if not test(attr_val):
176
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
177
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
178
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
179
                                   (op_id, attr_name), errors.ECODE_INVAL)
180

    
181
    self.CheckArguments()
182

    
183
  def __GetSSH(self):
184
    """Returns the SshRunner object
185

186
    """
187
    if not self.__ssh:
188
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
189
    return self.__ssh
190

    
191
  ssh = property(fget=__GetSSH)
192

    
193
  def CheckArguments(self):
194
    """Check syntactic validity for the opcode arguments.
195

196
    This method is for doing a simple syntactic check and ensure
197
    validity of opcode parameters, without any cluster-related
198
    checks. While the same can be accomplished in ExpandNames and/or
199
    CheckPrereq, doing these separate is better because:
200

201
      - ExpandNames is left as as purely a lock-related function
202
      - CheckPrereq is run after we have acquired locks (and possible
203
        waited for them)
204

205
    The function is allowed to change the self.op attribute so that
206
    later methods can no longer worry about missing parameters.
207

208
    """
209
    pass
210

    
211
  def ExpandNames(self):
212
    """Expand names for this LU.
213

214
    This method is called before starting to execute the opcode, and it should
215
    update all the parameters of the opcode to their canonical form (e.g. a
216
    short node name must be fully expanded after this method has successfully
217
    completed). This way locking, hooks, logging, etc. can work correctly.
218

219
    LUs which implement this method must also populate the self.needed_locks
220
    member, as a dict with lock levels as keys, and a list of needed lock names
221
    as values. Rules:
222

223
      - use an empty dict if you don't need any lock
224
      - if you don't need any lock at a particular level omit that level
225
      - don't put anything for the BGL level
226
      - if you want all locks at a level use locking.ALL_SET as a value
227

228
    If you need to share locks (rather than acquire them exclusively) at one
229
    level you can modify self.share_locks, setting a true value (usually 1) for
230
    that level. By default locks are not shared.
231

232
    This function can also define a list of tasklets, which then will be
233
    executed in order instead of the usual LU-level CheckPrereq and Exec
234
    functions, if those are not defined by the LU.
235

236
    Examples::
237

238
      # Acquire all nodes and one instance
239
      self.needed_locks = {
240
        locking.LEVEL_NODE: locking.ALL_SET,
241
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
242
      }
243
      # Acquire just two nodes
244
      self.needed_locks = {
245
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
246
      }
247
      # Acquire no locks
248
      self.needed_locks = {} # No, you can't leave it to the default value None
249

250
    """
251
    # The implementation of this method is mandatory only if the new LU is
252
    # concurrent, so that old LUs don't need to be changed all at the same
253
    # time.
254
    if self.REQ_BGL:
255
      self.needed_locks = {} # Exclusive LUs don't need locks.
256
    else:
257
      raise NotImplementedError
258

    
259
  def DeclareLocks(self, level):
260
    """Declare LU locking needs for a level
261

262
    While most LUs can just declare their locking needs at ExpandNames time,
263
    sometimes there's the need to calculate some locks after having acquired
264
    the ones before. This function is called just before acquiring locks at a
265
    particular level, but after acquiring the ones at lower levels, and permits
266
    such calculations. It can be used to modify self.needed_locks, and by
267
    default it does nothing.
268

269
    This function is only called if you have something already set in
270
    self.needed_locks for the level.
271

272
    @param level: Locking level which is going to be locked
273
    @type level: member of ganeti.locking.LEVELS
274

275
    """
276

    
277
  def CheckPrereq(self):
278
    """Check prerequisites for this LU.
279

280
    This method should check that the prerequisites for the execution
281
    of this LU are fulfilled. It can do internode communication, but
282
    it should be idempotent - no cluster or system changes are
283
    allowed.
284

285
    The method should raise errors.OpPrereqError in case something is
286
    not fulfilled. Its return value is ignored.
287

288
    This method should also update all the parameters of the opcode to
289
    their canonical form if it hasn't been done by ExpandNames before.
290

291
    """
292
    if self.tasklets is not None:
293
      for (idx, tl) in enumerate(self.tasklets):
294
        logging.debug("Checking prerequisites for tasklet %s/%s",
295
                      idx + 1, len(self.tasklets))
296
        tl.CheckPrereq()
297
    else:
298
      pass
299

    
300
  def Exec(self, feedback_fn):
301
    """Execute the LU.
302

303
    This method should implement the actual work. It should raise
304
    errors.OpExecError for failures that are somewhat dealt with in
305
    code, or expected.
306

307
    """
308
    if self.tasklets is not None:
309
      for (idx, tl) in enumerate(self.tasklets):
310
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
311
        tl.Exec(feedback_fn)
312
    else:
313
      raise NotImplementedError
314

    
315
  def BuildHooksEnv(self):
316
    """Build hooks environment for this LU.
317

318
    This method should return a three-node tuple consisting of: a dict
319
    containing the environment that will be used for running the
320
    specific hook for this LU, a list of node names on which the hook
321
    should run before the execution, and a list of node names on which
322
    the hook should run after the execution.
323

324
    The keys of the dict must not have 'GANETI_' prefixed as this will
325
    be handled in the hooks runner. Also note additional keys will be
326
    added by the hooks runner. If the LU doesn't define any
327
    environment, an empty dict (and not None) should be returned.
328

329
    No nodes should be returned as an empty list (and not None).
330

331
    Note that if the HPATH for a LU class is None, this function will
332
    not be called.
333

334
    """
335
    raise NotImplementedError
336

    
337
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
338
    """Notify the LU about the results of its hooks.
339

340
    This method is called every time a hooks phase is executed, and notifies
341
    the Logical Unit about the hooks' result. The LU can then use it to alter
342
    its result based on the hooks.  By default the method does nothing and the
343
    previous result is passed back unchanged but any LU can define it if it
344
    wants to use the local cluster hook-scripts somehow.
345

346
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
347
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
348
    @param hook_results: the results of the multi-node hooks rpc call
349
    @param feedback_fn: function used send feedback back to the caller
350
    @param lu_result: the previous Exec result this LU had, or None
351
        in the PRE phase
352
    @return: the new Exec result, based on the previous result
353
        and hook results
354

355
    """
356
    # API must be kept, thus we ignore the unused argument and could
357
    # be a function warnings
358
    # pylint: disable-msg=W0613,R0201
359
    return lu_result
360

    
361
  def _ExpandAndLockInstance(self):
362
    """Helper function to expand and lock an instance.
363

364
    Many LUs that work on an instance take its name in self.op.instance_name
365
    and need to expand it and then declare the expanded name for locking. This
366
    function does it, and then updates self.op.instance_name to the expanded
367
    name. It also initializes needed_locks as a dict, if this hasn't been done
368
    before.
369

370
    """
371
    if self.needed_locks is None:
372
      self.needed_locks = {}
373
    else:
374
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
375
        "_ExpandAndLockInstance called with instance-level locks set"
376
    self.op.instance_name = _ExpandInstanceName(self.cfg,
377
                                                self.op.instance_name)
378
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
379

    
380
  def _LockInstancesNodes(self, primary_only=False):
381
    """Helper function to declare instances' nodes for locking.
382

383
    This function should be called after locking one or more instances to lock
384
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
385
    with all primary or secondary nodes for instances already locked and
386
    present in self.needed_locks[locking.LEVEL_INSTANCE].
387

388
    It should be called from DeclareLocks, and for safety only works if
389
    self.recalculate_locks[locking.LEVEL_NODE] is set.
390

391
    In the future it may grow parameters to just lock some instance's nodes, or
392
    to just lock primaries or secondary nodes, if needed.
393

394
    If should be called in DeclareLocks in a way similar to::
395

396
      if level == locking.LEVEL_NODE:
397
        self._LockInstancesNodes()
398

399
    @type primary_only: boolean
400
    @param primary_only: only lock primary nodes of locked instances
401

402
    """
403
    assert locking.LEVEL_NODE in self.recalculate_locks, \
404
      "_LockInstancesNodes helper function called with no nodes to recalculate"
405

    
406
    # TODO: check if we're really been called with the instance locks held
407

    
408
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
409
    # future we might want to have different behaviors depending on the value
410
    # of self.recalculate_locks[locking.LEVEL_NODE]
411
    wanted_nodes = []
412
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
413
      instance = self.context.cfg.GetInstanceInfo(instance_name)
414
      wanted_nodes.append(instance.primary_node)
415
      if not primary_only:
416
        wanted_nodes.extend(instance.secondary_nodes)
417

    
418
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
419
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
420
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
421
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
422

    
423
    del self.recalculate_locks[locking.LEVEL_NODE]
424

    
425

    
426
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
427
  """Simple LU which runs no hooks.
428

429
  This LU is intended as a parent for other LogicalUnits which will
430
  run no hooks, in order to reduce duplicate code.
431

432
  """
433
  HPATH = None
434
  HTYPE = None
435

    
436
  def BuildHooksEnv(self):
437
    """Empty BuildHooksEnv for NoHooksLu.
438

439
    This just raises an error.
440

441
    """
442
    assert False, "BuildHooksEnv called for NoHooksLUs"
443

    
444

    
445
class Tasklet:
446
  """Tasklet base class.
447

448
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
450
  tasklets know nothing about locks.
451

452
  Subclasses must follow these rules:
453
    - Implement CheckPrereq
454
    - Implement Exec
455

456
  """
457
  def __init__(self, lu):
458
    self.lu = lu
459

    
460
    # Shortcuts
461
    self.cfg = lu.cfg
462
    self.rpc = lu.rpc
463

    
464
  def CheckPrereq(self):
465
    """Check prerequisites for this tasklets.
466

467
    This method should check whether the prerequisites for the execution of
468
    this tasklet are fulfilled. It can do internode communication, but it
469
    should be idempotent - no cluster or system changes are allowed.
470

471
    The method should raise errors.OpPrereqError in case something is not
472
    fulfilled. Its return value is ignored.
473

474
    This method should also update all parameters to their canonical form if it
475
    hasn't been done before.
476

477
    """
478
    pass
479

    
480
  def Exec(self, feedback_fn):
481
    """Execute the tasklet.
482

483
    This method should implement the actual work. It should raise
484
    errors.OpExecError for failures that are somewhat dealt with in code, or
485
    expected.
486

487
    """
488
    raise NotImplementedError
489

    
490

    
491
class _QueryBase:
492
  """Base for query utility classes.
493

494
  """
495
  #: Attribute holding field definitions
496
  FIELDS = None
497

    
498
  def __init__(self, names, fields, use_locking):
499
    """Initializes this class.
500

501
    """
502
    self.names = names
503
    self.use_locking = use_locking
504

    
505
    self.query = query.Query(self.FIELDS, fields)
506
    self.requested_data = self.query.RequestedData()
507

    
508
  @classmethod
509
  def FieldsQuery(cls, fields):
510
    """Returns list of available fields.
511

512
    @return: List of L{objects.QueryFieldDefinition}
513

514
    """
515
    if fields is None:
516
      # Client requests all fields
517
      fdefs = query.GetAllFields(cls.FIELDS.values())
518
    else:
519
      fdefs = query.Query(cls.FIELDS, fields).GetFields()
520

    
521
    return {
522
      "fields": [fdef.ToDict() for fdef in fdefs],
523
      }
524

    
525
  def ExpandNames(self, lu):
526
    """Expand names for this query.
527

528
    See L{LogicalUnit.ExpandNames}.
529

530
    """
531
    raise NotImplementedError()
532

    
533
  def DeclareLocks(self, level):
534
    """Declare locks for this query.
535

536
    See L{LogicalUnit.DeclareLocks}.
537

538
    """
539
    raise NotImplementedError()
540

    
541
  def _GetQueryData(self, lu):
542
    """Collects all data for this query.
543

544
    @return: Query data object
545

546
    """
547
    raise NotImplementedError()
548

    
549
  def NewStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    data = self._GetQueryData(lu)
554

    
555
    return {
556
      "data": self.query.Query(data),
557
      "fields": [fdef.ToDict()
558
                 for fdef in self.query.GetFields()],
559
      }
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu))
566

    
567

    
568
def _GetWantedNodes(lu, nodes):
569
  """Returns list of checked and expanded node names.
570

571
  @type lu: L{LogicalUnit}
572
  @param lu: the logical unit on whose behalf we execute
573
  @type nodes: list
574
  @param nodes: list of node names or None for all nodes
575
  @rtype: list
576
  @return: the list of nodes, sorted
577
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
578

579
  """
580
  if not nodes:
581
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
582
      " non-empty list of nodes whose name is to be expanded.")
583

    
584
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
585
  return utils.NiceSort(wanted)
586

    
587

    
588
def _GetWantedInstances(lu, instances):
589
  """Returns list of checked and expanded instance names.
590

591
  @type lu: L{LogicalUnit}
592
  @param lu: the logical unit on whose behalf we execute
593
  @type instances: list
594
  @param instances: list of instance names or None for all instances
595
  @rtype: list
596
  @return: the list of instances, sorted
597
  @raise errors.OpPrereqError: if the instances parameter is wrong type
598
  @raise errors.OpPrereqError: if any of the passed instances is not found
599

600
  """
601
  if instances:
602
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
603
  else:
604
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
605
  return wanted
606

    
607

    
608
def _GetUpdatedParams(old_params, update_dict,
609
                      use_default=True, use_none=False):
610
  """Return the new version of a parameter dictionary.
611

612
  @type old_params: dict
613
  @param old_params: old parameters
614
  @type update_dict: dict
615
  @param update_dict: dict containing new parameter values, or
616
      constants.VALUE_DEFAULT to reset the parameter to its default
617
      value
618
  @param use_default: boolean
619
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
620
      values as 'to be deleted' values
621
  @param use_none: boolean
622
  @type use_none: whether to recognise C{None} values as 'to be
623
      deleted' values
624
  @rtype: dict
625
  @return: the new parameter dictionary
626

627
  """
628
  params_copy = copy.deepcopy(old_params)
629
  for key, val in update_dict.iteritems():
630
    if ((use_default and val == constants.VALUE_DEFAULT) or
631
        (use_none and val is None)):
632
      try:
633
        del params_copy[key]
634
      except KeyError:
635
        pass
636
    else:
637
      params_copy[key] = val
638
  return params_copy
639

    
640

    
641
def _CheckOutputFields(static, dynamic, selected):
642
  """Checks whether all selected fields are valid.
643

644
  @type static: L{utils.FieldSet}
645
  @param static: static fields set
646
  @type dynamic: L{utils.FieldSet}
647
  @param dynamic: dynamic fields set
648

649
  """
650
  f = utils.FieldSet()
651
  f.Extend(static)
652
  f.Extend(dynamic)
653

    
654
  delta = f.NonMatching(selected)
655
  if delta:
656
    raise errors.OpPrereqError("Unknown output fields selected: %s"
657
                               % ",".join(delta), errors.ECODE_INVAL)
658

    
659

    
660
def _CheckGlobalHvParams(params):
661
  """Validates that given hypervisor params are not global ones.
662

663
  This will ensure that instances don't get customised versions of
664
  global params.
665

666
  """
667
  used_globals = constants.HVC_GLOBALS.intersection(params)
668
  if used_globals:
669
    msg = ("The following hypervisor parameters are global and cannot"
670
           " be customized at instance level, please modify them at"
671
           " cluster level: %s" % utils.CommaJoin(used_globals))
672
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
673

    
674

    
675
def _CheckNodeOnline(lu, node, msg=None):
676
  """Ensure that a given node is online.
677

678
  @param lu: the LU on behalf of which we make the check
679
  @param node: the node to check
680
  @param msg: if passed, should be a message to replace the default one
681
  @raise errors.OpPrereqError: if the node is offline
682

683
  """
684
  if msg is None:
685
    msg = "Can't use offline node"
686
  if lu.cfg.GetNodeInfo(node).offline:
687
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
688

    
689

    
690
def _CheckNodeNotDrained(lu, node):
691
  """Ensure that a given node is not drained.
692

693
  @param lu: the LU on behalf of which we make the check
694
  @param node: the node to check
695
  @raise errors.OpPrereqError: if the node is drained
696

697
  """
698
  if lu.cfg.GetNodeInfo(node).drained:
699
    raise errors.OpPrereqError("Can't use drained node %s" % node,
700
                               errors.ECODE_STATE)
701

    
702

    
703
def _CheckNodeVmCapable(lu, node):
704
  """Ensure that a given node is vm capable.
705

706
  @param lu: the LU on behalf of which we make the check
707
  @param node: the node to check
708
  @raise errors.OpPrereqError: if the node is not vm capable
709

710
  """
711
  if not lu.cfg.GetNodeInfo(node).vm_capable:
712
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
713
                               errors.ECODE_STATE)
714

    
715

    
716
def _CheckNodeHasOS(lu, node, os_name, force_variant):
717
  """Ensure that a node supports a given OS.
718

719
  @param lu: the LU on behalf of which we make the check
720
  @param node: the node to check
721
  @param os_name: the OS to query about
722
  @param force_variant: whether to ignore variant errors
723
  @raise errors.OpPrereqError: if the node is not supporting the OS
724

725
  """
726
  result = lu.rpc.call_os_get(node, os_name)
727
  result.Raise("OS '%s' not in supported OS list for node %s" %
728
               (os_name, node),
729
               prereq=True, ecode=errors.ECODE_INVAL)
730
  if not force_variant:
731
    _CheckOSVariant(result.payload, os_name)
732

    
733

    
734
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
735
  """Ensure that a node has the given secondary ip.
736

737
  @type lu: L{LogicalUnit}
738
  @param lu: the LU on behalf of which we make the check
739
  @type node: string
740
  @param node: the node to check
741
  @type secondary_ip: string
742
  @param secondary_ip: the ip to check
743
  @type prereq: boolean
744
  @param prereq: whether to throw a prerequisite or an execute error
745
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
746
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
747

748
  """
749
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
750
  result.Raise("Failure checking secondary ip on node %s" % node,
751
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
752
  if not result.payload:
753
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
754
           " please fix and re-run this command" % secondary_ip)
755
    if prereq:
756
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
757
    else:
758
      raise errors.OpExecError(msg)
759

    
760

    
761
def _RequireFileStorage():
762
  """Checks that file storage is enabled.
763

764
  @raise errors.OpPrereqError: when file storage is disabled
765

766
  """
767
  if not constants.ENABLE_FILE_STORAGE:
768
    raise errors.OpPrereqError("File storage disabled at configure time",
769
                               errors.ECODE_INVAL)
770

    
771

    
772
def _CheckDiskTemplate(template):
773
  """Ensure a given disk template is valid.
774

775
  """
776
  if template not in constants.DISK_TEMPLATES:
777
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
778
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
779
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
780
  if template == constants.DT_FILE:
781
    _RequireFileStorage()
782
  return True
783

    
784

    
785
def _CheckStorageType(storage_type):
786
  """Ensure a given storage type is valid.
787

788
  """
789
  if storage_type not in constants.VALID_STORAGE_TYPES:
790
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
791
                               errors.ECODE_INVAL)
792
  if storage_type == constants.ST_FILE:
793
    _RequireFileStorage()
794
  return True
795

    
796

    
797
def _GetClusterDomainSecret():
798
  """Reads the cluster domain secret.
799

800
  """
801
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
802
                               strict=True)
803

    
804

    
805
def _CheckInstanceDown(lu, instance, reason):
806
  """Ensure that an instance is not running."""
807
  if instance.admin_up:
808
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
809
                               (instance.name, reason), errors.ECODE_STATE)
810

    
811
  pnode = instance.primary_node
812
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
813
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
814
              prereq=True, ecode=errors.ECODE_ENVIRON)
815

    
816
  if instance.name in ins_l.payload:
817
    raise errors.OpPrereqError("Instance %s is running, %s" %
818
                               (instance.name, reason), errors.ECODE_STATE)
819

    
820

    
821
def _ExpandItemName(fn, name, kind):
822
  """Expand an item name.
823

824
  @param fn: the function to use for expansion
825
  @param name: requested item name
826
  @param kind: text description ('Node' or 'Instance')
827
  @return: the resolved (full) name
828
  @raise errors.OpPrereqError: if the item is not found
829

830
  """
831
  full_name = fn(name)
832
  if full_name is None:
833
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
834
                               errors.ECODE_NOENT)
835
  return full_name
836

    
837

    
838
def _ExpandNodeName(cfg, name):
839
  """Wrapper over L{_ExpandItemName} for nodes."""
840
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
841

    
842

    
843
def _ExpandInstanceName(cfg, name):
844
  """Wrapper over L{_ExpandItemName} for instance."""
845
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
846

    
847

    
848
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
849
                          memory, vcpus, nics, disk_template, disks,
850
                          bep, hvp, hypervisor_name):
851
  """Builds instance related env variables for hooks
852

853
  This builds the hook environment from individual variables.
854

855
  @type name: string
856
  @param name: the name of the instance
857
  @type primary_node: string
858
  @param primary_node: the name of the instance's primary node
859
  @type secondary_nodes: list
860
  @param secondary_nodes: list of secondary nodes as strings
861
  @type os_type: string
862
  @param os_type: the name of the instance's OS
863
  @type status: boolean
864
  @param status: the should_run status of the instance
865
  @type memory: string
866
  @param memory: the memory size of the instance
867
  @type vcpus: string
868
  @param vcpus: the count of VCPUs the instance has
869
  @type nics: list
870
  @param nics: list of tuples (ip, mac, mode, link) representing
871
      the NICs the instance has
872
  @type disk_template: string
873
  @param disk_template: the disk template of the instance
874
  @type disks: list
875
  @param disks: the list of (size, mode) pairs
876
  @type bep: dict
877
  @param bep: the backend parameters for the instance
878
  @type hvp: dict
879
  @param hvp: the hypervisor parameters for the instance
880
  @type hypervisor_name: string
881
  @param hypervisor_name: the hypervisor for the instance
882
  @rtype: dict
883
  @return: the hook environment for this instance
884

885
  """
886
  if status:
887
    str_status = "up"
888
  else:
889
    str_status = "down"
890
  env = {
891
    "OP_TARGET": name,
892
    "INSTANCE_NAME": name,
893
    "INSTANCE_PRIMARY": primary_node,
894
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
895
    "INSTANCE_OS_TYPE": os_type,
896
    "INSTANCE_STATUS": str_status,
897
    "INSTANCE_MEMORY": memory,
898
    "INSTANCE_VCPUS": vcpus,
899
    "INSTANCE_DISK_TEMPLATE": disk_template,
900
    "INSTANCE_HYPERVISOR": hypervisor_name,
901
  }
902

    
903
  if nics:
904
    nic_count = len(nics)
905
    for idx, (ip, mac, mode, link) in enumerate(nics):
906
      if ip is None:
907
        ip = ""
908
      env["INSTANCE_NIC%d_IP" % idx] = ip
909
      env["INSTANCE_NIC%d_MAC" % idx] = mac
910
      env["INSTANCE_NIC%d_MODE" % idx] = mode
911
      env["INSTANCE_NIC%d_LINK" % idx] = link
912
      if mode == constants.NIC_MODE_BRIDGED:
913
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
914
  else:
915
    nic_count = 0
916

    
917
  env["INSTANCE_NIC_COUNT"] = nic_count
918

    
919
  if disks:
920
    disk_count = len(disks)
921
    for idx, (size, mode) in enumerate(disks):
922
      env["INSTANCE_DISK%d_SIZE" % idx] = size
923
      env["INSTANCE_DISK%d_MODE" % idx] = mode
924
  else:
925
    disk_count = 0
926

    
927
  env["INSTANCE_DISK_COUNT"] = disk_count
928

    
929
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
930
    for key, value in source.items():
931
      env["INSTANCE_%s_%s" % (kind, key)] = value
932

    
933
  return env
934

    
935

    
936
def _NICListToTuple(lu, nics):
937
  """Build a list of nic information tuples.
938

939
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
940
  value in LUQueryInstanceData.
941

942
  @type lu:  L{LogicalUnit}
943
  @param lu: the logical unit on whose behalf we execute
944
  @type nics: list of L{objects.NIC}
945
  @param nics: list of nics to convert to hooks tuples
946

947
  """
948
  hooks_nics = []
949
  cluster = lu.cfg.GetClusterInfo()
950
  for nic in nics:
951
    ip = nic.ip
952
    mac = nic.mac
953
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
954
    mode = filled_params[constants.NIC_MODE]
955
    link = filled_params[constants.NIC_LINK]
956
    hooks_nics.append((ip, mac, mode, link))
957
  return hooks_nics
958

    
959

    
960
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
961
  """Builds instance related env variables for hooks from an object.
962

963
  @type lu: L{LogicalUnit}
964
  @param lu: the logical unit on whose behalf we execute
965
  @type instance: L{objects.Instance}
966
  @param instance: the instance for which we should build the
967
      environment
968
  @type override: dict
969
  @param override: dictionary with key/values that will override
970
      our values
971
  @rtype: dict
972
  @return: the hook environment dictionary
973

974
  """
975
  cluster = lu.cfg.GetClusterInfo()
976
  bep = cluster.FillBE(instance)
977
  hvp = cluster.FillHV(instance)
978
  args = {
979
    'name': instance.name,
980
    'primary_node': instance.primary_node,
981
    'secondary_nodes': instance.secondary_nodes,
982
    'os_type': instance.os,
983
    'status': instance.admin_up,
984
    'memory': bep[constants.BE_MEMORY],
985
    'vcpus': bep[constants.BE_VCPUS],
986
    'nics': _NICListToTuple(lu, instance.nics),
987
    'disk_template': instance.disk_template,
988
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
989
    'bep': bep,
990
    'hvp': hvp,
991
    'hypervisor_name': instance.hypervisor,
992
  }
993
  if override:
994
    args.update(override)
995
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
996

    
997

    
998
def _AdjustCandidatePool(lu, exceptions):
999
  """Adjust the candidate pool after node operations.
1000

1001
  """
1002
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1003
  if mod_list:
1004
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1005
               utils.CommaJoin(node.name for node in mod_list))
1006
    for name in mod_list:
1007
      lu.context.ReaddNode(name)
1008
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1009
  if mc_now > mc_max:
1010
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1011
               (mc_now, mc_max))
1012

    
1013

    
1014
def _DecideSelfPromotion(lu, exceptions=None):
1015
  """Decide whether I should promote myself as a master candidate.
1016

1017
  """
1018
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1019
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1020
  # the new node will increase mc_max with one, so:
1021
  mc_should = min(mc_should + 1, cp_size)
1022
  return mc_now < mc_should
1023

    
1024

    
1025
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1026
  """Check that the brigdes needed by a list of nics exist.
1027

1028
  """
1029
  cluster = lu.cfg.GetClusterInfo()
1030
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1031
  brlist = [params[constants.NIC_LINK] for params in paramslist
1032
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1033
  if brlist:
1034
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1035
    result.Raise("Error checking bridges on destination node '%s'" %
1036
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1037

    
1038

    
1039
def _CheckInstanceBridgesExist(lu, instance, node=None):
1040
  """Check that the brigdes needed by an instance exist.
1041

1042
  """
1043
  if node is None:
1044
    node = instance.primary_node
1045
  _CheckNicsBridgesExist(lu, instance.nics, node)
1046

    
1047

    
1048
def _CheckOSVariant(os_obj, name):
1049
  """Check whether an OS name conforms to the os variants specification.
1050

1051
  @type os_obj: L{objects.OS}
1052
  @param os_obj: OS object to check
1053
  @type name: string
1054
  @param name: OS name passed by the user, to check for validity
1055

1056
  """
1057
  if not os_obj.supported_variants:
1058
    return
1059
  variant = objects.OS.GetVariant(name)
1060
  if not variant:
1061
    raise errors.OpPrereqError("OS name must include a variant",
1062
                               errors.ECODE_INVAL)
1063

    
1064
  if variant not in os_obj.supported_variants:
1065
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1066

    
1067

    
1068
def _GetNodeInstancesInner(cfg, fn):
1069
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1070

    
1071

    
1072
def _GetNodeInstances(cfg, node_name):
1073
  """Returns a list of all primary and secondary instances on a node.
1074

1075
  """
1076

    
1077
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1078

    
1079

    
1080
def _GetNodePrimaryInstances(cfg, node_name):
1081
  """Returns primary instances on a node.
1082

1083
  """
1084
  return _GetNodeInstancesInner(cfg,
1085
                                lambda inst: node_name == inst.primary_node)
1086

    
1087

    
1088
def _GetNodeSecondaryInstances(cfg, node_name):
1089
  """Returns secondary instances on a node.
1090

1091
  """
1092
  return _GetNodeInstancesInner(cfg,
1093
                                lambda inst: node_name in inst.secondary_nodes)
1094

    
1095

    
1096
def _GetStorageTypeArgs(cfg, storage_type):
1097
  """Returns the arguments for a storage type.
1098

1099
  """
1100
  # Special case for file storage
1101
  if storage_type == constants.ST_FILE:
1102
    # storage.FileStorage wants a list of storage directories
1103
    return [[cfg.GetFileStorageDir()]]
1104

    
1105
  return []
1106

    
1107

    
1108
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1109
  faulty = []
1110

    
1111
  for dev in instance.disks:
1112
    cfg.SetDiskID(dev, node_name)
1113

    
1114
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1115
  result.Raise("Failed to get disk status from node %s" % node_name,
1116
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1117

    
1118
  for idx, bdev_status in enumerate(result.payload):
1119
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1120
      faulty.append(idx)
1121

    
1122
  return faulty
1123

    
1124

    
1125
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1126
  """Check the sanity of iallocator and node arguments and use the
1127
  cluster-wide iallocator if appropriate.
1128

1129
  Check that at most one of (iallocator, node) is specified. If none is
1130
  specified, then the LU's opcode's iallocator slot is filled with the
1131
  cluster-wide default iallocator.
1132

1133
  @type iallocator_slot: string
1134
  @param iallocator_slot: the name of the opcode iallocator slot
1135
  @type node_slot: string
1136
  @param node_slot: the name of the opcode target node slot
1137

1138
  """
1139
  node = getattr(lu.op, node_slot, None)
1140
  iallocator = getattr(lu.op, iallocator_slot, None)
1141

    
1142
  if node is not None and iallocator is not None:
1143
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1144
                               errors.ECODE_INVAL)
1145
  elif node is None and iallocator is None:
1146
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1147
    if default_iallocator:
1148
      setattr(lu.op, iallocator_slot, default_iallocator)
1149
    else:
1150
      raise errors.OpPrereqError("No iallocator or node given and no"
1151
                                 " cluster-wide default iallocator found."
1152
                                 " Please specify either an iallocator or a"
1153
                                 " node, or set a cluster-wide default"
1154
                                 " iallocator.")
1155

    
1156

    
1157
class LUPostInitCluster(LogicalUnit):
1158
  """Logical unit for running hooks after cluster initialization.
1159

1160
  """
1161
  HPATH = "cluster-init"
1162
  HTYPE = constants.HTYPE_CLUSTER
1163

    
1164
  def BuildHooksEnv(self):
1165
    """Build hooks env.
1166

1167
    """
1168
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1169
    mn = self.cfg.GetMasterNode()
1170
    return env, [], [mn]
1171

    
1172
  def Exec(self, feedback_fn):
1173
    """Nothing to do.
1174

1175
    """
1176
    return True
1177

    
1178

    
1179
class LUDestroyCluster(LogicalUnit):
1180
  """Logical unit for destroying the cluster.
1181

1182
  """
1183
  HPATH = "cluster-destroy"
1184
  HTYPE = constants.HTYPE_CLUSTER
1185

    
1186
  def BuildHooksEnv(self):
1187
    """Build hooks env.
1188

1189
    """
1190
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1191
    return env, [], []
1192

    
1193
  def CheckPrereq(self):
1194
    """Check prerequisites.
1195

1196
    This checks whether the cluster is empty.
1197

1198
    Any errors are signaled by raising errors.OpPrereqError.
1199

1200
    """
1201
    master = self.cfg.GetMasterNode()
1202

    
1203
    nodelist = self.cfg.GetNodeList()
1204
    if len(nodelist) != 1 or nodelist[0] != master:
1205
      raise errors.OpPrereqError("There are still %d node(s) in"
1206
                                 " this cluster." % (len(nodelist) - 1),
1207
                                 errors.ECODE_INVAL)
1208
    instancelist = self.cfg.GetInstanceList()
1209
    if instancelist:
1210
      raise errors.OpPrereqError("There are still %d instance(s) in"
1211
                                 " this cluster." % len(instancelist),
1212
                                 errors.ECODE_INVAL)
1213

    
1214
  def Exec(self, feedback_fn):
1215
    """Destroys the cluster.
1216

1217
    """
1218
    master = self.cfg.GetMasterNode()
1219

    
1220
    # Run post hooks on master node before it's removed
1221
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1222
    try:
1223
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1224
    except:
1225
      # pylint: disable-msg=W0702
1226
      self.LogWarning("Errors occurred running hooks on %s" % master)
1227

    
1228
    result = self.rpc.call_node_stop_master(master, False)
1229
    result.Raise("Could not disable the master role")
1230

    
1231
    return master
1232

    
1233

    
1234
def _VerifyCertificate(filename):
1235
  """Verifies a certificate for LUVerifyCluster.
1236

1237
  @type filename: string
1238
  @param filename: Path to PEM file
1239

1240
  """
1241
  try:
1242
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1243
                                           utils.ReadFile(filename))
1244
  except Exception, err: # pylint: disable-msg=W0703
1245
    return (LUVerifyCluster.ETYPE_ERROR,
1246
            "Failed to load X509 certificate %s: %s" % (filename, err))
1247

    
1248
  (errcode, msg) = \
1249
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1250
                                constants.SSL_CERT_EXPIRATION_ERROR)
1251

    
1252
  if msg:
1253
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1254
  else:
1255
    fnamemsg = None
1256

    
1257
  if errcode is None:
1258
    return (None, fnamemsg)
1259
  elif errcode == utils.CERT_WARNING:
1260
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1261
  elif errcode == utils.CERT_ERROR:
1262
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1263

    
1264
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1265

    
1266

    
1267
class LUVerifyCluster(LogicalUnit):
1268
  """Verifies the cluster status.
1269

1270
  """
1271
  HPATH = "cluster-verify"
1272
  HTYPE = constants.HTYPE_CLUSTER
1273
  _OP_PARAMS = [
1274
    ("skip_checks", ht.EmptyList,
1275
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1276
    ("verbose", False, ht.TBool),
1277
    ("error_codes", False, ht.TBool),
1278
    ("debug_simulate_errors", False, ht.TBool),
1279
    ]
1280
  REQ_BGL = False
1281

    
1282
  TCLUSTER = "cluster"
1283
  TNODE = "node"
1284
  TINSTANCE = "instance"
1285

    
1286
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1287
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1288
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1289
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1290
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1291
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1292
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1293
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1294
  ENODEDRBD = (TNODE, "ENODEDRBD")
1295
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1296
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1297
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1298
  ENODEHV = (TNODE, "ENODEHV")
1299
  ENODELVM = (TNODE, "ENODELVM")
1300
  ENODEN1 = (TNODE, "ENODEN1")
1301
  ENODENET = (TNODE, "ENODENET")
1302
  ENODEOS = (TNODE, "ENODEOS")
1303
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1304
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1305
  ENODERPC = (TNODE, "ENODERPC")
1306
  ENODESSH = (TNODE, "ENODESSH")
1307
  ENODEVERSION = (TNODE, "ENODEVERSION")
1308
  ENODESETUP = (TNODE, "ENODESETUP")
1309
  ENODETIME = (TNODE, "ENODETIME")
1310

    
1311
  ETYPE_FIELD = "code"
1312
  ETYPE_ERROR = "ERROR"
1313
  ETYPE_WARNING = "WARNING"
1314

    
1315
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1316

    
1317
  class NodeImage(object):
1318
    """A class representing the logical and physical status of a node.
1319

1320
    @type name: string
1321
    @ivar name: the node name to which this object refers
1322
    @ivar volumes: a structure as returned from
1323
        L{ganeti.backend.GetVolumeList} (runtime)
1324
    @ivar instances: a list of running instances (runtime)
1325
    @ivar pinst: list of configured primary instances (config)
1326
    @ivar sinst: list of configured secondary instances (config)
1327
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1328
        of this node (config)
1329
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1330
    @ivar dfree: free disk, as reported by the node (runtime)
1331
    @ivar offline: the offline status (config)
1332
    @type rpc_fail: boolean
1333
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1334
        not whether the individual keys were correct) (runtime)
1335
    @type lvm_fail: boolean
1336
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1337
    @type hyp_fail: boolean
1338
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1339
    @type ghost: boolean
1340
    @ivar ghost: whether this is a known node or not (config)
1341
    @type os_fail: boolean
1342
    @ivar os_fail: whether the RPC call didn't return valid OS data
1343
    @type oslist: list
1344
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1345
    @type vm_capable: boolean
1346
    @ivar vm_capable: whether the node can host instances
1347

1348
    """
1349
    def __init__(self, offline=False, name=None, vm_capable=True):
1350
      self.name = name
1351
      self.volumes = {}
1352
      self.instances = []
1353
      self.pinst = []
1354
      self.sinst = []
1355
      self.sbp = {}
1356
      self.mfree = 0
1357
      self.dfree = 0
1358
      self.offline = offline
1359
      self.vm_capable = vm_capable
1360
      self.rpc_fail = False
1361
      self.lvm_fail = False
1362
      self.hyp_fail = False
1363
      self.ghost = False
1364
      self.os_fail = False
1365
      self.oslist = {}
1366

    
1367
  def ExpandNames(self):
1368
    self.needed_locks = {
1369
      locking.LEVEL_NODE: locking.ALL_SET,
1370
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1371
    }
1372
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1373

    
1374
  def _Error(self, ecode, item, msg, *args, **kwargs):
1375
    """Format an error message.
1376

1377
    Based on the opcode's error_codes parameter, either format a
1378
    parseable error code, or a simpler error string.
1379

1380
    This must be called only from Exec and functions called from Exec.
1381

1382
    """
1383
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384
    itype, etxt = ecode
1385
    # first complete the msg
1386
    if args:
1387
      msg = msg % args
1388
    # then format the whole message
1389
    if self.op.error_codes:
1390
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391
    else:
1392
      if item:
1393
        item = " " + item
1394
      else:
1395
        item = ""
1396
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397
    # and finally report it via the feedback_fn
1398
    self._feedback_fn("  - %s" % msg)
1399

    
1400
  def _ErrorIf(self, cond, *args, **kwargs):
1401
    """Log an error message if the passed condition is True.
1402

1403
    """
1404
    cond = bool(cond) or self.op.debug_simulate_errors
1405
    if cond:
1406
      self._Error(*args, **kwargs)
1407
    # do not mark the operation as failed for WARN cases only
1408
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1409
      self.bad = self.bad or cond
1410

    
1411
  def _VerifyNode(self, ninfo, nresult):
1412
    """Perform some basic validation on data returned from a node.
1413

1414
      - check the result data structure is well formed and has all the
1415
        mandatory fields
1416
      - check ganeti version
1417

1418
    @type ninfo: L{objects.Node}
1419
    @param ninfo: the node to check
1420
    @param nresult: the results from the node
1421
    @rtype: boolean
1422
    @return: whether overall this call was successful (and we can expect
1423
         reasonable values in the respose)
1424

1425
    """
1426
    node = ninfo.name
1427
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428

    
1429
    # main result, nresult should be a non-empty dict
1430
    test = not nresult or not isinstance(nresult, dict)
1431
    _ErrorIf(test, self.ENODERPC, node,
1432
                  "unable to verify node: no data returned")
1433
    if test:
1434
      return False
1435

    
1436
    # compares ganeti version
1437
    local_version = constants.PROTOCOL_VERSION
1438
    remote_version = nresult.get("version", None)
1439
    test = not (remote_version and
1440
                isinstance(remote_version, (list, tuple)) and
1441
                len(remote_version) == 2)
1442
    _ErrorIf(test, self.ENODERPC, node,
1443
             "connection to node returned invalid data")
1444
    if test:
1445
      return False
1446

    
1447
    test = local_version != remote_version[0]
1448
    _ErrorIf(test, self.ENODEVERSION, node,
1449
             "incompatible protocol versions: master %s,"
1450
             " node %s", local_version, remote_version[0])
1451
    if test:
1452
      return False
1453

    
1454
    # node seems compatible, we can actually try to look into its results
1455

    
1456
    # full package version
1457
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1458
                  self.ENODEVERSION, node,
1459
                  "software version mismatch: master %s, node %s",
1460
                  constants.RELEASE_VERSION, remote_version[1],
1461
                  code=self.ETYPE_WARNING)
1462

    
1463
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1464
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1465
      for hv_name, hv_result in hyp_result.iteritems():
1466
        test = hv_result is not None
1467
        _ErrorIf(test, self.ENODEHV, node,
1468
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1469

    
1470
    test = nresult.get(constants.NV_NODESETUP,
1471
                           ["Missing NODESETUP results"])
1472
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1473
             "; ".join(test))
1474

    
1475
    return True
1476

    
1477
  def _VerifyNodeTime(self, ninfo, nresult,
1478
                      nvinfo_starttime, nvinfo_endtime):
1479
    """Check the node time.
1480

1481
    @type ninfo: L{objects.Node}
1482
    @param ninfo: the node to check
1483
    @param nresult: the remote results for the node
1484
    @param nvinfo_starttime: the start time of the RPC call
1485
    @param nvinfo_endtime: the end time of the RPC call
1486

1487
    """
1488
    node = ninfo.name
1489
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1490

    
1491
    ntime = nresult.get(constants.NV_TIME, None)
1492
    try:
1493
      ntime_merged = utils.MergeTime(ntime)
1494
    except (ValueError, TypeError):
1495
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1496
      return
1497

    
1498
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1499
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1500
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1501
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1502
    else:
1503
      ntime_diff = None
1504

    
1505
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1506
             "Node time diverges by at least %s from master node time",
1507
             ntime_diff)
1508

    
1509
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1510
    """Check the node time.
1511

1512
    @type ninfo: L{objects.Node}
1513
    @param ninfo: the node to check
1514
    @param nresult: the remote results for the node
1515
    @param vg_name: the configured VG name
1516

1517
    """
1518
    if vg_name is None:
1519
      return
1520

    
1521
    node = ninfo.name
1522
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523

    
1524
    # checks vg existence and size > 20G
1525
    vglist = nresult.get(constants.NV_VGLIST, None)
1526
    test = not vglist
1527
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1528
    if not test:
1529
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1530
                                            constants.MIN_VG_SIZE)
1531
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1532

    
1533
    # check pv names
1534
    pvlist = nresult.get(constants.NV_PVLIST, None)
1535
    test = pvlist is None
1536
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1537
    if not test:
1538
      # check that ':' is not present in PV names, since it's a
1539
      # special character for lvcreate (denotes the range of PEs to
1540
      # use on the PV)
1541
      for _, pvname, owner_vg in pvlist:
1542
        test = ":" in pvname
1543
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1544
                 " '%s' of VG '%s'", pvname, owner_vg)
1545

    
1546
  def _VerifyNodeNetwork(self, ninfo, nresult):
1547
    """Check the node time.
1548

1549
    @type ninfo: L{objects.Node}
1550
    @param ninfo: the node to check
1551
    @param nresult: the remote results for the node
1552

1553
    """
1554
    node = ninfo.name
1555
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1556

    
1557
    test = constants.NV_NODELIST not in nresult
1558
    _ErrorIf(test, self.ENODESSH, node,
1559
             "node hasn't returned node ssh connectivity data")
1560
    if not test:
1561
      if nresult[constants.NV_NODELIST]:
1562
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1563
          _ErrorIf(True, self.ENODESSH, node,
1564
                   "ssh communication with node '%s': %s", a_node, a_msg)
1565

    
1566
    test = constants.NV_NODENETTEST not in nresult
1567
    _ErrorIf(test, self.ENODENET, node,
1568
             "node hasn't returned node tcp connectivity data")
1569
    if not test:
1570
      if nresult[constants.NV_NODENETTEST]:
1571
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1572
        for anode in nlist:
1573
          _ErrorIf(True, self.ENODENET, node,
1574
                   "tcp communication with node '%s': %s",
1575
                   anode, nresult[constants.NV_NODENETTEST][anode])
1576

    
1577
    test = constants.NV_MASTERIP not in nresult
1578
    _ErrorIf(test, self.ENODENET, node,
1579
             "node hasn't returned node master IP reachability data")
1580
    if not test:
1581
      if not nresult[constants.NV_MASTERIP]:
1582
        if node == self.master_node:
1583
          msg = "the master node cannot reach the master IP (not configured?)"
1584
        else:
1585
          msg = "cannot reach the master IP"
1586
        _ErrorIf(True, self.ENODENET, node, msg)
1587

    
1588
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1589
                      diskstatus):
1590
    """Verify an instance.
1591

1592
    This function checks to see if the required block devices are
1593
    available on the instance's node.
1594

1595
    """
1596
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1597
    node_current = instanceconfig.primary_node
1598

    
1599
    node_vol_should = {}
1600
    instanceconfig.MapLVsByNode(node_vol_should)
1601

    
1602
    for node in node_vol_should:
1603
      n_img = node_image[node]
1604
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1605
        # ignore missing volumes on offline or broken nodes
1606
        continue
1607
      for volume in node_vol_should[node]:
1608
        test = volume not in n_img.volumes
1609
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1610
                 "volume %s missing on node %s", volume, node)
1611

    
1612
    if instanceconfig.admin_up:
1613
      pri_img = node_image[node_current]
1614
      test = instance not in pri_img.instances and not pri_img.offline
1615
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1616
               "instance not running on its primary node %s",
1617
               node_current)
1618

    
1619
    for node, n_img in node_image.items():
1620
      if (not node == node_current):
1621
        test = instance in n_img.instances
1622
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1623
                 "instance should not run on node %s", node)
1624

    
1625
    diskdata = [(nname, success, status, idx)
1626
                for (nname, disks) in diskstatus.items()
1627
                for idx, (success, status) in enumerate(disks)]
1628

    
1629
    for nname, success, bdev_status, idx in diskdata:
1630
      _ErrorIf(instanceconfig.admin_up and not success,
1631
               self.EINSTANCEFAULTYDISK, instance,
1632
               "couldn't retrieve status for disk/%s on %s: %s",
1633
               idx, nname, bdev_status)
1634
      _ErrorIf((instanceconfig.admin_up and success and
1635
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1636
               self.EINSTANCEFAULTYDISK, instance,
1637
               "disk/%s on %s is faulty", idx, nname)
1638

    
1639
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1640
    """Verify if there are any unknown volumes in the cluster.
1641

1642
    The .os, .swap and backup volumes are ignored. All other volumes are
1643
    reported as unknown.
1644

1645
    @type reserved: L{ganeti.utils.FieldSet}
1646
    @param reserved: a FieldSet of reserved volume names
1647

1648
    """
1649
    for node, n_img in node_image.items():
1650
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1651
        # skip non-healthy nodes
1652
        continue
1653
      for volume in n_img.volumes:
1654
        test = ((node not in node_vol_should or
1655
                volume not in node_vol_should[node]) and
1656
                not reserved.Matches(volume))
1657
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1658
                      "volume %s is unknown", volume)
1659

    
1660
  def _VerifyOrphanInstances(self, instancelist, node_image):
1661
    """Verify the list of running instances.
1662

1663
    This checks what instances are running but unknown to the cluster.
1664

1665
    """
1666
    for node, n_img in node_image.items():
1667
      for o_inst in n_img.instances:
1668
        test = o_inst not in instancelist
1669
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1670
                      "instance %s on node %s should not exist", o_inst, node)
1671

    
1672
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1673
    """Verify N+1 Memory Resilience.
1674

1675
    Check that if one single node dies we can still start all the
1676
    instances it was primary for.
1677

1678
    """
1679
    for node, n_img in node_image.items():
1680
      # This code checks that every node which is now listed as
1681
      # secondary has enough memory to host all instances it is
1682
      # supposed to should a single other node in the cluster fail.
1683
      # FIXME: not ready for failover to an arbitrary node
1684
      # FIXME: does not support file-backed instances
1685
      # WARNING: we currently take into account down instances as well
1686
      # as up ones, considering that even if they're down someone
1687
      # might want to start them even in the event of a node failure.
1688
      for prinode, instances in n_img.sbp.items():
1689
        needed_mem = 0
1690
        for instance in instances:
1691
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1692
          if bep[constants.BE_AUTO_BALANCE]:
1693
            needed_mem += bep[constants.BE_MEMORY]
1694
        test = n_img.mfree < needed_mem
1695
        self._ErrorIf(test, self.ENODEN1, node,
1696
                      "not enough memory on to accommodate"
1697
                      " failovers should peer node %s fail", prinode)
1698

    
1699
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1700
                       master_files):
1701
    """Verifies and computes the node required file checksums.
1702

1703
    @type ninfo: L{objects.Node}
1704
    @param ninfo: the node to check
1705
    @param nresult: the remote results for the node
1706
    @param file_list: required list of files
1707
    @param local_cksum: dictionary of local files and their checksums
1708
    @param master_files: list of files that only masters should have
1709

1710
    """
1711
    node = ninfo.name
1712
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1713

    
1714
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1715
    test = not isinstance(remote_cksum, dict)
1716
    _ErrorIf(test, self.ENODEFILECHECK, node,
1717
             "node hasn't returned file checksum data")
1718
    if test:
1719
      return
1720

    
1721
    for file_name in file_list:
1722
      node_is_mc = ninfo.master_candidate
1723
      must_have = (file_name not in master_files) or node_is_mc
1724
      # missing
1725
      test1 = file_name not in remote_cksum
1726
      # invalid checksum
1727
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1728
      # existing and good
1729
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1730
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1731
               "file '%s' missing", file_name)
1732
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1733
               "file '%s' has wrong checksum", file_name)
1734
      # not candidate and this is not a must-have file
1735
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1736
               "file '%s' should not exist on non master"
1737
               " candidates (and the file is outdated)", file_name)
1738
      # all good, except non-master/non-must have combination
1739
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1740
               "file '%s' should not exist"
1741
               " on non master candidates", file_name)
1742

    
1743
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1744
                      drbd_map):
1745
    """Verifies and the node DRBD status.
1746

1747
    @type ninfo: L{objects.Node}
1748
    @param ninfo: the node to check
1749
    @param nresult: the remote results for the node
1750
    @param instanceinfo: the dict of instances
1751
    @param drbd_helper: the configured DRBD usermode helper
1752
    @param drbd_map: the DRBD map as returned by
1753
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1754

1755
    """
1756
    node = ninfo.name
1757
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1758

    
1759
    if drbd_helper:
1760
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1761
      test = (helper_result == None)
1762
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1763
               "no drbd usermode helper returned")
1764
      if helper_result:
1765
        status, payload = helper_result
1766
        test = not status
1767
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1768
                 "drbd usermode helper check unsuccessful: %s", payload)
1769
        test = status and (payload != drbd_helper)
1770
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1771
                 "wrong drbd usermode helper: %s", payload)
1772

    
1773
    # compute the DRBD minors
1774
    node_drbd = {}
1775
    for minor, instance in drbd_map[node].items():
1776
      test = instance not in instanceinfo
1777
      _ErrorIf(test, self.ECLUSTERCFG, None,
1778
               "ghost instance '%s' in temporary DRBD map", instance)
1779
        # ghost instance should not be running, but otherwise we
1780
        # don't give double warnings (both ghost instance and
1781
        # unallocated minor in use)
1782
      if test:
1783
        node_drbd[minor] = (instance, False)
1784
      else:
1785
        instance = instanceinfo[instance]
1786
        node_drbd[minor] = (instance.name, instance.admin_up)
1787

    
1788
    # and now check them
1789
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1790
    test = not isinstance(used_minors, (tuple, list))
1791
    _ErrorIf(test, self.ENODEDRBD, node,
1792
             "cannot parse drbd status file: %s", str(used_minors))
1793
    if test:
1794
      # we cannot check drbd status
1795
      return
1796

    
1797
    for minor, (iname, must_exist) in node_drbd.items():
1798
      test = minor not in used_minors and must_exist
1799
      _ErrorIf(test, self.ENODEDRBD, node,
1800
               "drbd minor %d of instance %s is not active", minor, iname)
1801
    for minor in used_minors:
1802
      test = minor not in node_drbd
1803
      _ErrorIf(test, self.ENODEDRBD, node,
1804
               "unallocated drbd minor %d is in use", minor)
1805

    
1806
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1807
    """Builds the node OS structures.
1808

1809
    @type ninfo: L{objects.Node}
1810
    @param ninfo: the node to check
1811
    @param nresult: the remote results for the node
1812
    @param nimg: the node image object
1813

1814
    """
1815
    node = ninfo.name
1816
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1817

    
1818
    remote_os = nresult.get(constants.NV_OSLIST, None)
1819
    test = (not isinstance(remote_os, list) or
1820
            not compat.all(isinstance(v, list) and len(v) == 7
1821
                           for v in remote_os))
1822

    
1823
    _ErrorIf(test, self.ENODEOS, node,
1824
             "node hasn't returned valid OS data")
1825

    
1826
    nimg.os_fail = test
1827

    
1828
    if test:
1829
      return
1830

    
1831
    os_dict = {}
1832

    
1833
    for (name, os_path, status, diagnose,
1834
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1835

    
1836
      if name not in os_dict:
1837
        os_dict[name] = []
1838

    
1839
      # parameters is a list of lists instead of list of tuples due to
1840
      # JSON lacking a real tuple type, fix it:
1841
      parameters = [tuple(v) for v in parameters]
1842
      os_dict[name].append((os_path, status, diagnose,
1843
                            set(variants), set(parameters), set(api_ver)))
1844

    
1845
    nimg.oslist = os_dict
1846

    
1847
  def _VerifyNodeOS(self, ninfo, nimg, base):
1848
    """Verifies the node OS list.
1849

1850
    @type ninfo: L{objects.Node}
1851
    @param ninfo: the node to check
1852
    @param nimg: the node image object
1853
    @param base: the 'template' node we match against (e.g. from the master)
1854

1855
    """
1856
    node = ninfo.name
1857
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1858

    
1859
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1860

    
1861
    for os_name, os_data in nimg.oslist.items():
1862
      assert os_data, "Empty OS status for OS %s?!" % os_name
1863
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1864
      _ErrorIf(not f_status, self.ENODEOS, node,
1865
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1866
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1867
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1868
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1869
      # this will catched in backend too
1870
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1871
               and not f_var, self.ENODEOS, node,
1872
               "OS %s with API at least %d does not declare any variant",
1873
               os_name, constants.OS_API_V15)
1874
      # comparisons with the 'base' image
1875
      test = os_name not in base.oslist
1876
      _ErrorIf(test, self.ENODEOS, node,
1877
               "Extra OS %s not present on reference node (%s)",
1878
               os_name, base.name)
1879
      if test:
1880
        continue
1881
      assert base.oslist[os_name], "Base node has empty OS status?"
1882
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1883
      if not b_status:
1884
        # base OS is invalid, skipping
1885
        continue
1886
      for kind, a, b in [("API version", f_api, b_api),
1887
                         ("variants list", f_var, b_var),
1888
                         ("parameters", f_param, b_param)]:
1889
        _ErrorIf(a != b, self.ENODEOS, node,
1890
                 "OS %s %s differs from reference node %s: %s vs. %s",
1891
                 kind, os_name, base.name,
1892
                 utils.CommaJoin(a), utils.CommaJoin(b))
1893

    
1894
    # check any missing OSes
1895
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1896
    _ErrorIf(missing, self.ENODEOS, node,
1897
             "OSes present on reference node %s but missing on this node: %s",
1898
             base.name, utils.CommaJoin(missing))
1899

    
1900
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1901
    """Verifies and updates the node volume data.
1902

1903
    This function will update a L{NodeImage}'s internal structures
1904
    with data from the remote call.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param nimg: the node image object
1910
    @param vg_name: the configured VG name
1911

1912
    """
1913
    node = ninfo.name
1914
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1915

    
1916
    nimg.lvm_fail = True
1917
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1918
    if vg_name is None:
1919
      pass
1920
    elif isinstance(lvdata, basestring):
1921
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1922
               utils.SafeEncode(lvdata))
1923
    elif not isinstance(lvdata, dict):
1924
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1925
    else:
1926
      nimg.volumes = lvdata
1927
      nimg.lvm_fail = False
1928

    
1929
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1930
    """Verifies and updates the node instance list.
1931

1932
    If the listing was successful, then updates this node's instance
1933
    list. Otherwise, it marks the RPC call as failed for the instance
1934
    list key.
1935

1936
    @type ninfo: L{objects.Node}
1937
    @param ninfo: the node to check
1938
    @param nresult: the remote results for the node
1939
    @param nimg: the node image object
1940

1941
    """
1942
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1943
    test = not isinstance(idata, list)
1944
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1945
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1946
    if test:
1947
      nimg.hyp_fail = True
1948
    else:
1949
      nimg.instances = idata
1950

    
1951
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1952
    """Verifies and computes a node information map
1953

1954
    @type ninfo: L{objects.Node}
1955
    @param ninfo: the node to check
1956
    @param nresult: the remote results for the node
1957
    @param nimg: the node image object
1958
    @param vg_name: the configured VG name
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1963

    
1964
    # try to read free memory (from the hypervisor)
1965
    hv_info = nresult.get(constants.NV_HVINFO, None)
1966
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1967
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1968
    if not test:
1969
      try:
1970
        nimg.mfree = int(hv_info["memory_free"])
1971
      except (ValueError, TypeError):
1972
        _ErrorIf(True, self.ENODERPC, node,
1973
                 "node returned invalid nodeinfo, check hypervisor")
1974

    
1975
    # FIXME: devise a free space model for file based instances as well
1976
    if vg_name is not None:
1977
      test = (constants.NV_VGLIST not in nresult or
1978
              vg_name not in nresult[constants.NV_VGLIST])
1979
      _ErrorIf(test, self.ENODELVM, node,
1980
               "node didn't return data for the volume group '%s'"
1981
               " - it is either missing or broken", vg_name)
1982
      if not test:
1983
        try:
1984
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1985
        except (ValueError, TypeError):
1986
          _ErrorIf(True, self.ENODERPC, node,
1987
                   "node returned invalid LVM info, check LVM status")
1988

    
1989
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1990
    """Gets per-disk status information for all instances.
1991

1992
    @type nodelist: list of strings
1993
    @param nodelist: Node names
1994
    @type node_image: dict of (name, L{objects.Node})
1995
    @param node_image: Node objects
1996
    @type instanceinfo: dict of (name, L{objects.Instance})
1997
    @param instanceinfo: Instance objects
1998

1999
    """
2000
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2001

    
2002
    node_disks = {}
2003
    node_disks_devonly = {}
2004

    
2005
    for nname in nodelist:
2006
      disks = [(inst, disk)
2007
               for instlist in [node_image[nname].pinst,
2008
                                node_image[nname].sinst]
2009
               for inst in instlist
2010
               for disk in instanceinfo[inst].disks]
2011

    
2012
      if not disks:
2013
        # No need to collect data
2014
        continue
2015

    
2016
      node_disks[nname] = disks
2017

    
2018
      # Creating copies as SetDiskID below will modify the objects and that can
2019
      # lead to incorrect data returned from nodes
2020
      devonly = [dev.Copy() for (_, dev) in disks]
2021

    
2022
      for dev in devonly:
2023
        self.cfg.SetDiskID(dev, nname)
2024

    
2025
      node_disks_devonly[nname] = devonly
2026

    
2027
    assert len(node_disks) == len(node_disks_devonly)
2028

    
2029
    # Collect data from all nodes with disks
2030
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2031
                                                          node_disks_devonly)
2032

    
2033
    assert len(result) == len(node_disks)
2034

    
2035
    instdisk = {}
2036

    
2037
    for (nname, nres) in result.items():
2038
      if nres.offline:
2039
        # Ignore offline node
2040
        continue
2041

    
2042
      disks = node_disks[nname]
2043

    
2044
      msg = nres.fail_msg
2045
      _ErrorIf(msg, self.ENODERPC, nname,
2046
               "while getting disk information: %s", nres.fail_msg)
2047
      if msg:
2048
        # No data from this node
2049
        data = len(disks) * [None]
2050
      else:
2051
        data = nres.payload
2052

    
2053
      for ((inst, _), status) in zip(disks, data):
2054
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2055

    
2056
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2057
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
2058
                      for inst, nnames in instdisk.items()
2059
                      for nname, statuses in nnames.items())
2060

    
2061
    return instdisk
2062

    
2063
  def BuildHooksEnv(self):
2064
    """Build hooks env.
2065

2066
    Cluster-Verify hooks just ran in the post phase and their failure makes
2067
    the output be logged in the verify output and the verification to fail.
2068

2069
    """
2070
    all_nodes = self.cfg.GetNodeList()
2071
    env = {
2072
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2073
      }
2074
    for node in self.cfg.GetAllNodesInfo().values():
2075
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2076

    
2077
    return env, [], all_nodes
2078

    
2079
  def Exec(self, feedback_fn):
2080
    """Verify integrity of cluster, performing various test on nodes.
2081

2082
    """
2083
    self.bad = False
2084
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2085
    verbose = self.op.verbose
2086
    self._feedback_fn = feedback_fn
2087
    feedback_fn("* Verifying global settings")
2088
    for msg in self.cfg.VerifyConfig():
2089
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2090

    
2091
    # Check the cluster certificates
2092
    for cert_filename in constants.ALL_CERT_FILES:
2093
      (errcode, msg) = _VerifyCertificate(cert_filename)
2094
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2095

    
2096
    vg_name = self.cfg.GetVGName()
2097
    drbd_helper = self.cfg.GetDRBDHelper()
2098
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2099
    cluster = self.cfg.GetClusterInfo()
2100
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2101
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2102
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2103
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2104
                        for iname in instancelist)
2105
    i_non_redundant = [] # Non redundant instances
2106
    i_non_a_balanced = [] # Non auto-balanced instances
2107
    n_offline = 0 # Count of offline nodes
2108
    n_drained = 0 # Count of nodes being drained
2109
    node_vol_should = {}
2110

    
2111
    # FIXME: verify OS list
2112
    # do local checksums
2113
    master_files = [constants.CLUSTER_CONF_FILE]
2114
    master_node = self.master_node = self.cfg.GetMasterNode()
2115
    master_ip = self.cfg.GetMasterIP()
2116

    
2117
    file_names = ssconf.SimpleStore().GetFileList()
2118
    file_names.extend(constants.ALL_CERT_FILES)
2119
    file_names.extend(master_files)
2120
    if cluster.modify_etc_hosts:
2121
      file_names.append(constants.ETC_HOSTS)
2122

    
2123
    local_checksums = utils.FingerprintFiles(file_names)
2124

    
2125
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2126
    node_verify_param = {
2127
      constants.NV_FILELIST: file_names,
2128
      constants.NV_NODELIST: [node.name for node in nodeinfo
2129
                              if not node.offline],
2130
      constants.NV_HYPERVISOR: hypervisors,
2131
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2132
                                  node.secondary_ip) for node in nodeinfo
2133
                                 if not node.offline],
2134
      constants.NV_INSTANCELIST: hypervisors,
2135
      constants.NV_VERSION: None,
2136
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2137
      constants.NV_NODESETUP: None,
2138
      constants.NV_TIME: None,
2139
      constants.NV_MASTERIP: (master_node, master_ip),
2140
      constants.NV_OSLIST: None,
2141
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2142
      }
2143

    
2144
    if vg_name is not None:
2145
      node_verify_param[constants.NV_VGLIST] = None
2146
      node_verify_param[constants.NV_LVLIST] = vg_name
2147
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2148
      node_verify_param[constants.NV_DRBDLIST] = None
2149

    
2150
    if drbd_helper:
2151
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2152

    
2153
    # Build our expected cluster state
2154
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2155
                                                 name=node.name,
2156
                                                 vm_capable=node.vm_capable))
2157
                      for node in nodeinfo)
2158

    
2159
    for instance in instancelist:
2160
      inst_config = instanceinfo[instance]
2161

    
2162
      for nname in inst_config.all_nodes:
2163
        if nname not in node_image:
2164
          # ghost node
2165
          gnode = self.NodeImage(name=nname)
2166
          gnode.ghost = True
2167
          node_image[nname] = gnode
2168

    
2169
      inst_config.MapLVsByNode(node_vol_should)
2170

    
2171
      pnode = inst_config.primary_node
2172
      node_image[pnode].pinst.append(instance)
2173

    
2174
      for snode in inst_config.secondary_nodes:
2175
        nimg = node_image[snode]
2176
        nimg.sinst.append(instance)
2177
        if pnode not in nimg.sbp:
2178
          nimg.sbp[pnode] = []
2179
        nimg.sbp[pnode].append(instance)
2180

    
2181
    # At this point, we have the in-memory data structures complete,
2182
    # except for the runtime information, which we'll gather next
2183

    
2184
    # Due to the way our RPC system works, exact response times cannot be
2185
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2186
    # time before and after executing the request, we can at least have a time
2187
    # window.
2188
    nvinfo_starttime = time.time()
2189
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2190
                                           self.cfg.GetClusterName())
2191
    nvinfo_endtime = time.time()
2192

    
2193
    all_drbd_map = self.cfg.ComputeDRBDMap()
2194

    
2195
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2196
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2197

    
2198
    feedback_fn("* Verifying node status")
2199

    
2200
    refos_img = None
2201

    
2202
    for node_i in nodeinfo:
2203
      node = node_i.name
2204
      nimg = node_image[node]
2205

    
2206
      if node_i.offline:
2207
        if verbose:
2208
          feedback_fn("* Skipping offline node %s" % (node,))
2209
        n_offline += 1
2210
        continue
2211

    
2212
      if node == master_node:
2213
        ntype = "master"
2214
      elif node_i.master_candidate:
2215
        ntype = "master candidate"
2216
      elif node_i.drained:
2217
        ntype = "drained"
2218
        n_drained += 1
2219
      else:
2220
        ntype = "regular"
2221
      if verbose:
2222
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2223

    
2224
      msg = all_nvinfo[node].fail_msg
2225
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2226
      if msg:
2227
        nimg.rpc_fail = True
2228
        continue
2229

    
2230
      nresult = all_nvinfo[node].payload
2231

    
2232
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2233
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2234
      self._VerifyNodeNetwork(node_i, nresult)
2235
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2236
                            master_files)
2237

    
2238
      if nimg.vm_capable:
2239
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2240
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2241
                             all_drbd_map)
2242

    
2243
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2244
        self._UpdateNodeInstances(node_i, nresult, nimg)
2245
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2246
        self._UpdateNodeOS(node_i, nresult, nimg)
2247
        if not nimg.os_fail:
2248
          if refos_img is None:
2249
            refos_img = nimg
2250
          self._VerifyNodeOS(node_i, nimg, refos_img)
2251

    
2252
    feedback_fn("* Verifying instance status")
2253
    for instance in instancelist:
2254
      if verbose:
2255
        feedback_fn("* Verifying instance %s" % instance)
2256
      inst_config = instanceinfo[instance]
2257
      self._VerifyInstance(instance, inst_config, node_image,
2258
                           instdisk[instance])
2259
      inst_nodes_offline = []
2260

    
2261
      pnode = inst_config.primary_node
2262
      pnode_img = node_image[pnode]
2263
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2264
               self.ENODERPC, pnode, "instance %s, connection to"
2265
               " primary node failed", instance)
2266

    
2267
      if pnode_img.offline:
2268
        inst_nodes_offline.append(pnode)
2269

    
2270
      # If the instance is non-redundant we cannot survive losing its primary
2271
      # node, so we are not N+1 compliant. On the other hand we have no disk
2272
      # templates with more than one secondary so that situation is not well
2273
      # supported either.
2274
      # FIXME: does not support file-backed instances
2275
      if not inst_config.secondary_nodes:
2276
        i_non_redundant.append(instance)
2277
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2278
               instance, "instance has multiple secondary nodes: %s",
2279
               utils.CommaJoin(inst_config.secondary_nodes),
2280
               code=self.ETYPE_WARNING)
2281

    
2282
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2283
        i_non_a_balanced.append(instance)
2284

    
2285
      for snode in inst_config.secondary_nodes:
2286
        s_img = node_image[snode]
2287
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2288
                 "instance %s, connection to secondary node failed", instance)
2289

    
2290
        if s_img.offline:
2291
          inst_nodes_offline.append(snode)
2292

    
2293
      # warn that the instance lives on offline nodes
2294
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2295
               "instance lives on offline node(s) %s",
2296
               utils.CommaJoin(inst_nodes_offline))
2297
      # ... or ghost/non-vm_capable nodes
2298
      for node in inst_config.all_nodes:
2299
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2300
                 "instance lives on ghost node %s", node)
2301
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2302
                 instance, "instance lives on non-vm_capable node %s", node)
2303

    
2304
    feedback_fn("* Verifying orphan volumes")
2305
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2306
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2307

    
2308
    feedback_fn("* Verifying orphan instances")
2309
    self._VerifyOrphanInstances(instancelist, node_image)
2310

    
2311
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2312
      feedback_fn("* Verifying N+1 Memory redundancy")
2313
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2314

    
2315
    feedback_fn("* Other Notes")
2316
    if i_non_redundant:
2317
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2318
                  % len(i_non_redundant))
2319

    
2320
    if i_non_a_balanced:
2321
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2322
                  % len(i_non_a_balanced))
2323

    
2324
    if n_offline:
2325
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2326

    
2327
    if n_drained:
2328
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2329

    
2330
    return not self.bad
2331

    
2332
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2333
    """Analyze the post-hooks' result
2334

2335
    This method analyses the hook result, handles it, and sends some
2336
    nicely-formatted feedback back to the user.
2337

2338
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2339
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2340
    @param hooks_results: the results of the multi-node hooks rpc call
2341
    @param feedback_fn: function used send feedback back to the caller
2342
    @param lu_result: previous Exec result
2343
    @return: the new Exec result, based on the previous result
2344
        and hook results
2345

2346
    """
2347
    # We only really run POST phase hooks, and are only interested in
2348
    # their results
2349
    if phase == constants.HOOKS_PHASE_POST:
2350
      # Used to change hooks' output to proper indentation
2351
      feedback_fn("* Hooks Results")
2352
      assert hooks_results, "invalid result from hooks"
2353

    
2354
      for node_name in hooks_results:
2355
        res = hooks_results[node_name]
2356
        msg = res.fail_msg
2357
        test = msg and not res.offline
2358
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2359
                      "Communication failure in hooks execution: %s", msg)
2360
        if res.offline or msg:
2361
          # No need to investigate payload if node is offline or gave an error.
2362
          # override manually lu_result here as _ErrorIf only
2363
          # overrides self.bad
2364
          lu_result = 1
2365
          continue
2366
        for script, hkr, output in res.payload:
2367
          test = hkr == constants.HKR_FAIL
2368
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2369
                        "Script %s failed, output:", script)
2370
          if test:
2371
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2372
            feedback_fn("%s" % output)
2373
            lu_result = 0
2374

    
2375
      return lu_result
2376

    
2377

    
2378
class LUVerifyDisks(NoHooksLU):
2379
  """Verifies the cluster disks status.
2380

2381
  """
2382
  REQ_BGL = False
2383

    
2384
  def ExpandNames(self):
2385
    self.needed_locks = {
2386
      locking.LEVEL_NODE: locking.ALL_SET,
2387
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2388
    }
2389
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2390

    
2391
  def Exec(self, feedback_fn):
2392
    """Verify integrity of cluster disks.
2393

2394
    @rtype: tuple of three items
2395
    @return: a tuple of (dict of node-to-node_error, list of instances
2396
        which need activate-disks, dict of instance: (node, volume) for
2397
        missing volumes
2398

2399
    """
2400
    result = res_nodes, res_instances, res_missing = {}, [], {}
2401

    
2402
    vg_name = self.cfg.GetVGName()
2403
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2404
    instances = [self.cfg.GetInstanceInfo(name)
2405
                 for name in self.cfg.GetInstanceList()]
2406

    
2407
    nv_dict = {}
2408
    for inst in instances:
2409
      inst_lvs = {}
2410
      if (not inst.admin_up or
2411
          inst.disk_template not in constants.DTS_NET_MIRROR):
2412
        continue
2413
      inst.MapLVsByNode(inst_lvs)
2414
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2415
      for node, vol_list in inst_lvs.iteritems():
2416
        for vol in vol_list:
2417
          nv_dict[(node, vol)] = inst
2418

    
2419
    if not nv_dict:
2420
      return result
2421

    
2422
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2423

    
2424
    for node in nodes:
2425
      # node_volume
2426
      node_res = node_lvs[node]
2427
      if node_res.offline:
2428
        continue
2429
      msg = node_res.fail_msg
2430
      if msg:
2431
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2432
        res_nodes[node] = msg
2433
        continue
2434

    
2435
      lvs = node_res.payload
2436
      for lv_name, (_, _, lv_online) in lvs.items():
2437
        inst = nv_dict.pop((node, lv_name), None)
2438
        if (not lv_online and inst is not None
2439
            and inst.name not in res_instances):
2440
          res_instances.append(inst.name)
2441

    
2442
    # any leftover items in nv_dict are missing LVs, let's arrange the
2443
    # data better
2444
    for key, inst in nv_dict.iteritems():
2445
      if inst.name not in res_missing:
2446
        res_missing[inst.name] = []
2447
      res_missing[inst.name].append(key)
2448

    
2449
    return result
2450

    
2451

    
2452
class LURepairDiskSizes(NoHooksLU):
2453
  """Verifies the cluster disks sizes.
2454

2455
  """
2456
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2457
  REQ_BGL = False
2458

    
2459
  def ExpandNames(self):
2460
    if self.op.instances:
2461
      self.wanted_names = []
2462
      for name in self.op.instances:
2463
        full_name = _ExpandInstanceName(self.cfg, name)
2464
        self.wanted_names.append(full_name)
2465
      self.needed_locks = {
2466
        locking.LEVEL_NODE: [],
2467
        locking.LEVEL_INSTANCE: self.wanted_names,
2468
        }
2469
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2470
    else:
2471
      self.wanted_names = None
2472
      self.needed_locks = {
2473
        locking.LEVEL_NODE: locking.ALL_SET,
2474
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2475
        }
2476
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2477

    
2478
  def DeclareLocks(self, level):
2479
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2480
      self._LockInstancesNodes(primary_only=True)
2481

    
2482
  def CheckPrereq(self):
2483
    """Check prerequisites.
2484

2485
    This only checks the optional instance list against the existing names.
2486

2487
    """
2488
    if self.wanted_names is None:
2489
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2490

    
2491
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2492
                             in self.wanted_names]
2493

    
2494
  def _EnsureChildSizes(self, disk):
2495
    """Ensure children of the disk have the needed disk size.
2496

2497
    This is valid mainly for DRBD8 and fixes an issue where the
2498
    children have smaller disk size.
2499

2500
    @param disk: an L{ganeti.objects.Disk} object
2501

2502
    """
2503
    if disk.dev_type == constants.LD_DRBD8:
2504
      assert disk.children, "Empty children for DRBD8?"
2505
      fchild = disk.children[0]
2506
      mismatch = fchild.size < disk.size
2507
      if mismatch:
2508
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2509
                     fchild.size, disk.size)
2510
        fchild.size = disk.size
2511

    
2512
      # and we recurse on this child only, not on the metadev
2513
      return self._EnsureChildSizes(fchild) or mismatch
2514
    else:
2515
      return False
2516

    
2517
  def Exec(self, feedback_fn):
2518
    """Verify the size of cluster disks.
2519

2520
    """
2521
    # TODO: check child disks too
2522
    # TODO: check differences in size between primary/secondary nodes
2523
    per_node_disks = {}
2524
    for instance in self.wanted_instances:
2525
      pnode = instance.primary_node
2526
      if pnode not in per_node_disks:
2527
        per_node_disks[pnode] = []
2528
      for idx, disk in enumerate(instance.disks):
2529
        per_node_disks[pnode].append((instance, idx, disk))
2530

    
2531
    changed = []
2532
    for node, dskl in per_node_disks.items():
2533
      newl = [v[2].Copy() for v in dskl]
2534
      for dsk in newl:
2535
        self.cfg.SetDiskID(dsk, node)
2536
      result = self.rpc.call_blockdev_getsizes(node, newl)
2537
      if result.fail_msg:
2538
        self.LogWarning("Failure in blockdev_getsizes call to node"
2539
                        " %s, ignoring", node)
2540
        continue
2541
      if len(result.data) != len(dskl):
2542
        self.LogWarning("Invalid result from node %s, ignoring node results",
2543
                        node)
2544
        continue
2545
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2546
        if size is None:
2547
          self.LogWarning("Disk %d of instance %s did not return size"
2548
                          " information, ignoring", idx, instance.name)
2549
          continue
2550
        if not isinstance(size, (int, long)):
2551
          self.LogWarning("Disk %d of instance %s did not return valid"
2552
                          " size information, ignoring", idx, instance.name)
2553
          continue
2554
        size = size >> 20
2555
        if size != disk.size:
2556
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2557
                       " correcting: recorded %d, actual %d", idx,
2558
                       instance.name, disk.size, size)
2559
          disk.size = size
2560
          self.cfg.Update(instance, feedback_fn)
2561
          changed.append((instance.name, idx, size))
2562
        if self._EnsureChildSizes(disk):
2563
          self.cfg.Update(instance, feedback_fn)
2564
          changed.append((instance.name, idx, disk.size))
2565
    return changed
2566

    
2567

    
2568
class LURenameCluster(LogicalUnit):
2569
  """Rename the cluster.
2570

2571
  """
2572
  HPATH = "cluster-rename"
2573
  HTYPE = constants.HTYPE_CLUSTER
2574
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2575

    
2576
  def BuildHooksEnv(self):
2577
    """Build hooks env.
2578

2579
    """
2580
    env = {
2581
      "OP_TARGET": self.cfg.GetClusterName(),
2582
      "NEW_NAME": self.op.name,
2583
      }
2584
    mn = self.cfg.GetMasterNode()
2585
    all_nodes = self.cfg.GetNodeList()
2586
    return env, [mn], all_nodes
2587

    
2588
  def CheckPrereq(self):
2589
    """Verify that the passed name is a valid one.
2590

2591
    """
2592
    hostname = netutils.GetHostname(name=self.op.name,
2593
                                    family=self.cfg.GetPrimaryIPFamily())
2594

    
2595
    new_name = hostname.name
2596
    self.ip = new_ip = hostname.ip
2597
    old_name = self.cfg.GetClusterName()
2598
    old_ip = self.cfg.GetMasterIP()
2599
    if new_name == old_name and new_ip == old_ip:
2600
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2601
                                 " cluster has changed",
2602
                                 errors.ECODE_INVAL)
2603
    if new_ip != old_ip:
2604
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2605
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2606
                                   " reachable on the network" %
2607
                                   new_ip, errors.ECODE_NOTUNIQUE)
2608

    
2609
    self.op.name = new_name
2610

    
2611
  def Exec(self, feedback_fn):
2612
    """Rename the cluster.
2613

2614
    """
2615
    clustername = self.op.name
2616
    ip = self.ip
2617

    
2618
    # shutdown the master IP
2619
    master = self.cfg.GetMasterNode()
2620
    result = self.rpc.call_node_stop_master(master, False)
2621
    result.Raise("Could not disable the master role")
2622

    
2623
    try:
2624
      cluster = self.cfg.GetClusterInfo()
2625
      cluster.cluster_name = clustername
2626
      cluster.master_ip = ip
2627
      self.cfg.Update(cluster, feedback_fn)
2628

    
2629
      # update the known hosts file
2630
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2631
      node_list = self.cfg.GetOnlineNodeList()
2632
      try:
2633
        node_list.remove(master)
2634
      except ValueError:
2635
        pass
2636
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2637
    finally:
2638
      result = self.rpc.call_node_start_master(master, False, False)
2639
      msg = result.fail_msg
2640
      if msg:
2641
        self.LogWarning("Could not re-enable the master role on"
2642
                        " the master, please restart manually: %s", msg)
2643

    
2644
    return clustername
2645

    
2646

    
2647
class LUSetClusterParams(LogicalUnit):
2648
  """Change the parameters of the cluster.
2649

2650
  """
2651
  HPATH = "cluster-modify"
2652
  HTYPE = constants.HTYPE_CLUSTER
2653
  _OP_PARAMS = [
2654
    ("vg_name", None, ht.TMaybeString),
2655
    ("enabled_hypervisors", None,
2656
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2657
            ht.TNone)),
2658
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2659
                              ht.TNone)),
2660
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2661
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2662
                            ht.TNone)),
2663
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2664
                              ht.TNone)),
2665
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2666
    ("uid_pool", None, ht.NoType),
2667
    ("add_uids", None, ht.NoType),
2668
    ("remove_uids", None, ht.NoType),
2669
    ("maintain_node_health", None, ht.TMaybeBool),
2670
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2671
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2672
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2673
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2674
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2675
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2676
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2677
          ht.TAnd(ht.TList,
2678
                ht.TIsLength(2),
2679
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2680
          ht.TNone)),
2681
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2682
          ht.TAnd(ht.TList,
2683
                ht.TIsLength(2),
2684
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2685
          ht.TNone)),
2686
    ]
2687
  REQ_BGL = False
2688

    
2689
  def CheckArguments(self):
2690
    """Check parameters
2691

2692
    """
2693
    if self.op.uid_pool:
2694
      uidpool.CheckUidPool(self.op.uid_pool)
2695

    
2696
    if self.op.add_uids:
2697
      uidpool.CheckUidPool(self.op.add_uids)
2698

    
2699
    if self.op.remove_uids:
2700
      uidpool.CheckUidPool(self.op.remove_uids)
2701

    
2702
  def ExpandNames(self):
2703
    # FIXME: in the future maybe other cluster params won't require checking on
2704
    # all nodes to be modified.
2705
    self.needed_locks = {
2706
      locking.LEVEL_NODE: locking.ALL_SET,
2707
    }
2708
    self.share_locks[locking.LEVEL_NODE] = 1
2709

    
2710
  def BuildHooksEnv(self):
2711
    """Build hooks env.
2712

2713
    """
2714
    env = {
2715
      "OP_TARGET": self.cfg.GetClusterName(),
2716
      "NEW_VG_NAME": self.op.vg_name,
2717
      }
2718
    mn = self.cfg.GetMasterNode()
2719
    return env, [mn], [mn]
2720

    
2721
  def CheckPrereq(self):
2722
    """Check prerequisites.
2723

2724
    This checks whether the given params don't conflict and
2725
    if the given volume group is valid.
2726

2727
    """
2728
    if self.op.vg_name is not None and not self.op.vg_name:
2729
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2730
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2731
                                   " instances exist", errors.ECODE_INVAL)
2732

    
2733
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2734
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2735
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2736
                                   " drbd-based instances exist",
2737
                                   errors.ECODE_INVAL)
2738

    
2739
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2740

    
2741
    # if vg_name not None, checks given volume group on all nodes
2742
    if self.op.vg_name:
2743
      vglist = self.rpc.call_vg_list(node_list)
2744
      for node in node_list:
2745
        msg = vglist[node].fail_msg
2746
        if msg:
2747
          # ignoring down node
2748
          self.LogWarning("Error while gathering data on node %s"
2749
                          " (ignoring node): %s", node, msg)
2750
          continue
2751
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2752
                                              self.op.vg_name,
2753
                                              constants.MIN_VG_SIZE)
2754
        if vgstatus:
2755
          raise errors.OpPrereqError("Error on node '%s': %s" %
2756
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2757

    
2758
    if self.op.drbd_helper:
2759
      # checks given drbd helper on all nodes
2760
      helpers = self.rpc.call_drbd_helper(node_list)
2761
      for node in node_list:
2762
        ninfo = self.cfg.GetNodeInfo(node)
2763
        if ninfo.offline:
2764
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2765
          continue
2766
        msg = helpers[node].fail_msg
2767
        if msg:
2768
          raise errors.OpPrereqError("Error checking drbd helper on node"
2769
                                     " '%s': %s" % (node, msg),
2770
                                     errors.ECODE_ENVIRON)
2771
        node_helper = helpers[node].payload
2772
        if node_helper != self.op.drbd_helper:
2773
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2774
                                     (node, node_helper), errors.ECODE_ENVIRON)
2775

    
2776
    self.cluster = cluster = self.cfg.GetClusterInfo()
2777
    # validate params changes
2778
    if self.op.beparams:
2779
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2780
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2781

    
2782
    if self.op.ndparams:
2783
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2784
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2785

    
2786
    if self.op.nicparams:
2787
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2788
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2789
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2790
      nic_errors = []
2791

    
2792
      # check all instances for consistency
2793
      for instance in self.cfg.GetAllInstancesInfo().values():
2794
        for nic_idx, nic in enumerate(instance.nics):
2795
          params_copy = copy.deepcopy(nic.nicparams)
2796
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2797

    
2798
          # check parameter syntax
2799
          try:
2800
            objects.NIC.CheckParameterSyntax(params_filled)
2801
          except errors.ConfigurationError, err:
2802
            nic_errors.append("Instance %s, nic/%d: %s" %
2803
                              (instance.name, nic_idx, err))
2804

    
2805
          # if we're moving instances to routed, check that they have an ip
2806
          target_mode = params_filled[constants.NIC_MODE]
2807
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2808
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2809
                              (instance.name, nic_idx))
2810
      if nic_errors:
2811
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2812
                                   "\n".join(nic_errors))
2813

    
2814
    # hypervisor list/parameters
2815
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2816
    if self.op.hvparams:
2817
      for hv_name, hv_dict in self.op.hvparams.items():
2818
        if hv_name not in self.new_hvparams:
2819
          self.new_hvparams[hv_name] = hv_dict
2820
        else:
2821
          self.new_hvparams[hv_name].update(hv_dict)
2822

    
2823
    # os hypervisor parameters
2824
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2825
    if self.op.os_hvp:
2826
      for os_name, hvs in self.op.os_hvp.items():
2827
        if os_name not in self.new_os_hvp:
2828
          self.new_os_hvp[os_name] = hvs
2829
        else:
2830
          for hv_name, hv_dict in hvs.items():
2831
            if hv_name not in self.new_os_hvp[os_name]:
2832
              self.new_os_hvp[os_name][hv_name] = hv_dict
2833
            else:
2834
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2835

    
2836
    # os parameters
2837
    self.new_osp = objects.FillDict(cluster.osparams, {})
2838
    if self.op.osparams:
2839
      for os_name, osp in self.op.osparams.items():
2840
        if os_name not in self.new_osp:
2841
          self.new_osp[os_name] = {}
2842

    
2843
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2844
                                                  use_none=True)
2845

    
2846
        if not self.new_osp[os_name]:
2847
          # we removed all parameters
2848
          del self.new_osp[os_name]
2849
        else:
2850
          # check the parameter validity (remote check)
2851
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2852
                         os_name, self.new_osp[os_name])
2853

    
2854
    # changes to the hypervisor list
2855
    if self.op.enabled_hypervisors is not None:
2856
      self.hv_list = self.op.enabled_hypervisors
2857
      for hv in self.hv_list:
2858
        # if the hypervisor doesn't already exist in the cluster
2859
        # hvparams, we initialize it to empty, and then (in both
2860
        # cases) we make sure to fill the defaults, as we might not
2861
        # have a complete defaults list if the hypervisor wasn't
2862
        # enabled before
2863
        if hv not in new_hvp:
2864
          new_hvp[hv] = {}
2865
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2866
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2867
    else:
2868
      self.hv_list = cluster.enabled_hypervisors
2869

    
2870
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2871
      # either the enabled list has changed, or the parameters have, validate
2872
      for hv_name, hv_params in self.new_hvparams.items():
2873
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2874
            (self.op.enabled_hypervisors and
2875
             hv_name in self.op.enabled_hypervisors)):
2876
          # either this is a new hypervisor, or its parameters have changed
2877
          hv_class = hypervisor.GetHypervisor(hv_name)
2878
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2879
          hv_class.CheckParameterSyntax(hv_params)
2880
          _CheckHVParams(self, node_list, hv_name, hv_params)
2881

    
2882
    if self.op.os_hvp:
2883
      # no need to check any newly-enabled hypervisors, since the
2884
      # defaults have already been checked in the above code-block
2885
      for os_name, os_hvp in self.new_os_hvp.items():
2886
        for hv_name, hv_params in os_hvp.items():
2887
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2888
          # we need to fill in the new os_hvp on top of the actual hv_p
2889
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2890
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2891
          hv_class = hypervisor.GetHypervisor(hv_name)
2892
          hv_class.CheckParameterSyntax(new_osp)
2893
          _CheckHVParams(self, node_list, hv_name, new_osp)
2894

    
2895
    if self.op.default_iallocator:
2896
      alloc_script = utils.FindFile(self.op.default_iallocator,
2897
                                    constants.IALLOCATOR_SEARCH_PATH,
2898
                                    os.path.isfile)
2899
      if alloc_script is None:
2900
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2901
                                   " specified" % self.op.default_iallocator,
2902
                                   errors.ECODE_INVAL)
2903

    
2904
  def Exec(self, feedback_fn):
2905
    """Change the parameters of the cluster.
2906

2907
    """
2908
    if self.op.vg_name is not None:
2909
      new_volume = self.op.vg_name
2910
      if not new_volume:
2911
        new_volume = None
2912
      if new_volume != self.cfg.GetVGName():
2913
        self.cfg.SetVGName(new_volume)
2914
      else:
2915
        feedback_fn("Cluster LVM configuration already in desired"
2916
                    " state, not changing")
2917
    if self.op.drbd_helper is not None:
2918
      new_helper = self.op.drbd_helper
2919
      if not new_helper:
2920
        new_helper = None
2921
      if new_helper != self.cfg.GetDRBDHelper():
2922
        self.cfg.SetDRBDHelper(new_helper)
2923
      else:
2924
        feedback_fn("Cluster DRBD helper already in desired state,"
2925
                    " not changing")
2926
    if self.op.hvparams:
2927
      self.cluster.hvparams = self.new_hvparams
2928
    if self.op.os_hvp:
2929
      self.cluster.os_hvp = self.new_os_hvp
2930
    if self.op.enabled_hypervisors is not None:
2931
      self.cluster.hvparams = self.new_hvparams
2932
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2933
    if self.op.beparams:
2934
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2935
    if self.op.nicparams:
2936
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2937
    if self.op.osparams:
2938
      self.cluster.osparams = self.new_osp
2939
    if self.op.ndparams:
2940
      self.cluster.ndparams = self.new_ndparams
2941

    
2942
    if self.op.candidate_pool_size is not None:
2943
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2944
      # we need to update the pool size here, otherwise the save will fail
2945
      _AdjustCandidatePool(self, [])
2946

    
2947
    if self.op.maintain_node_health is not None:
2948
      self.cluster.maintain_node_health = self.op.maintain_node_health
2949

    
2950
    if self.op.prealloc_wipe_disks is not None:
2951
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2952

    
2953
    if self.op.add_uids is not None:
2954
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2955

    
2956
    if self.op.remove_uids is not None:
2957
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2958

    
2959
    if self.op.uid_pool is not None:
2960
      self.cluster.uid_pool = self.op.uid_pool
2961

    
2962
    if self.op.default_iallocator is not None:
2963
      self.cluster.default_iallocator = self.op.default_iallocator
2964

    
2965
    if self.op.reserved_lvs is not None:
2966
      self.cluster.reserved_lvs = self.op.reserved_lvs
2967

    
2968
    def helper_os(aname, mods, desc):
2969
      desc += " OS list"
2970
      lst = getattr(self.cluster, aname)
2971
      for key, val in mods:
2972
        if key == constants.DDM_ADD:
2973
          if val in lst:
2974
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2975
          else:
2976
            lst.append(val)
2977
        elif key == constants.DDM_REMOVE:
2978
          if val in lst:
2979
            lst.remove(val)
2980
          else:
2981
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2982
        else:
2983
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
2984

    
2985
    if self.op.hidden_os:
2986
      helper_os("hidden_os", self.op.hidden_os, "hidden")
2987

    
2988
    if self.op.blacklisted_os:
2989
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2990

    
2991
    self.cfg.Update(self.cluster, feedback_fn)
2992

    
2993

    
2994
def _UploadHelper(lu, nodes, fname):
2995
  """Helper for uploading a file and showing warnings.
2996

2997
  """
2998
  if os.path.exists(fname):
2999
    result = lu.rpc.call_upload_file(nodes, fname)
3000
    for to_node, to_result in result.items():
3001
      msg = to_result.fail_msg
3002
      if msg:
3003
        msg = ("Copy of file %s to node %s failed: %s" %
3004
               (fname, to_node, msg))
3005
        lu.proc.LogWarning(msg)
3006

    
3007

    
3008
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3009
  """Distribute additional files which are part of the cluster configuration.
3010

3011
  ConfigWriter takes care of distributing the config and ssconf files, but
3012
  there are more files which should be distributed to all nodes. This function
3013
  makes sure those are copied.
3014

3015
  @param lu: calling logical unit
3016
  @param additional_nodes: list of nodes not in the config to distribute to
3017
  @type additional_vm: boolean
3018
  @param additional_vm: whether the additional nodes are vm-capable or not
3019

3020
  """
3021
  # 1. Gather target nodes
3022
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3023
  dist_nodes = lu.cfg.GetOnlineNodeList()
3024
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3025
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3026
  if additional_nodes is not None:
3027
    dist_nodes.extend(additional_nodes)
3028
    if additional_vm:
3029
      vm_nodes.extend(additional_nodes)
3030
  if myself.name in dist_nodes:
3031
    dist_nodes.remove(myself.name)
3032
  if myself.name in vm_nodes:
3033
    vm_nodes.remove(myself.name)
3034

    
3035
  # 2. Gather files to distribute
3036
  dist_files = set([constants.ETC_HOSTS,
3037
                    constants.SSH_KNOWN_HOSTS_FILE,
3038
                    constants.RAPI_CERT_FILE,
3039
                    constants.RAPI_USERS_FILE,
3040
                    constants.CONFD_HMAC_KEY,
3041
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3042
                   ])
3043

    
3044
  vm_files = set()
3045
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3046
  for hv_name in enabled_hypervisors:
3047
    hv_class = hypervisor.GetHypervisor(hv_name)
3048
    vm_files.update(hv_class.GetAncillaryFiles())
3049

    
3050
  # 3. Perform the files upload
3051
  for fname in dist_files:
3052
    _UploadHelper(lu, dist_nodes, fname)
3053
  for fname in vm_files:
3054
    _UploadHelper(lu, vm_nodes, fname)
3055

    
3056

    
3057
class LURedistributeConfig(NoHooksLU):
3058
  """Force the redistribution of cluster configuration.
3059

3060
  This is a very simple LU.
3061

3062
  """
3063
  REQ_BGL = False
3064

    
3065
  def ExpandNames(self):
3066
    self.needed_locks = {
3067
      locking.LEVEL_NODE: locking.ALL_SET,
3068
    }
3069
    self.share_locks[locking.LEVEL_NODE] = 1
3070

    
3071
  def Exec(self, feedback_fn):
3072
    """Redistribute the configuration.
3073

3074
    """
3075
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3076
    _RedistributeAncillaryFiles(self)
3077

    
3078

    
3079
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3080
  """Sleep and poll for an instance's disk to sync.
3081

3082
  """
3083
  if not instance.disks or disks is not None and not disks:
3084
    return True
3085

    
3086
  disks = _ExpandCheckDisks(instance, disks)
3087

    
3088
  if not oneshot:
3089
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3090

    
3091
  node = instance.primary_node
3092

    
3093
  for dev in disks:
3094
    lu.cfg.SetDiskID(dev, node)
3095

    
3096
  # TODO: Convert to utils.Retry
3097

    
3098
  retries = 0
3099
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3100
  while True:
3101
    max_time = 0
3102
    done = True
3103
    cumul_degraded = False
3104
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3105
    msg = rstats.fail_msg
3106
    if msg:
3107
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3108
      retries += 1
3109
      if retries >= 10:
3110
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3111
                                 " aborting." % node)
3112
      time.sleep(6)
3113
      continue
3114
    rstats = rstats.payload
3115
    retries = 0
3116
    for i, mstat in enumerate(rstats):
3117
      if mstat is None:
3118
        lu.LogWarning("Can't compute data for node %s/%s",
3119
                           node, disks[i].iv_name)
3120
        continue
3121

    
3122
      cumul_degraded = (cumul_degraded or
3123
                        (mstat.is_degraded and mstat.sync_percent is None))
3124
      if mstat.sync_percent is not None:
3125
        done = False
3126
        if mstat.estimated_time is not None:
3127
          rem_time = ("%s remaining (estimated)" %
3128
                      utils.FormatSeconds(mstat.estimated_time))
3129
          max_time = mstat.estimated_time
3130
        else:
3131
          rem_time = "no time estimate"
3132
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3133
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3134

    
3135
    # if we're done but degraded, let's do a few small retries, to
3136
    # make sure we see a stable and not transient situation; therefore
3137
    # we force restart of the loop
3138
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3139
      logging.info("Degraded disks found, %d retries left", degr_retries)
3140
      degr_retries -= 1
3141
      time.sleep(1)
3142
      continue
3143

    
3144
    if done or oneshot:
3145
      break
3146

    
3147
    time.sleep(min(60, max_time))
3148

    
3149
  if done:
3150
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3151
  return not cumul_degraded
3152

    
3153

    
3154
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3155
  """Check that mirrors are not degraded.
3156

3157
  The ldisk parameter, if True, will change the test from the
3158
  is_degraded attribute (which represents overall non-ok status for
3159
  the device(s)) to the ldisk (representing the local storage status).
3160

3161
  """
3162
  lu.cfg.SetDiskID(dev, node)
3163

    
3164
  result = True
3165

    
3166
  if on_primary or dev.AssembleOnSecondary():
3167
    rstats = lu.rpc.call_blockdev_find(node, dev)
3168
    msg = rstats.fail_msg
3169
    if msg:
3170
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3171
      result = False
3172
    elif not rstats.payload:
3173
      lu.LogWarning("Can't find disk on node %s", node)
3174
      result = False
3175
    else:
3176
      if ldisk:
3177
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3178
      else:
3179
        result = result and not rstats.payload.is_degraded
3180

    
3181
  if dev.children:
3182
    for child in dev.children:
3183
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3184

    
3185
  return result
3186

    
3187

    
3188
class LUDiagnoseOS(NoHooksLU):
3189
  """Logical unit for OS diagnose/query.
3190

3191
  """
3192
  _OP_PARAMS = [
3193
    _POutputFields,
3194
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3195
    ]
3196
  REQ_BGL = False
3197
  _HID = "hidden"
3198
  _BLK = "blacklisted"
3199
  _VLD = "valid"
3200
  _FIELDS_STATIC = utils.FieldSet()
3201
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3202
                                   "parameters", "api_versions", _HID, _BLK)
3203

    
3204
  def CheckArguments(self):
3205
    if self.op.names:
3206
      raise errors.OpPrereqError("Selective OS query not supported",
3207
                                 errors.ECODE_INVAL)
3208

    
3209
    _CheckOutputFields(static=self._FIELDS_STATIC,
3210
                       dynamic=self._FIELDS_DYNAMIC,
3211
                       selected=self.op.output_fields)
3212

    
3213
  def ExpandNames(self):
3214
    # Lock all nodes, in shared mode
3215
    # Temporary removal of locks, should be reverted later
3216
    # TODO: reintroduce locks when they are lighter-weight
3217
    self.needed_locks = {}
3218
    #self.share_locks[locking.LEVEL_NODE] = 1
3219
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3220

    
3221
  @staticmethod
3222
  def _DiagnoseByOS(rlist):
3223
    """Remaps a per-node return list into an a per-os per-node dictionary
3224

3225
    @param rlist: a map with node names as keys and OS objects as values
3226

3227
    @rtype: dict
3228
    @return: a dictionary with osnames as keys and as value another
3229
        map, with nodes as keys and tuples of (path, status, diagnose,
3230
        variants, parameters, api_versions) as values, eg::
3231

3232
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3233
                                     (/srv/..., False, "invalid api")],
3234
                           "node2": [(/srv/..., True, "", [], [])]}
3235
          }
3236

3237
    """
3238
    all_os = {}
3239
    # we build here the list of nodes that didn't fail the RPC (at RPC
3240
    # level), so that nodes with a non-responding node daemon don't
3241
    # make all OSes invalid
3242
    good_nodes = [node_name for node_name in rlist
3243
                  if not rlist[node_name].fail_msg]
3244
    for node_name, nr in rlist.items():
3245
      if nr.fail_msg or not nr.payload:
3246
        continue
3247
      for (name, path, status, diagnose, variants,
3248
           params, api_versions) in nr.payload:
3249
        if name not in all_os:
3250
          # build a list of nodes for this os containing empty lists
3251
          # for each node in node_list
3252
          all_os[name] = {}
3253
          for nname in good_nodes:
3254
            all_os[name][nname] = []
3255
        # convert params from [name, help] to (name, help)
3256
        params = [tuple(v) for v in params]
3257
        all_os[name][node_name].append((path, status, diagnose,
3258
                                        variants, params, api_versions))
3259
    return all_os
3260

    
3261
  def Exec(self, feedback_fn):
3262
    """Compute the list of OSes.
3263

3264
    """
3265
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3266
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3267
    pol = self._DiagnoseByOS(node_data)
3268
    output = []
3269
    cluster = self.cfg.GetClusterInfo()
3270

    
3271
    for os_name in utils.NiceSort(pol.keys()):
3272
      os_data = pol[os_name]
3273
      row = []
3274
      valid = True
3275
      (variants, params, api_versions) = null_state = (set(), set(), set())
3276
      for idx, osl in enumerate(os_data.values()):
3277
        valid = bool(valid and osl and osl[0][1])
3278
        if not valid:
3279
          (variants, params, api_versions) = null_state
3280
          break
3281
        node_variants, node_params, node_api = osl[0][3:6]
3282
        if idx == 0: # first entry
3283
          variants = set(node_variants)
3284
          params = set(node_params)
3285
          api_versions = set(node_api)
3286
        else: # keep consistency
3287
          variants.intersection_update(node_variants)
3288
          params.intersection_update(node_params)
3289
          api_versions.intersection_update(node_api)
3290

    
3291
      is_hid = os_name in cluster.hidden_os
3292
      is_blk = os_name in cluster.blacklisted_os
3293
      if ((self._HID not in self.op.output_fields and is_hid) or
3294
          (self._BLK not in self.op.output_fields and is_blk) or
3295
          (self._VLD not in self.op.output_fields and not valid)):
3296
        continue
3297

    
3298
      for field in self.op.output_fields:
3299
        if field == "name":
3300
          val = os_name
3301
        elif field == self._VLD:
3302
          val = valid
3303
        elif field == "node_status":
3304
          # this is just a copy of the dict
3305
          val = {}
3306
          for node_name, nos_list in os_data.items():
3307
            val[node_name] = nos_list
3308
        elif field == "variants":
3309
          val = utils.NiceSort(list(variants))
3310
        elif field == "parameters":
3311
          val = list(params)
3312
        elif field == "api_versions":
3313
          val = list(api_versions)
3314
        elif field == self._HID:
3315
          val = is_hid
3316
        elif field == self._BLK:
3317
          val = is_blk
3318
        else:
3319
          raise errors.ParameterError(field)
3320
        row.append(val)
3321
      output.append(row)
3322

    
3323
    return output
3324

    
3325

    
3326
class LURemoveNode(LogicalUnit):
3327
  """Logical unit for removing a node.
3328

3329
  """
3330
  HPATH = "node-remove"
3331
  HTYPE = constants.HTYPE_NODE
3332
  _OP_PARAMS = [
3333
    _PNodeName,
3334
    ]
3335

    
3336
  def BuildHooksEnv(self):
3337
    """Build hooks env.
3338

3339
    This doesn't run on the target node in the pre phase as a failed
3340
    node would then be impossible to remove.
3341

3342
    """
3343
    env = {
3344
      "OP_TARGET": self.op.node_name,
3345
      "NODE_NAME": self.op.node_name,
3346
      }
3347
    all_nodes = self.cfg.GetNodeList()
3348
    try:
3349
      all_nodes.remove(self.op.node_name)
3350
    except ValueError:
3351
      logging.warning("Node %s which is about to be removed not found"
3352
                      " in the all nodes list", self.op.node_name)
3353
    return env, all_nodes, all_nodes
3354

    
3355
  def CheckPrereq(self):
3356
    """Check prerequisites.
3357

3358
    This checks:
3359
     - the node exists in the configuration
3360
     - it does not have primary or secondary instances
3361
     - it's not the master
3362

3363
    Any errors are signaled by raising errors.OpPrereqError.
3364

3365
    """
3366
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3367
    node = self.cfg.GetNodeInfo(self.op.node_name)
3368
    assert node is not None
3369

    
3370
    instance_list = self.cfg.GetInstanceList()
3371

    
3372
    masternode = self.cfg.GetMasterNode()
3373
    if node.name == masternode:
3374
      raise errors.OpPrereqError("Node is the master node,"
3375
                                 " you need to failover first.",
3376
                                 errors.ECODE_INVAL)
3377

    
3378
    for instance_name in instance_list:
3379
      instance = self.cfg.GetInstanceInfo(instance_name)
3380
      if node.name in instance.all_nodes:
3381
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3382
                                   " please remove first." % instance_name,
3383
                                   errors.ECODE_INVAL)
3384
    self.op.node_name = node.name
3385
    self.node = node
3386

    
3387
  def Exec(self, feedback_fn):
3388
    """Removes the node from the cluster.
3389

3390
    """
3391
    node = self.node
3392
    logging.info("Stopping the node daemon and removing configs from node %s",
3393
                 node.name)
3394

    
3395
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3396

    
3397
    # Promote nodes to master candidate as needed
3398
    _AdjustCandidatePool(self, exceptions=[node.name])
3399
    self.context.RemoveNode(node.name)
3400

    
3401
    # Run post hooks on the node before it's removed
3402
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3403
    try:
3404
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3405
    except:
3406
      # pylint: disable-msg=W0702
3407
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3408

    
3409
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3410
    msg = result.fail_msg
3411
    if msg:
3412
      self.LogWarning("Errors encountered on the remote node while leaving"
3413
                      " the cluster: %s", msg)
3414

    
3415
    # Remove node from our /etc/hosts
3416
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3417
      master_node = self.cfg.GetMasterNode()
3418
      result = self.rpc.call_etc_hosts_modify(master_node,
3419
                                              constants.ETC_HOSTS_REMOVE,
3420
                                              node.name, None)
3421
      result.Raise("Can't update hosts file with new host data")
3422
      _RedistributeAncillaryFiles(self)
3423

    
3424

    
3425
class _NodeQuery(_QueryBase):
3426
  FIELDS = query.NODE_FIELDS
3427

    
3428
  def ExpandNames(self, lu):
3429
    lu.needed_locks = {}
3430
    lu.share_locks[locking.LEVEL_NODE] = 1
3431

    
3432
    if self.names:
3433
      self.wanted = _GetWantedNodes(lu, self.names)
3434
    else:
3435
      self.wanted = locking.ALL_SET
3436

    
3437
    self.do_locking = (self.use_locking and
3438
                       query.NQ_LIVE in self.requested_data)
3439

    
3440
    if self.do_locking:
3441
      # if we don't request only static fields, we need to lock the nodes
3442
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3443

    
3444
  def DeclareLocks(self, _):
3445
    pass
3446

    
3447
  def _GetQueryData(self, lu):
3448
    """Computes the list of nodes and their attributes.
3449

3450
    """
3451
    all_info = lu.cfg.GetAllNodesInfo()
3452

    
3453
    if self.do_locking:
3454
      nodenames = lu.acquired_locks[locking.LEVEL_NODE]
3455
    elif self.wanted != locking.ALL_SET:
3456
      nodenames = self.wanted
3457
      missing = set(nodenames).difference(all_info.keys())
3458
      if missing:
3459
        raise errors.OpExecError("Some nodes were removed before retrieving"
3460
                                 " their data: %s" % missing)
3461
    else:
3462
      nodenames = all_info.keys()
3463

    
3464
    nodenames = utils.NiceSort(nodenames)
3465

    
3466
    # Gather data as requested
3467
    if query.NQ_LIVE in self.requested_data:
3468
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3469
                                        lu.cfg.GetHypervisorType())
3470
      live_data = dict((name, nresult.payload)
3471
                       for (name, nresult) in node_data.items()
3472
                       if not nresult.fail_msg and nresult.payload)
3473
    else:
3474
      live_data = None
3475

    
3476
    if query.NQ_INST in self.requested_data:
3477
      node_to_primary = dict([(name, set()) for name in nodenames])
3478
      node_to_secondary = dict([(name, set()) for name in nodenames])
3479

    
3480
      inst_data = lu.cfg.GetAllInstancesInfo()
3481

    
3482
      for inst in inst_data.values():
3483
        if inst.primary_node in node_to_primary:
3484
          node_to_primary[inst.primary_node].add(inst.name)
3485
        for secnode in inst.secondary_nodes:
3486
          if secnode in node_to_secondary:
3487
            node_to_secondary[secnode].add(inst.name)
3488
    else:
3489
      node_to_primary = None
3490
      node_to_secondary = None
3491

    
3492
    if query.NQ_GROUP in self.requested_data:
3493
      groups = lu.cfg.GetAllNodeGroupsInfo()
3494
    else:
3495
      groups = {}
3496

    
3497
    return query.NodeQueryData([all_info[name] for name in nodenames],
3498
                               live_data, lu.cfg.GetMasterNode(),
3499
                               node_to_primary, node_to_secondary, groups)
3500

    
3501

    
3502
class LUQueryNodes(NoHooksLU):
3503
  """Logical unit for querying nodes.
3504

3505
  """
3506
  # pylint: disable-msg=W0142
3507
  _OP_PARAMS = [
3508
    _POutputFields,
3509
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3510
    ("use_locking", False, ht.TBool),
3511
    ]
3512
  REQ_BGL = False
3513

    
3514
  def CheckArguments(self):
3515
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3516
                         self.op.use_locking)
3517

    
3518
  def ExpandNames(self):
3519
    self.nq.ExpandNames(self)
3520

    
3521
  def Exec(self, feedback_fn):
3522
    return self.nq.OldStyleQuery(self)
3523

    
3524

    
3525
class LUQueryNodeVolumes(NoHooksLU):
3526
  """Logical unit for getting volumes on node(s).
3527

3528
  """
3529
  _OP_PARAMS = [
3530
    _POutputFields,
3531
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3532
    ]
3533
  REQ_BGL = False
3534
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3535
  _FIELDS_STATIC = utils.FieldSet("node")
3536

    
3537
  def CheckArguments(self):
3538
    _CheckOutputFields(static=self._FIELDS_STATIC,
3539
                       dynamic=self._FIELDS_DYNAMIC,
3540
                       selected=self.op.output_fields)
3541

    
3542
  def ExpandNames(self):
3543
    self.needed_locks = {}
3544
    self.share_locks[locking.LEVEL_NODE] = 1
3545
    if not self.op.nodes:
3546
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3547
    else:
3548
      self.needed_locks[locking.LEVEL_NODE] = \
3549
        _GetWantedNodes(self, self.op.nodes)
3550

    
3551
  def Exec(self, feedback_fn):
3552
    """Computes the list of nodes and their attributes.
3553

3554
    """
3555
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3556
    volumes = self.rpc.call_node_volumes(nodenames)
3557

    
3558
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3559
             in self.cfg.GetInstanceList()]
3560

    
3561
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3562

    
3563
    output = []
3564
    for node in nodenames:
3565
      nresult = volumes[node]
3566
      if nresult.offline:
3567
        continue
3568
      msg = nresult.fail_msg
3569
      if msg:
3570
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3571
        continue
3572

    
3573
      node_vols = nresult.payload[:]
3574
      node_vols.sort(key=lambda vol: vol['dev'])
3575

    
3576
      for vol in node_vols:
3577
        node_output = []
3578
        for field in self.op.output_fields:
3579
          if field == "node":
3580
            val = node
3581
          elif field == "phys":
3582
            val = vol['dev']
3583
          elif field == "vg":
3584
            val = vol['vg']
3585
          elif field == "name":
3586
            val = vol['name']
3587
          elif field == "size":
3588
            val = int(float(vol['size']))
3589
          elif field == "instance":
3590
            for inst in ilist:
3591
              if node not in lv_by_node[inst]:
3592
                continue
3593
              if vol['name'] in lv_by_node[inst][node]:
3594
                val = inst.name
3595
                break
3596
            else:
3597
              val = '-'
3598
          else:
3599
            raise errors.ParameterError(field)
3600
          node_output.append(str(val))
3601

    
3602
        output.append(node_output)
3603

    
3604
    return output
3605

    
3606

    
3607
class LUQueryNodeStorage(NoHooksLU):
3608
  """Logical unit for getting information on storage units on node(s).
3609

3610
  """
3611
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3612
  _OP_PARAMS = [
3613
    _POutputFields,
3614
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3615
    ("storage_type", ht.NoDefault, _CheckStorageType),
3616
    ("name", None, ht.TMaybeString),
3617
    ]
3618
  REQ_BGL = False
3619

    
3620
  def CheckArguments(self):
3621
    _CheckOutputFields(static=self._FIELDS_STATIC,
3622
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3623
                       selected=self.op.output_fields)
3624

    
3625
  def ExpandNames(self):
3626
    self.needed_locks = {}
3627
    self.share_locks[locking.LEVEL_NODE] = 1
3628

    
3629
    if self.op.nodes:
3630
      self.needed_locks[locking.LEVEL_NODE] = \
3631
        _GetWantedNodes(self, self.op.nodes)
3632
    else:
3633
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3634

    
3635
  def Exec(self, feedback_fn):
3636
    """Computes the list of nodes and their attributes.
3637

3638
    """
3639
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3640

    
3641
    # Always get name to sort by
3642
    if constants.SF_NAME in self.op.output_fields:
3643
      fields = self.op.output_fields[:]
3644
    else:
3645
      fields = [constants.SF_NAME] + self.op.output_fields
3646

    
3647
    # Never ask for node or type as it's only known to the LU
3648
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3649
      while extra in fields:
3650
        fields.remove(extra)
3651

    
3652
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3653
    name_idx = field_idx[constants.SF_NAME]
3654

    
3655
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3656
    data = self.rpc.call_storage_list(self.nodes,
3657
                                      self.op.storage_type, st_args,
3658
                                      self.op.name, fields)
3659

    
3660
    result = []
3661

    
3662
    for node in utils.NiceSort(self.nodes):
3663
      nresult = data[node]
3664
      if nresult.offline:
3665
        continue
3666

    
3667
      msg = nresult.fail_msg
3668
      if msg:
3669
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3670
        continue
3671

    
3672
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3673

    
3674
      for name in utils.NiceSort(rows.keys()):
3675
        row = rows[name]
3676

    
3677
        out = []
3678

    
3679
        for field in self.op.output_fields:
3680
          if field == constants.SF_NODE:
3681
            val = node
3682
          elif field == constants.SF_TYPE:
3683
            val = self.op.storage_type
3684
          elif field in field_idx:
3685
            val = row[field_idx[field]]
3686
          else:
3687
            raise errors.ParameterError(field)
3688

    
3689
          out.append(val)
3690

    
3691
        result.append(out)
3692

    
3693
    return result
3694

    
3695

    
3696
def _InstanceQuery(*args): # pylint: disable-msg=W0613
3697
  """Dummy until instance queries have been converted to query2.
3698

3699
  """
3700
  raise NotImplementedError
3701

    
3702

    
3703
#: Query type implementations
3704
_QUERY_IMPL = {
3705
  constants.QR_INSTANCE: _InstanceQuery,
3706
  constants.QR_NODE: _NodeQuery,
3707
  }
3708

    
3709

    
3710
def _GetQueryImplementation(name):
3711
  """Returns the implemtnation for a query type.
3712

3713
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3714

3715
  """
3716
  try:
3717
    return _QUERY_IMPL[name]
3718
  except KeyError:
3719
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3720
                               errors.ECODE_INVAL)
3721

    
3722

    
3723
class LUQuery(NoHooksLU):
3724
  """Query for resources/items of a certain kind.
3725

3726
  """
3727
  # pylint: disable-msg=W0142
3728
  _OP_PARAMS = [
3729
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3730
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3731
    ("filter", None, ht.TOr(ht.TNone,
3732
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3733
    ]
3734
  REQ_BGL = False
3735

    
3736
  def CheckArguments(self):
3737
    qcls = _GetQueryImplementation(self.op.what)
3738
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3739

    
3740
    self.impl = qcls(names, self.op.fields, False)
3741

    
3742
  def ExpandNames(self):
3743
    self.impl.ExpandNames(self)
3744

    
3745
  def DeclareLocks(self, level):
3746
    self.impl.DeclareLocks(self, level)
3747

    
3748
  def Exec(self, feedback_fn):
3749
    return self.impl.NewStyleQuery(self)
3750

    
3751

    
3752
class LUQueryFields(NoHooksLU):
3753
  """Query for resources/items of a certain kind.
3754

3755
  """
3756
  # pylint: disable-msg=W0142
3757
  _OP_PARAMS = [
3758
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3759
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3760
    ]
3761
  REQ_BGL = False
3762

    
3763
  def CheckArguments(self):
3764
    self.qcls = _GetQueryImplementation(self.op.what)
3765

    
3766
  def ExpandNames(self):
3767
    self.needed_locks = {}
3768

    
3769
  def Exec(self, feedback_fn):
3770
    return self.qcls.FieldsQuery(self.op.fields)
3771

    
3772

    
3773
class LUModifyNodeStorage(NoHooksLU):
3774
  """Logical unit for modifying a storage volume on a node.
3775

3776
  """
3777
  _OP_PARAMS = [
3778
    _PNodeName,
3779
    ("storage_type", ht.NoDefault, _CheckStorageType),
3780
    ("name", ht.NoDefault, ht.TNonEmptyString),
3781
    ("changes", ht.NoDefault, ht.TDict),
3782
    ]
3783
  REQ_BGL = False
3784

    
3785
  def CheckArguments(self):
3786
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3787

    
3788
    storage_type = self.op.storage_type
3789

    
3790
    try:
3791
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3792
    except KeyError:
3793
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3794
                                 " modified" % storage_type,
3795
                                 errors.ECODE_INVAL)
3796

    
3797
    diff = set(self.op.changes.keys()) - modifiable
3798
    if diff:
3799
      raise errors.OpPrereqError("The following fields can not be modified for"
3800
                                 " storage units of type '%s': %r" %
3801
                                 (storage_type, list(diff)),
3802
                                 errors.ECODE_INVAL)
3803

    
3804
  def ExpandNames(self):
3805
    self.needed_locks = {
3806
      locking.LEVEL_NODE: self.op.node_name,
3807
      }
3808

    
3809
  def Exec(self, feedback_fn):
3810
    """Computes the list of nodes and their attributes.
3811

3812
    """
3813
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3814
    result = self.rpc.call_storage_modify(self.op.node_name,
3815
                                          self.op.storage_type, st_args,
3816
                                          self.op.name, self.op.changes)
3817
    result.Raise("Failed to modify storage unit '%s' on %s" %
3818
                 (self.op.name, self.op.node_name))
3819

    
3820

    
3821
class LUAddNode(LogicalUnit):
3822
  """Logical unit for adding node to the cluster.
3823

3824
  """
3825
  HPATH = "node-add"
3826
  HTYPE = constants.HTYPE_NODE
3827
  _OP_PARAMS = [
3828
    _PNodeName,
3829
    ("primary_ip", None, ht.NoType),
3830
    ("secondary_ip", None, ht.TMaybeString),
3831
    ("readd", False, ht.TBool),
3832
    ("group", None, ht.TMaybeString),
3833
    ("master_capable", None, ht.TMaybeBool),
3834
    ("vm_capable", None, ht.TMaybeBool),
3835
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
3836
    ]
3837
  _NFLAGS = ["master_capable", "vm_capable"]
3838

    
3839
  def CheckArguments(self):
3840
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3841
    # validate/normalize the node name
3842
    self.hostname = netutils.GetHostname(name=self.op.node_name,
3843
                                         family=self.primary_ip_family)
3844
    self.op.node_name = self.hostname.name
3845
    if self.op.readd and self.op.group:
3846
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
3847
                                 " being readded", errors.ECODE_INVAL)
3848

    
3849
  def BuildHooksEnv(self):
3850
    """Build hooks env.
3851

3852
    This will run on all nodes before, and on all nodes + the new node after.
3853

3854
    """
3855
    env = {
3856
      "OP_TARGET": self.op.node_name,
3857
      "NODE_NAME": self.op.node_name,
3858
      "NODE_PIP": self.op.primary_ip,
3859
      "NODE_SIP": self.op.secondary_ip,
3860
      "MASTER_CAPABLE": str(self.op.master_capable),
3861
      "VM_CAPABLE": str(self.op.vm_capable),
3862
      }
3863
    nodes_0 = self.cfg.GetNodeList()
3864
    nodes_1 = nodes_0 + [self.op.node_name, ]
3865
    return env, nodes_0, nodes_1
3866

    
3867
  def CheckPrereq(self):
3868
    """Check prerequisites.
3869

3870
    This checks:
3871
     - the new node is not already in the config
3872
     - it is resolvable
3873
     - its parameters (single/dual homed) matches the cluster
3874

3875
    Any errors are signaled by raising errors.OpPrereqError.
3876

3877
    """
3878
    cfg = self.cfg
3879
    hostname = self.hostname
3880
    node = hostname.name
3881
    primary_ip = self.op.primary_ip = hostname.ip
3882
    if self.op.secondary_ip is None:
3883
      if self.primary_ip_family == netutils.IP6Address.family:
3884
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3885
                                   " IPv4 address must be given as secondary",
3886
                                   errors.ECODE_INVAL)
3887
      self.op.secondary_ip = primary_ip
3888

    
3889
    secondary_ip = self.op.secondary_ip
3890
    if not netutils.IP4Address.IsValid(secondary_ip):
3891
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3892
                                 " address" % secondary_ip, errors.ECODE_INVAL)
3893

    
3894
    node_list = cfg.GetNodeList()
3895
    if not self.op.readd and node in node_list:
3896
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3897
                                 node, errors.ECODE_EXISTS)
3898
    elif self.op.readd and node not in node_list:
3899
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3900
                                 errors.ECODE_NOENT)
3901

    
3902
    self.changed_primary_ip = False
3903

    
3904
    for existing_node_name in node_list:
3905
      existing_node = cfg.GetNodeInfo(existing_node_name)
3906

    
3907
      if self.op.readd and node == existing_node_name:
3908
        if existing_node.secondary_ip != secondary_ip:
3909
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3910
                                     " address configuration as before",
3911
                                     errors.ECODE_INVAL)
3912
        if existing_node.primary_ip != primary_ip:
3913
          self.changed_primary_ip = True
3914

    
3915
        continue
3916

    
3917
      if (existing_node.primary_ip == primary_ip or
3918
          existing_node.secondary_ip == primary_ip or
3919
          existing_node.primary_ip == secondary_ip or
3920
          existing_node.secondary_ip == secondary_ip):
3921
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3922
                                   " existing node %s" % existing_node.name,
3923
                                   errors.ECODE_NOTUNIQUE)
3924

    
3925
    # After this 'if' block, None is no longer a valid value for the
3926
    # _capable op attributes
3927
    if self.op.readd:
3928
      old_node = self.cfg.GetNodeInfo(node)
3929
      assert old_node is not None, "Can't retrieve locked node %s" % node
3930
      for attr in self._NFLAGS:
3931
        if getattr(self.op, attr) is None:
3932
          setattr(self.op, attr, getattr(old_node, attr))
3933
    else:
3934
      for attr in self._NFLAGS:
3935
        if getattr(self.op, attr) is None:
3936
          setattr(self.op, attr, True)
3937

    
3938
    if self.op.readd and not self.op.vm_capable:
3939
      pri, sec = cfg.GetNodeInstances(node)
3940
      if pri or sec:
3941
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3942
                                   " flag set to false, but it already holds"
3943
                                   " instances" % node,
3944
                                   errors.ECODE_STATE)
3945

    
3946
    # check that the type of the node (single versus dual homed) is the
3947
    # same as for the master
3948
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3949
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3950
    newbie_singlehomed = secondary_ip == primary_ip
3951
    if master_singlehomed != newbie_singlehomed:
3952
      if master_singlehomed:
3953
        raise errors.OpPrereqError("The master has no secondary ip but the"
3954
                                   " new node has one",
3955
                                   errors.ECODE_INVAL)
3956
      else:
3957
        raise errors.OpPrereqError("The master has a secondary ip but the"
3958
                                   " new node doesn't have one",
3959
                                   errors.ECODE_INVAL)
3960

    
3961
    # checks reachability
3962
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3963
      raise errors.OpPrereqError("Node not reachable by ping",
3964
                                 errors.ECODE_ENVIRON)
3965

    
3966
    if not newbie_singlehomed:
3967
      # check reachability from my secondary ip to newbie's secondary ip
3968
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3969
                           source=myself.secondary_ip):
3970
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3971
                                   " based ping to node daemon port",
3972
                                   errors.ECODE_ENVIRON)
3973

    
3974
    if self.op.readd:
3975
      exceptions = [node]
3976
    else:
3977
      exceptions = []
3978

    
3979
    if self.op.master_capable:
3980
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3981
    else:
3982
      self.master_candidate = False
3983

    
3984
    if self.op.readd:
3985
      self.new_node = old_node
3986
    else:
3987
      node_group = cfg.LookupNodeGroup(self.op.group)
3988
      self.new_node = objects.Node(name=node,
3989
                                   primary_ip=primary_ip,
3990
                                   secondary_ip=secondary_ip,
3991
                                   master_candidate=self.master_candidate,
3992
                                   offline=False, drained=False,
3993
                                   group=node_group)
3994

    
3995
    if self.op.ndparams:
3996
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3997

    
3998
  def Exec(self, feedback_fn):
3999
    """Adds the new node to the cluster.
4000

4001
    """
4002
    new_node = self.new_node
4003
    node = new_node.name
4004

    
4005
    # for re-adds, reset the offline/drained/master-candidate flags;
4006
    # we need to reset here, otherwise offline would prevent RPC calls
4007
    # later in the procedure; this also means that if the re-add
4008
    # fails, we are left with a non-offlined, broken node
4009
    if self.op.readd:
4010
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4011
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4012
      # if we demote the node, we do cleanup later in the procedure
4013
      new_node.master_candidate = self.master_candidate
4014
      if self.changed_primary_ip:
4015
        new_node.primary_ip = self.op.primary_ip
4016

    
4017
    # copy the master/vm_capable flags
4018
    for attr in self._NFLAGS:
4019
      setattr(new_node, attr, getattr(self.op, attr))
4020

    
4021
    # notify the user about any possible mc promotion
4022
    if new_node.master_candidate:
4023
      self.LogInfo("Node will be a master candidate")
4024

    
4025
    if self.op.ndparams:
4026
      new_node.ndparams = self.op.ndparams
4027

    
4028
    # check connectivity
4029
    result = self.rpc.call_version([node])[node]
4030
    result.Raise("Can't get version information from node %s" % node)
4031
    if constants.PROTOCOL_VERSION == result.payload:
4032
      logging.info("Communication to node %s fine, sw version %s match",
4033
                   node, result.payload)
4034
    else:
4035
      raise errors.OpExecError("Version mismatch master version %s,"
4036
                               " node version %s" %
4037
                               (constants.PROTOCOL_VERSION, result.payload))
4038

    
4039
    # Add node to our /etc/hosts, and add key to known_hosts
4040
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4041
      master_node = self.cfg.GetMasterNode()
4042
      result = self.rpc.call_etc_hosts_modify(master_node,
4043
                                              constants.ETC_HOSTS_ADD,
4044
                                              self.hostname.name,
4045
                                              self.hostname.ip)
4046
      result.Raise("Can't update hosts file with new host data")
4047

    
4048
    if new_node.secondary_ip != new_node.primary_ip:
4049
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4050
                               False)
4051

    
4052
    node_verify_list = [self.cfg.GetMasterNode()]
4053
    node_verify_param = {
4054
      constants.NV_NODELIST: [node],
4055
      # TODO: do a node-net-test as well?
4056
    }
4057

    
4058
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4059
                                       self.cfg.GetClusterName())
4060
    for verifier in node_verify_list:
4061
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4062
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4063
      if nl_payload:
4064
        for failed in nl_payload:
4065
          feedback_fn("ssh/hostname verification failed"
4066
                      " (checking from %s): %s" %
4067
                      (verifier, nl_payload[failed]))
4068
        raise errors.OpExecError("ssh/hostname verification failed.")
4069

    
4070
    if self.op.readd:
4071
      _RedistributeAncillaryFiles(self)
4072
      self.context.ReaddNode(new_node)
4073
      # make sure we redistribute the config
4074
      self.cfg.Update(new_node, feedback_fn)
4075
      # and make sure the new node will not have old files around
4076
      if not new_node.master_candidate:
4077
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4078
        msg = result.fail_msg
4079
        if msg:
4080
          self.LogWarning("Node failed to demote itself from master"
4081
                          " candidate status: %s" % msg)
4082
    else:
4083
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4084
                                  additional_vm=self.op.vm_capable)
4085
      self.context.AddNode(new_node, self.proc.GetECId())
4086

    
4087

    
4088
class LUSetNodeParams(LogicalUnit):
4089
  """Modifies the parameters of a node.
4090

4091
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4092
      to the node role (as _ROLE_*)
4093
  @cvar _R2F: a dictionary from node role to tuples of flags
4094
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4095

4096
  """
4097
  HPATH = "node-modify"
4098
  HTYPE = constants.HTYPE_NODE
4099
  _OP_PARAMS = [
4100
    _PNodeName,
4101
    ("master_candidate", None, ht.TMaybeBool),
4102
    ("offline", None, ht.TMaybeBool),
4103
    ("drained", None, ht.TMaybeBool),
4104
    ("auto_promote", False, ht.TBool),
4105
    ("master_capable", None, ht.TMaybeBool),
4106
    ("vm_capable", None, ht.TMaybeBool),
4107
    ("secondary_ip", None, ht.TMaybeString),
4108
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4109
    _PForce,
4110
    ]
4111
  REQ_BGL = False
4112
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4113
  _F2R = {
4114
    (True, False, False): _ROLE_CANDIDATE,
4115
    (False, True, False): _ROLE_DRAINED,
4116
    (False, False, True): _ROLE_OFFLINE,
4117
    (False, False, False): _ROLE_REGULAR,
4118
    }
4119
  _R2F = dict((v, k) for k, v in _F2R.items())
4120
  _FLAGS = ["master_candidate", "drained", "offline"]
4121

    
4122
  def CheckArguments(self):
4123
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4124
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4125
                self.op.master_capable, self.op.vm_capable,
4126
                self.op.secondary_ip, self.op.ndparams]
4127
    if all_mods.count(None) == len(all_mods):
4128
      raise errors.OpPrereqError("Please pass at least one modification",
4129
                                 errors.ECODE_INVAL)
4130
    if all_mods.count(True) > 1:
4131
      raise errors.OpPrereqError("Can't set the node into more than one"
4132
                                 " state at the same time",
4133
                                 errors.ECODE_INVAL)
4134

    
4135
    # Boolean value that tells us whether we might be demoting from MC
4136
    self.might_demote = (self.op.master_candidate == False or
4137
                         self.op.offline == True or
4138
                         self.op.drained == True or
4139
                         self.op.master_capable == False)
4140

    
4141
    if self.op.secondary_ip:
4142
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4143
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4144
                                   " address" % self.op.secondary_ip,
4145
                                   errors.ECODE_INVAL)
4146

    
4147
    self.lock_all = self.op.auto_promote and self.might_demote
4148
    self.lock_instances = self.op.secondary_ip is not None
4149

    
4150
  def ExpandNames(self):
4151
    if self.lock_all:
4152
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4153
    else:
4154
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4155

    
4156
    if self.lock_instances:
4157
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4158

    
4159
  def DeclareLocks(self, level):
4160
    # If we have locked all instances, before waiting to lock nodes, release
4161
    # all the ones living on nodes unrelated to the current operation.
4162
    if level == locking.LEVEL_NODE and self.lock_instances:
4163
      instances_release = []
4164
      instances_keep = []
4165
      self.affected_instances = []
4166
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4167
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4168
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4169
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4170
          if i_mirrored and self.op.node_name in instance.all_nodes:
4171
            instances_keep.append(instance_name)
4172
            self.affected_instances.append(instance)
4173
          else:
4174
            instances_release.append(instance_name)
4175
        if instances_release:
4176
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4177
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4178

    
4179
  def BuildHooksEnv(self):
4180
    """Build hooks env.
4181

4182
    This runs on the master node.
4183

4184
    """
4185
    env = {
4186
      "OP_TARGET": self.op.node_name,
4187
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4188
      "OFFLINE": str(self.op.offline),
4189
      "DRAINED": str(self.op.drained),
4190
      "MASTER_CAPABLE": str(self.op.master_capable),
4191
      "VM_CAPABLE": str(self.op.vm_capable),
4192
      }
4193
    nl = [self.cfg.GetMasterNode(),
4194
          self.op.node_name]
4195
    return env, nl, nl
4196

    
4197
  def CheckPrereq(self):
4198
    """Check prerequisites.
4199

4200
    This only checks the instance list against the existing names.
4201

4202
    """
4203
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4204

    
4205
    if (self.op.master_candidate is not None or
4206
        self.op.drained is not None or
4207
        self.op.offline is not None):
4208
      # we can't change the master's node flags
4209
      if self.op.node_name == self.cfg.GetMasterNode():
4210
        raise errors.OpPrereqError("The master role can be changed"
4211
                                   " only via master-failover",
4212
                                   errors.ECODE_INVAL)
4213

    
4214
    if self.op.master_candidate and not node.master_capable:
4215
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4216
                                 " it a master candidate" % node.name,
4217
                                 errors.ECODE_STATE)
4218

    
4219
    if self.op.vm_capable == False:
4220
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4221
      if ipri or isec:
4222
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4223
                                   " the vm_capable flag" % node.name,
4224
                                   errors.ECODE_STATE)
4225

    
4226
    if node.master_candidate and self.might_demote and not self.lock_all:
4227
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4228
      # check if after removing the current node, we're missing master
4229
      # candidates
4230
      (mc_remaining, mc_should, _) = \
4231
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4232
      if mc_remaining < mc_should:
4233
        raise errors.OpPrereqError("Not enough master candidates, please"
4234
                                   " pass auto_promote to allow promotion",
4235
                                   errors.ECODE_STATE)
4236

    
4237
    self.old_flags = old_flags = (node.master_candidate,
4238
                                  node.drained, node.offline)
4239
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4240
    self.old_role = old_role = self._F2R[old_flags]
4241

    
4242
    # Check for ineffective changes
4243
    for attr in self._FLAGS:
4244
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4245
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4246
        setattr(self.op, attr, None)
4247

    
4248
    # Past this point, any flag change to False means a transition
4249
    # away from the respective state, as only real changes are kept
4250

    
4251
    # If we're being deofflined/drained, we'll MC ourself if needed
4252
    if (self.op.drained == False or self.op.offline == False or
4253
        (self.op.master_capable and not node.master_capable)):
4254
      if _DecideSelfPromotion(self):
4255
        self.op.master_candidate = True
4256
        self.LogInfo("Auto-promoting node to master candidate")
4257

    
4258
    # If we're no longer master capable, we'll demote ourselves from MC
4259
    if self.op.master_capable == False and node.master_candidate:
4260
      self.LogInfo("Demoting from master candidate")
4261
      self.op.master_candidate = False
4262

    
4263
    # Compute new role
4264
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4265
    if self.op.master_candidate:
4266
      new_role = self._ROLE_CANDIDATE
4267
    elif self.op.drained:
4268
      new_role = self._ROLE_DRAINED
4269
    elif self.op.offline:
4270
      new_role = self._ROLE_OFFLINE
4271
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4272
      # False is still in new flags, which means we're un-setting (the
4273
      # only) True flag
4274
      new_role = self._ROLE_REGULAR
4275
    else: # no new flags, nothing, keep old role
4276
      new_role = old_role
4277

    
4278
    self.new_role = new_role
4279

    
4280
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4281
      # Trying to transition out of offline status
4282
      result = self.rpc.call_version([node.name])[node.name]
4283
      if result.fail_msg:
4284
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4285
                                   " to report its version: %s" %
4286
                                   (node.name, result.fail_msg),
4287
                                   errors.ECODE_STATE)
4288
      else:
4289
        self.LogWarning("Transitioning node from offline to online state"
4290
                        " without using re-add. Please make sure the node"
4291
                        " is healthy!")
4292

    
4293
    if self.op.secondary_ip:
4294
      # Ok even without locking, because this can't be changed by any LU
4295
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4296
      master_singlehomed = master.secondary_ip == master.primary_ip
4297
      if master_singlehomed and self.op.secondary_ip:
4298
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4299
                                   " homed cluster", errors.ECODE_INVAL)
4300

    
4301
      if node.offline:
4302
        if self.affected_instances:
4303
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4304
                                     " node has instances (%s) configured"
4305
                                     " to use it" % self.affected_instances)
4306
      else:
4307
        # On online nodes, check that no instances are running, and that
4308
        # the node has the new ip and we can reach it.
4309
        for instance in self.affected_instances:
4310
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4311

    
4312
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4313
        if master.name != node.name:
4314
          # check reachability from master secondary ip to new secondary ip
4315
          if not netutils.TcpPing(self.op.secondary_ip,
4316
                                  constants.DEFAULT_NODED_PORT,
4317
                                  source=master.secondary_ip):
4318
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4319
                                       " based ping to node daemon port",
4320
                                       errors.ECODE_ENVIRON)
4321

    
4322
    if self.op.ndparams:
4323
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4324
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4325
      self.new_ndparams = new_ndparams
4326

    
4327
  def Exec(self, feedback_fn):
4328
    """Modifies a node.
4329

4330
    """
4331
    node = self.node
4332
    old_role = self.old_role
4333
    new_role = self.new_role
4334

    
4335
    result = []
4336

    
4337
    if self.op.ndparams:
4338
      node.ndparams = self.new_ndparams
4339

    
4340
    for attr in ["master_capable", "vm_capable"]:
4341
      val = getattr(self.op, attr)
4342
      if val is not None:
4343
        setattr(node, attr, val)
4344
        result.append((attr, str(val)))
4345

    
4346
    if new_role != old_role:
4347
      # Tell the node to demote itself, if no longer MC and not offline
4348
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4349
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4350
        if msg:
4351
          self.LogWarning("Node failed to demote itself: %s", msg)
4352

    
4353
      new_flags = self._R2F[new_role]
4354
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4355
        if of != nf:
4356
          result.append((desc, str(nf)))
4357
      (node.master_candidate, node.drained, node.offline) = new_flags
4358

    
4359
      # we locked all nodes, we adjust the CP before updating this node
4360
      if self.lock_all:
4361
        _AdjustCandidatePool(self, [node.name])
4362

    
4363
    if self.op.secondary_ip:
4364
      node.secondary_ip = self.op.secondary_ip
4365
      result.append(("secondary_ip", self.op.secondary_ip))
4366

    
4367
    # this will trigger configuration file update, if needed
4368
    self.cfg.Update(node, feedback_fn)
4369

    
4370
    # this will trigger job queue propagation or cleanup if the mc
4371
    # flag changed
4372
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4373
      self.context.ReaddNode(node)
4374

    
4375
    return result
4376

    
4377

    
4378
class LUPowercycleNode(NoHooksLU):
4379
  """Powercycles a node.
4380

4381
  """
4382
  _OP_PARAMS = [
4383
    _PNodeName,
4384
    _PForce,
4385
    ]
4386
  REQ_BGL = False
4387

    
4388
  def CheckArguments(self):
4389
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4390
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4391
      raise errors.OpPrereqError("The node is the master and the force"
4392
                                 " parameter was not set",
4393
                                 errors.ECODE_INVAL)
4394

    
4395
  def ExpandNames(self):
4396
    """Locking for PowercycleNode.
4397

4398
    This is a last-resort option and shouldn't block on other
4399
    jobs. Therefore, we grab no locks.
4400

4401
    """
4402
    self.needed_locks = {}
4403

    
4404
  def Exec(self, feedback_fn):
4405
    """Reboots a node.
4406

4407
    """
4408
    result = self.rpc.call_node_powercycle(self.op.node_name,
4409
                                           self.cfg.GetHypervisorType())
4410
    result.Raise("Failed to schedule the reboot")
4411
    return result.payload
4412

    
4413

    
4414
class LUQueryClusterInfo(NoHooksLU):
4415
  """Query cluster configuration.
4416

4417
  """
4418
  REQ_BGL = False
4419

    
4420
  def ExpandNames(self):
4421
    self.needed_locks = {}
4422

    
4423
  def Exec(self, feedback_fn):
4424
    """Return cluster config.
4425

4426
    """
4427
    cluster = self.cfg.GetClusterInfo()
4428
    os_hvp = {}
4429

    
4430
    # Filter just for enabled hypervisors
4431
    for os_name, hv_dict in cluster.os_hvp.items():
4432
      os_hvp[os_name] = {}
4433
      for hv_name, hv_params in hv_dict.items():
4434
        if hv_name in cluster.enabled_hypervisors:
4435
          os_hvp[os_name][hv_name] = hv_params
4436

    
4437
    # Convert ip_family to ip_version
4438
    primary_ip_version = constants.IP4_VERSION
4439
    if cluster.primary_ip_family == netutils.IP6Address.family:
4440
      primary_ip_version = constants.IP6_VERSION
4441

    
4442
    result = {
4443
      "software_version": constants.RELEASE_VERSION,
4444
      "protocol_version": constants.PROTOCOL_VERSION,
4445
      "config_version": constants.CONFIG_VERSION,
4446
      "os_api_version": max(constants.OS_API_VERSIONS),
4447
      "export_version": constants.EXPORT_VERSION,
4448
      "architecture": (platform.architecture()[0], platform.machine()),
4449
      "name": cluster.cluster_name,
4450
      "master": cluster.master_node,
4451
      "default_hypervisor": cluster.enabled_hypervisors[0],
4452
      "enabled_hypervisors": cluster.enabled_hypervisors,
4453
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4454
                        for hypervisor_name in cluster.enabled_hypervisors]),
4455
      "os_hvp": os_hvp,
4456
      "beparams": cluster.beparams,
4457
      "osparams": cluster.osparams,
4458
      "nicparams": cluster.nicparams,
4459
      "candidate_pool_size": cluster.candidate_pool_size,
4460
      "master_netdev": cluster.master_netdev,
4461
      "volume_group_name": cluster.volume_group_name,
4462
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4463
      "file_storage_dir": cluster.file_storage_dir,
4464
      "maintain_node_health": cluster.maintain_node_health,
4465
      "ctime": cluster.ctime,
4466
      "mtime": cluster.mtime,
4467
      "uuid": cluster.uuid,
4468
      "tags": list(cluster.GetTags()),
4469
      "uid_pool": cluster.uid_pool,
4470
      "default_iallocator": cluster.default_iallocator,
4471
      "reserved_lvs": cluster.reserved_lvs,
4472
      "primary_ip_version": primary_ip_version,
4473
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4474
      }
4475

    
4476
    return result
4477

    
4478

    
4479
class LUQueryConfigValues(NoHooksLU):
4480
  """Return configuration values.
4481

4482
  """
4483
  _OP_PARAMS = [_POutputFields]
4484
  REQ_BGL = False
4485
  _FIELDS_DYNAMIC = utils.FieldSet()
4486
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4487
                                  "watcher_pause", "volume_group_name")
4488

    
4489
  def CheckArguments(self):
4490
    _CheckOutputFields(static=self._FIELDS_STATIC,
4491
                       dynamic=self._FIELDS_DYNAMIC,
4492
                       selected=self.op.output_fields)
4493

    
4494
  def ExpandNames(self):
4495
    self.needed_locks = {}
4496

    
4497
  def Exec(self, feedback_fn):
4498
    """Dump a representation of the cluster config to the standard output.
4499

4500
    """
4501
    values = []
4502
    for field in self.op.output_fields:
4503
      if field == "cluster_name":
4504
        entry = self.cfg.GetClusterName()
4505
      elif field == "master_node":
4506
        entry = self.cfg.GetMasterNode()
4507
      elif field == "drain_flag":
4508
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4509
      elif field == "watcher_pause":
4510
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4511
      elif field == "volume_group_name":
4512
        entry = self.cfg.GetVGName()
4513
      else:
4514
        raise errors.ParameterError(field)
4515
      values.append(entry)
4516
    return values
4517

    
4518

    
4519
class LUActivateInstanceDisks(NoHooksLU):
4520
  """Bring up an instance's disks.
4521

4522
  """
4523
  _OP_PARAMS = [
4524
    _PInstanceName,
4525
    ("ignore_size", False, ht.TBool),
4526
    ]
4527
  REQ_BGL = False
4528

    
4529
  def ExpandNames(self):
4530
    self._ExpandAndLockInstance()
4531
    self.needed_locks[locking.LEVEL_NODE] = []
4532
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4533

    
4534
  def DeclareLocks(self, level):
4535
    if level == locking.LEVEL_NODE:
4536
      self._LockInstancesNodes()
4537

    
4538
  def CheckPrereq(self):
4539
    """Check prerequisites.
4540

4541
    This checks that the instance is in the cluster.
4542

4543
    """
4544
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4545
    assert self.instance is not None, \
4546
      "Cannot retrieve locked instance %s" % self.op.instance_name
4547
    _CheckNodeOnline(self, self.instance.primary_node)
4548

    
4549
  def Exec(self, feedback_fn):
4550
    """Activate the disks.
4551

4552
    """
4553
    disks_ok, disks_info = \
4554
              _AssembleInstanceDisks(self, self.instance,
4555
                                     ignore_size=self.op.ignore_size)
4556
    if not disks_ok:
4557
      raise errors.OpExecError("Cannot activate block devices")
4558

    
4559
    return disks_info
4560

    
4561

    
4562
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4563
                           ignore_size=False):
4564
  """Prepare the block devices for an instance.
4565

4566
  This sets up the block devices on all nodes.
4567

4568
  @type lu: L{LogicalUnit}
4569
  @param lu: the logical unit on whose behalf we execute
4570
  @type instance: L{objects.Instance}
4571
  @param instance: the instance for whose disks we assemble
4572
  @type disks: list of L{objects.Disk} or None
4573
  @param disks: which disks to assemble (or all, if None)
4574
  @type ignore_secondaries: boolean
4575
  @param ignore_secondaries: if true, errors on secondary nodes
4576
      won't result in an error return from the function
4577
  @type ignore_size: boolean
4578
  @param ignore_size: if true, the current known size of the disk
4579
      will not be used during the disk activation, useful for cases
4580
      when the size is wrong
4581
  @return: False if the operation failed, otherwise a list of
4582
      (host, instance_visible_name, node_visible_name)
4583
      with the mapping from node devices to instance devices
4584

4585
  """
4586
  device_info = []
4587
  disks_ok = True
4588
  iname = instance.name
4589
  disks = _ExpandCheckDisks(instance, disks)
4590

    
4591
  # With the two passes mechanism we try to reduce the window of
4592
  # opportunity for the race condition of switching DRBD to primary
4593
  # before handshaking occured, but we do not eliminate it
4594

    
4595
  # The proper fix would be to wait (with some limits) until the
4596
  # connection has been made and drbd transitions from WFConnection
4597
  # into any other network-connected state (Connected, SyncTarget,
4598
  # SyncSource, etc.)
4599

    
4600
  # 1st pass, assemble on all nodes in secondary mode
4601
  for inst_disk in disks:
4602
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4603
      if ignore_size:
4604
        node_disk = node_disk.Copy()
4605
        node_disk.UnsetSize()
4606
      lu.cfg.SetDiskID(node_disk, node)
4607
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4608
      msg = result.fail_msg
4609
      if msg:
4610
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4611
                           " (is_primary=False, pass=1): %s",
4612
                           inst_disk.iv_name, node, msg)
4613
        if not ignore_secondaries:
4614
          disks_ok = False
4615

    
4616
  # FIXME: race condition on drbd migration to primary
4617

    
4618
  # 2nd pass, do only the primary node
4619
  for inst_disk in disks:
4620
    dev_path = None
4621

    
4622
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4623
      if node != instance.primary_node:
4624
        continue
4625
      if ignore_size:
4626
        node_disk = node_disk.Copy()
4627
        node_disk.UnsetSize()
4628
      lu.cfg.SetDiskID(node_disk, node)
4629
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4630
      msg = result.fail_msg
4631
      if msg:
4632
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4633
                           " (is_primary=True, pass=2): %s",
4634
                           inst_disk.iv_name, node, msg)
4635
        disks_ok = False
4636
      else:
4637
        dev_path = result.payload
4638

    
4639
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4640

    
4641
  # leave the disks configured for the primary node
4642
  # this is a workaround that would be fixed better by
4643
  # improving the logical/physical id handling
4644
  for disk in disks:
4645
    lu.cfg.SetDiskID(disk, instance.primary_node)
4646

    
4647
  return disks_ok, device_info
4648

    
4649

    
4650
def _StartInstanceDisks(lu, instance, force):
4651
  """Start the disks of an instance.
4652

4653
  """
4654
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4655
                                           ignore_secondaries=force)
4656
  if not disks_ok:
4657
    _ShutdownInstanceDisks(lu, instance)
4658
    if force is not None and not force:
4659
      lu.proc.LogWarning("", hint="If the message above refers to a"
4660
                         " secondary node,"
4661
                         " you can retry the operation using '--force'.")
4662
    raise errors.OpExecError("Disk consistency error")
4663

    
4664

    
4665
class LUDeactivateInstanceDisks(NoHooksLU):
4666
  """Shutdown an instance's disks.
4667

4668
  """
4669
  _OP_PARAMS = [
4670
    _PInstanceName,
4671
    ]
4672
  REQ_BGL = False
4673

    
4674
  def ExpandNames(self):
4675
    self._ExpandAndLockInstance()
4676
    self.needed_locks[locking.LEVEL_NODE] = []
4677
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4678

    
4679
  def DeclareLocks(self, level):
4680
    if level == locking.LEVEL_NODE:
4681
      self._LockInstancesNodes()
4682

    
4683
  def CheckPrereq(self):
4684
    """Check prerequisites.
4685

4686
    This checks that the instance is in the cluster.
4687

4688
    """
4689
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4690
    assert self.instance is not None, \
4691
      "Cannot retrieve locked instance %s" % self.op.instance_name
4692

    
4693
  def Exec(self, feedback_fn):
4694
    """Deactivate the disks
4695

4696
    """
4697
    instance = self.instance
4698
    _SafeShutdownInstanceDisks(self, instance)
4699

    
4700

    
4701
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4702
  """Shutdown block devices of an instance.
4703

4704
  This function checks if an instance is running, before calling
4705
  _ShutdownInstanceDisks.
4706

4707
  """
4708
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4709
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4710

    
4711

    
4712
def _ExpandCheckDisks(instance, disks):
4713
  """Return the instance disks selected by the disks list
4714

4715
  @type disks: list of L{objects.Disk} or None
4716
  @param disks: selected disks
4717
  @rtype: list of L{objects.Disk}
4718
  @return: selected instance disks to act on
4719

4720
  """
4721
  if disks is None:
4722
    return instance.disks
4723
  else:
4724
    if not set(disks).issubset(instance.disks):
4725
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4726
                                   " target instance")
4727
    return disks
4728

    
4729

    
4730
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4731
  """Shutdown block devices of an instance.
4732

4733
  This does the shutdown on all nodes of the instance.
4734

4735
  If the ignore_primary is false, errors on the primary node are
4736
  ignored.
4737

4738
  """
4739
  all_result = True
4740
  disks = _ExpandCheckDisks(instance, disks)
4741

    
4742
  for disk in disks:
4743
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4744
      lu.cfg.SetDiskID(top_disk, node)
4745
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4746
      msg = result.fail_msg
4747
      if msg:
4748
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4749
                      disk.iv_name, node, msg)
4750
        if not ignore_primary or node != instance.primary_node:
4751
          all_result = False
4752
  return all_result
4753

    
4754

    
4755
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4756
  """Checks if a node has enough free memory.
4757

4758
  This function check if a given node has the needed amount of free
4759
  memory. In case the node has less memory or we cannot get the
4760
  information from the node, this function raise an OpPrereqError
4761
  exception.
4762

4763
  @type lu: C{LogicalUnit}
4764
  @param lu: a logical unit from which we get configuration data
4765
  @type node: C{str}
4766
  @param node: the node to check
4767
  @type reason: C{str}
4768
  @param reason: string to use in the error message
4769
  @type requested: C{int}
4770
  @param requested: the amount of memory in MiB to check for
4771
  @type hypervisor_name: C{str}
4772
  @param hypervisor_name: the hypervisor to ask for memory stats
4773
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4774
      we cannot check the node
4775

4776
  """
4777
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4778
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4779
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4780
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4781
  if not isinstance(free_mem, int):
4782
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4783
                               " was '%s'" % (node, free_mem),
4784
                               errors.ECODE_ENVIRON)
4785
  if requested > free_mem:
4786
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4787
                               " needed %s MiB, available %s MiB" %
4788
                               (node, reason, requested, free_mem),
4789
                               errors.ECODE_NORES)
4790

    
4791

    
4792
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4793
  """Checks if nodes have enough free disk space in the all VGs.
4794

4795
  This function check if all given nodes have the needed amount of
4796
  free disk. In case any node has less disk or we cannot get the
4797
  information from the node, this function raise an OpPrereqError
4798
  exception.
4799

4800
  @type lu: C{LogicalUnit}
4801
  @param lu: a logical unit from which we get configuration data
4802
  @type nodenames: C{list}
4803
  @param nodenames: the list of node names to check
4804
  @type req_sizes: C{dict}
4805
  @param req_sizes: the hash of vg and corresponding amount of disk in
4806
      MiB to check for
4807
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4808
      or we cannot check the node
4809

4810
  """
4811
  if req_sizes is not None:
4812
    for vg, req_size in req_sizes.iteritems():
4813
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4814

    
4815

    
4816
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4817
  """Checks if nodes have enough free disk space in the specified VG.
4818

4819
  This function check if all given nodes have the needed amount of
4820
  free disk. In case any node has less disk or we cannot get the
4821
  information from the node, this function raise an OpPrereqError
4822
  exception.
4823

4824
  @type lu: C{LogicalUnit}
4825
  @param lu: a logical unit from which we get configuration data
4826
  @type nodenames: C{list}
4827
  @param nodenames: the list of node names to check
4828
  @type vg: C{str}
4829
  @param vg: the volume group to check
4830
  @type requested: C{int}
4831
  @param requested: the amount of disk in MiB to check for
4832
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
4833
      or we cannot check the node
4834

4835
  """
4836
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
4837
  for node in nodenames:
4838
    info = nodeinfo[node]
4839
    info.Raise("Cannot get current information from node %s" % node,
4840
               prereq=True, ecode=errors.ECODE_ENVIRON)
4841
    vg_free = info.payload.get("vg_free", None)
4842
    if not isinstance(vg_free, int):
4843
      raise errors.OpPrereqError("Can't compute free disk space on node"
4844
                                 " %s for vg %s, result was '%s'" %
4845
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
4846
    if requested > vg_free:
4847
      raise errors.OpPrereqError("Not enough disk space on target node %s"
4848
                                 " vg %s: required %d MiB, available %d MiB" %
4849
                                 (node, vg, requested, vg_free),
4850
                                 errors.ECODE_NORES)
4851

    
4852

    
4853
class LUStartupInstance(LogicalUnit):
4854
  """Starts an instance.
4855

4856
  """
4857
  HPATH = "instance-start"
4858
  HTYPE = constants.HTYPE_INSTANCE
4859
  _OP_PARAMS = [
4860
    _PInstanceName,
4861
    _PForce,
4862
    _PIgnoreOfflineNodes,
4863
    ("hvparams", ht.EmptyDict, ht.TDict),
4864
    ("beparams", ht.EmptyDict, ht.TDict),
4865
    ]
4866
  REQ_BGL = False
4867

    
4868
  def CheckArguments(self):
4869
    # extra beparams
4870
    if self.op.beparams:
4871
      # fill the beparams dict
4872
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4873

    
4874
  def ExpandNames(self):
4875
    self._ExpandAndLockInstance()
4876

    
4877
  def BuildHooksEnv(self):
4878
    """Build hooks env.
4879

4880
    This runs on master, primary and secondary nodes of the instance.
4881

4882
    """
4883
    env = {
4884
      "FORCE": self.op.force,
4885
      }
4886
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4887
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4888
    return env, nl, nl
4889

    
4890
  def CheckPrereq(self):
4891
    """Check prerequisites.
4892

4893
    This checks that the instance is in the cluster.
4894

4895
    """
4896
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4897
    assert self.instance is not None, \
4898
      "Cannot retrieve locked instance %s" % self.op.instance_name
4899

    
4900
    # extra hvparams
4901
    if self.op.hvparams:
4902
      # check hypervisor parameter syntax (locally)
4903
      cluster = self.cfg.GetClusterInfo()
4904
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4905
      filled_hvp = cluster.FillHV(instance)
4906
      filled_hvp.update(self.op.hvparams)
4907
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4908
      hv_type.CheckParameterSyntax(filled_hvp)
4909
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4910

    
4911
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4912

    
4913
    if self.primary_offline and self.op.ignore_offline_nodes:
4914
      self.proc.LogWarning("Ignoring offline primary node")
4915

    
4916
      if self.op.hvparams or self.op.beparams:
4917
        self.proc.LogWarning("Overridden parameters are ignored")
4918
    else:
4919
      _CheckNodeOnline(self, instance.primary_node)
4920

    
4921
      bep = self.cfg.GetClusterInfo().FillBE(instance)
4922

    
4923
      # check bridges existence
4924
      _CheckInstanceBridgesExist(self, instance)
4925

    
4926
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4927
                                                instance.name,
4928
                                                instance.hypervisor)
4929
      remote_info.Raise("Error checking node %s" % instance.primary_node,
4930
                        prereq=True, ecode=errors.ECODE_ENVIRON)
4931
      if not remote_info.payload: # not running already
4932
        _CheckNodeFreeMemory(self, instance.primary_node,
4933
                             "starting instance %s" % instance.name,
4934
                             bep[constants.BE_MEMORY], instance.hypervisor)
4935

    
4936
  def Exec(self, feedback_fn):
4937
    """Start the instance.
4938

4939
    """
4940
    instance = self.instance
4941
    force = self.op.force
4942

    
4943
    self.cfg.MarkInstanceUp(instance.name)
4944

    
4945
    if self.primary_offline:
4946
      assert self.op.ignore_offline_nodes
4947
      self.proc.LogInfo("Primary node offline, marked instance as started")
4948
    else:
4949
      node_current = instance.primary_node
4950

    
4951
      _StartInstanceDisks(self, instance, force)
4952

    
4953
      result = self.rpc.call_instance_start(node_current, instance,
4954
                                            self.op.hvparams, self.op.beparams)
4955
      msg = result.fail_msg
4956
      if msg:
4957
        _ShutdownInstanceDisks(self, instance)
4958
        raise errors.OpExecError("Could not start instance: %s" % msg)
4959

    
4960

    
4961
class LURebootInstance(LogicalUnit):
4962
  """Reboot an instance.
4963

4964
  """
4965
  HPATH = "instance-reboot"
4966
  HTYPE = constants.HTYPE_INSTANCE
4967
  _OP_PARAMS = [
4968
    _PInstanceName,
4969
    ("ignore_secondaries", False, ht.TBool),
4970
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4971
    _PShutdownTimeout,
4972
    ]
4973
  REQ_BGL = False
4974

    
4975
  def ExpandNames(self):
4976
    self._ExpandAndLockInstance()
4977

    
4978
  def BuildHooksEnv(self):
4979
    """Build hooks env.
4980

4981
    This runs on master, primary and secondary nodes of the instance.
4982

4983
    """
4984
    env = {
4985
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4986
      "REBOOT_TYPE": self.op.reboot_type,
4987
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4988
      }
4989
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4990
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4991
    return env, nl, nl
4992

    
4993
  def CheckPrereq(self):
4994
    """Check prerequisites.
4995

4996
    This checks that the instance is in the cluster.
4997

4998
    """
4999
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5000
    assert self.instance is not None, \
5001
      "Cannot retrieve locked instance %s" % self.op.instance_name
5002

    
5003
    _CheckNodeOnline(self, instance.primary_node)
5004

    
5005
    # check bridges existence
5006
    _CheckInstanceBridgesExist(self, instance)
5007

    
5008
  def Exec(self, feedback_fn):
5009
    """Reboot the instance.
5010

5011
    """
5012
    instance = self.instance
5013
    ignore_secondaries = self.op.ignore_secondaries
5014
    reboot_type = self.op.reboot_type
5015

    
5016
    node_current = instance.primary_node
5017

    
5018
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5019
                       constants.INSTANCE_REBOOT_HARD]:
5020
      for disk in instance.disks:
5021
        self.cfg.SetDiskID(disk, node_current)
5022
      result = self.rpc.call_instance_reboot(node_current, instance,
5023
                                             reboot_type,
5024
                                             self.op.shutdown_timeout)
5025
      result.Raise("Could not reboot instance")
5026
    else:
5027
      result = self.rpc.call_instance_shutdown(node_current, instance,
5028
                                               self.op.shutdown_timeout)
5029
      result.Raise("Could not shutdown instance for full reboot")
5030
      _ShutdownInstanceDisks(self, instance)
5031
      _StartInstanceDisks(self, instance, ignore_secondaries)
5032
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5033
      msg = result.fail_msg
5034
      if msg:
5035
        _ShutdownInstanceDisks(self, instance)
5036
        raise errors.OpExecError("Could not start instance for"
5037
                                 " full reboot: %s" % msg)
5038

    
5039
    self.cfg.MarkInstanceUp(instance.name)
5040

    
5041

    
5042
class LUShutdownInstance(LogicalUnit):
5043
  """Shutdown an instance.
5044

5045
  """
5046
  HPATH = "instance-stop"
5047
  HTYPE = constants.HTYPE_INSTANCE
5048
  _OP_PARAMS = [
5049
    _PInstanceName,
5050
    _PIgnoreOfflineNodes,
5051
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5052
    ]
5053
  REQ_BGL = False
5054

    
5055
  def ExpandNames(self):
5056
    self._ExpandAndLockInstance()
5057

    
5058
  def BuildHooksEnv(self):
5059
    """Build hooks env.
5060

5061
    This runs on master, primary and secondary nodes of the instance.
5062

5063
    """
5064
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5065
    env["TIMEOUT"] = self.op.timeout
5066
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5067
    return env, nl, nl
5068

    
5069
  def CheckPrereq(self):
5070
    """Check prerequisites.
5071

5072
    This checks that the instance is in the cluster.
5073

5074
    """
5075
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5076
    assert self.instance is not None, \
5077
      "Cannot retrieve locked instance %s" % self.op.instance_name
5078

    
5079
    self.primary_offline = \
5080
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5081

    
5082
    if self.primary_offline and self.op.ignore_offline_nodes:
5083
      self.proc.LogWarning("Ignoring offline primary node")
5084
    else:
5085
      _CheckNodeOnline(self, self.instance.primary_node)
5086

    
5087
  def Exec(self, feedback_fn):
5088
    """Shutdown the instance.
5089

5090
    """
5091
    instance = self.instance
5092
    node_current = instance.primary_node
5093
    timeout = self.op.timeout
5094

    
5095
    self.cfg.MarkInstanceDown(instance.name)
5096

    
5097
    if self.primary_offline:
5098
      assert self.op.ignore_offline_nodes
5099
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5100
    else:
5101
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5102
      msg = result.fail_msg
5103
      if msg:
5104
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5105

    
5106
      _ShutdownInstanceDisks(self, instance)
5107

    
5108

    
5109
class LUReinstallInstance(LogicalUnit):
5110
  """Reinstall an instance.
5111

5112
  """
5113
  HPATH = "instance-reinstall"
5114
  HTYPE = constants.HTYPE_INSTANCE
5115
  _OP_PARAMS = [
5116
    _PInstanceName,
5117
    ("os_type", None, ht.TMaybeString),
5118
    ("force_variant", False, ht.TBool),
5119
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5120
    ]
5121
  REQ_BGL = False
5122

    
5123
  def ExpandNames(self):
5124
    self._ExpandAndLockInstance()
5125

    
5126
  def BuildHooksEnv(self):
5127
    """Build hooks env.
5128

5129
    This runs on master, primary and secondary nodes of the instance.
5130

5131
    """
5132
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5133
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5134
    return env, nl, nl
5135

    
5136
  def CheckPrereq(self):
5137
    """Check prerequisites.
5138

5139
    This checks that the instance is in the cluster and is not running.
5140

5141
    """
5142
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5143
    assert instance is not None, \
5144
      "Cannot retrieve locked instance %s" % self.op.instance_name
5145
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5146
                     " offline, cannot reinstall")
5147
    for node in instance.secondary_nodes:
5148
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5149
                       " cannot reinstall")
5150

    
5151
    if instance.disk_template == constants.DT_DISKLESS:
5152
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5153
                                 self.op.instance_name,
5154
                                 errors.ECODE_INVAL)
5155
    _CheckInstanceDown(self, instance, "cannot reinstall")
5156

    
5157
    if self.op.os_type is not None:
5158
      # OS verification
5159
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5160
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5161
      instance_os = self.op.os_type
5162
    else:
5163
      instance_os = instance.os
5164

    
5165
    nodelist = list(instance.all_nodes)
5166

    
5167
    if self.op.osparams:
5168
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5169
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5170
      self.os_inst = i_osdict # the new dict (without defaults)
5171
    else:
5172
      self.os_inst = None
5173

    
5174
    self.instance = instance
5175

    
5176
  def Exec(self, feedback_fn):
5177
    """Reinstall the instance.
5178

5179
    """
5180
    inst = self.instance
5181

    
5182
    if self.op.os_type is not None:
5183
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5184
      inst.os = self.op.os_type
5185
      # Write to configuration
5186
      self.cfg.Update(inst, feedback_fn)
5187

    
5188
    _StartInstanceDisks(self, inst, None)
5189
    try:
5190
      feedback_fn("Running the instance OS create scripts...")
5191
      # FIXME: pass debug option from opcode to backend
5192
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5193
                                             self.op.debug_level,
5194
                                             osparams=self.os_inst)
5195
      result.Raise("Could not install OS for instance %s on node %s" %
5196
                   (inst.name, inst.primary_node))
5197
    finally:
5198
      _ShutdownInstanceDisks(self, inst)
5199

    
5200

    
5201
class LURecreateInstanceDisks(LogicalUnit):
5202
  """Recreate an instance's missing disks.
5203

5204
  """
5205
  HPATH = "instance-recreate-disks"
5206
  HTYPE = constants.HTYPE_INSTANCE
5207
  _OP_PARAMS = [
5208
    _PInstanceName,
5209
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5210
    ]
5211
  REQ_BGL = False
5212

    
5213
  def ExpandNames(self):
5214
    self._ExpandAndLockInstance()
5215

    
5216
  def BuildHooksEnv(self):
5217
    """Build hooks env.
5218

5219
    This runs on master, primary and secondary nodes of the instance.
5220

5221
    """
5222
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5223
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5224
    return env, nl, nl
5225

    
5226
  def CheckPrereq(self):
5227
    """Check prerequisites.
5228

5229
    This checks that the instance is in the cluster and is not running.
5230

5231
    """
5232
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5233
    assert instance is not None, \
5234
      "Cannot retrieve locked instance %s" % self.op.instance_name
5235
    _CheckNodeOnline(self, instance.primary_node)
5236

    
5237
    if instance.disk_template == constants.DT_DISKLESS:
5238
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5239
                                 self.op.instance_name, errors.ECODE_INVAL)
5240
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5241

    
5242
    if not self.op.disks:
5243
      self.op.disks = range(len(instance.disks))
5244
    else:
5245
      for idx in self.op.disks:
5246
        if idx >= len(instance.disks):
5247
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5248
                                     errors.ECODE_INVAL)
5249

    
5250
    self.instance = instance
5251

    
5252
  def Exec(self, feedback_fn):
5253
    """Recreate the disks.
5254

5255
    """
5256
    to_skip = []
5257
    for idx, _ in enumerate(self.instance.disks):
5258
      if idx not in self.op.disks: # disk idx has not been passed in
5259
        to_skip.append(idx)
5260
        continue
5261

    
5262
    _CreateDisks(self, self.instance, to_skip=to_skip)
5263

    
5264

    
5265
class LURenameInstance(LogicalUnit):
5266
  """Rename an instance.
5267

5268
  """
5269
  HPATH = "instance-rename"
5270
  HTYPE = constants.HTYPE_INSTANCE
5271
  _OP_PARAMS = [
5272
    _PInstanceName,
5273
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5274
    ("ip_check", False, ht.TBool),
5275
    ("name_check", True, ht.TBool),
5276
    ]
5277

    
5278
  def CheckArguments(self):
5279
    """Check arguments.
5280

5281
    """
5282
    if self.op.ip_check and not self.op.name_check:
5283
      # TODO: make the ip check more flexible and not depend on the name check
5284
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5285
                                 errors.ECODE_INVAL)
5286

    
5287
  def BuildHooksEnv(self):
5288
    """Build hooks env.
5289

5290
    This runs on master, primary and secondary nodes of the instance.
5291

5292
    """
5293
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5294
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5295
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5296
    return env, nl, nl
5297

    
5298
  def CheckPrereq(self):
5299
    """Check prerequisites.
5300

5301
    This checks that the instance is in the cluster and is not running.
5302

5303
    """
5304
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5305
                                                self.op.instance_name)
5306
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5307
    assert instance is not None
5308
    _CheckNodeOnline(self, instance.primary_node)
5309
    _CheckInstanceDown(self, instance, "cannot rename")
5310
    self.instance = instance
5311

    
5312
    new_name = self.op.new_name
5313
    if self.op.name_check:
5314
      hostname = netutils.GetHostname(name=new_name)
5315
      new_name = self.op.new_name = hostname.name
5316
      if (self.op.ip_check and
5317
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5318
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5319
                                   (hostname.ip, new_name),
5320
                                   errors.ECODE_NOTUNIQUE)
5321

    
5322
    instance_list = self.cfg.GetInstanceList()
5323
    if new_name in instance_list:
5324
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5325
                                 new_name, errors.ECODE_EXISTS)
5326

    
5327
  def Exec(self, feedback_fn):
5328
    """Reinstall the instance.
5329

5330
    """
5331
    inst = self.instance
5332
    old_name = inst.name
5333

    
5334
    if inst.disk_template == constants.DT_FILE:
5335
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5336

    
5337
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5338
    # Change the instance lock. This is definitely safe while we hold the BGL
5339
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5340
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5341

    
5342
    # re-read the instance from the configuration after rename
5343
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5344

    
5345
    if inst.disk_template == constants.DT_FILE:
5346
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5347
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5348
                                                     old_file_storage_dir,
5349
                                                     new_file_storage_dir)
5350
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5351
                   " (but the instance has been renamed in Ganeti)" %
5352
                   (inst.primary_node, old_file_storage_dir,
5353
                    new_file_storage_dir))
5354

    
5355
    _StartInstanceDisks(self, inst, None)
5356
    try:
5357
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5358
                                                 old_name, self.op.debug_level)
5359
      msg = result.fail_msg
5360
      if msg:
5361
        msg = ("Could not run OS rename script for instance %s on node %s"
5362
               " (but the instance has been renamed in Ganeti): %s" %
5363
               (inst.name, inst.primary_node, msg))
5364
        self.proc.LogWarning(msg)
5365
    finally:
5366
      _ShutdownInstanceDisks(self, inst)
5367

    
5368
    return inst.name
5369

    
5370

    
5371
class LURemoveInstance(LogicalUnit):
5372
  """Remove an instance.
5373

5374
  """
5375
  HPATH = "instance-remove"
5376
  HTYPE = constants.HTYPE_INSTANCE
5377
  _OP_PARAMS = [
5378
    _PInstanceName,
5379
    ("ignore_failures", False, ht.TBool),
5380
    _PShutdownTimeout,
5381
    ]
5382
  REQ_BGL = False
5383

    
5384
  def ExpandNames(self):
5385
    self._ExpandAndLockInstance()
5386
    self.needed_locks[locking.LEVEL_NODE] = []
5387
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5388

    
5389
  def DeclareLocks(self, level):
5390
    if level == locking.LEVEL_NODE:
5391
      self._LockInstancesNodes()
5392

    
5393
  def BuildHooksEnv(self):
5394
    """Build hooks env.
5395

5396
    This runs on master, primary and secondary nodes of the instance.
5397

5398
    """
5399
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5400
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5401
    nl = [self.cfg.GetMasterNode()]
5402
    nl_post = list(self.instance.all_nodes) + nl
5403
    return env, nl, nl_post
5404

    
5405
  def CheckPrereq(self):
5406
    """Check prerequisites.
5407

5408
    This checks that the instance is in the cluster.
5409

5410
    """
5411
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5412
    assert self.instance is not None, \
5413
      "Cannot retrieve locked instance %s" % self.op.instance_name
5414

    
5415
  def Exec(self, feedback_fn):
5416
    """Remove the instance.
5417

5418
    """
5419
    instance = self.instance
5420
    logging.info("Shutting down instance %s on node %s",
5421
                 instance.name, instance.primary_node)
5422

    
5423
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5424
                                             self.op.shutdown_timeout)
5425
    msg = result.fail_msg
5426
    if msg:
5427
      if self.op.ignore_failures:
5428
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5429
      else:
5430
        raise errors.OpExecError("Could not shutdown instance %s on"
5431
                                 " node %s: %s" %
5432
                                 (instance.name, instance.primary_node, msg))
5433

    
5434
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5435

    
5436

    
5437
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5438
  """Utility function to remove an instance.
5439

5440
  """
5441
  logging.info("Removing block devices for instance %s", instance.name)
5442

    
5443
  if not _RemoveDisks(lu, instance):
5444
    if not ignore_failures:
5445
      raise errors.OpExecError("Can't remove instance's disks")
5446
    feedback_fn("Warning: can't remove instance's disks")
5447

    
5448
  logging.info("Removing instance %s out of cluster config", instance.name)
5449

    
5450
  lu.cfg.RemoveInstance(instance.name)
5451

    
5452
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5453
    "Instance lock removal conflict"
5454

    
5455
  # Remove lock for the instance
5456
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5457

    
5458

    
5459
class LUQueryInstances(NoHooksLU):
5460
  """Logical unit for querying instances.
5461

5462
  """
5463
  # pylint: disable-msg=W0142
5464
  _OP_PARAMS = [
5465
    _POutputFields,
5466
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5467
    ("use_locking", False, ht.TBool),
5468
    ]
5469
  REQ_BGL = False
5470
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5471
                    "serial_no", "ctime", "mtime", "uuid"]
5472
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5473
                                    "admin_state",
5474
                                    "disk_template", "ip", "mac", "bridge",
5475
                                    "nic_mode", "nic_link",
5476
                                    "sda_size", "sdb_size", "vcpus", "tags",
5477
                                    "network_port", "beparams",
5478
                                    r"(disk)\.(size)/([0-9]+)",
5479
                                    r"(disk)\.(sizes)", "disk_usage",
5480
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5481
                                    r"(nic)\.(bridge)/([0-9]+)",
5482
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
5483
                                    r"(disk|nic)\.(count)",
5484
                                    "hvparams", "custom_hvparams",
5485
                                    "custom_beparams", "custom_nicparams",
5486
                                    ] + _SIMPLE_FIELDS +
5487
                                  ["hv/%s" % name
5488
                                   for name in constants.HVS_PARAMETERS
5489
                                   if name not in constants.HVC_GLOBALS] +
5490
                                  ["be/%s" % name
5491
                                   for name in constants.BES_PARAMETERS])
5492
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5493
                                   "oper_ram",
5494
                                   "oper_vcpus",
5495
                                   "status")
5496

    
5497

    
5498
  def CheckArguments(self):
5499
    _CheckOutputFields(static=self._FIELDS_STATIC,
5500
                       dynamic=self._FIELDS_DYNAMIC,
5501
                       selected=self.op.output_fields)
5502

    
5503
  def ExpandNames(self):
5504
    self.needed_locks = {}
5505
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5506
    self.share_locks[locking.LEVEL_NODE] = 1
5507

    
5508
    if self.op.names:
5509
      self.wanted = _GetWantedInstances(self, self.op.names)
5510
    else:
5511
      self.wanted = locking.ALL_SET
5512

    
5513
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5514
    self.do_locking = self.do_node_query and self.op.use_locking
5515
    if self.do_locking:
5516
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5517
      self.needed_locks[locking.LEVEL_NODE] = []
5518
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5519

    
5520
  def DeclareLocks(self, level):
5521
    if level == locking.LEVEL_NODE and self.do_locking:
5522
      self._LockInstancesNodes()
5523

    
5524
  def Exec(self, feedback_fn):
5525
    """Computes the list of nodes and their attributes.
5526

5527
    """
5528
    # pylint: disable-msg=R0912
5529
    # way too many branches here
5530
    all_info = self.cfg.GetAllInstancesInfo()
5531
    if self.wanted == locking.ALL_SET:
5532
      # caller didn't specify instance names, so ordering is not important
5533
      if self.do_locking:
5534
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5535
      else:
5536
        instance_names = all_info.keys()
5537
      instance_names = utils.NiceSort(instance_names)
5538
    else:
5539
      # caller did specify names, so we must keep the ordering
5540
      if self.do_locking:
5541
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5542
      else:
5543
        tgt_set = all_info.keys()
5544
      missing = set(self.wanted).difference(tgt_set)
5545
      if missing:
5546
        raise errors.OpExecError("Some instances were removed before"
5547
                                 " retrieving their data: %s" % missing)
5548
      instance_names = self.wanted
5549

    
5550
    instance_list = [all_info[iname] for iname in instance_names]
5551

    
5552
    # begin data gathering
5553

    
5554
    nodes = frozenset([inst.primary_node for inst in instance_list])
5555
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
5556

    
5557
    bad_nodes = []
5558
    off_nodes = []
5559
    if self.do_node_query:
5560
      live_data = {}
5561
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5562
      for name in nodes:
5563
        result = node_data[name]
5564
        if result.offline:
5565
          # offline nodes will be in both lists
5566
          off_nodes.append(name)
5567
        if result.fail_msg:
5568
          bad_nodes.append(name)
5569
        else:
5570
          if result.payload:
5571
            live_data.update(result.payload)
5572
          # else no instance is alive
5573
    else:
5574
      live_data = dict([(name, {}) for name in instance_names])
5575

    
5576
    # end data gathering
5577

    
5578
    HVPREFIX = "hv/"
5579
    BEPREFIX = "be/"
5580
    output = []
5581
    cluster = self.cfg.GetClusterInfo()
5582
    for instance in instance_list:
5583
      iout = []
5584
      i_hv = cluster.FillHV(instance, skip_globals=True)
5585
      i_be = cluster.FillBE(instance)
5586
      i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5587
      for field in self.op.output_fields:
5588
        st_match = self._FIELDS_STATIC.Matches(field)
5589
        if field in self._SIMPLE_FIELDS:
5590
          val = getattr(instance, field)
5591
        elif field == "pnode":
5592
          val = instance.primary_node
5593
        elif field == "snodes":
5594
          val = list(instance.secondary_nodes)
5595
        elif field == "admin_state":
5596
          val = instance.admin_up
5597
        elif field == "oper_state":
5598
          if instance.primary_node in bad_nodes:
5599
            val = None
5600
          else:
5601
            val = bool(live_data.get(instance.name))
5602
        elif field == "status":
5603
          if instance.primary_node in off_nodes:
5604
            val = "ERROR_nodeoffline"
5605
          elif instance.primary_node in bad_nodes:
5606
            val = "ERROR_nodedown"
5607
          else:
5608
            running = bool(live_data.get(instance.name))
5609
            if running:
5610
              if instance.admin_up:
5611
                val = "running"
5612
              else:
5613
                val = "ERROR_up"
5614
            else:
5615
              if instance.admin_up:
5616
                val = "ERROR_down"
5617
              else:
5618
                val = "ADMIN_down"
5619
        elif field == "oper_ram":
5620
          if instance.primary_node in bad_nodes:
5621
            val = None
5622
          elif instance.name in live_data:
5623
            val = live_data[instance.name].get("memory", "?")
5624
          else:
5625
            val = "-"
5626
        elif field == "oper_vcpus":
5627
          if instance.primary_node in bad_nodes:
5628
            val = None
5629
          elif instance.name in live_data:
5630
            val = live_data[instance.name].get("vcpus", "?")
5631
          else:
5632
            val = "-"
5633
        elif field == "vcpus":
5634
          val = i_be[constants.BE_VCPUS]
5635
        elif field == "disk_template":
5636
          val = instance.disk_template
5637
        elif field == "ip":
5638
          if instance.nics:
5639
            val = instance.nics[0].ip
5640
          else:
5641
            val = None
5642
        elif field == "nic_mode":
5643
          if instance.nics:
5644
            val = i_nicp[0][constants.NIC_MODE]
5645
          else:
5646
            val = None
5647
        elif field == "nic_link":
5648
          if instance.nics:
5649
            val = i_nicp[0][constants.NIC_LINK]
5650
          else:
5651
            val = None
5652
        elif field == "bridge":
5653
          if (instance.nics and
5654
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5655
            val = i_nicp[0][constants.NIC_LINK]
5656
          else:
5657
            val = None
5658
        elif field == "mac":
5659
          if instance.nics:
5660
            val = instance.nics[0].mac
5661
          else:
5662
            val = None
5663
        elif field == "custom_nicparams":
5664
          val = [nic.nicparams for nic in instance.nics]
5665
        elif field == "sda_size" or field == "sdb_size":
5666
          idx = ord(field[2]) - ord('a')
5667
          try:
5668
            val = instance.FindDisk(idx).size
5669
          except errors.OpPrereqError:
5670
            val = None
5671
        elif field == "disk_usage": # total disk usage per node
5672
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
5673
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5674
        elif field == "tags":
5675
          val = list(instance.GetTags())
5676
        elif field == "custom_hvparams":
5677
          val = instance.hvparams # not filled!
5678
        elif field == "hvparams":
5679
          val = i_hv
5680
        elif (field.startswith(HVPREFIX) and
5681
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5682
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5683
          val = i_hv.get(field[len(HVPREFIX):], None)
5684
        elif field == "custom_beparams":
5685
          val = instance.beparams
5686
        elif field == "beparams":
5687
          val = i_be
5688
        elif (field.startswith(BEPREFIX) and
5689
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5690
          val = i_be.get(field[len(BEPREFIX):], None)
5691
        elif st_match and st_match.groups():
5692
          # matches a variable list
5693
          st_groups = st_match.groups()
5694
          if st_groups and st_groups[0] == "disk":
5695
            if st_groups[1] == "count":
5696
              val = len(instance.disks)
5697
            elif st_groups[1] == "sizes":
5698
              val = [disk.size for disk in instance.disks]
5699
            elif st_groups[1] == "size":
5700
              try:
5701
                val = instance.FindDisk(st_groups[2]).size
5702
              except errors.OpPrereqError:
5703
                val = None
5704
            else:
5705
              assert False, "Unhandled disk parameter"
5706
          elif st_groups[0] == "nic":
5707
            if st_groups[1] == "count":
5708
              val = len(instance.nics)
5709
            elif st_groups[1] == "macs":
5710
              val = [nic.mac for nic in instance.nics]
5711
            elif st_groups[1] == "ips":
5712
              val = [nic.ip for nic in instance.nics]
5713
            elif st_groups[1] == "modes":
5714
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5715
            elif st_groups[1] == "links":
5716
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5717
            elif st_groups[1] == "bridges":
5718
              val = []
5719
              for nicp in i_nicp:
5720
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5721
                  val.append(nicp[constants.NIC_LINK])
5722
                else:
5723
                  val.append(None)
5724
            else:
5725
              # index-based item
5726
              nic_idx = int(st_groups[2])
5727
              if nic_idx >= len(instance.nics):
5728
                val = None
5729
              else:
5730
                if st_groups[1] == "mac":
5731
                  val = instance.nics[nic_idx].mac
5732
                elif st_groups[1] == "ip":
5733
                  val = instance.nics[nic_idx].ip
5734
                elif st_groups[1] == "mode":
5735
                  val = i_nicp[nic_idx][constants.NIC_MODE]
5736
                elif st_groups[1] == "link":
5737
                  val = i_nicp[nic_idx][constants.NIC_LINK]
5738
                elif st_groups[1] == "bridge":
5739
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5740
                  if nic_mode == constants.NIC_MODE_BRIDGED:
5741
                    val = i_nicp[nic_idx][constants.NIC_LINK]
5742
                  else:
5743
                    val = None
5744
                else:
5745
                  assert False, "Unhandled NIC parameter"
5746
          else:
5747
            assert False, ("Declared but unhandled variable parameter '%s'" %
5748
                           field)
5749
        else:
5750
          assert False, "Declared but unhandled parameter '%s'" % field
5751
        iout.append(val)
5752
      output.append(iout)
5753

    
5754
    return output
5755

    
5756

    
5757
class LUFailoverInstance(LogicalUnit):
5758
  """Failover an instance.
5759

5760
  """
5761
  HPATH = "instance-failover"
5762
  HTYPE = constants.HTYPE_INSTANCE
5763
  _OP_PARAMS = [
5764
    _PInstanceName,
5765
    ("ignore_consistency", False, ht.TBool),
5766
    _PShutdownTimeout,
5767
    ]
5768
  REQ_BGL = False
5769

    
5770
  def ExpandNames(self):
5771
    self._ExpandAndLockInstance()
5772
    self.needed_locks[locking.LEVEL_NODE] = []
5773
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5774

    
5775
  def DeclareLocks(self, level):
5776
    if level == locking.LEVEL_NODE:
5777
      self._LockInstancesNodes()
5778

    
5779
  def BuildHooksEnv(self):
5780
    """Build hooks env.
5781

5782
    This runs on master, primary and secondary nodes of the instance.
5783

5784
    """
5785
    instance = self.instance
5786
    source_node = instance.primary_node
5787
    target_node = instance.secondary_nodes[0]
5788
    env = {
5789
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5790
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5791
      "OLD_PRIMARY": source_node,
5792
      "OLD_SECONDARY": target_node,
5793
      "NEW_PRIMARY": target_node,
5794
      "NEW_SECONDARY": source_node,
5795
      }
5796
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5797
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5798
    nl_post = list(nl)
5799
    nl_post.append(source_node)
5800
    return env, nl, nl_post
5801

    
5802
  def CheckPrereq(self):
5803
    """Check prerequisites.
5804

5805
    This checks that the instance is in the cluster.
5806

5807
    """
5808
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5809
    assert self.instance is not None, \
5810
      "Cannot retrieve locked instance %s" % self.op.instance_name
5811

    
5812
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5813
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5814
      raise errors.OpPrereqError("Instance's disk layout is not"
5815
                                 " network mirrored, cannot failover.",
5816
                                 errors.ECODE_STATE)
5817

    
5818
    secondary_nodes = instance.secondary_nodes
5819
    if not secondary_nodes:
5820
      raise errors.ProgrammerError("no secondary node but using "
5821
                                   "a mirrored disk template")
5822

    
5823
    target_node = secondary_nodes[0]
5824
    _CheckNodeOnline(self, target_node)
5825
    _CheckNodeNotDrained(self, target_node)
5826
    if instance.admin_up:
5827
      # check memory requirements on the secondary node
5828
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5829
                           instance.name, bep[constants.BE_MEMORY],
5830
                           instance.hypervisor)
5831
    else:
5832
      self.LogInfo("Not checking memory on the secondary node as"
5833
                   " instance will not be started")
5834

    
5835
    # check bridge existance
5836
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5837

    
5838
  def Exec(self, feedback_fn):
5839
    """Failover an instance.
5840

5841
    The failover is done by shutting it down on its present node and
5842
    starting it on the secondary.
5843

5844
    """
5845
    instance = self.instance
5846
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5847

    
5848
    source_node = instance.primary_node
5849
    target_node = instance.secondary_nodes[0]
5850

    
5851
    if instance.admin_up:
5852
      feedback_fn("* checking disk consistency between source and target")
5853
      for dev in instance.disks:
5854
        # for drbd, these are drbd over lvm
5855
        if not _CheckDiskConsistency(self, dev, target_node, False):
5856
          if not self.op.ignore_consistency:
5857
            raise errors.OpExecError("Disk %s is degraded on target node,"
5858
                                     " aborting failover." % dev.iv_name)
5859
    else:
5860
      feedback_fn("* not checking disk consistency as instance is not running")
5861

    
5862
    feedback_fn("* shutting down instance on source node")
5863
    logging.info("Shutting down instance %s on node %s",
5864
                 instance.name, source_node)
5865

    
5866
    result = self.rpc.call_instance_shutdown(source_node, instance,
5867
                                             self.op.shutdown_timeout)
5868
    msg = result.fail_msg
5869
    if msg:
5870
      if self.op.ignore_consistency or primary_node.offline:
5871
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5872
                             " Proceeding anyway. Please make sure node"
5873
                             " %s is down. Error details: %s",
5874
                             instance.name, source_node, source_node, msg)
5875
      else:
5876
        raise errors.OpExecError("Could not shutdown instance %s on"
5877
                                 " node %s: %s" %
5878
                                 (instance.name, source_node, msg))
5879

    
5880
    feedback_fn("* deactivating the instance's disks on source node")
5881
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5882
      raise errors.OpExecError("Can't shut down the instance's disks.")
5883

    
5884
    instance.primary_node = target_node
5885
    # distribute new instance config to the other nodes
5886
    self.cfg.Update(instance, feedback_fn)
5887

    
5888
    # Only start the instance if it's marked as up
5889
    if instance.admin_up:
5890
      feedback_fn("* activating the instance's disks on target node")
5891
      logging.info("Starting instance %s on node %s",
5892
                   instance.name, target_node)
5893

    
5894
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5895
                                           ignore_secondaries=True)
5896
      if not disks_ok:
5897
        _ShutdownInstanceDisks(self, instance)
5898
        raise errors.OpExecError("Can't activate the instance's disks")
5899

    
5900
      feedback_fn("* starting the instance on the target node")
5901
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5902
      msg = result.fail_msg
5903
      if msg:
5904
        _ShutdownInstanceDisks(self, instance)
5905
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5906
                                 (instance.name, target_node, msg))
5907

    
5908

    
5909
class LUMigrateInstance(LogicalUnit):
5910
  """Migrate an instance.
5911

5912
  This is migration without shutting down, compared to the failover,
5913
  which is done with shutdown.
5914

5915
  """
5916
  HPATH = "instance-migrate"
5917
  HTYPE = constants.HTYPE_INSTANCE
5918
  _OP_PARAMS = [
5919
    _PInstanceName,
5920
    _PMigrationMode,
5921
    _PMigrationLive,
5922
    ("cleanup", False, ht.TBool),
5923
    ]
5924

    
5925
  REQ_BGL = False
5926

    
5927
  def ExpandNames(self):
5928
    self._ExpandAndLockInstance()
5929

    
5930
    self.needed_locks[locking.LEVEL_NODE] = []
5931
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5932

    
5933
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5934
                                       self.op.cleanup)
5935
    self.tasklets = [self._migrater]
5936

    
5937
  def DeclareLocks(self, level):
5938
    if level == locking.LEVEL_NODE:
5939
      self._LockInstancesNodes()
5940

    
5941
  def BuildHooksEnv(self):
5942
    """Build hooks env.
5943

5944
    This runs on master, primary and secondary nodes of the instance.
5945

5946
    """
5947
    instance = self._migrater.instance
5948
    source_node = instance.primary_node
5949
    target_node = instance.secondary_nodes[0]
5950
    env = _BuildInstanceHookEnvByObject(self, instance)
5951
    env["MIGRATE_LIVE"] = self._migrater.live
5952
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5953
    env.update({
5954
        "OLD_PRIMARY": source_node,
5955
        "OLD_SECONDARY": target_node,
5956
        "NEW_PRIMARY": target_node,
5957
        "NEW_SECONDARY": source_node,
5958
        })
5959
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5960
    nl_post = list(nl)
5961
    nl_post.append(source_node)
5962
    return env, nl, nl_post
5963

    
5964

    
5965
class LUMoveInstance(LogicalUnit):
5966
  """Move an instance by data-copying.
5967

5968
  """
5969
  HPATH = "instance-move"
5970
  HTYPE = constants.HTYPE_INSTANCE
5971
  _OP_PARAMS = [
5972
    _PInstanceName,
5973
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5974
    _PShutdownTimeout,
5975
    ]
5976
  REQ_BGL = False
5977

    
5978
  def ExpandNames(self):
5979
    self._ExpandAndLockInstance()
5980
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5981
    self.op.target_node = target_node
5982
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5983
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5984

    
5985
  def DeclareLocks(self, level):
5986
    if level == locking.LEVEL_NODE:
5987
      self._LockInstancesNodes(primary_only=True)
5988

    
5989
  def BuildHooksEnv(self):
5990
    """Build hooks env.
5991

5992
    This runs on master, primary and secondary nodes of the instance.
5993

5994
    """
5995
    env = {
5996
      "TARGET_NODE": self.op.target_node,
5997
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5998
      }
5999
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6000
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6001
                                       self.op.target_node]
6002
    return env, nl, nl
6003

    
6004
  def CheckPrereq(self):
6005
    """Check prerequisites.
6006

6007
    This checks that the instance is in the cluster.
6008

6009
    """
6010
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011
    assert self.instance is not None, \
6012
      "Cannot retrieve locked instance %s" % self.op.instance_name
6013

    
6014
    node = self.cfg.GetNodeInfo(self.op.target_node)
6015
    assert node is not None, \
6016
      "Cannot retrieve locked node %s" % self.op.target_node
6017

    
6018
    self.target_node = target_node = node.name
6019

    
6020
    if target_node == instance.primary_node:
6021
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6022
                                 (instance.name, target_node),
6023
                                 errors.ECODE_STATE)
6024

    
6025
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6026

    
6027
    for idx, dsk in enumerate(instance.disks):
6028
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6029
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6030
                                   " cannot copy" % idx, errors.ECODE_STATE)
6031

    
6032
    _CheckNodeOnline(self, target_node)
6033
    _CheckNodeNotDrained(self, target_node)
6034
    _CheckNodeVmCapable(self, target_node)
6035

    
6036
    if instance.admin_up:
6037
      # check memory requirements on the secondary node
6038
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6039
                           instance.name, bep[constants.BE_MEMORY],
6040
                           instance.hypervisor)
6041
    else:
6042
      self.LogInfo("Not checking memory on the secondary node as"
6043
                   " instance will not be started")
6044

    
6045
    # check bridge existance
6046
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6047

    
6048
  def Exec(self, feedback_fn):
6049
    """Move an instance.
6050

6051
    The move is done by shutting it down on its present node, copying
6052
    the data over (slow) and starting it on the new node.
6053

6054
    """
6055
    instance = self.instance
6056

    
6057
    source_node = instance.primary_node
6058
    target_node = self.target_node
6059

    
6060
    self.LogInfo("Shutting down instance %s on source node %s",
6061
                 instance.name, source_node)
6062

    
6063
    result = self.rpc.call_instance_shutdown(source_node, instance,
6064
                                             self.op.shutdown_timeout)
6065
    msg = result.fail_msg
6066
    if msg:
6067
      if self.op.ignore_consistency:
6068
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6069
                             " Proceeding anyway. Please make sure node"
6070
                             " %s is down. Error details: %s",
6071
                             instance.name, source_node, source_node, msg)
6072
      else:
6073
        raise errors.OpExecError("Could not shutdown instance %s on"
6074
                                 " node %s: %s" %
6075
                                 (instance.name, source_node, msg))
6076

    
6077
    # create the target disks
6078
    try:
6079
      _CreateDisks(self, instance, target_node=target_node)
6080
    except errors.OpExecError:
6081
      self.LogWarning("Device creation failed, reverting...")
6082
      try:
6083
        _RemoveDisks(self, instance, target_node=target_node)
6084
      finally:
6085
        self.cfg.ReleaseDRBDMinors(instance.name)
6086
        raise
6087

    
6088
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6089

    
6090
    errs = []
6091
    # activate, get path, copy the data over
6092
    for idx, disk in enumerate(instance.disks):
6093
      self.LogInfo("Copying data for disk %d", idx)
6094
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6095
                                               instance.name, True)
6096
      if result.fail_msg:
6097
        self.LogWarning("Can't assemble newly created disk %d: %s",
6098
                        idx, result.fail_msg)
6099
        errs.append(result.fail_msg)
6100
        break
6101
      dev_path = result.payload
6102
      result = self.rpc.call_blockdev_export(source_node, disk,
6103
                                             target_node, dev_path,
6104
                                             cluster_name)
6105
      if result.fail_msg:
6106
        self.LogWarning("Can't copy data over for disk %d: %s",
6107
                        idx, result.fail_msg)
6108
        errs.append(result.fail_msg)
6109
        break
6110

    
6111
    if errs:
6112
      self.LogWarning("Some disks failed to copy, aborting")
6113
      try:
6114
        _RemoveDisks(self, instance, target_node=target_node)
6115
      finally:
6116
        self.cfg.ReleaseDRBDMinors(instance.name)
6117
        raise errors.OpExecError("Errors during disk copy: %s" %
6118
                                 (",".join(errs),))
6119

    
6120
    instance.primary_node = target_node
6121
    self.cfg.Update(instance, feedback_fn)
6122

    
6123
    self.LogInfo("Removing the disks on the original node")
6124
    _RemoveDisks(self, instance, target_node=source_node)
6125

    
6126
    # Only start the instance if it's marked as up
6127
    if instance.admin_up:
6128
      self.LogInfo("Starting instance %s on node %s",
6129
                   instance.name, target_node)
6130

    
6131
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6132
                                           ignore_secondaries=True)
6133
      if not disks_ok:
6134
        _ShutdownInstanceDisks(self, instance)
6135
        raise errors.OpExecError("Can't activate the instance's disks")
6136

    
6137
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6138
      msg = result.fail_msg
6139
      if msg:
6140
        _ShutdownInstanceDisks(self, instance)
6141
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6142
                                 (instance.name, target_node, msg))
6143

    
6144

    
6145
class LUMigrateNode(LogicalUnit):
6146
  """Migrate all instances from a node.
6147

6148
  """
6149
  HPATH = "node-migrate"
6150
  HTYPE = constants.HTYPE_NODE
6151
  _OP_PARAMS = [
6152
    _PNodeName,
6153
    _PMigrationMode,
6154
    _PMigrationLive,
6155
    ]
6156
  REQ_BGL = False
6157

    
6158
  def ExpandNames(self):
6159
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6160

    
6161
    self.needed_locks = {
6162
      locking.LEVEL_NODE: [self.op.node_name],
6163
      }
6164

    
6165
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6166

    
6167
    # Create tasklets for migrating instances for all instances on this node
6168
    names = []
6169
    tasklets = []
6170

    
6171
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6172
      logging.debug("Migrating instance %s", inst.name)
6173
      names.append(inst.name)
6174

    
6175
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6176

    
6177
    self.tasklets = tasklets
6178

    
6179
    # Declare instance locks
6180
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6181

    
6182
  def DeclareLocks(self, level):
6183
    if level == locking.LEVEL_NODE:
6184
      self._LockInstancesNodes()
6185

    
6186
  def BuildHooksEnv(self):
6187
    """Build hooks env.
6188

6189
    This runs on the master, the primary and all the secondaries.
6190

6191
    """
6192
    env = {
6193
      "NODE_NAME": self.op.node_name,
6194
      }
6195

    
6196
    nl = [self.cfg.GetMasterNode()]
6197

    
6198
    return (env, nl, nl)
6199

    
6200

    
6201
class TLMigrateInstance(Tasklet):
6202
  """Tasklet class for instance migration.
6203

6204
  @type live: boolean
6205
  @ivar live: whether the migration will be done live or non-live;
6206
      this variable is initalized only after CheckPrereq has run
6207

6208
  """
6209
  def __init__(self, lu, instance_name, cleanup):
6210
    """Initializes this class.
6211

6212
    """
6213
    Tasklet.__init__(self, lu)
6214

    
6215
    # Parameters
6216
    self.instance_name = instance_name
6217
    self.cleanup = cleanup
6218
    self.live = False # will be overridden later
6219

    
6220
  def CheckPrereq(self):
6221
    """Check prerequisites.
6222

6223
    This checks that the instance is in the cluster.
6224

6225
    """
6226
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6227
    instance = self.cfg.GetInstanceInfo(instance_name)
6228
    assert instance is not None
6229

    
6230
    if instance.disk_template != constants.DT_DRBD8:
6231
      raise errors.OpPrereqError("Instance's disk layout is not"
6232
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6233

    
6234
    secondary_nodes = instance.secondary_nodes
6235
    if not secondary_nodes:
6236
      raise errors.ConfigurationError("No secondary node but using"
6237
                                      " drbd8 disk template")
6238

    
6239
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6240

    
6241
    target_node = secondary_nodes[0]
6242
    # check memory requirements on the secondary node
6243
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6244
                         instance.name, i_be[constants.BE_MEMORY],
6245
                         instance.hypervisor)
6246

    
6247
    # check bridge existance
6248
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6249

    
6250
    if not self.cleanup:
6251
      _CheckNodeNotDrained(self.lu, target_node)
6252
      result = self.rpc.call_instance_migratable(instance.primary_node,
6253
                                                 instance)
6254
      result.Raise("Can't migrate, please use failover",
6255
                   prereq=True, ecode=errors.ECODE_STATE)
6256

    
6257
    self.instance = instance
6258

    
6259
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6260
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6261
                                 " parameters are accepted",
6262
                                 errors.ECODE_INVAL)
6263
    if self.lu.op.live is not None:
6264
      if self.lu.op.live:
6265
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6266
      else:
6267
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6268
      # reset the 'live' parameter to None so that repeated
6269
      # invocations of CheckPrereq do not raise an exception
6270
      self.lu.op.live = None
6271
    elif self.lu.op.mode is None:
6272
      # read the default value from the hypervisor
6273
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6274
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6275

    
6276
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6277

    
6278
  def _WaitUntilSync(self):
6279
    """Poll with custom rpc for disk sync.
6280

6281
    This uses our own step-based rpc call.
6282

6283
    """
6284
    self.feedback_fn("* wait until resync is done")
6285
    all_done = False
6286
    while not all_done:
6287
      all_done = True
6288
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6289
                                            self.nodes_ip,
6290
                                            self.instance.disks)
6291
      min_percent = 100
6292
      for node, nres in result.items():
6293
        nres.Raise("Cannot resync disks on node %s" % node)
6294
        node_done, node_percent = nres.payload
6295
        all_done = all_done and node_done
6296
        if node_percent is not None:
6297
          min_percent = min(min_percent, node_percent)
6298
      if not all_done:
6299
        if min_percent < 100:
6300
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6301
        time.sleep(2)
6302

    
6303
  def _EnsureSecondary(self, node):
6304
    """Demote a node to secondary.
6305

6306
    """
6307
    self.feedback_fn("* switching node %s to secondary mode" % node)
6308

    
6309
    for dev in self.instance.disks:
6310
      self.cfg.SetDiskID(dev, node)
6311

    
6312
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6313
                                          self.instance.disks)
6314
    result.Raise("Cannot change disk to secondary on node %s" % node)
6315

    
6316
  def _GoStandalone(self):
6317
    """Disconnect from the network.
6318

6319
    """
6320
    self.feedback_fn("* changing into standalone mode")
6321
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6322
                                               self.instance.disks)
6323
    for node, nres in result.items():
6324
      nres.Raise("Cannot disconnect disks node %s" % node)
6325

    
6326
  def _GoReconnect(self, multimaster):
6327
    """Reconnect to the network.
6328

6329
    """
6330
    if multimaster:
6331
      msg = "dual-master"
6332
    else:
6333
      msg = "single-master"
6334
    self.feedback_fn("* changing disks into %s mode" % msg)
6335
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6336
                                           self.instance.disks,
6337
                                           self.instance.name, multimaster)
6338
    for node, nres in result.items():
6339
      nres.Raise("Cannot change disks config on node %s" % node)
6340

    
6341
  def _ExecCleanup(self):
6342
    """Try to cleanup after a failed migration.
6343

6344
    The cleanup is done by:
6345
      - check that the instance is running only on one node
6346
        (and update the config if needed)
6347
      - change disks on its secondary node to secondary
6348
      - wait until disks are fully synchronized
6349
      - disconnect from the network
6350
      - change disks into single-master mode
6351
      - wait again until disks are fully synchronized
6352

6353
    """
6354
    instance = self.instance
6355
    target_node = self.target_node
6356
    source_node = self.source_node
6357

    
6358
    # check running on only one node
6359
    self.feedback_fn("* checking where the instance actually runs"
6360
                     " (if this hangs, the hypervisor might be in"
6361
                     " a bad state)")
6362
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6363
    for node, result in ins_l.items():
6364
      result.Raise("Can't contact node %s" % node)
6365

    
6366
    runningon_source = instance.name in ins_l[source_node].payload
6367
    runningon_target = instance.name in ins_l[target_node].payload
6368

    
6369
    if runningon_source and runningon_target:
6370
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6371
                               " or the hypervisor is confused. You will have"
6372
                               " to ensure manually that it runs only on one"
6373
                               " and restart this operation.")
6374

    
6375
    if not (runningon_source or runningon_target):
6376
      raise errors.OpExecError("Instance does not seem to be running at all."
6377
                               " In this case, it's safer to repair by"
6378
                               " running 'gnt-instance stop' to ensure disk"
6379
                               " shutdown, and then restarting it.")
6380

    
6381
    if runningon_target:
6382
      # the migration has actually succeeded, we need to update the config
6383
      self.feedback_fn("* instance running on secondary node (%s),"
6384
                       " updating config" % target_node)
6385
      instance.primary_node = target_node
6386
      self.cfg.Update(instance, self.feedback_fn)
6387
      demoted_node = source_node
6388
    else:
6389
      self.feedback_fn("* instance confirmed to be running on its"
6390
                       " primary node (%s)" % source_node)
6391
      demoted_node = target_node
6392

    
6393
    self._EnsureSecondary(demoted_node)
6394
    try:
6395
      self._WaitUntilSync()
6396
    except errors.OpExecError:
6397
      # we ignore here errors, since if the device is standalone, it
6398
      # won't be able to sync
6399
      pass
6400
    self._GoStandalone()
6401
    self._GoReconnect(False)
6402
    self._WaitUntilSync()
6403

    
6404
    self.feedback_fn("* done")
6405

    
6406
  def _RevertDiskStatus(self):
6407
    """Try to revert the disk status after a failed migration.
6408

6409
    """
6410
    target_node = self.target_node
6411
    try:
6412
      self._EnsureSecondary(target_node)
6413
      self._GoStandalone()
6414
      self._GoReconnect(False)
6415
      self._WaitUntilSync()
6416
    except errors.OpExecError, err:
6417
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6418
                         " drives: error '%s'\n"
6419
                         "Please look and recover the instance status" %
6420
                         str(err))
6421

    
6422
  def _AbortMigration(self):
6423
    """Call the hypervisor code to abort a started migration.
6424

6425
    """
6426
    instance = self.instance
6427
    target_node = self.target_node
6428
    migration_info = self.migration_info
6429

    
6430
    abort_result = self.rpc.call_finalize_migration(target_node,
6431
                                                    instance,
6432
                                                    migration_info,
6433
                                                    False)
6434
    abort_msg = abort_result.fail_msg
6435
    if abort_msg:
6436
      logging.error("Aborting migration failed on target node %s: %s",
6437
                    target_node, abort_msg)
6438
      # Don't raise an exception here, as we stil have to try to revert the
6439
      # disk status, even if this step failed.
6440

    
6441
  def _ExecMigration(self):
6442
    """Migrate an instance.
6443

6444
    The migrate is done by:
6445
      - change the disks into dual-master mode
6446
      - wait until disks are fully synchronized again
6447
      - migrate the instance
6448
      - change disks on the new secondary node (the old primary) to secondary
6449
      - wait until disks are fully synchronized
6450
      - change disks into single-master mode
6451

6452
    """
6453
    instance = self.instance
6454
    target_node = self.target_node
6455
    source_node = self.source_node
6456

    
6457
    self.feedback_fn("* checking disk consistency between source and target")
6458
    for dev in instance.disks:
6459
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6460
        raise errors.OpExecError("Disk %s is degraded or not fully"
6461
                                 " synchronized on target node,"
6462
                                 " aborting migrate." % dev.iv_name)
6463

    
6464
    # First get the migration information from the remote node
6465
    result = self.rpc.call_migration_info(source_node, instance)
6466
    msg = result.fail_msg
6467
    if msg:
6468
      log_err = ("Failed fetching source migration information from %s: %s" %
6469
                 (source_node, msg))
6470
      logging.error(log_err)
6471
      raise errors.OpExecError(log_err)
6472

    
6473
    self.migration_info = migration_info = result.payload
6474

    
6475
    # Then switch the disks to master/master mode
6476
    self._EnsureSecondary(target_node)
6477
    self._GoStandalone()
6478
    self._GoReconnect(True)
6479
    self._WaitUntilSync()
6480

    
6481
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6482
    result = self.rpc.call_accept_instance(target_node,
6483
                                           instance,
6484
                                           migration_info,
6485
                                           self.nodes_ip[target_node])
6486

    
6487
    msg = result.fail_msg
6488
    if msg:
6489
      logging.error("Instance pre-migration failed, trying to revert"
6490
                    " disk status: %s", msg)
6491
      self.feedback_fn("Pre-migration failed, aborting")
6492
      self._AbortMigration()
6493
      self._RevertDiskStatus()
6494
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6495
                               (instance.name, msg))
6496

    
6497
    self.feedback_fn("* migrating instance to %s" % target_node)
6498
    time.sleep(10)
6499
    result = self.rpc.call_instance_migrate(source_node, instance,
6500
                                            self.nodes_ip[target_node],
6501
                                            self.live)
6502
    msg = result.fail_msg
6503
    if msg:
6504
      logging.error("Instance migration failed, trying to revert"
6505
                    " disk status: %s", msg)
6506
      self.feedback_fn("Migration failed, aborting")
6507
      self._AbortMigration()
6508
      self._RevertDiskStatus()
6509
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6510
                               (instance.name, msg))
6511
    time.sleep(10)
6512

    
6513
    instance.primary_node = target_node
6514
    # distribute new instance config to the other nodes
6515
    self.cfg.Update(instance, self.feedback_fn)
6516

    
6517
    result = self.rpc.call_finalize_migration(target_node,
6518
                                              instance,
6519
                                              migration_info,
6520
                                              True)
6521
    msg = result.fail_msg
6522
    if msg:
6523
      logging.error("Instance migration succeeded, but finalization failed:"
6524
                    " %s", msg)
6525
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6526
                               msg)
6527

    
6528
    self._EnsureSecondary(source_node)
6529
    self._WaitUntilSync()
6530
    self._GoStandalone()
6531
    self._GoReconnect(False)
6532
    self._WaitUntilSync()
6533

    
6534
    self.feedback_fn("* done")
6535

    
6536
  def Exec(self, feedback_fn):
6537
    """Perform the migration.
6538

6539
    """
6540
    feedback_fn("Migrating instance %s" % self.instance.name)
6541

    
6542
    self.feedback_fn = feedback_fn
6543

    
6544
    self.source_node = self.instance.primary_node
6545
    self.target_node = self.instance.secondary_nodes[0]
6546
    self.all_nodes = [self.source_node, self.target_node]
6547
    self.nodes_ip = {
6548
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6549
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6550
      }
6551

    
6552
    if self.cleanup:
6553
      return self._ExecCleanup()
6554
    else:
6555
      return self._ExecMigration()
6556

    
6557

    
6558
def _CreateBlockDev(lu, node, instance, device, force_create,
6559
                    info, force_open):
6560
  """Create a tree of block devices on a given node.
6561

6562
  If this device type has to be created on secondaries, create it and
6563
  all its children.
6564

6565
  If not, just recurse to children keeping the same 'force' value.
6566

6567
  @param lu: the lu on whose behalf we execute
6568
  @param node: the node on which to create the device
6569
  @type instance: L{objects.Instance}
6570
  @param instance: the instance which owns the device
6571
  @type device: L{objects.Disk}
6572
  @param device: the device to create
6573
  @type force_create: boolean
6574
  @param force_create: whether to force creation of this device; this
6575
      will be change to True whenever we find a device which has
6576
      CreateOnSecondary() attribute
6577
  @param info: the extra 'metadata' we should attach to the device
6578
      (this will be represented as a LVM tag)
6579
  @type force_open: boolean
6580
  @param force_open: this parameter will be passes to the
6581
      L{backend.BlockdevCreate} function where it specifies
6582
      whether we run on primary or not, and it affects both
6583
      the child assembly and the device own Open() execution
6584

6585
  """
6586
  if device.CreateOnSecondary():
6587
    force_create = True
6588

    
6589
  if device.children:
6590
    for child in device.children:
6591
      _CreateBlockDev(lu, node, instance, child, force_create,
6592
                      info, force_open)
6593

    
6594
  if not force_create:
6595
    return
6596

    
6597
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6598

    
6599

    
6600
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6601
  """Create a single block device on a given node.
6602

6603
  This will not recurse over children of the device, so they must be
6604
  created in advance.
6605

6606
  @param lu: the lu on whose behalf we execute
6607
  @param node: the node on which to create the device
6608
  @type instance: L{objects.Instance}
6609
  @param instance: the instance which owns the device
6610
  @type device: L{objects.Disk}
6611
  @param device: the device to create
6612
  @param info: the extra 'metadata' we should attach to the device
6613
      (this will be represented as a LVM tag)
6614
  @type force_open: boolean
6615
  @param force_open: this parameter will be passes to the
6616
      L{backend.BlockdevCreate} function where it specifies
6617
      whether we run on primary or not, and it affects both
6618
      the child assembly and the device own Open() execution
6619

6620
  """
6621
  lu.cfg.SetDiskID(device, node)
6622
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6623
                                       instance.name, force_open, info)
6624
  result.Raise("Can't create block device %s on"
6625
               " node %s for instance %s" % (device, node, instance.name))
6626
  if device.physical_id is None:
6627
    device.physical_id = result.payload
6628

    
6629

    
6630
def _GenerateUniqueNames(lu, exts):
6631
  """Generate a suitable LV name.
6632

6633
  This will generate a logical volume name for the given instance.
6634

6635
  """
6636
  results = []
6637
  for val in exts:
6638
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6639
    results.append("%s%s" % (new_id, val))
6640
  return results
6641

    
6642

    
6643
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6644
                         p_minor, s_minor):
6645
  """Generate a drbd8 device complete with its children.
6646

6647
  """
6648
  port = lu.cfg.AllocatePort()
6649
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6650
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6651
                          logical_id=(vgname, names[0]))
6652
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6653
                          logical_id=(vgname, names[1]))
6654
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6655
                          logical_id=(primary, secondary, port,
6656
                                      p_minor, s_minor,
6657
                                      shared_secret),
6658
                          children=[dev_data, dev_meta],
6659
                          iv_name=iv_name)
6660
  return drbd_dev
6661

    
6662

    
6663
def _GenerateDiskTemplate(lu, template_name,
6664
                          instance_name, primary_node,
6665
                          secondary_nodes, disk_info,
6666
                          file_storage_dir, file_driver,
6667
                          base_index, feedback_fn):
6668
  """Generate the entire disk layout for a given template type.
6669

6670
  """
6671
  #TODO: compute space requirements
6672

    
6673
  vgname = lu.cfg.GetVGName()
6674
  disk_count = len(disk_info)
6675
  disks = []
6676
  if template_name == constants.DT_DISKLESS:
6677
    pass
6678
  elif template_name == constants.DT_PLAIN:
6679
    if len(secondary_nodes) != 0:
6680
      raise errors.ProgrammerError("Wrong template configuration")
6681

    
6682
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6683
                                      for i in range(disk_count)])
6684
    for idx, disk in enumerate(disk_info):
6685
      disk_index = idx + base_index
6686
      vg = disk.get("vg", vgname)
6687
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6688
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6689
                              logical_id=(vg, names[idx]),
6690
                              iv_name="disk/%d" % disk_index,
6691
                              mode=disk["mode"])
6692
      disks.append(disk_dev)
6693
  elif template_name == constants.DT_DRBD8:
6694
    if len(secondary_nodes) != 1:
6695
      raise errors.ProgrammerError("Wrong template configuration")
6696
    remote_node = secondary_nodes[0]
6697
    minors = lu.cfg.AllocateDRBDMinor(
6698
      [primary_node, remote_node] * len(disk_info), instance_name)
6699

    
6700
    names = []
6701
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6702
                                               for i in range(disk_count)]):
6703
      names.append(lv_prefix + "_data")
6704
      names.append(lv_prefix + "_meta")
6705
    for idx, disk in enumerate(disk_info):
6706
      disk_index = idx + base_index
6707
      vg = disk.get("vg", vgname)
6708
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6709
                                      disk["size"], vg, names[idx*2:idx*2+2],
6710
                                      "disk/%d" % disk_index,
6711
                                      minors[idx*2], minors[idx*2+1])
6712
      disk_dev.mode = disk["mode"]
6713
      disks.append(disk_dev)
6714
  elif template_name == constants.DT_FILE:
6715
    if len(secondary_nodes) != 0:
6716
      raise errors.ProgrammerError("Wrong template configuration")
6717

    
6718
    _RequireFileStorage()
6719

    
6720
    for idx, disk in enumerate(disk_info):
6721
      disk_index = idx + base_index
6722
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6723
                              iv_name="disk/%d" % disk_index,
6724
                              logical_id=(file_driver,
6725
                                          "%s/disk%d" % (file_storage_dir,
6726
                                                         disk_index)),
6727
                              mode=disk["mode"])
6728
      disks.append(disk_dev)
6729
  else:
6730
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6731
  return disks
6732

    
6733

    
6734
def _GetInstanceInfoText(instance):
6735
  """Compute that text that should be added to the disk's metadata.
6736

6737
  """
6738
  return "originstname+%s" % instance.name
6739

    
6740

    
6741
def _CalcEta(time_taken, written, total_size):
6742
  """Calculates the ETA based on size written and total size.
6743

6744
  @param time_taken: The time taken so far
6745
  @param written: amount written so far
6746
  @param total_size: The total size of data to be written
6747
  @return: The remaining time in seconds
6748

6749
  """
6750
  avg_time = time_taken / float(written)
6751
  return (total_size - written) * avg_time
6752

    
6753

    
6754
def _WipeDisks(lu, instance):
6755
  """Wipes instance disks.
6756

6757
  @type lu: L{LogicalUnit}
6758
  @param lu: the logical unit on whose behalf we execute
6759
  @type instance: L{objects.Instance}
6760
  @param instance: the instance whose disks we should create
6761
  @return: the success of the wipe
6762

6763
  """
6764
  node = instance.primary_node
6765
  for idx, device in enumerate(instance.disks):
6766
    lu.LogInfo("* Wiping disk %d", idx)
6767
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6768

    
6769
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6770
    # MAX_WIPE_CHUNK at max
6771
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6772
                          constants.MIN_WIPE_CHUNK_PERCENT)
6773

    
6774
    offset = 0
6775
    size = device.size
6776
    last_output = 0
6777
    start_time = time.time()
6778

    
6779
    while offset < size:
6780
      wipe_size = min(wipe_chunk_size, size - offset)
6781
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6782
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6783
                   (idx, offset, wipe_size))
6784
      now = time.time()
6785
      offset += wipe_size
6786
      if now - last_output >= 60:
6787
        eta = _CalcEta(now - start_time, offset, size)
6788
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6789
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6790
        last_output = now
6791

    
6792

    
6793
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6794
  """Create all disks for an instance.
6795

6796
  This abstracts away some work from AddInstance.
6797

6798
  @type lu: L{LogicalUnit}
6799
  @param lu: the logical unit on whose behalf we execute
6800
  @type instance: L{objects.Instance}
6801
  @param instance: the instance whose disks we should create
6802
  @type to_skip: list
6803
  @param to_skip: list of indices to skip
6804
  @type target_node: string
6805
  @param target_node: if passed, overrides the target node for creation
6806
  @rtype: boolean
6807
  @return: the success of the creation
6808

6809
  """
6810
  info = _GetInstanceInfoText(instance)
6811
  if target_node is None:
6812
    pnode = instance.primary_node
6813
    all_nodes = instance.all_nodes
6814
  else:
6815
    pnode = target_node
6816
    all_nodes = [pnode]
6817

    
6818
  if instance.disk_template == constants.DT_FILE:
6819
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6820
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6821

    
6822
    result.Raise("Failed to create directory '%s' on"
6823
                 " node %s" % (file_storage_dir, pnode))
6824

    
6825
  # Note: this needs to be kept in sync with adding of disks in
6826
  # LUSetInstanceParams
6827
  for idx, device in enumerate(instance.disks):
6828
    if to_skip and idx in to_skip:
6829
      continue
6830
    logging.info("Creating volume %s for instance %s",
6831
                 device.iv_name, instance.name)
6832
    #HARDCODE
6833
    for node in all_nodes:
6834
      f_create = node == pnode
6835
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6836

    
6837

    
6838
def _RemoveDisks(lu, instance, target_node=None):
6839
  """Remove all disks for an instance.
6840

6841
  This abstracts away some work from `AddInstance()` and
6842
  `RemoveInstance()`. Note that in case some of the devices couldn't
6843
  be removed, the removal will continue with the other ones (compare
6844
  with `_CreateDisks()`).
6845

6846
  @type lu: L{LogicalUnit}
6847
  @param lu: the logical unit on whose behalf we execute
6848
  @type instance: L{objects.Instance}
6849
  @param instance: the instance whose disks we should remove
6850
  @type target_node: string
6851
  @param target_node: used to override the node on which to remove the disks
6852
  @rtype: boolean
6853
  @return: the success of the removal
6854

6855
  """
6856
  logging.info("Removing block devices for instance %s", instance.name)
6857

    
6858
  all_result = True
6859
  for device in instance.disks:
6860
    if target_node:
6861
      edata = [(target_node, device)]
6862
    else:
6863
      edata = device.ComputeNodeTree(instance.primary_node)
6864
    for node, disk in edata:
6865
      lu.cfg.SetDiskID(disk, node)
6866
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6867
      if msg:
6868
        lu.LogWarning("Could not remove block device %s on node %s,"
6869
                      " continuing anyway: %s", device.iv_name, node, msg)
6870
        all_result = False
6871

    
6872
  if instance.disk_template == constants.DT_FILE:
6873
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6874
    if target_node:
6875
      tgt = target_node
6876
    else:
6877
      tgt = instance.primary_node
6878
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6879
    if result.fail_msg:
6880
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6881
                    file_storage_dir, instance.primary_node, result.fail_msg)
6882
      all_result = False
6883

    
6884
  return all_result
6885

    
6886

    
6887
def _ComputeDiskSizePerVG(disk_template, disks):
6888
  """Compute disk size requirements in the volume group
6889

6890
  """
6891
  def _compute(disks, payload):
6892
    """Universal algorithm
6893

6894
    """
6895
    vgs = {}
6896
    for disk in disks:
6897
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6898

    
6899
    return vgs
6900

    
6901
  # Required free disk space as a function of disk and swap space
6902
  req_size_dict = {
6903
    constants.DT_DISKLESS: None,
6904
    constants.DT_PLAIN: _compute(disks, 0),
6905
    # 128 MB are added for drbd metadata for each disk
6906
    constants.DT_DRBD8: _compute(disks, 128),
6907
    constants.DT_FILE: None,
6908
  }
6909

    
6910
  if disk_template not in req_size_dict:
6911
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6912
                                 " is unknown" %  disk_template)
6913

    
6914
  return req_size_dict[disk_template]
6915

    
6916
def _ComputeDiskSize(disk_template, disks):
6917
  """Compute disk size requirements in the volume group
6918

6919
  """
6920
  # Required free disk space as a function of disk and swap space
6921
  req_size_dict = {
6922
    constants.DT_DISKLESS: None,
6923
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6924
    # 128 MB are added for drbd metadata for each disk
6925
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6926
    constants.DT_FILE: None,
6927
  }
6928

    
6929
  if disk_template not in req_size_dict:
6930
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6931
                                 " is unknown" %  disk_template)
6932

    
6933
  return req_size_dict[disk_template]
6934

    
6935

    
6936
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6937
  """Hypervisor parameter validation.
6938

6939
  This function abstract the hypervisor parameter validation to be
6940
  used in both instance create and instance modify.
6941

6942
  @type lu: L{LogicalUnit}
6943
  @param lu: the logical unit for which we check
6944
  @type nodenames: list
6945
  @param nodenames: the list of nodes on which we should check
6946
  @type hvname: string
6947
  @param hvname: the name of the hypervisor we should use
6948
  @type hvparams: dict
6949
  @param hvparams: the parameters which we need to check
6950
  @raise errors.OpPrereqError: if the parameters are not valid
6951

6952
  """
6953
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6954
                                                  hvname,
6955
                                                  hvparams)
6956
  for node in nodenames:
6957
    info = hvinfo[node]
6958
    if info.offline:
6959
      continue
6960
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6961

    
6962

    
6963
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6964
  """OS parameters validation.
6965

6966
  @type lu: L{LogicalUnit}
6967
  @param lu: the logical unit for which we check
6968
  @type required: boolean
6969
  @param required: whether the validation should fail if the OS is not
6970
      found
6971
  @type nodenames: list
6972
  @param nodenames: the list of nodes on which we should check
6973
  @type osname: string
6974
  @param osname: the name of the hypervisor we should use
6975
  @type osparams: dict
6976
  @param osparams: the parameters which we need to check
6977
  @raise errors.OpPrereqError: if the parameters are not valid
6978

6979
  """
6980
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6981
                                   [constants.OS_VALIDATE_PARAMETERS],
6982
                                   osparams)
6983
  for node, nres in result.items():
6984
    # we don't check for offline cases since this should be run only
6985
    # against the master node and/or an instance's nodes
6986
    nres.Raise("OS Parameters validation failed on node %s" % node)
6987
    if not nres.payload:
6988
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6989
                 osname, node)
6990

    
6991

    
6992
class LUCreateInstance(LogicalUnit):
6993
  """Create an instance.
6994

6995
  """
6996
  HPATH = "instance-add"
6997
  HTYPE = constants.HTYPE_INSTANCE
6998
  _OP_PARAMS = [
6999
    _PInstanceName,
7000
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
7001
    ("start", True, ht.TBool),
7002
    ("wait_for_sync", True, ht.TBool),
7003
    ("ip_check", True, ht.TBool),
7004
    ("name_check", True, ht.TBool),
7005
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
7006
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
7007
    ("hvparams", ht.EmptyDict, ht.TDict),
7008
    ("beparams", ht.EmptyDict, ht.TDict),
7009
    ("osparams", ht.EmptyDict, ht.TDict),
7010
    ("no_install", None, ht.TMaybeBool),
7011
    ("os_type", None, ht.TMaybeString),
7012
    ("force_variant", False, ht.TBool),
7013
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
7014
    ("source_x509_ca", None, ht.TMaybeString),
7015
    ("source_instance_name", None, ht.TMaybeString),
7016
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
7017
     ht.TPositiveInt),
7018
    ("src_node", None, ht.TMaybeString),
7019
    ("src_path", None, ht.TMaybeString),
7020
    ("pnode", None, ht.TMaybeString),
7021
    ("snode", None, ht.TMaybeString),
7022
    ("iallocator", None, ht.TMaybeString),
7023
    ("hypervisor", None, ht.TMaybeString),
7024
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
7025
    ("identify_defaults", False, ht.TBool),
7026
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
7027
    ("file_storage_dir", None, ht.TMaybeString),
7028
    ]
7029
  REQ_BGL = False
7030

    
7031
  def CheckArguments(self):
7032
    """Check arguments.
7033

7034
    """
7035
    # do not require name_check to ease forward/backward compatibility
7036
    # for tools
7037
    if self.op.no_install and self.op.start:
7038
      self.LogInfo("No-installation mode selected, disabling startup")
7039
      self.op.start = False
7040
    # validate/normalize the instance name
7041
    self.op.instance_name = \
7042
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7043

    
7044
    if self.op.ip_check and not self.op.name_check:
7045
      # TODO: make the ip check more flexible and not depend on the name check
7046
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7047
                                 errors.ECODE_INVAL)
7048

    
7049
    # check nics' parameter names
7050
    for nic in self.op.nics:
7051
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7052

    
7053
    # check disks. parameter names and consistent adopt/no-adopt strategy
7054
    has_adopt = has_no_adopt = False
7055
    for disk in self.op.disks:
7056
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7057
      if "adopt" in disk:
7058
        has_adopt = True
7059
      else:
7060
        has_no_adopt = True
7061
    if has_adopt and has_no_adopt:
7062
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7063
                                 errors.ECODE_INVAL)
7064
    if has_adopt:
7065
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7066
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7067
                                   " '%s' disk template" %
7068
                                   self.op.disk_template,
7069
                                   errors.ECODE_INVAL)
7070
      if self.op.iallocator is not None:
7071
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7072
                                   " iallocator script", errors.ECODE_INVAL)
7073
      if self.op.mode == constants.INSTANCE_IMPORT:
7074
        raise errors.OpPrereqError("Disk adoption not allowed for"
7075
                                   " instance import", errors.ECODE_INVAL)
7076

    
7077
    self.adopt_disks = has_adopt
7078

    
7079
    # instance name verification
7080
    if self.op.name_check:
7081
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7082
      self.op.instance_name = self.hostname1.name
7083
      # used in CheckPrereq for ip ping check
7084
      self.check_ip = self.hostname1.ip
7085
    else:
7086
      self.check_ip = None
7087

    
7088
    # file storage checks
7089
    if (self.op.file_driver and
7090
        not self.op.file_driver in constants.FILE_DRIVER):
7091
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7092
                                 self.op.file_driver, errors.ECODE_INVAL)
7093

    
7094
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7095
      raise errors.OpPrereqError("File storage directory path not absolute",
7096
                                 errors.ECODE_INVAL)
7097

    
7098
    ### Node/iallocator related checks
7099
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7100

    
7101
    if self.op.pnode is not None:
7102
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7103
        if self.op.snode is None:
7104
          raise errors.OpPrereqError("The networked disk templates need"
7105
                                     " a mirror node", errors.ECODE_INVAL)
7106
      elif self.op.snode:
7107
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7108
                        " template")
7109
        self.op.snode = None
7110

    
7111
    self._cds = _GetClusterDomainSecret()
7112

    
7113
    if self.op.mode == constants.INSTANCE_IMPORT:
7114
      # On import force_variant must be True, because if we forced it at
7115
      # initial install, our only chance when importing it back is that it
7116
      # works again!
7117
      self.op.force_variant = True
7118

    
7119
      if self.op.no_install:
7120
        self.LogInfo("No-installation mode has no effect during import")
7121

    
7122
    elif self.op.mode == constants.INSTANCE_CREATE:
7123
      if self.op.os_type is None:
7124
        raise errors.OpPrereqError("No guest OS specified",
7125
                                   errors.ECODE_INVAL)
7126
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7127
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7128
                                   " installation" % self.op.os_type,
7129
                                   errors.ECODE_STATE)
7130
      if self.op.disk_template is None:
7131
        raise errors.OpPrereqError("No disk template specified",
7132
                                   errors.ECODE_INVAL)
7133

    
7134
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7135
      # Check handshake to ensure both clusters have the same domain secret
7136
      src_handshake = self.op.source_handshake
7137
      if not src_handshake:
7138
        raise errors.OpPrereqError("Missing source handshake",
7139
                                   errors.ECODE_INVAL)
7140

    
7141
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7142
                                                           src_handshake)
7143
      if errmsg:
7144
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7145
                                   errors.ECODE_INVAL)
7146

    
7147
      # Load and check source CA
7148
      self.source_x509_ca_pem = self.op.source_x509_ca
7149
      if not self.source_x509_ca_pem:
7150
        raise errors.OpPrereqError("Missing source X509 CA",
7151
                                   errors.ECODE_INVAL)
7152

    
7153
      try:
7154
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7155
                                                    self._cds)
7156
      except OpenSSL.crypto.Error, err:
7157
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7158
                                   (err, ), errors.ECODE_INVAL)
7159

    
7160
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7161
      if errcode is not None:
7162
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7163
                                   errors.ECODE_INVAL)
7164

    
7165
      self.source_x509_ca = cert
7166

    
7167
      src_instance_name = self.op.source_instance_name
7168
      if not src_instance_name:
7169
        raise errors.OpPrereqError("Missing source instance name",
7170
                                   errors.ECODE_INVAL)
7171

    
7172
      self.source_instance_name = \
7173
          netutils.GetHostname(name=src_instance_name).name
7174

    
7175
    else:
7176
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7177
                                 self.op.mode, errors.ECODE_INVAL)
7178

    
7179
  def ExpandNames(self):
7180
    """ExpandNames for CreateInstance.
7181

7182
    Figure out the right locks for instance creation.
7183

7184
    """
7185
    self.needed_locks = {}
7186

    
7187
    instance_name = self.op.instance_name
7188
    # this is just a preventive check, but someone might still add this
7189
    # instance in the meantime, and creation will fail at lock-add time
7190
    if instance_name in self.cfg.GetInstanceList():
7191
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7192
                                 instance_name, errors.ECODE_EXISTS)
7193

    
7194
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7195

    
7196
    if self.op.iallocator:
7197
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7198
    else:
7199
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7200
      nodelist = [self.op.pnode]
7201
      if self.op.snode is not None:
7202
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7203
        nodelist.append(self.op.snode)
7204
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7205

    
7206
    # in case of import lock the source node too
7207
    if self.op.mode == constants.INSTANCE_IMPORT:
7208
      src_node = self.op.src_node
7209
      src_path = self.op.src_path
7210

    
7211
      if src_path is None:
7212
        self.op.src_path = src_path = self.op.instance_name
7213

    
7214
      if src_node is None:
7215
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7216
        self.op.src_node = None
7217
        if os.path.isabs(src_path):
7218
          raise errors.OpPrereqError("Importing an instance from an absolute"
7219
                                     " path requires a source node option.",
7220
                                     errors.ECODE_INVAL)
7221
      else:
7222
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7223
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7224
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7225
        if not os.path.isabs(src_path):
7226
          self.op.src_path = src_path = \
7227
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7228

    
7229
  def _RunAllocator(self):
7230
    """Run the allocator based on input opcode.
7231

7232
    """
7233
    nics = [n.ToDict() for n in self.nics]
7234
    ial = IAllocator(self.cfg, self.rpc,
7235
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7236
                     name=self.op.instance_name,
7237
                     disk_template=self.op.disk_template,
7238
                     tags=[],
7239
                     os=self.op.os_type,
7240
                     vcpus=self.be_full[constants.BE_VCPUS],
7241
                     mem_size=self.be_full[constants.BE_MEMORY],
7242
                     disks=self.disks,
7243
                     nics=nics,
7244
                     hypervisor=self.op.hypervisor,
7245
                     )
7246

    
7247
    ial.Run(self.op.iallocator)
7248

    
7249
    if not ial.success:
7250
      raise errors.OpPrereqError("Can't compute nodes using"
7251
                                 " iallocator '%s': %s" %
7252
                                 (self.op.iallocator, ial.info),
7253
                                 errors.ECODE_NORES)
7254
    if len(ial.result) != ial.required_nodes:
7255
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7256
                                 " of nodes (%s), required %s" %
7257
                                 (self.op.iallocator, len(ial.result),
7258
                                  ial.required_nodes), errors.ECODE_FAULT)
7259
    self.op.pnode = ial.result[0]
7260
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7261
                 self.op.instance_name, self.op.iallocator,
7262
                 utils.CommaJoin(ial.result))
7263
    if ial.required_nodes == 2:
7264
      self.op.snode = ial.result[1]
7265

    
7266
  def BuildHooksEnv(self):
7267
    """Build hooks env.
7268

7269
    This runs on master, primary and secondary nodes of the instance.
7270

7271
    """
7272
    env = {
7273
      "ADD_MODE": self.op.mode,
7274
      }
7275
    if self.op.mode == constants.INSTANCE_IMPORT:
7276
      env["SRC_NODE"] = self.op.src_node
7277
      env["SRC_PATH"] = self.op.src_path
7278
      env["SRC_IMAGES"] = self.src_images
7279

    
7280
    env.update(_BuildInstanceHookEnv(
7281
      name=self.op.instance_name,
7282
      primary_node=self.op.pnode,
7283
      secondary_nodes=self.secondaries,
7284
      status=self.op.start,
7285
      os_type=self.op.os_type,
7286
      memory=self.be_full[constants.BE_MEMORY],
7287
      vcpus=self.be_full[constants.BE_VCPUS],
7288
      nics=_NICListToTuple(self, self.nics),
7289
      disk_template=self.op.disk_template,
7290
      disks=[(d["size"], d["mode"]) for d in self.disks],
7291
      bep=self.be_full,
7292
      hvp=self.hv_full,
7293
      hypervisor_name=self.op.hypervisor,
7294
    ))
7295

    
7296
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7297
          self.secondaries)
7298
    return env, nl, nl
7299

    
7300
  def _ReadExportInfo(self):
7301
    """Reads the export information from disk.
7302

7303
    It will override the opcode source node and path with the actual
7304
    information, if these two were not specified before.
7305

7306
    @return: the export information
7307

7308
    """
7309
    assert self.op.mode == constants.INSTANCE_IMPORT
7310

    
7311
    src_node = self.op.src_node
7312
    src_path = self.op.src_path
7313

    
7314
    if src_node is None:
7315
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7316
      exp_list = self.rpc.call_export_list(locked_nodes)
7317
      found = False
7318
      for node in exp_list:
7319
        if exp_list[node].fail_msg:
7320
          continue
7321
        if src_path in exp_list[node].payload:
7322
          found = True
7323
          self.op.src_node = src_node = node
7324
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7325
                                                       src_path)
7326
          break
7327
      if not found:
7328
        raise errors.OpPrereqError("No export found for relative path %s" %
7329
                                    src_path, errors.ECODE_INVAL)
7330

    
7331
    _CheckNodeOnline(self, src_node)
7332
    result = self.rpc.call_export_info(src_node, src_path)
7333
    result.Raise("No export or invalid export found in dir %s" % src_path)
7334

    
7335
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7336
    if not export_info.has_section(constants.INISECT_EXP):
7337
      raise errors.ProgrammerError("Corrupted export config",
7338
                                   errors.ECODE_ENVIRON)
7339

    
7340
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7341
    if (int(ei_version) != constants.EXPORT_VERSION):
7342
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7343
                                 (ei_version, constants.EXPORT_VERSION),
7344
                                 errors.ECODE_ENVIRON)
7345
    return export_info
7346

    
7347
  def _ReadExportParams(self, einfo):
7348
    """Use export parameters as defaults.
7349

7350
    In case the opcode doesn't specify (as in override) some instance
7351
    parameters, then try to use them from the export information, if
7352
    that declares them.
7353

7354
    """
7355
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7356

    
7357
    if self.op.disk_template is None:
7358
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7359
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7360
                                          "disk_template")
7361
      else:
7362
        raise errors.OpPrereqError("No disk template specified and the export"
7363
                                   " is missing the disk_template information",
7364
                                   errors.ECODE_INVAL)
7365

    
7366
    if not self.op.disks:
7367
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7368
        disks = []
7369
        # TODO: import the disk iv_name too
7370
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7371
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7372
          disks.append({"size": disk_sz})
7373
        self.op.disks = disks
7374
      else:
7375
        raise errors.OpPrereqError("No disk info specified and the export"
7376
                                   " is missing the disk information",
7377
                                   errors.ECODE_INVAL)
7378

    
7379
    if (not self.op.nics and
7380
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7381
      nics = []
7382
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7383
        ndict = {}
7384
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7385
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7386
          ndict[name] = v
7387
        nics.append(ndict)
7388
      self.op.nics = nics
7389

    
7390
    if (self.op.hypervisor is None and
7391
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7392
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7393
    if einfo.has_section(constants.INISECT_HYP):
7394
      # use the export parameters but do not override the ones
7395
      # specified by the user
7396
      for name, value in einfo.items(constants.INISECT_HYP):
7397
        if name not in self.op.hvparams:
7398
          self.op.hvparams[name] = value
7399

    
7400
    if einfo.has_section(constants.INISECT_BEP):
7401
      # use the parameters, without overriding
7402
      for name, value in einfo.items(constants.INISECT_BEP):
7403
        if name not in self.op.beparams:
7404
          self.op.beparams[name] = value
7405
    else:
7406
      # try to read the parameters old style, from the main section
7407
      for name in constants.BES_PARAMETERS:
7408
        if (name not in self.op.beparams and
7409
            einfo.has_option(constants.INISECT_INS, name)):
7410
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7411

    
7412
    if einfo.has_section(constants.INISECT_OSP):
7413
      # use the parameters, without overriding
7414
      for name, value in einfo.items(constants.INISECT_OSP):
7415
        if name not in self.op.osparams:
7416
          self.op.osparams[name] = value
7417

    
7418
  def _RevertToDefaults(self, cluster):
7419
    """Revert the instance parameters to the default values.
7420

7421
    """
7422
    # hvparams
7423
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7424
    for name in self.op.hvparams.keys():
7425
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7426
        del self.op.hvparams[name]
7427
    # beparams
7428
    be_defs = cluster.SimpleFillBE({})
7429
    for name in self.op.beparams.keys():
7430
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7431
        del self.op.beparams[name]
7432
    # nic params
7433
    nic_defs = cluster.SimpleFillNIC({})
7434
    for nic in self.op.nics:
7435
      for name in constants.NICS_PARAMETERS:
7436
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7437
          del nic[name]
7438
    # osparams
7439
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7440
    for name in self.op.osparams.keys():
7441
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7442
        del self.op.osparams[name]
7443

    
7444
  def CheckPrereq(self):
7445
    """Check prerequisites.
7446

7447
    """
7448
    if self.op.mode == constants.INSTANCE_IMPORT:
7449
      export_info = self._ReadExportInfo()
7450
      self._ReadExportParams(export_info)
7451

    
7452
    _CheckDiskTemplate(self.op.disk_template)
7453

    
7454
    if (not self.cfg.GetVGName() and
7455
        self.op.disk_template not in constants.DTS_NOT_LVM):
7456
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7457
                                 " instances", errors.ECODE_STATE)
7458

    
7459
    if self.op.hypervisor is None:
7460
      self.op.hypervisor = self.cfg.GetHypervisorType()
7461

    
7462
    cluster = self.cfg.GetClusterInfo()
7463
    enabled_hvs = cluster.enabled_hypervisors
7464
    if self.op.hypervisor not in enabled_hvs:
7465
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7466
                                 " cluster (%s)" % (self.op.hypervisor,
7467
                                  ",".join(enabled_hvs)),
7468
                                 errors.ECODE_STATE)
7469

    
7470
    # check hypervisor parameter syntax (locally)
7471
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7472
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7473
                                      self.op.hvparams)
7474
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7475
    hv_type.CheckParameterSyntax(filled_hvp)
7476
    self.hv_full = filled_hvp
7477
    # check that we don't specify global parameters on an instance
7478
    _CheckGlobalHvParams(self.op.hvparams)
7479

    
7480
    # fill and remember the beparams dict
7481
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7482
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7483

    
7484
    # build os parameters
7485
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7486

    
7487
    # now that hvp/bep are in final format, let's reset to defaults,
7488
    # if told to do so
7489
    if self.op.identify_defaults:
7490
      self._RevertToDefaults(cluster)
7491

    
7492
    # NIC buildup
7493
    self.nics = []
7494
    for idx, nic in enumerate(self.op.nics):
7495
      nic_mode_req = nic.get("mode", None)
7496
      nic_mode = nic_mode_req
7497
      if nic_mode is None:
7498
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7499

    
7500
      # in routed mode, for the first nic, the default ip is 'auto'
7501
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7502
        default_ip_mode = constants.VALUE_AUTO
7503
      else:
7504
        default_ip_mode = constants.VALUE_NONE
7505

    
7506
      # ip validity checks
7507
      ip = nic.get("ip", default_ip_mode)
7508
      if ip is None or ip.lower() == constants.VALUE_NONE:
7509
        nic_ip = None
7510
      elif ip.lower() == constants.VALUE_AUTO:
7511
        if not self.op.name_check:
7512
          raise errors.OpPrereqError("IP address set to auto but name checks"
7513
                                     " have been skipped",
7514
                                     errors.ECODE_INVAL)
7515
        nic_ip = self.hostname1.ip
7516
      else:
7517
        if not netutils.IPAddress.IsValid(ip):
7518
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7519
                                     errors.ECODE_INVAL)
7520
        nic_ip = ip
7521

    
7522
      # TODO: check the ip address for uniqueness
7523
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7524
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7525
                                   errors.ECODE_INVAL)
7526

    
7527
      # MAC address verification
7528
      mac = nic.get("mac", constants.VALUE_AUTO)
7529
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7530
        mac = utils.NormalizeAndValidateMac(mac)
7531

    
7532
        try:
7533
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7534
        except errors.ReservationError:
7535
          raise errors.OpPrereqError("MAC address %s already in use"
7536
                                     " in cluster" % mac,
7537
                                     errors.ECODE_NOTUNIQUE)
7538

    
7539
      # bridge verification
7540
      bridge = nic.get("bridge", None)
7541
      link = nic.get("link", None)
7542
      if bridge and link:
7543
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7544
                                   " at the same time", errors.ECODE_INVAL)
7545
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7546
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7547
                                   errors.ECODE_INVAL)
7548
      elif bridge:
7549
        link = bridge
7550

    
7551
      nicparams = {}
7552
      if nic_mode_req:
7553
        nicparams[constants.NIC_MODE] = nic_mode_req
7554
      if link:
7555
        nicparams[constants.NIC_LINK] = link
7556

    
7557
      check_params = cluster.SimpleFillNIC(nicparams)
7558
      objects.NIC.CheckParameterSyntax(check_params)
7559
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7560

    
7561
    # disk checks/pre-build
7562
    self.disks = []
7563
    for disk in self.op.disks:
7564
      mode = disk.get("mode", constants.DISK_RDWR)
7565
      if mode not in constants.DISK_ACCESS_SET:
7566
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7567
                                   mode, errors.ECODE_INVAL)
7568
      size = disk.get("size", None)
7569
      if size is None:
7570
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7571
      try:
7572
        size = int(size)
7573
      except (TypeError, ValueError):
7574
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7575
                                   errors.ECODE_INVAL)
7576
      vg = disk.get("vg", self.cfg.GetVGName())
7577
      new_disk = {"size": size, "mode": mode, "vg": vg}
7578
      if "adopt" in disk:
7579
        new_disk["adopt"] = disk["adopt"]
7580
      self.disks.append(new_disk)
7581

    
7582
    if self.op.mode == constants.INSTANCE_IMPORT:
7583

    
7584
      # Check that the new instance doesn't have less disks than the export
7585
      instance_disks = len(self.disks)
7586
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7587
      if instance_disks < export_disks:
7588
        raise errors.OpPrereqError("Not enough disks to import."
7589
                                   " (instance: %d, export: %d)" %
7590
                                   (instance_disks, export_disks),
7591
                                   errors.ECODE_INVAL)
7592

    
7593
      disk_images = []
7594
      for idx in range(export_disks):
7595
        option = 'disk%d_dump' % idx
7596
        if export_info.has_option(constants.INISECT_INS, option):
7597
          # FIXME: are the old os-es, disk sizes, etc. useful?
7598
          export_name = export_info.get(constants.INISECT_INS, option)
7599
          image = utils.PathJoin(self.op.src_path, export_name)
7600
          disk_images.append(image)
7601
        else:
7602
          disk_images.append(False)
7603

    
7604
      self.src_images = disk_images
7605

    
7606
      old_name = export_info.get(constants.INISECT_INS, 'name')
7607
      try:
7608
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7609
      except (TypeError, ValueError), err:
7610
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7611
                                   " an integer: %s" % str(err),
7612
                                   errors.ECODE_STATE)
7613
      if self.op.instance_name == old_name:
7614
        for idx, nic in enumerate(self.nics):
7615
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7616
            nic_mac_ini = 'nic%d_mac' % idx
7617
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7618

    
7619
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7620

    
7621
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7622
    if self.op.ip_check:
7623
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7624
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7625
                                   (self.check_ip, self.op.instance_name),
7626
                                   errors.ECODE_NOTUNIQUE)
7627

    
7628
    #### mac address generation
7629
    # By generating here the mac address both the allocator and the hooks get
7630
    # the real final mac address rather than the 'auto' or 'generate' value.
7631
    # There is a race condition between the generation and the instance object
7632
    # creation, which means that we know the mac is valid now, but we're not
7633
    # sure it will be when we actually add the instance. If things go bad
7634
    # adding the instance will abort because of a duplicate mac, and the
7635
    # creation job will fail.
7636
    for nic in self.nics:
7637
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7638
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7639

    
7640
    #### allocator run
7641

    
7642
    if self.op.iallocator is not None:
7643
      self._RunAllocator()
7644

    
7645
    #### node related checks
7646

    
7647
    # check primary node
7648
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7649
    assert self.pnode is not None, \
7650
      "Cannot retrieve locked node %s" % self.op.pnode
7651
    if pnode.offline:
7652
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7653
                                 pnode.name, errors.ECODE_STATE)
7654
    if pnode.drained:
7655
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7656
                                 pnode.name, errors.ECODE_STATE)
7657
    if not pnode.vm_capable:
7658
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7659
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7660

    
7661
    self.secondaries = []
7662

    
7663
    # mirror node verification
7664
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7665
      if self.op.snode == pnode.name:
7666
        raise errors.OpPrereqError("The secondary node cannot be the"
7667
                                   " primary node.", errors.ECODE_INVAL)
7668
      _CheckNodeOnline(self, self.op.snode)
7669
      _CheckNodeNotDrained(self, self.op.snode)
7670
      _CheckNodeVmCapable(self, self.op.snode)
7671
      self.secondaries.append(self.op.snode)
7672

    
7673
    nodenames = [pnode.name] + self.secondaries
7674

    
7675
    if not self.adopt_disks:
7676
      # Check lv size requirements, if not adopting
7677
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7678
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7679

    
7680
    else: # instead, we must check the adoption data
7681
      all_lvs = set([i["adopt"] for i in self.disks])
7682
      if len(all_lvs) != len(self.disks):
7683
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7684
                                   errors.ECODE_INVAL)
7685
      for lv_name in all_lvs:
7686
        try:
7687
          # FIXME: VG must be provided here. Else all LVs with the
7688
          # same name will be locked on all VGs.
7689
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7690
        except errors.ReservationError:
7691
          raise errors.OpPrereqError("LV named %s used by another instance" %
7692
                                     lv_name, errors.ECODE_NOTUNIQUE)
7693

    
7694
      node_lvs = self.rpc.call_lv_list([pnode.name],
7695
                                       self.cfg.GetVGName())[pnode.name]
7696
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7697
      node_lvs = node_lvs.payload
7698
      delta = all_lvs.difference(node_lvs.keys())
7699
      if delta:
7700
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7701
                                   utils.CommaJoin(delta),
7702
                                   errors.ECODE_INVAL)
7703
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7704
      if online_lvs:
7705
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7706
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7707
                                   errors.ECODE_STATE)
7708
      # update the size of disk based on what is found
7709
      for dsk in self.disks:
7710
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7711

    
7712
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7713

    
7714
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7715
    # check OS parameters (remotely)
7716
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7717

    
7718
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7719

    
7720
    # memory check on primary node
7721
    if self.op.start:
7722
      _CheckNodeFreeMemory(self, self.pnode.name,
7723
                           "creating instance %s" % self.op.instance_name,
7724
                           self.be_full[constants.BE_MEMORY],
7725
                           self.op.hypervisor)
7726

    
7727
    self.dry_run_result = list(nodenames)
7728

    
7729
  def Exec(self, feedback_fn):
7730
    """Create and add the instance to the cluster.
7731

7732
    """
7733
    instance = self.op.instance_name
7734
    pnode_name = self.pnode.name
7735

    
7736
    ht_kind = self.op.hypervisor
7737
    if ht_kind in constants.HTS_REQ_PORT:
7738
      network_port = self.cfg.AllocatePort()
7739
    else:
7740
      network_port = None
7741

    
7742
    if constants.ENABLE_FILE_STORAGE:
7743
      # this is needed because os.path.join does not accept None arguments
7744
      if self.op.file_storage_dir is None:
7745
        string_file_storage_dir = ""
7746
      else:
7747
        string_file_storage_dir = self.op.file_storage_dir
7748

    
7749
      # build the full file storage dir path
7750
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7751
                                        string_file_storage_dir, instance)
7752
    else:
7753
      file_storage_dir = ""
7754

    
7755
    disks = _GenerateDiskTemplate(self,
7756
                                  self.op.disk_template,
7757
                                  instance, pnode_name,
7758
                                  self.secondaries,
7759
                                  self.disks,
7760
                                  file_storage_dir,
7761
                                  self.op.file_driver,
7762
                                  0,
7763
                                  feedback_fn)
7764

    
7765
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7766
                            primary_node=pnode_name,
7767
                            nics=self.nics, disks=disks,
7768
                            disk_template=self.op.disk_template,
7769
                            admin_up=False,
7770
                            network_port=network_port,
7771
                            beparams=self.op.beparams,
7772
                            hvparams=self.op.hvparams,
7773
                            hypervisor=self.op.hypervisor,
7774
                            osparams=self.op.osparams,
7775
                            )
7776

    
7777
    if self.adopt_disks:
7778
      # rename LVs to the newly-generated names; we need to construct
7779
      # 'fake' LV disks with the old data, plus the new unique_id
7780
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7781
      rename_to = []
7782
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7783
        rename_to.append(t_dsk.logical_id)
7784
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7785
        self.cfg.SetDiskID(t_dsk, pnode_name)
7786
      result = self.rpc.call_blockdev_rename(pnode_name,
7787
                                             zip(tmp_disks, rename_to))
7788
      result.Raise("Failed to rename adoped LVs")
7789
    else:
7790
      feedback_fn("* creating instance disks...")
7791
      try:
7792
        _CreateDisks(self, iobj)
7793
      except errors.OpExecError:
7794
        self.LogWarning("Device creation failed, reverting...")
7795
        try:
7796
          _RemoveDisks(self, iobj)
7797
        finally:
7798
          self.cfg.ReleaseDRBDMinors(instance)
7799
          raise
7800

    
7801
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7802
        feedback_fn("* wiping instance disks...")
7803
        try:
7804
          _WipeDisks(self, iobj)
7805
        except errors.OpExecError:
7806
          self.LogWarning("Device wiping failed, reverting...")
7807
          try:
7808
            _RemoveDisks(self, iobj)
7809
          finally:
7810
            self.cfg.ReleaseDRBDMinors(instance)
7811
            raise
7812

    
7813
    feedback_fn("adding instance %s to cluster config" % instance)
7814

    
7815
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7816

    
7817
    # Declare that we don't want to remove the instance lock anymore, as we've
7818
    # added the instance to the config
7819
    del self.remove_locks[locking.LEVEL_INSTANCE]
7820
    # Unlock all the nodes
7821
    if self.op.mode == constants.INSTANCE_IMPORT:
7822
      nodes_keep = [self.op.src_node]
7823
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7824
                       if node != self.op.src_node]
7825
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7826
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7827
    else:
7828
      self.context.glm.release(locking.LEVEL_NODE)
7829
      del self.acquired_locks[locking.LEVEL_NODE]
7830

    
7831
    if self.op.wait_for_sync:
7832
      disk_abort = not _WaitForSync(self, iobj)
7833
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7834
      # make sure the disks are not degraded (still sync-ing is ok)
7835
      time.sleep(15)
7836
      feedback_fn("* checking mirrors status")
7837
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7838
    else:
7839
      disk_abort = False
7840

    
7841
    if disk_abort:
7842
      _RemoveDisks(self, iobj)
7843
      self.cfg.RemoveInstance(iobj.name)
7844
      # Make sure the instance lock gets removed
7845
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7846
      raise errors.OpExecError("There are some degraded disks for"
7847
                               " this instance")
7848

    
7849
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7850
      if self.op.mode == constants.INSTANCE_CREATE:
7851
        if not self.op.no_install:
7852
          feedback_fn("* running the instance OS create scripts...")
7853
          # FIXME: pass debug option from opcode to backend
7854
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7855
                                                 self.op.debug_level)
7856
          result.Raise("Could not add os for instance %s"
7857
                       " on node %s" % (instance, pnode_name))
7858

    
7859
      elif self.op.mode == constants.INSTANCE_IMPORT:
7860
        feedback_fn("* running the instance OS import scripts...")
7861

    
7862
        transfers = []
7863

    
7864
        for idx, image in enumerate(self.src_images):
7865
          if not image:
7866
            continue
7867

    
7868
          # FIXME: pass debug option from opcode to backend
7869
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7870
                                             constants.IEIO_FILE, (image, ),
7871
                                             constants.IEIO_SCRIPT,
7872
                                             (iobj.disks[idx], idx),
7873
                                             None)
7874
          transfers.append(dt)
7875

    
7876
        import_result = \
7877
          masterd.instance.TransferInstanceData(self, feedback_fn,
7878
                                                self.op.src_node, pnode_name,
7879
                                                self.pnode.secondary_ip,
7880
                                                iobj, transfers)
7881
        if not compat.all(import_result):
7882
          self.LogWarning("Some disks for instance %s on node %s were not"
7883
                          " imported successfully" % (instance, pnode_name))
7884

    
7885
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7886
        feedback_fn("* preparing remote import...")
7887
        # The source cluster will stop the instance before attempting to make a
7888
        # connection. In some cases stopping an instance can take a long time,
7889
        # hence the shutdown timeout is added to the connection timeout.
7890
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7891
                           self.op.source_shutdown_timeout)
7892
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7893

    
7894
        assert iobj.primary_node == self.pnode.name
7895
        disk_results = \
7896
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7897
                                        self.source_x509_ca,
7898
                                        self._cds, timeouts)
7899
        if not compat.all(disk_results):
7900
          # TODO: Should the instance still be started, even if some disks
7901
          # failed to import (valid for local imports, too)?
7902
          self.LogWarning("Some disks for instance %s on node %s were not"
7903
                          " imported successfully" % (instance, pnode_name))
7904

    
7905
        # Run rename script on newly imported instance
7906
        assert iobj.name == instance
7907
        feedback_fn("Running rename script for %s" % instance)
7908
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7909
                                                   self.source_instance_name,
7910
                                                   self.op.debug_level)
7911
        if result.fail_msg:
7912
          self.LogWarning("Failed to run rename script for %s on node"
7913
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7914

    
7915
      else:
7916
        # also checked in the prereq part
7917
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7918
                                     % self.op.mode)
7919

    
7920
    if self.op.start:
7921
      iobj.admin_up = True
7922
      self.cfg.Update(iobj, feedback_fn)
7923
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7924
      feedback_fn("* starting instance...")
7925
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7926
      result.Raise("Could not start instance")
7927

    
7928
    return list(iobj.all_nodes)
7929

    
7930

    
7931
class LUConnectConsole(NoHooksLU):
7932
  """Connect to an instance's console.
7933

7934
  This is somewhat special in that it returns the command line that
7935
  you need to run on the master node in order to connect to the
7936
  console.
7937

7938
  """
7939
  _OP_PARAMS = [
7940
    _PInstanceName
7941
    ]
7942
  REQ_BGL = False
7943

    
7944
  def ExpandNames(self):
7945
    self._ExpandAndLockInstance()
7946

    
7947
  def CheckPrereq(self):
7948
    """Check prerequisites.
7949

7950
    This checks that the instance is in the cluster.
7951

7952
    """
7953
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7954
    assert self.instance is not None, \
7955
      "Cannot retrieve locked instance %s" % self.op.instance_name
7956
    _CheckNodeOnline(self, self.instance.primary_node)
7957

    
7958
  def Exec(self, feedback_fn):
7959
    """Connect to the console of an instance
7960

7961
    """
7962
    instance = self.instance
7963
    node = instance.primary_node
7964

    
7965
    node_insts = self.rpc.call_instance_list([node],
7966
                                             [instance.hypervisor])[node]
7967
    node_insts.Raise("Can't get node information from %s" % node)
7968

    
7969
    if instance.name not in node_insts.payload:
7970
      if instance.admin_up:
7971
        state = "ERROR_down"
7972
      else:
7973
        state = "ADMIN_down"
7974
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7975
                               (instance.name, state))
7976

    
7977
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7978

    
7979
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7980
    cluster = self.cfg.GetClusterInfo()
7981
    # beparams and hvparams are passed separately, to avoid editing the
7982
    # instance and then saving the defaults in the instance itself.
7983
    hvparams = cluster.FillHV(instance)
7984
    beparams = cluster.FillBE(instance)
7985
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7986

    
7987
    # build ssh cmdline
7988
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7989

    
7990

    
7991
class LUReplaceDisks(LogicalUnit):
7992
  """Replace the disks of an instance.
7993

7994
  """
7995
  HPATH = "mirrors-replace"
7996
  HTYPE = constants.HTYPE_INSTANCE
7997
  _OP_PARAMS = [
7998
    _PInstanceName,
7999
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
8000
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
8001
    ("remote_node", None, ht.TMaybeString),
8002
    ("iallocator", None, ht.TMaybeString),
8003
    ("early_release", False, ht.TBool),
8004
    ]
8005
  REQ_BGL = False
8006

    
8007
  def CheckArguments(self):
8008
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8009
                                  self.op.iallocator)
8010

    
8011
  def ExpandNames(self):
8012
    self._ExpandAndLockInstance()
8013

    
8014
    if self.op.iallocator is not None:
8015
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8016

    
8017
    elif self.op.remote_node is not None:
8018
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8019
      self.op.remote_node = remote_node
8020

    
8021
      # Warning: do not remove the locking of the new secondary here
8022
      # unless DRBD8.AddChildren is changed to work in parallel;
8023
      # currently it doesn't since parallel invocations of
8024
      # FindUnusedMinor will conflict
8025
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8026
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8027

    
8028
    else:
8029
      self.needed_locks[locking.LEVEL_NODE] = []
8030
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8031

    
8032
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8033
                                   self.op.iallocator, self.op.remote_node,
8034
                                   self.op.disks, False, self.op.early_release)
8035

    
8036
    self.tasklets = [self.replacer]
8037

    
8038
  def DeclareLocks(self, level):
8039
    # If we're not already locking all nodes in the set we have to declare the
8040
    # instance's primary/secondary nodes.
8041
    if (level == locking.LEVEL_NODE and
8042
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8043
      self._LockInstancesNodes()
8044

    
8045
  def BuildHooksEnv(self):
8046
    """Build hooks env.
8047

8048
    This runs on the master, the primary and all the secondaries.
8049

8050
    """
8051
    instance = self.replacer.instance
8052
    env = {
8053
      "MODE": self.op.mode,
8054
      "NEW_SECONDARY": self.op.remote_node,
8055
      "OLD_SECONDARY": instance.secondary_nodes[0],
8056
      }
8057
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8058
    nl = [
8059
      self.cfg.GetMasterNode(),
8060
      instance.primary_node,
8061
      ]
8062
    if self.op.remote_node is not None:
8063
      nl.append(self.op.remote_node)
8064
    return env, nl, nl
8065

    
8066

    
8067
class TLReplaceDisks(Tasklet):
8068
  """Replaces disks for an instance.
8069

8070
  Note: Locking is not within the scope of this class.
8071

8072
  """
8073
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8074
               disks, delay_iallocator, early_release):
8075
    """Initializes this class.
8076

8077
    """
8078
    Tasklet.__init__(self, lu)
8079

    
8080
    # Parameters
8081
    self.instance_name = instance_name
8082
    self.mode = mode
8083
    self.iallocator_name = iallocator_name
8084
    self.remote_node = remote_node
8085
    self.disks = disks
8086
    self.delay_iallocator = delay_iallocator
8087
    self.early_release = early_release
8088

    
8089
    # Runtime data
8090
    self.instance = None
8091
    self.new_node = None
8092
    self.target_node = None
8093
    self.other_node = None
8094
    self.remote_node_info = None
8095
    self.node_secondary_ip = None
8096

    
8097
  @staticmethod
8098
  def CheckArguments(mode, remote_node, iallocator):
8099
    """Helper function for users of this class.
8100

8101
    """
8102
    # check for valid parameter combination
8103
    if mode == constants.REPLACE_DISK_CHG:
8104
      if remote_node is None and iallocator is None:
8105
        raise errors.OpPrereqError("When changing the secondary either an"
8106
                                   " iallocator script must be used or the"
8107
                                   " new node given", errors.ECODE_INVAL)
8108

    
8109
      if remote_node is not None and iallocator is not None:
8110
        raise errors.OpPrereqError("Give either the iallocator or the new"
8111
                                   " secondary, not both", errors.ECODE_INVAL)
8112

    
8113
    elif remote_node is not None or iallocator is not None:
8114
      # Not replacing the secondary
8115
      raise errors.OpPrereqError("The iallocator and new node options can"
8116
                                 " only be used when changing the"
8117
                                 " secondary node", errors.ECODE_INVAL)
8118

    
8119
  @staticmethod
8120
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8121
    """Compute a new secondary node using an IAllocator.
8122

8123
    """
8124
    ial = IAllocator(lu.cfg, lu.rpc,
8125
                     mode=constants.IALLOCATOR_MODE_RELOC,
8126
                     name=instance_name,
8127
                     relocate_from=relocate_from)
8128

    
8129
    ial.Run(iallocator_name)
8130

    
8131
    if not ial.success:
8132
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8133
                                 " %s" % (iallocator_name, ial.info),
8134
                                 errors.ECODE_NORES)
8135

    
8136
    if len(ial.result) != ial.required_nodes:
8137
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8138
                                 " of nodes (%s), required %s" %
8139
                                 (iallocator_name,
8140
                                  len(ial.result), ial.required_nodes),
8141
                                 errors.ECODE_FAULT)
8142

    
8143
    remote_node_name = ial.result[0]
8144

    
8145
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8146
               instance_name, remote_node_name)
8147

    
8148
    return remote_node_name
8149

    
8150
  def _FindFaultyDisks(self, node_name):
8151
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8152
                                    node_name, True)
8153

    
8154
  def CheckPrereq(self):
8155
    """Check prerequisites.
8156

8157
    This checks that the instance is in the cluster.
8158

8159
    """
8160
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8161
    assert instance is not None, \
8162
      "Cannot retrieve locked instance %s" % self.instance_name
8163

    
8164
    if instance.disk_template != constants.DT_DRBD8:
8165
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8166
                                 " instances", errors.ECODE_INVAL)
8167

    
8168
    if len(instance.secondary_nodes) != 1:
8169
      raise errors.OpPrereqError("The instance has a strange layout,"
8170
                                 " expected one secondary but found %d" %
8171
                                 len(instance.secondary_nodes),
8172
                                 errors.ECODE_FAULT)
8173

    
8174
    if not self.delay_iallocator:
8175
      self._CheckPrereq2()
8176

    
8177
  def _CheckPrereq2(self):
8178
    """Check prerequisites, second part.
8179

8180
    This function should always be part of CheckPrereq. It was separated and is
8181
    now called from Exec because during node evacuation iallocator was only
8182
    called with an unmodified cluster model, not taking planned changes into
8183
    account.
8184

8185
    """
8186
    instance = self.instance
8187
    secondary_node = instance.secondary_nodes[0]
8188

    
8189
    if self.iallocator_name is None:
8190
      remote_node = self.remote_node
8191
    else:
8192
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8193
                                       instance.name, instance.secondary_nodes)
8194

    
8195
    if remote_node is not None:
8196
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8197
      assert self.remote_node_info is not None, \
8198
        "Cannot retrieve locked node %s" % remote_node
8199
    else:
8200
      self.remote_node_info = None
8201

    
8202
    if remote_node == self.instance.primary_node:
8203
      raise errors.OpPrereqError("The specified node is the primary node of"
8204
                                 " the instance.", errors.ECODE_INVAL)
8205

    
8206
    if remote_node == secondary_node:
8207
      raise errors.OpPrereqError("The specified node is already the"
8208
                                 " secondary node of the instance.",
8209
                                 errors.ECODE_INVAL)
8210

    
8211
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8212
                                    constants.REPLACE_DISK_CHG):
8213
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8214
                                 errors.ECODE_INVAL)
8215

    
8216
    if self.mode == constants.REPLACE_DISK_AUTO:
8217
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8218
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8219

    
8220
      if faulty_primary and faulty_secondary:
8221
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8222
                                   " one node and can not be repaired"
8223
                                   " automatically" % self.instance_name,
8224
                                   errors.ECODE_STATE)
8225

    
8226
      if faulty_primary:
8227
        self.disks = faulty_primary
8228
        self.target_node = instance.primary_node
8229
        self.other_node = secondary_node
8230
        check_nodes = [self.target_node, self.other_node]
8231
      elif faulty_secondary:
8232
        self.disks = faulty_secondary
8233
        self.target_node = secondary_node
8234
        self.other_node = instance.primary_node
8235
        check_nodes = [self.target_node, self.other_node]
8236
      else:
8237
        self.disks = []
8238
        check_nodes = []
8239

    
8240
    else:
8241
      # Non-automatic modes
8242
      if self.mode == constants.REPLACE_DISK_PRI:
8243
        self.target_node = instance.primary_node
8244
        self.other_node = secondary_node
8245
        check_nodes = [self.target_node, self.other_node]
8246

    
8247
      elif self.mode == constants.REPLACE_DISK_SEC:
8248
        self.target_node = secondary_node
8249
        self.other_node = instance.primary_node
8250
        check_nodes = [self.target_node, self.other_node]
8251

    
8252
      elif self.mode == constants.REPLACE_DISK_CHG:
8253
        self.new_node = remote_node
8254
        self.other_node = instance.primary_node
8255
        self.target_node = secondary_node
8256
        check_nodes = [self.new_node, self.other_node]
8257

    
8258
        _CheckNodeNotDrained(self.lu, remote_node)
8259
        _CheckNodeVmCapable(self.lu, remote_node)
8260

    
8261
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8262
        assert old_node_info is not None
8263
        if old_node_info.offline and not self.early_release:
8264
          # doesn't make sense to delay the release
8265
          self.early_release = True
8266
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8267
                          " early-release mode", secondary_node)
8268

    
8269
      else:
8270
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8271
                                     self.mode)
8272

    
8273
      # If not specified all disks should be replaced
8274
      if not self.disks:
8275
        self.disks = range(len(self.instance.disks))
8276

    
8277
    for node in check_nodes:
8278
      _CheckNodeOnline(self.lu, node)
8279

    
8280
    # Check whether disks are valid
8281
    for disk_idx in self.disks:
8282
      instance.FindDisk(disk_idx)
8283

    
8284
    # Get secondary node IP addresses
8285
    node_2nd_ip = {}
8286

    
8287
    for node_name in [self.target_node, self.other_node, self.new_node]:
8288
      if node_name is not None:
8289
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8290

    
8291
    self.node_secondary_ip = node_2nd_ip
8292

    
8293
  def Exec(self, feedback_fn):
8294
    """Execute disk replacement.
8295

8296
    This dispatches the disk replacement to the appropriate handler.
8297

8298
    """
8299
    if self.delay_iallocator:
8300
      self._CheckPrereq2()
8301

    
8302
    if not self.disks:
8303
      feedback_fn("No disks need replacement")
8304
      return
8305

    
8306
    feedback_fn("Replacing disk(s) %s for %s" %
8307
                (utils.CommaJoin(self.disks), self.instance.name))
8308

    
8309
    activate_disks = (not self.instance.admin_up)
8310

    
8311
    # Activate the instance disks if we're replacing them on a down instance
8312
    if activate_disks:
8313
      _StartInstanceDisks(self.lu, self.instance, True)
8314

    
8315
    try:
8316
      # Should we replace the secondary node?
8317
      if self.new_node is not None:
8318
        fn = self._ExecDrbd8Secondary
8319
      else:
8320
        fn = self._ExecDrbd8DiskOnly
8321

    
8322
      return fn(feedback_fn)
8323

    
8324
    finally:
8325
      # Deactivate the instance disks if we're replacing them on a
8326
      # down instance
8327
      if activate_disks:
8328
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8329

    
8330
  def _CheckVolumeGroup(self, nodes):
8331
    self.lu.LogInfo("Checking volume groups")
8332

    
8333
    vgname = self.cfg.GetVGName()
8334

    
8335
    # Make sure volume group exists on all involved nodes
8336
    results = self.rpc.call_vg_list(nodes)
8337
    if not results:
8338
      raise errors.OpExecError("Can't list volume groups on the nodes")
8339

    
8340
    for node in nodes:
8341
      res = results[node]
8342
      res.Raise("Error checking node %s" % node)
8343
      if vgname not in res.payload:
8344
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8345
                                 (vgname, node))
8346

    
8347
  def _CheckDisksExistence(self, nodes):
8348
    # Check disk existence
8349
    for idx, dev in enumerate(self.instance.disks):
8350
      if idx not in self.disks:
8351
        continue
8352

    
8353
      for node in nodes:
8354
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8355
        self.cfg.SetDiskID(dev, node)
8356

    
8357
        result = self.rpc.call_blockdev_find(node, dev)
8358

    
8359
        msg = result.fail_msg
8360
        if msg or not result.payload:
8361
          if not msg:
8362
            msg = "disk not found"
8363
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8364
                                   (idx, node, msg))
8365

    
8366
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8367
    for idx, dev in enumerate(self.instance.disks):
8368
      if idx not in self.disks:
8369
        continue
8370

    
8371
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8372
                      (idx, node_name))
8373

    
8374
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8375
                                   ldisk=ldisk):
8376
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8377
                                 " replace disks for instance %s" %
8378
                                 (node_name, self.instance.name))
8379

    
8380
  def _CreateNewStorage(self, node_name):
8381
    vgname = self.cfg.GetVGName()
8382
    iv_names = {}
8383

    
8384
    for idx, dev in enumerate(self.instance.disks):
8385
      if idx not in self.disks:
8386
        continue
8387

    
8388
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8389

    
8390
      self.cfg.SetDiskID(dev, node_name)
8391

    
8392
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8393
      names = _GenerateUniqueNames(self.lu, lv_names)
8394

    
8395
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8396
                             logical_id=(vgname, names[0]))
8397
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8398
                             logical_id=(vgname, names[1]))
8399

    
8400
      new_lvs = [lv_data, lv_meta]
8401
      old_lvs = dev.children
8402
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8403

    
8404
      # we pass force_create=True to force the LVM creation
8405
      for new_lv in new_lvs:
8406
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8407
                        _GetInstanceInfoText(self.instance), False)
8408

    
8409
    return iv_names
8410

    
8411
  def _CheckDevices(self, node_name, iv_names):
8412
    for name, (dev, _, _) in iv_names.iteritems():
8413
      self.cfg.SetDiskID(dev, node_name)
8414

    
8415
      result = self.rpc.call_blockdev_find(node_name, dev)
8416

    
8417
      msg = result.fail_msg
8418
      if msg or not result.payload:
8419
        if not msg:
8420
          msg = "disk not found"
8421
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8422
                                 (name, msg))
8423

    
8424
      if result.payload.is_degraded:
8425
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8426

    
8427
  def _RemoveOldStorage(self, node_name, iv_names):
8428
    for name, (_, old_lvs, _) in iv_names.iteritems():
8429
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8430

    
8431
      for lv in old_lvs:
8432
        self.cfg.SetDiskID(lv, node_name)
8433

    
8434
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8435
        if msg:
8436
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8437
                             hint="remove unused LVs manually")
8438

    
8439
  def _ReleaseNodeLock(self, node_name):
8440
    """Releases the lock for a given node."""
8441
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8442

    
8443
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8444
    """Replace a disk on the primary or secondary for DRBD 8.
8445

8446
    The algorithm for replace is quite complicated:
8447

8448
      1. for each disk to be replaced:
8449

8450
        1. create new LVs on the target node with unique names
8451
        1. detach old LVs from the drbd device
8452
        1. rename old LVs to name_replaced.<time_t>
8453
        1. rename new LVs to old LVs
8454
        1. attach the new LVs (with the old names now) to the drbd device
8455

8456
      1. wait for sync across all devices
8457

8458
      1. for each modified disk:
8459

8460
        1. remove old LVs (which have the name name_replaces.<time_t>)
8461

8462
    Failures are not very well handled.
8463

8464
    """
8465
    steps_total = 6
8466

    
8467
    # Step: check device activation
8468
    self.lu.LogStep(1, steps_total, "Check device existence")
8469
    self._CheckDisksExistence([self.other_node, self.target_node])
8470
    self._CheckVolumeGroup([self.target_node, self.other_node])
8471

    
8472
    # Step: check other node consistency
8473
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8474
    self._CheckDisksConsistency(self.other_node,
8475
                                self.other_node == self.instance.primary_node,
8476
                                False)
8477

    
8478
    # Step: create new storage
8479
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8480
    iv_names = self._CreateNewStorage(self.target_node)
8481

    
8482
    # Step: for each lv, detach+rename*2+attach
8483
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8484
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8485
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8486

    
8487
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8488
                                                     old_lvs)
8489
      result.Raise("Can't detach drbd from local storage on node"
8490
                   " %s for device %s" % (self.target_node, dev.iv_name))
8491
      #dev.children = []
8492
      #cfg.Update(instance)
8493

    
8494
      # ok, we created the new LVs, so now we know we have the needed
8495
      # storage; as such, we proceed on the target node to rename
8496
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8497
      # using the assumption that logical_id == physical_id (which in
8498
      # turn is the unique_id on that node)
8499

    
8500
      # FIXME(iustin): use a better name for the replaced LVs
8501
      temp_suffix = int(time.time())
8502
      ren_fn = lambda d, suff: (d.physical_id[0],
8503
                                d.physical_id[1] + "_replaced-%s" % suff)
8504

    
8505
      # Build the rename list based on what LVs exist on the node
8506
      rename_old_to_new = []
8507
      for to_ren in old_lvs:
8508
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8509
        if not result.fail_msg and result.payload:
8510
          # device exists
8511
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8512

    
8513
      self.lu.LogInfo("Renaming the old LVs on the target node")
8514
      result = self.rpc.call_blockdev_rename(self.target_node,
8515
                                             rename_old_to_new)
8516
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8517

    
8518
      # Now we rename the new LVs to the old LVs
8519
      self.lu.LogInfo("Renaming the new LVs on the target node")
8520
      rename_new_to_old = [(new, old.physical_id)
8521
                           for old, new in zip(old_lvs, new_lvs)]
8522
      result = self.rpc.call_blockdev_rename(self.target_node,
8523
                                             rename_new_to_old)
8524
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8525

    
8526
      for old, new in zip(old_lvs, new_lvs):
8527
        new.logical_id = old.logical_id
8528
        self.cfg.SetDiskID(new, self.target_node)
8529

    
8530
      for disk in old_lvs:
8531
        disk.logical_id = ren_fn(disk, temp_suffix)
8532
        self.cfg.SetDiskID(disk, self.target_node)
8533

    
8534
      # Now that the new lvs have the old name, we can add them to the device
8535
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8536
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8537
                                                  new_lvs)
8538
      msg = result.fail_msg
8539
      if msg:
8540
        for new_lv in new_lvs:
8541
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8542
                                               new_lv).fail_msg
8543
          if msg2:
8544
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8545
                               hint=("cleanup manually the unused logical"
8546
                                     "volumes"))
8547
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8548

    
8549
      dev.children = new_lvs
8550

    
8551
      self.cfg.Update(self.instance, feedback_fn)
8552

    
8553
    cstep = 5
8554
    if self.early_release:
8555
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8556
      cstep += 1
8557
      self._RemoveOldStorage(self.target_node, iv_names)
8558
      # WARNING: we release both node locks here, do not do other RPCs
8559
      # than WaitForSync to the primary node
8560
      self._ReleaseNodeLock([self.target_node, self.other_node])
8561

    
8562
    # Wait for sync
8563
    # This can fail as the old devices are degraded and _WaitForSync
8564
    # does a combined result over all disks, so we don't check its return value
8565
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8566
    cstep += 1
8567
    _WaitForSync(self.lu, self.instance)
8568

    
8569
    # Check all devices manually
8570
    self._CheckDevices(self.instance.primary_node, iv_names)
8571

    
8572
    # Step: remove old storage
8573
    if not self.early_release:
8574
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8575
      cstep += 1
8576
      self._RemoveOldStorage(self.target_node, iv_names)
8577

    
8578
  def _ExecDrbd8Secondary(self, feedback_fn):
8579
    """Replace the secondary node for DRBD 8.
8580

8581
    The algorithm for replace is quite complicated:
8582
      - for all disks of the instance:
8583
        - create new LVs on the new node with same names
8584
        - shutdown the drbd device on the old secondary
8585
        - disconnect the drbd network on the primary
8586
        - create the drbd device on the new secondary
8587
        - network attach the drbd on the primary, using an artifice:
8588
          the drbd code for Attach() will connect to the network if it
8589
          finds a device which is connected to the good local disks but
8590
          not network enabled
8591
      - wait for sync across all devices
8592
      - remove all disks from the old secondary
8593

8594
    Failures are not very well handled.
8595

8596
    """
8597
    steps_total = 6
8598

    
8599
    # Step: check device activation
8600
    self.lu.LogStep(1, steps_total, "Check device existence")
8601
    self._CheckDisksExistence([self.instance.primary_node])
8602
    self._CheckVolumeGroup([self.instance.primary_node])
8603

    
8604
    # Step: check other node consistency
8605
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8606
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8607

    
8608
    # Step: create new storage
8609
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8610
    for idx, dev in enumerate(self.instance.disks):
8611
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8612
                      (self.new_node, idx))
8613
      # we pass force_create=True to force LVM creation
8614
      for new_lv in dev.children:
8615
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8616
                        _GetInstanceInfoText(self.instance), False)
8617

    
8618
    # Step 4: dbrd minors and drbd setups changes
8619
    # after this, we must manually remove the drbd minors on both the
8620
    # error and the success paths
8621
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8622
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8623
                                         for dev in self.instance.disks],
8624
                                        self.instance.name)
8625
    logging.debug("Allocated minors %r", minors)
8626

    
8627
    iv_names = {}
8628
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8629
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8630
                      (self.new_node, idx))
8631
      # create new devices on new_node; note that we create two IDs:
8632
      # one without port, so the drbd will be activated without
8633
      # networking information on the new node at this stage, and one
8634
      # with network, for the latter activation in step 4
8635
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8636
      if self.instance.primary_node == o_node1:
8637
        p_minor = o_minor1
8638
      else:
8639
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8640
        p_minor = o_minor2
8641

    
8642
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8643
                      p_minor, new_minor, o_secret)
8644
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8645
                    p_minor, new_minor, o_secret)
8646

    
8647
      iv_names[idx] = (dev, dev.children, new_net_id)
8648
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8649
                    new_net_id)
8650
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8651
                              logical_id=new_alone_id,
8652
                              children=dev.children,
8653
                              size=dev.size)
8654
      try:
8655
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8656
                              _GetInstanceInfoText(self.instance), False)
8657
      except errors.GenericError:
8658
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8659
        raise
8660

    
8661
    # We have new devices, shutdown the drbd on the old secondary
8662
    for idx, dev in enumerate(self.instance.disks):
8663
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8664
      self.cfg.SetDiskID(dev, self.target_node)
8665
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8666
      if msg:
8667
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8668
                           "node: %s" % (idx, msg),
8669
                           hint=("Please cleanup this device manually as"
8670
                                 " soon as possible"))
8671

    
8672
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8673
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8674
                                               self.node_secondary_ip,
8675
                                               self.instance.disks)\
8676
                                              [self.instance.primary_node]
8677

    
8678
    msg = result.fail_msg
8679
    if msg:
8680
      # detaches didn't succeed (unlikely)
8681
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8682
      raise errors.OpExecError("Can't detach the disks from the network on"
8683
                               " old node: %s" % (msg,))
8684

    
8685
    # if we managed to detach at least one, we update all the disks of
8686
    # the instance to point to the new secondary
8687
    self.lu.LogInfo("Updating instance configuration")
8688
    for dev, _, new_logical_id in iv_names.itervalues():
8689
      dev.logical_id = new_logical_id
8690
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8691

    
8692
    self.cfg.Update(self.instance, feedback_fn)
8693

    
8694
    # and now perform the drbd attach
8695
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8696
                    " (standalone => connected)")
8697
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8698
                                            self.new_node],
8699
                                           self.node_secondary_ip,
8700
                                           self.instance.disks,
8701
                                           self.instance.name,
8702
                                           False)
8703
    for to_node, to_result in result.items():
8704
      msg = to_result.fail_msg
8705
      if msg:
8706
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8707
                           to_node, msg,
8708
                           hint=("please do a gnt-instance info to see the"
8709
                                 " status of disks"))
8710
    cstep = 5
8711
    if self.early_release:
8712
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8713
      cstep += 1
8714
      self._RemoveOldStorage(self.target_node, iv_names)
8715
      # WARNING: we release all node locks here, do not do other RPCs
8716
      # than WaitForSync to the primary node
8717
      self._ReleaseNodeLock([self.instance.primary_node,
8718
                             self.target_node,
8719
                             self.new_node])
8720

    
8721
    # Wait for sync
8722
    # This can fail as the old devices are degraded and _WaitForSync
8723
    # does a combined result over all disks, so we don't check its return value
8724
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8725
    cstep += 1
8726
    _WaitForSync(self.lu, self.instance)
8727

    
8728
    # Check all devices manually
8729
    self._CheckDevices(self.instance.primary_node, iv_names)
8730

    
8731
    # Step: remove old storage
8732
    if not self.early_release:
8733
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8734
      self._RemoveOldStorage(self.target_node, iv_names)
8735

    
8736

    
8737
class LURepairNodeStorage(NoHooksLU):
8738
  """Repairs the volume group on a node.
8739

8740
  """
8741
  _OP_PARAMS = [
8742
    _PNodeName,
8743
    ("storage_type", ht.NoDefault, _CheckStorageType),
8744
    ("name", ht.NoDefault, ht.TNonEmptyString),
8745
    ("ignore_consistency", False, ht.TBool),
8746
    ]
8747
  REQ_BGL = False
8748

    
8749
  def CheckArguments(self):
8750
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8751

    
8752
    storage_type = self.op.storage_type
8753

    
8754
    if (constants.SO_FIX_CONSISTENCY not in
8755
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8756
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8757
                                 " repaired" % storage_type,
8758
                                 errors.ECODE_INVAL)
8759

    
8760
  def ExpandNames(self):
8761
    self.needed_locks = {
8762
      locking.LEVEL_NODE: [self.op.node_name],
8763
      }
8764

    
8765
  def _CheckFaultyDisks(self, instance, node_name):
8766
    """Ensure faulty disks abort the opcode or at least warn."""
8767
    try:
8768
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8769
                                  node_name, True):
8770
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8771
                                   " node '%s'" % (instance.name, node_name),
8772
                                   errors.ECODE_STATE)
8773
    except errors.OpPrereqError, err:
8774
      if self.op.ignore_consistency:
8775
        self.proc.LogWarning(str(err.args[0]))
8776
      else:
8777
        raise
8778

    
8779
  def CheckPrereq(self):
8780
    """Check prerequisites.
8781

8782
    """
8783
    # Check whether any instance on this node has faulty disks
8784
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8785
      if not inst.admin_up:
8786
        continue
8787
      check_nodes = set(inst.all_nodes)
8788
      check_nodes.discard(self.op.node_name)
8789
      for inst_node_name in check_nodes:
8790
        self._CheckFaultyDisks(inst, inst_node_name)
8791

    
8792
  def Exec(self, feedback_fn):
8793
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8794
                (self.op.name, self.op.node_name))
8795

    
8796
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8797
    result = self.rpc.call_storage_execute(self.op.node_name,
8798
                                           self.op.storage_type, st_args,
8799
                                           self.op.name,
8800
                                           constants.SO_FIX_CONSISTENCY)
8801
    result.Raise("Failed to repair storage unit '%s' on %s" %
8802
                 (self.op.name, self.op.node_name))
8803

    
8804

    
8805
class LUNodeEvacuationStrategy(NoHooksLU):
8806
  """Computes the node evacuation strategy.
8807

8808
  """
8809
  _OP_PARAMS = [
8810
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8811
    ("remote_node", None, ht.TMaybeString),
8812
    ("iallocator", None, ht.TMaybeString),
8813
    ]
8814
  REQ_BGL = False
8815

    
8816
  def CheckArguments(self):
8817
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8818

    
8819
  def ExpandNames(self):
8820
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8821
    self.needed_locks = locks = {}
8822
    if self.op.remote_node is None:
8823
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8824
    else:
8825
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8826
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8827

    
8828
  def Exec(self, feedback_fn):
8829
    if self.op.remote_node is not None:
8830
      instances = []
8831
      for node in self.op.nodes:
8832
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8833
      result = []
8834
      for i in instances:
8835
        if i.primary_node == self.op.remote_node:
8836
          raise errors.OpPrereqError("Node %s is the primary node of"
8837
                                     " instance %s, cannot use it as"
8838
                                     " secondary" %
8839
                                     (self.op.remote_node, i.name),
8840
                                     errors.ECODE_INVAL)
8841
        result.append([i.name, self.op.remote_node])
8842
    else:
8843
      ial = IAllocator(self.cfg, self.rpc,
8844
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8845
                       evac_nodes=self.op.nodes)
8846
      ial.Run(self.op.iallocator, validate=True)
8847
      if not ial.success:
8848
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8849
                                 errors.ECODE_NORES)
8850
      result = ial.result
8851
    return result
8852

    
8853

    
8854
class LUGrowDisk(LogicalUnit):
8855
  """Grow a disk of an instance.
8856

8857
  """
8858
  HPATH = "disk-grow"
8859
  HTYPE = constants.HTYPE_INSTANCE
8860
  _OP_PARAMS = [
8861
    _PInstanceName,
8862
    ("disk", ht.NoDefault, ht.TInt),
8863
    ("amount", ht.NoDefault, ht.TInt),
8864
    ("wait_for_sync", True, ht.TBool),
8865
    ]
8866
  REQ_BGL = False
8867

    
8868
  def ExpandNames(self):
8869
    self._ExpandAndLockInstance()
8870
    self.needed_locks[locking.LEVEL_NODE] = []
8871
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8872

    
8873
  def DeclareLocks(self, level):
8874
    if level == locking.LEVEL_NODE:
8875
      self._LockInstancesNodes()
8876

    
8877
  def BuildHooksEnv(self):
8878
    """Build hooks env.
8879

8880
    This runs on the master, the primary and all the secondaries.
8881

8882
    """
8883
    env = {
8884
      "DISK": self.op.disk,
8885
      "AMOUNT": self.op.amount,
8886
      }
8887
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8888
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8889
    return env, nl, nl
8890

    
8891
  def CheckPrereq(self):
8892
    """Check prerequisites.
8893

8894
    This checks that the instance is in the cluster.
8895

8896
    """
8897
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8898
    assert instance is not None, \
8899
      "Cannot retrieve locked instance %s" % self.op.instance_name
8900
    nodenames = list(instance.all_nodes)
8901
    for node in nodenames:
8902
      _CheckNodeOnline(self, node)
8903

    
8904
    self.instance = instance
8905

    
8906
    if instance.disk_template not in constants.DTS_GROWABLE:
8907
      raise errors.OpPrereqError("Instance's disk layout does not support"
8908
                                 " growing.", errors.ECODE_INVAL)
8909

    
8910
    self.disk = instance.FindDisk(self.op.disk)
8911

    
8912
    if instance.disk_template != constants.DT_FILE:
8913
      # TODO: check the free disk space for file, when that feature
8914
      # will be supported
8915
      _CheckNodesFreeDiskPerVG(self, nodenames,
8916
                               {self.disk.physical_id[0]: self.op.amount})
8917

    
8918
  def Exec(self, feedback_fn):
8919
    """Execute disk grow.
8920

8921
    """
8922
    instance = self.instance
8923
    disk = self.disk
8924

    
8925
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8926
    if not disks_ok:
8927
      raise errors.OpExecError("Cannot activate block device to grow")
8928

    
8929
    for node in instance.all_nodes:
8930
      self.cfg.SetDiskID(disk, node)
8931
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8932
      result.Raise("Grow request failed to node %s" % node)
8933

    
8934
      # TODO: Rewrite code to work properly
8935
      # DRBD goes into sync mode for a short amount of time after executing the
8936
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8937
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8938
      # time is a work-around.
8939
      time.sleep(5)
8940

    
8941
    disk.RecordGrow(self.op.amount)
8942
    self.cfg.Update(instance, feedback_fn)
8943
    if self.op.wait_for_sync:
8944
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8945
      if disk_abort:
8946
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8947
                             " status.\nPlease check the instance.")
8948
      if not instance.admin_up:
8949
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8950
    elif not instance.admin_up:
8951
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8952
                           " not supposed to be running because no wait for"
8953
                           " sync mode was requested.")
8954

    
8955

    
8956
class LUQueryInstanceData(NoHooksLU):
8957
  """Query runtime instance data.
8958

8959
  """
8960
  _OP_PARAMS = [
8961
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8962
    ("static", False, ht.TBool),
8963
    ]
8964
  REQ_BGL = False
8965

    
8966
  def ExpandNames(self):
8967
    self.needed_locks = {}
8968
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8969

    
8970
    if self.op.instances:
8971
      self.wanted_names = []
8972
      for name in self.op.instances:
8973
        full_name = _ExpandInstanceName(self.cfg, name)
8974
        self.wanted_names.append(full_name)
8975
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8976
    else:
8977
      self.wanted_names = None
8978
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8979

    
8980
    self.needed_locks[locking.LEVEL_NODE] = []
8981
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8982

    
8983
  def DeclareLocks(self, level):
8984
    if level == locking.LEVEL_NODE:
8985
      self._LockInstancesNodes()
8986

    
8987
  def CheckPrereq(self):
8988
    """Check prerequisites.
8989

8990
    This only checks the optional instance list against the existing names.
8991

8992
    """
8993
    if self.wanted_names is None:
8994
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8995

    
8996
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8997
                             in self.wanted_names]
8998

    
8999
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9000
    """Returns the status of a block device
9001

9002
    """
9003
    if self.op.static or not node:
9004
      return None
9005

    
9006
    self.cfg.SetDiskID(dev, node)
9007

    
9008
    result = self.rpc.call_blockdev_find(node, dev)
9009
    if result.offline:
9010
      return None
9011

    
9012
    result.Raise("Can't compute disk status for %s" % instance_name)
9013

    
9014
    status = result.payload
9015
    if status is None:
9016
      return None
9017

    
9018
    return (status.dev_path, status.major, status.minor,
9019
            status.sync_percent, status.estimated_time,
9020
            status.is_degraded, status.ldisk_status)
9021

    
9022
  def _ComputeDiskStatus(self, instance, snode, dev):
9023
    """Compute block device status.
9024

9025
    """
9026
    if dev.dev_type in constants.LDS_DRBD:
9027
      # we change the snode then (otherwise we use the one passed in)
9028
      if dev.logical_id[0] == instance.primary_node:
9029
        snode = dev.logical_id[1]
9030
      else:
9031
        snode = dev.logical_id[0]
9032

    
9033
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9034
                                              instance.name, dev)
9035
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9036

    
9037
    if dev.children:
9038
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9039
                      for child in dev.children]
9040
    else:
9041
      dev_children = []
9042

    
9043
    data = {
9044
      "iv_name": dev.iv_name,
9045
      "dev_type": dev.dev_type,
9046
      "logical_id": dev.logical_id,
9047
      "physical_id": dev.physical_id,
9048
      "pstatus": dev_pstatus,
9049
      "sstatus": dev_sstatus,
9050
      "children": dev_children,
9051
      "mode": dev.mode,
9052
      "size": dev.size,
9053
      }
9054

    
9055
    return data
9056

    
9057
  def Exec(self, feedback_fn):
9058
    """Gather and return data"""
9059
    result = {}
9060

    
9061
    cluster = self.cfg.GetClusterInfo()
9062

    
9063
    for instance in self.wanted_instances:
9064
      if not self.op.static:
9065
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9066
                                                  instance.name,
9067
                                                  instance.hypervisor)
9068
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9069
        remote_info = remote_info.payload
9070
        if remote_info and "state" in remote_info:
9071
          remote_state = "up"
9072
        else:
9073
          remote_state = "down"
9074
      else:
9075
        remote_state = None
9076
      if instance.admin_up:
9077
        config_state = "up"
9078
      else:
9079
        config_state = "down"
9080

    
9081
      disks = [self._ComputeDiskStatus(instance, None, device)
9082
               for device in instance.disks]
9083

    
9084
      idict = {
9085
        "name": instance.name,
9086
        "config_state": config_state,
9087
        "run_state": remote_state,
9088
        "pnode": instance.primary_node,
9089
        "snodes": instance.secondary_nodes,
9090
        "os": instance.os,
9091
        # this happens to be the same format used for hooks
9092
        "nics": _NICListToTuple(self, instance.nics),
9093
        "disk_template": instance.disk_template,
9094
        "disks": disks,
9095
        "hypervisor": instance.hypervisor,
9096
        "network_port": instance.network_port,
9097
        "hv_instance": instance.hvparams,
9098
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9099
        "be_instance": instance.beparams,
9100
        "be_actual": cluster.FillBE(instance),
9101
        "os_instance": instance.osparams,
9102
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9103
        "serial_no": instance.serial_no,
9104
        "mtime": instance.mtime,
9105
        "ctime": instance.ctime,
9106
        "uuid": instance.uuid,
9107
        }
9108

    
9109
      result[instance.name] = idict
9110

    
9111
    return result
9112

    
9113

    
9114
class LUSetInstanceParams(LogicalUnit):
9115
  """Modifies an instances's parameters.
9116

9117
  """
9118
  HPATH = "instance-modify"
9119
  HTYPE = constants.HTYPE_INSTANCE
9120
  _OP_PARAMS = [
9121
    _PInstanceName,
9122
    ("nics", ht.EmptyList, ht.TList),
9123
    ("disks", ht.EmptyList, ht.TList),
9124
    ("beparams", ht.EmptyDict, ht.TDict),
9125
    ("hvparams", ht.EmptyDict, ht.TDict),
9126
    ("disk_template", None, ht.TMaybeString),
9127
    ("remote_node", None, ht.TMaybeString),
9128
    ("os_name", None, ht.TMaybeString),
9129
    ("force_variant", False, ht.TBool),
9130
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9131
    _PForce,
9132
    ]
9133
  REQ_BGL = False
9134

    
9135
  def CheckArguments(self):
9136
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9137
            self.op.hvparams or self.op.beparams or self.op.os_name):
9138
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9139

    
9140
    if self.op.hvparams:
9141
      _CheckGlobalHvParams(self.op.hvparams)
9142

    
9143
    # Disk validation
9144
    disk_addremove = 0
9145
    for disk_op, disk_dict in self.op.disks:
9146
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9147
      if disk_op == constants.DDM_REMOVE:
9148
        disk_addremove += 1
9149
        continue
9150
      elif disk_op == constants.DDM_ADD:
9151
        disk_addremove += 1
9152
      else:
9153
        if not isinstance(disk_op, int):
9154
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9155
        if not isinstance(disk_dict, dict):
9156
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9157
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9158

    
9159
      if disk_op == constants.DDM_ADD:
9160
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9161
        if mode not in constants.DISK_ACCESS_SET:
9162
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9163
                                     errors.ECODE_INVAL)
9164
        size = disk_dict.get('size', None)
9165
        if size is None:
9166
          raise errors.OpPrereqError("Required disk parameter size missing",
9167
                                     errors.ECODE_INVAL)
9168
        try:
9169
          size = int(size)
9170
        except (TypeError, ValueError), err:
9171
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9172
                                     str(err), errors.ECODE_INVAL)
9173
        disk_dict['size'] = size
9174
      else:
9175
        # modification of disk
9176
        if 'size' in disk_dict:
9177
          raise errors.OpPrereqError("Disk size change not possible, use"
9178
                                     " grow-disk", errors.ECODE_INVAL)
9179

    
9180
    if disk_addremove > 1:
9181
      raise errors.OpPrereqError("Only one disk add or remove operation"
9182
                                 " supported at a time", errors.ECODE_INVAL)
9183

    
9184
    if self.op.disks and self.op.disk_template is not None:
9185
      raise errors.OpPrereqError("Disk template conversion and other disk"
9186
                                 " changes not supported at the same time",
9187
                                 errors.ECODE_INVAL)
9188

    
9189
    if self.op.disk_template:
9190
      _CheckDiskTemplate(self.op.disk_template)
9191
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9192
          self.op.remote_node is None):
9193
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9194
                                   " one requires specifying a secondary node",
9195
                                   errors.ECODE_INVAL)
9196

    
9197
    # NIC validation
9198
    nic_addremove = 0
9199
    for nic_op, nic_dict in self.op.nics:
9200
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9201
      if nic_op == constants.DDM_REMOVE:
9202
        nic_addremove += 1
9203
        continue
9204
      elif nic_op == constants.DDM_ADD:
9205
        nic_addremove += 1
9206
      else:
9207
        if not isinstance(nic_op, int):
9208
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9209
        if not isinstance(nic_dict, dict):
9210
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9211
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9212

    
9213
      # nic_dict should be a dict
9214
      nic_ip = nic_dict.get('ip', None)
9215
      if nic_ip is not None:
9216
        if nic_ip.lower() == constants.VALUE_NONE:
9217
          nic_dict['ip'] = None
9218
        else:
9219
          if not netutils.IPAddress.IsValid(nic_ip):
9220
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9221
                                       errors.ECODE_INVAL)
9222

    
9223
      nic_bridge = nic_dict.get('bridge', None)
9224
      nic_link = nic_dict.get('link', None)
9225
      if nic_bridge and nic_link:
9226
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9227
                                   " at the same time", errors.ECODE_INVAL)
9228
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9229
        nic_dict['bridge'] = None
9230
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9231
        nic_dict['link'] = None
9232

    
9233
      if nic_op == constants.DDM_ADD:
9234
        nic_mac = nic_dict.get('mac', None)
9235
        if nic_mac is None:
9236
          nic_dict['mac'] = constants.VALUE_AUTO
9237

    
9238
      if 'mac' in nic_dict:
9239
        nic_mac = nic_dict['mac']
9240
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9241
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9242

    
9243
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9244
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9245
                                     " modifying an existing nic",
9246
                                     errors.ECODE_INVAL)
9247

    
9248
    if nic_addremove > 1:
9249
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9250
                                 " supported at a time", errors.ECODE_INVAL)
9251

    
9252
  def ExpandNames(self):
9253
    self._ExpandAndLockInstance()
9254
    self.needed_locks[locking.LEVEL_NODE] = []
9255
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9256

    
9257
  def DeclareLocks(self, level):
9258
    if level == locking.LEVEL_NODE:
9259
      self._LockInstancesNodes()
9260
      if self.op.disk_template and self.op.remote_node:
9261
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9262
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9263

    
9264
  def BuildHooksEnv(self):
9265
    """Build hooks env.
9266

9267
    This runs on the master, primary and secondaries.
9268

9269
    """
9270
    args = dict()
9271
    if constants.BE_MEMORY in self.be_new:
9272
      args['memory'] = self.be_new[constants.BE_MEMORY]
9273
    if constants.BE_VCPUS in self.be_new:
9274
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9275
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9276
    # information at all.
9277
    if self.op.nics:
9278
      args['nics'] = []
9279
      nic_override = dict(self.op.nics)
9280
      for idx, nic in enumerate(self.instance.nics):
9281
        if idx in nic_override:
9282
          this_nic_override = nic_override[idx]
9283
        else:
9284
          this_nic_override = {}
9285
        if 'ip' in this_nic_override:
9286
          ip = this_nic_override['ip']
9287
        else:
9288
          ip = nic.ip
9289
        if 'mac' in this_nic_override:
9290
          mac = this_nic_override['mac']
9291
        else:
9292
          mac = nic.mac
9293
        if idx in self.nic_pnew:
9294
          nicparams = self.nic_pnew[idx]
9295
        else:
9296
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9297
        mode = nicparams[constants.NIC_MODE]
9298
        link = nicparams[constants.NIC_LINK]
9299
        args['nics'].append((ip, mac, mode, link))
9300
      if constants.DDM_ADD in nic_override:
9301
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9302
        mac = nic_override[constants.DDM_ADD]['mac']
9303
        nicparams = self.nic_pnew[constants.DDM_ADD]
9304
        mode = nicparams[constants.NIC_MODE]
9305
        link = nicparams[constants.NIC_LINK]
9306
        args['nics'].append((ip, mac, mode, link))
9307
      elif constants.DDM_REMOVE in nic_override:
9308
        del args['nics'][-1]
9309

    
9310
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9311
    if self.op.disk_template:
9312
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9313
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9314
    return env, nl, nl
9315

    
9316
  def CheckPrereq(self):
9317
    """Check prerequisites.
9318

9319
    This only checks the instance list against the existing names.
9320

9321
    """
9322
    # checking the new params on the primary/secondary nodes
9323

    
9324
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9325
    cluster = self.cluster = self.cfg.GetClusterInfo()
9326
    assert self.instance is not None, \
9327
      "Cannot retrieve locked instance %s" % self.op.instance_name
9328
    pnode = instance.primary_node
9329
    nodelist = list(instance.all_nodes)
9330

    
9331
    # OS change
9332
    if self.op.os_name and not self.op.force:
9333
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9334
                      self.op.force_variant)
9335
      instance_os = self.op.os_name
9336
    else:
9337
      instance_os = instance.os
9338

    
9339
    if self.op.disk_template:
9340
      if instance.disk_template == self.op.disk_template:
9341
        raise errors.OpPrereqError("Instance already has disk template %s" %
9342
                                   instance.disk_template, errors.ECODE_INVAL)
9343

    
9344
      if (instance.disk_template,
9345
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9346
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9347
                                   " %s to %s" % (instance.disk_template,
9348
                                                  self.op.disk_template),
9349
                                   errors.ECODE_INVAL)
9350
      _CheckInstanceDown(self, instance, "cannot change disk template")
9351
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9352
        if self.op.remote_node == pnode:
9353
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9354
                                     " as the primary node of the instance" %
9355
                                     self.op.remote_node, errors.ECODE_STATE)
9356
        _CheckNodeOnline(self, self.op.remote_node)
9357
        _CheckNodeNotDrained(self, self.op.remote_node)
9358
        # FIXME: here we assume that the old instance type is DT_PLAIN
9359
        assert instance.disk_template == constants.DT_PLAIN
9360
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9361
                 for d in instance.disks]
9362
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9363
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9364

    
9365
    # hvparams processing
9366
    if self.op.hvparams:
9367
      hv_type = instance.hypervisor
9368
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9369
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9370
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9371

    
9372
      # local check
9373
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9374
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9375
      self.hv_new = hv_new # the new actual values
9376
      self.hv_inst = i_hvdict # the new dict (without defaults)
9377
    else:
9378
      self.hv_new = self.hv_inst = {}
9379

    
9380
    # beparams processing
9381
    if self.op.beparams:
9382
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9383
                                   use_none=True)
9384
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9385
      be_new = cluster.SimpleFillBE(i_bedict)
9386
      self.be_new = be_new # the new actual values
9387
      self.be_inst = i_bedict # the new dict (without defaults)
9388
    else:
9389
      self.be_new = self.be_inst = {}
9390

    
9391
    # osparams processing
9392
    if self.op.osparams:
9393
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9394
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9395
      self.os_inst = i_osdict # the new dict (without defaults)
9396
    else:
9397
      self.os_inst = {}
9398

    
9399
    self.warn = []
9400

    
9401
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9402
      mem_check_list = [pnode]
9403
      if be_new[constants.BE_AUTO_BALANCE]:
9404
        # either we changed auto_balance to yes or it was from before
9405
        mem_check_list.extend(instance.secondary_nodes)
9406
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9407
                                                  instance.hypervisor)
9408
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9409
                                         instance.hypervisor)
9410
      pninfo = nodeinfo[pnode]
9411
      msg = pninfo.fail_msg
9412
      if msg:
9413
        # Assume the primary node is unreachable and go ahead
9414
        self.warn.append("Can't get info from primary node %s: %s" %
9415
                         (pnode,  msg))
9416
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9417
        self.warn.append("Node data from primary node %s doesn't contain"
9418
                         " free memory information" % pnode)
9419
      elif instance_info.fail_msg:
9420
        self.warn.append("Can't get instance runtime information: %s" %
9421
                        instance_info.fail_msg)
9422
      else:
9423
        if instance_info.payload:
9424
          current_mem = int(instance_info.payload['memory'])
9425
        else:
9426
          # Assume instance not running
9427
          # (there is a slight race condition here, but it's not very probable,
9428
          # and we have no other way to check)
9429
          current_mem = 0
9430
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9431
                    pninfo.payload['memory_free'])
9432
        if miss_mem > 0:
9433
          raise errors.OpPrereqError("This change will prevent the instance"
9434
                                     " from starting, due to %d MB of memory"
9435
                                     " missing on its primary node" % miss_mem,
9436
                                     errors.ECODE_NORES)
9437

    
9438
      if be_new[constants.BE_AUTO_BALANCE]:
9439
        for node, nres in nodeinfo.items():
9440
          if node not in instance.secondary_nodes:
9441
            continue
9442
          msg = nres.fail_msg
9443
          if msg:
9444
            self.warn.append("Can't get info from secondary node %s: %s" %
9445
                             (node, msg))
9446
          elif not isinstance(nres.payload.get('memory_free', None), int):
9447
            self.warn.append("Secondary node %s didn't return free"
9448
                             " memory information" % node)
9449
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9450
            self.warn.append("Not enough memory to failover instance to"
9451
                             " secondary node %s" % node)
9452

    
9453
    # NIC processing
9454
    self.nic_pnew = {}
9455
    self.nic_pinst = {}
9456
    for nic_op, nic_dict in self.op.nics:
9457
      if nic_op == constants.DDM_REMOVE:
9458
        if not instance.nics:
9459
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9460
                                     errors.ECODE_INVAL)
9461
        continue
9462
      if nic_op != constants.DDM_ADD:
9463
        # an existing nic
9464
        if not instance.nics:
9465
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9466
                                     " no NICs" % nic_op,
9467
                                     errors.ECODE_INVAL)
9468
        if nic_op < 0 or nic_op >= len(instance.nics):
9469
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9470
                                     " are 0 to %d" %
9471
                                     (nic_op, len(instance.nics) - 1),
9472
                                     errors.ECODE_INVAL)
9473
        old_nic_params = instance.nics[nic_op].nicparams
9474
        old_nic_ip = instance.nics[nic_op].ip
9475
      else:
9476
        old_nic_params = {}
9477
        old_nic_ip = None
9478

    
9479
      update_params_dict = dict([(key, nic_dict[key])
9480
                                 for key in constants.NICS_PARAMETERS
9481
                                 if key in nic_dict])
9482

    
9483
      if 'bridge' in nic_dict:
9484
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9485

    
9486
      new_nic_params = _GetUpdatedParams(old_nic_params,
9487
                                         update_params_dict)
9488
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9489
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9490
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9491
      self.nic_pinst[nic_op] = new_nic_params
9492
      self.nic_pnew[nic_op] = new_filled_nic_params
9493
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9494

    
9495
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9496
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9497
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9498
        if msg:
9499
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9500
          if self.op.force:
9501
            self.warn.append(msg)
9502
          else:
9503
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9504
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9505
        if 'ip' in nic_dict:
9506
          nic_ip = nic_dict['ip']
9507
        else:
9508
          nic_ip = old_nic_ip
9509
        if nic_ip is None:
9510
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9511
                                     ' on a routed nic', errors.ECODE_INVAL)
9512
      if 'mac' in nic_dict:
9513
        nic_mac = nic_dict['mac']
9514
        if nic_mac is None:
9515
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9516
                                     errors.ECODE_INVAL)
9517
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9518
          # otherwise generate the mac
9519
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9520
        else:
9521
          # or validate/reserve the current one
9522
          try:
9523
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9524
          except errors.ReservationError:
9525
            raise errors.OpPrereqError("MAC address %s already in use"
9526
                                       " in cluster" % nic_mac,
9527
                                       errors.ECODE_NOTUNIQUE)
9528

    
9529
    # DISK processing
9530
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9531
      raise errors.OpPrereqError("Disk operations not supported for"
9532
                                 " diskless instances",
9533
                                 errors.ECODE_INVAL)
9534
    for disk_op, _ in self.op.disks:
9535
      if disk_op == constants.DDM_REMOVE:
9536
        if len(instance.disks) == 1:
9537
          raise errors.OpPrereqError("Cannot remove the last disk of"
9538
                                     " an instance", errors.ECODE_INVAL)
9539
        _CheckInstanceDown(self, instance, "cannot remove disks")
9540

    
9541
      if (disk_op == constants.DDM_ADD and
9542
          len(instance.nics) >= constants.MAX_DISKS):
9543
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9544
                                   " add more" % constants.MAX_DISKS,
9545
                                   errors.ECODE_STATE)
9546
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9547
        # an existing disk
9548
        if disk_op < 0 or disk_op >= len(instance.disks):
9549
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9550
                                     " are 0 to %d" %
9551
                                     (disk_op, len(instance.disks)),
9552
                                     errors.ECODE_INVAL)
9553

    
9554
    return
9555

    
9556
  def _ConvertPlainToDrbd(self, feedback_fn):
9557
    """Converts an instance from plain to drbd.
9558

9559
    """
9560
    feedback_fn("Converting template to drbd")
9561
    instance = self.instance
9562
    pnode = instance.primary_node
9563
    snode = self.op.remote_node
9564

    
9565
    # create a fake disk info for _GenerateDiskTemplate
9566
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9567
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9568
                                      instance.name, pnode, [snode],
9569
                                      disk_info, None, None, 0, feedback_fn)
9570
    info = _GetInstanceInfoText(instance)
9571
    feedback_fn("Creating aditional volumes...")
9572
    # first, create the missing data and meta devices
9573
    for disk in new_disks:
9574
      # unfortunately this is... not too nice
9575
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9576
                            info, True)
9577
      for child in disk.children:
9578
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9579
    # at this stage, all new LVs have been created, we can rename the
9580
    # old ones
9581
    feedback_fn("Renaming original volumes...")
9582
    rename_list = [(o, n.children[0].logical_id)
9583
                   for (o, n) in zip(instance.disks, new_disks)]
9584
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9585
    result.Raise("Failed to rename original LVs")
9586

    
9587
    feedback_fn("Initializing DRBD devices...")
9588
    # all child devices are in place, we can now create the DRBD devices
9589
    for disk in new_disks:
9590
      for node in [pnode, snode]:
9591
        f_create = node == pnode
9592
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9593

    
9594
    # at this point, the instance has been modified
9595
    instance.disk_template = constants.DT_DRBD8
9596
    instance.disks = new_disks
9597
    self.cfg.Update(instance, feedback_fn)
9598

    
9599
    # disks are created, waiting for sync
9600
    disk_abort = not _WaitForSync(self, instance)
9601
    if disk_abort:
9602
      raise errors.OpExecError("There are some degraded disks for"
9603
                               " this instance, please cleanup manually")
9604

    
9605
  def _ConvertDrbdToPlain(self, feedback_fn):
9606
    """Converts an instance from drbd to plain.
9607

9608
    """
9609
    instance = self.instance
9610
    assert len(instance.secondary_nodes) == 1
9611
    pnode = instance.primary_node
9612
    snode = instance.secondary_nodes[0]
9613
    feedback_fn("Converting template to plain")
9614

    
9615
    old_disks = instance.disks
9616
    new_disks = [d.children[0] for d in old_disks]
9617

    
9618
    # copy over size and mode
9619
    for parent, child in zip(old_disks, new_disks):
9620
      child.size = parent.size
9621
      child.mode = parent.mode
9622

    
9623
    # update instance structure
9624
    instance.disks = new_disks
9625
    instance.disk_template = constants.DT_PLAIN
9626
    self.cfg.Update(instance, feedback_fn)
9627

    
9628
    feedback_fn("Removing volumes on the secondary node...")
9629
    for disk in old_disks:
9630
      self.cfg.SetDiskID(disk, snode)
9631
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9632
      if msg:
9633
        self.LogWarning("Could not remove block device %s on node %s,"
9634
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9635

    
9636
    feedback_fn("Removing unneeded volumes on the primary node...")
9637
    for idx, disk in enumerate(old_disks):
9638
      meta = disk.children[1]
9639
      self.cfg.SetDiskID(meta, pnode)
9640
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9641
      if msg:
9642
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9643
                        " continuing anyway: %s", idx, pnode, msg)
9644

    
9645
  def Exec(self, feedback_fn):
9646
    """Modifies an instance.
9647

9648
    All parameters take effect only at the next restart of the instance.
9649

9650
    """
9651
    # Process here the warnings from CheckPrereq, as we don't have a
9652
    # feedback_fn there.
9653
    for warn in self.warn:
9654
      feedback_fn("WARNING: %s" % warn)
9655

    
9656
    result = []
9657
    instance = self.instance
9658
    # disk changes
9659
    for disk_op, disk_dict in self.op.disks:
9660
      if disk_op == constants.DDM_REMOVE:
9661
        # remove the last disk
9662
        device = instance.disks.pop()
9663
        device_idx = len(instance.disks)
9664
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9665
          self.cfg.SetDiskID(disk, node)
9666
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9667
          if msg:
9668
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9669
                            " continuing anyway", device_idx, node, msg)
9670
        result.append(("disk/%d" % device_idx, "remove"))
9671
      elif disk_op == constants.DDM_ADD:
9672
        # add a new disk
9673
        if instance.disk_template == constants.DT_FILE:
9674
          file_driver, file_path = instance.disks[0].logical_id
9675
          file_path = os.path.dirname(file_path)
9676
        else:
9677
          file_driver = file_path = None
9678
        disk_idx_base = len(instance.disks)
9679
        new_disk = _GenerateDiskTemplate(self,
9680
                                         instance.disk_template,
9681
                                         instance.name, instance.primary_node,
9682
                                         instance.secondary_nodes,
9683
                                         [disk_dict],
9684
                                         file_path,
9685
                                         file_driver,
9686
                                         disk_idx_base, feedback_fn)[0]
9687
        instance.disks.append(new_disk)
9688
        info = _GetInstanceInfoText(instance)
9689

    
9690
        logging.info("Creating volume %s for instance %s",
9691
                     new_disk.iv_name, instance.name)
9692
        # Note: this needs to be kept in sync with _CreateDisks
9693
        #HARDCODE
9694
        for node in instance.all_nodes:
9695
          f_create = node == instance.primary_node
9696
          try:
9697
            _CreateBlockDev(self, node, instance, new_disk,
9698
                            f_create, info, f_create)
9699
          except errors.OpExecError, err:
9700
            self.LogWarning("Failed to create volume %s (%s) on"
9701
                            " node %s: %s",
9702
                            new_disk.iv_name, new_disk, node, err)
9703
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9704
                       (new_disk.size, new_disk.mode)))
9705
      else:
9706
        # change a given disk
9707
        instance.disks[disk_op].mode = disk_dict['mode']
9708
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9709

    
9710
    if self.op.disk_template:
9711
      r_shut = _ShutdownInstanceDisks(self, instance)
9712
      if not r_shut:
9713
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9714
                                 " proceed with disk template conversion")
9715
      mode = (instance.disk_template, self.op.disk_template)
9716
      try:
9717
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9718
      except:
9719
        self.cfg.ReleaseDRBDMinors(instance.name)
9720
        raise
9721
      result.append(("disk_template", self.op.disk_template))
9722

    
9723
    # NIC changes
9724
    for nic_op, nic_dict in self.op.nics:
9725
      if nic_op == constants.DDM_REMOVE:
9726
        # remove the last nic
9727
        del instance.nics[-1]
9728
        result.append(("nic.%d" % len(instance.nics), "remove"))
9729
      elif nic_op == constants.DDM_ADD:
9730
        # mac and bridge should be set, by now
9731
        mac = nic_dict['mac']
9732
        ip = nic_dict.get('ip', None)
9733
        nicparams = self.nic_pinst[constants.DDM_ADD]
9734
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9735
        instance.nics.append(new_nic)
9736
        result.append(("nic.%d" % (len(instance.nics) - 1),
9737
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9738
                       (new_nic.mac, new_nic.ip,
9739
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9740
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9741
                       )))
9742
      else:
9743
        for key in 'mac', 'ip':
9744
          if key in nic_dict:
9745
            setattr(instance.nics[nic_op], key, nic_dict[key])
9746
        if nic_op in self.nic_pinst:
9747
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9748
        for key, val in nic_dict.iteritems():
9749
          result.append(("nic.%s/%d" % (key, nic_op), val))
9750

    
9751
    # hvparams changes
9752
    if self.op.hvparams:
9753
      instance.hvparams = self.hv_inst
9754
      for key, val in self.op.hvparams.iteritems():
9755
        result.append(("hv/%s" % key, val))
9756

    
9757
    # beparams changes
9758
    if self.op.beparams:
9759
      instance.beparams = self.be_inst
9760
      for key, val in self.op.beparams.iteritems():
9761
        result.append(("be/%s" % key, val))
9762

    
9763
    # OS change
9764
    if self.op.os_name:
9765
      instance.os = self.op.os_name
9766

    
9767
    # osparams changes
9768
    if self.op.osparams:
9769
      instance.osparams = self.os_inst
9770
      for key, val in self.op.osparams.iteritems():
9771
        result.append(("os/%s" % key, val))
9772

    
9773
    self.cfg.Update(instance, feedback_fn)
9774

    
9775
    return result
9776

    
9777
  _DISK_CONVERSIONS = {
9778
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9779
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9780
    }
9781

    
9782

    
9783
class LUQueryExports(NoHooksLU):
9784
  """Query the exports list
9785

9786
  """
9787
  _OP_PARAMS = [
9788
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9789
    ("use_locking", False, ht.TBool),
9790
    ]
9791
  REQ_BGL = False
9792

    
9793
  def ExpandNames(self):
9794
    self.needed_locks = {}
9795
    self.share_locks[locking.LEVEL_NODE] = 1
9796
    if not self.op.nodes:
9797
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9798
    else:
9799
      self.needed_locks[locking.LEVEL_NODE] = \
9800
        _GetWantedNodes(self, self.op.nodes)
9801

    
9802
  def Exec(self, feedback_fn):
9803
    """Compute the list of all the exported system images.
9804

9805
    @rtype: dict
9806
    @return: a dictionary with the structure node->(export-list)
9807
        where export-list is a list of the instances exported on
9808
        that node.
9809

9810
    """
9811
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9812
    rpcresult = self.rpc.call_export_list(self.nodes)
9813
    result = {}
9814
    for node in rpcresult:
9815
      if rpcresult[node].fail_msg:
9816
        result[node] = False
9817
      else:
9818
        result[node] = rpcresult[node].payload
9819

    
9820
    return result
9821

    
9822

    
9823
class LUPrepareExport(NoHooksLU):
9824
  """Prepares an instance for an export and returns useful information.
9825

9826
  """
9827
  _OP_PARAMS = [
9828
    _PInstanceName,
9829
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9830
    ]
9831
  REQ_BGL = False
9832

    
9833
  def ExpandNames(self):
9834
    self._ExpandAndLockInstance()
9835

    
9836
  def CheckPrereq(self):
9837
    """Check prerequisites.
9838

9839
    """
9840
    instance_name = self.op.instance_name
9841

    
9842
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9843
    assert self.instance is not None, \
9844
          "Cannot retrieve locked instance %s" % self.op.instance_name
9845
    _CheckNodeOnline(self, self.instance.primary_node)
9846

    
9847
    self._cds = _GetClusterDomainSecret()
9848

    
9849
  def Exec(self, feedback_fn):
9850
    """Prepares an instance for an export.
9851

9852
    """
9853
    instance = self.instance
9854

    
9855
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9856
      salt = utils.GenerateSecret(8)
9857

    
9858
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9859
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9860
                                              constants.RIE_CERT_VALIDITY)
9861
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9862

    
9863
      (name, cert_pem) = result.payload
9864

    
9865
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9866
                                             cert_pem)
9867

    
9868
      return {
9869
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9870
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9871
                          salt),
9872
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9873
        }
9874

    
9875
    return None
9876

    
9877

    
9878
class LUExportInstance(LogicalUnit):
9879
  """Export an instance to an image in the cluster.
9880

9881
  """
9882
  HPATH = "instance-export"
9883
  HTYPE = constants.HTYPE_INSTANCE
9884
  _OP_PARAMS = [
9885
    _PInstanceName,
9886
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9887
    ("shutdown", True, ht.TBool),
9888
    _PShutdownTimeout,
9889
    ("remove_instance", False, ht.TBool),
9890
    ("ignore_remove_failures", False, ht.TBool),
9891
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9892
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9893
    ("destination_x509_ca", None, ht.TMaybeString),
9894
    ]
9895
  REQ_BGL = False
9896

    
9897
  def CheckArguments(self):
9898
    """Check the arguments.
9899

9900
    """
9901
    self.x509_key_name = self.op.x509_key_name
9902
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9903

    
9904
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9905
      if not self.x509_key_name:
9906
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9907
                                   errors.ECODE_INVAL)
9908

    
9909
      if not self.dest_x509_ca_pem:
9910
        raise errors.OpPrereqError("Missing destination X509 CA",
9911
                                   errors.ECODE_INVAL)
9912

    
9913
  def ExpandNames(self):
9914
    self._ExpandAndLockInstance()
9915

    
9916
    # Lock all nodes for local exports
9917
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9918
      # FIXME: lock only instance primary and destination node
9919
      #
9920
      # Sad but true, for now we have do lock all nodes, as we don't know where
9921
      # the previous export might be, and in this LU we search for it and
9922
      # remove it from its current node. In the future we could fix this by:
9923
      #  - making a tasklet to search (share-lock all), then create the
9924
      #    new one, then one to remove, after
9925
      #  - removing the removal operation altogether
9926
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9927

    
9928
  def DeclareLocks(self, level):
9929
    """Last minute lock declaration."""
9930
    # All nodes are locked anyway, so nothing to do here.
9931

    
9932
  def BuildHooksEnv(self):
9933
    """Build hooks env.
9934

9935
    This will run on the master, primary node and target node.
9936

9937
    """
9938
    env = {
9939
      "EXPORT_MODE": self.op.mode,
9940
      "EXPORT_NODE": self.op.target_node,
9941
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9942
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9943
      # TODO: Generic function for boolean env variables
9944
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9945
      }
9946

    
9947
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9948

    
9949
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9950

    
9951
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9952
      nl.append(self.op.target_node)
9953

    
9954
    return env, nl, nl
9955

    
9956
  def CheckPrereq(self):
9957
    """Check prerequisites.
9958

9959
    This checks that the instance and node names are valid.
9960

9961
    """
9962
    instance_name = self.op.instance_name
9963

    
9964
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9965
    assert self.instance is not None, \
9966
          "Cannot retrieve locked instance %s" % self.op.instance_name
9967
    _CheckNodeOnline(self, self.instance.primary_node)
9968

    
9969
    if (self.op.remove_instance and self.instance.admin_up and
9970
        not self.op.shutdown):
9971
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9972
                                 " down before")
9973

    
9974
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9975
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9976
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9977
      assert self.dst_node is not None
9978

    
9979
      _CheckNodeOnline(self, self.dst_node.name)
9980
      _CheckNodeNotDrained(self, self.dst_node.name)
9981

    
9982
      self._cds = None
9983
      self.dest_disk_info = None
9984
      self.dest_x509_ca = None
9985

    
9986
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9987
      self.dst_node = None
9988

    
9989
      if len(self.op.target_node) != len(self.instance.disks):
9990
        raise errors.OpPrereqError(("Received destination information for %s"
9991
                                    " disks, but instance %s has %s disks") %
9992
                                   (len(self.op.target_node), instance_name,
9993
                                    len(self.instance.disks)),
9994
                                   errors.ECODE_INVAL)
9995

    
9996
      cds = _GetClusterDomainSecret()
9997

    
9998
      # Check X509 key name
9999
      try:
10000
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10001
      except (TypeError, ValueError), err:
10002
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10003

    
10004
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10005
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10006
                                   errors.ECODE_INVAL)
10007

    
10008
      # Load and verify CA
10009
      try:
10010
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10011
      except OpenSSL.crypto.Error, err:
10012
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10013
                                   (err, ), errors.ECODE_INVAL)
10014

    
10015
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10016
      if errcode is not None:
10017
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10018
                                   (msg, ), errors.ECODE_INVAL)
10019

    
10020
      self.dest_x509_ca = cert
10021

    
10022
      # Verify target information
10023
      disk_info = []
10024
      for idx, disk_data in enumerate(self.op.target_node):
10025
        try:
10026
          (host, port, magic) = \
10027
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10028
        except errors.GenericError, err:
10029
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10030
                                     (idx, err), errors.ECODE_INVAL)
10031

    
10032
        disk_info.append((host, port, magic))
10033

    
10034
      assert len(disk_info) == len(self.op.target_node)
10035
      self.dest_disk_info = disk_info
10036

    
10037
    else:
10038
      raise errors.ProgrammerError("Unhandled export mode %r" %
10039
                                   self.op.mode)
10040

    
10041
    # instance disk type verification
10042
    # TODO: Implement export support for file-based disks
10043
    for disk in self.instance.disks:
10044
      if disk.dev_type == constants.LD_FILE:
10045
        raise errors.OpPrereqError("Export not supported for instances with"
10046
                                   " file-based disks", errors.ECODE_INVAL)
10047

    
10048
  def _CleanupExports(self, feedback_fn):
10049
    """Removes exports of current instance from all other nodes.
10050

10051
    If an instance in a cluster with nodes A..D was exported to node C, its
10052
    exports will be removed from the nodes A, B and D.
10053

10054
    """
10055
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10056

    
10057
    nodelist = self.cfg.GetNodeList()
10058
    nodelist.remove(self.dst_node.name)
10059

    
10060
    # on one-node clusters nodelist will be empty after the removal
10061
    # if we proceed the backup would be removed because OpQueryExports
10062
    # substitutes an empty list with the full cluster node list.
10063
    iname = self.instance.name
10064
    if nodelist:
10065
      feedback_fn("Removing old exports for instance %s" % iname)
10066
      exportlist = self.rpc.call_export_list(nodelist)
10067
      for node in exportlist:
10068
        if exportlist[node].fail_msg:
10069
          continue
10070
        if iname in exportlist[node].payload:
10071
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10072
          if msg:
10073
            self.LogWarning("Could not remove older export for instance %s"
10074
                            " on node %s: %s", iname, node, msg)
10075

    
10076
  def Exec(self, feedback_fn):
10077
    """Export an instance to an image in the cluster.
10078

10079
    """
10080
    assert self.op.mode in constants.EXPORT_MODES
10081

    
10082
    instance = self.instance
10083
    src_node = instance.primary_node
10084

    
10085
    if self.op.shutdown:
10086
      # shutdown the instance, but not the disks
10087
      feedback_fn("Shutting down instance %s" % instance.name)
10088
      result = self.rpc.call_instance_shutdown(src_node, instance,
10089
                                               self.op.shutdown_timeout)
10090
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10091
      result.Raise("Could not shutdown instance %s on"
10092
                   " node %s" % (instance.name, src_node))
10093

    
10094
    # set the disks ID correctly since call_instance_start needs the
10095
    # correct drbd minor to create the symlinks
10096
    for disk in instance.disks:
10097
      self.cfg.SetDiskID(disk, src_node)
10098

    
10099
    activate_disks = (not instance.admin_up)
10100

    
10101
    if activate_disks:
10102
      # Activate the instance disks if we'exporting a stopped instance
10103
      feedback_fn("Activating disks for %s" % instance.name)
10104
      _StartInstanceDisks(self, instance, None)
10105

    
10106
    try:
10107
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10108
                                                     instance)
10109

    
10110
      helper.CreateSnapshots()
10111
      try:
10112
        if (self.op.shutdown and instance.admin_up and
10113
            not self.op.remove_instance):
10114
          assert not activate_disks
10115
          feedback_fn("Starting instance %s" % instance.name)
10116
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10117
          msg = result.fail_msg
10118
          if msg:
10119
            feedback_fn("Failed to start instance: %s" % msg)
10120
            _ShutdownInstanceDisks(self, instance)
10121
            raise errors.OpExecError("Could not start instance: %s" % msg)
10122

    
10123
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10124
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10125
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10126
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10127
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10128

    
10129
          (key_name, _, _) = self.x509_key_name
10130

    
10131
          dest_ca_pem = \
10132
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10133
                                            self.dest_x509_ca)
10134

    
10135
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10136
                                                     key_name, dest_ca_pem,
10137
                                                     timeouts)
10138
      finally:
10139
        helper.Cleanup()
10140

    
10141
      # Check for backwards compatibility
10142
      assert len(dresults) == len(instance.disks)
10143
      assert compat.all(isinstance(i, bool) for i in dresults), \
10144
             "Not all results are boolean: %r" % dresults
10145

    
10146
    finally:
10147
      if activate_disks:
10148
        feedback_fn("Deactivating disks for %s" % instance.name)
10149
        _ShutdownInstanceDisks(self, instance)
10150

    
10151
    if not (compat.all(dresults) and fin_resu):
10152
      failures = []
10153
      if not fin_resu:
10154
        failures.append("export finalization")
10155
      if not compat.all(dresults):
10156
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10157
                               if not dsk)
10158
        failures.append("disk export: disk(s) %s" % fdsk)
10159

    
10160
      raise errors.OpExecError("Export failed, errors in %s" %
10161
                               utils.CommaJoin(failures))
10162

    
10163
    # At this point, the export was successful, we can cleanup/finish
10164

    
10165
    # Remove instance if requested
10166
    if self.op.remove_instance:
10167
      feedback_fn("Removing instance %s" % instance.name)
10168
      _RemoveInstance(self, feedback_fn, instance,
10169
                      self.op.ignore_remove_failures)
10170

    
10171
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10172
      self._CleanupExports(feedback_fn)
10173

    
10174
    return fin_resu, dresults
10175

    
10176

    
10177
class LURemoveExport(NoHooksLU):
10178
  """Remove exports related to the named instance.
10179

10180
  """
10181
  _OP_PARAMS = [
10182
    _PInstanceName,
10183
    ]
10184
  REQ_BGL = False
10185

    
10186
  def ExpandNames(self):
10187
    self.needed_locks = {}
10188
    # We need all nodes to be locked in order for RemoveExport to work, but we
10189
    # don't need to lock the instance itself, as nothing will happen to it (and
10190
    # we can remove exports also for a removed instance)
10191
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10192

    
10193
  def Exec(self, feedback_fn):
10194
    """Remove any export.
10195

10196
    """
10197
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10198
    # If the instance was not found we'll try with the name that was passed in.
10199
    # This will only work if it was an FQDN, though.
10200
    fqdn_warn = False
10201
    if not instance_name:
10202
      fqdn_warn = True
10203
      instance_name = self.op.instance_name
10204

    
10205
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10206
    exportlist = self.rpc.call_export_list(locked_nodes)
10207
    found = False
10208
    for node in exportlist:
10209
      msg = exportlist[node].fail_msg
10210
      if msg:
10211
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10212
        continue
10213
      if instance_name in exportlist[node].payload:
10214
        found = True
10215
        result = self.rpc.call_export_remove(node, instance_name)
10216
        msg = result.fail_msg
10217
        if msg:
10218
          logging.error("Could not remove export for instance %s"
10219
                        " on node %s: %s", instance_name, node, msg)
10220

    
10221
    if fqdn_warn and not found:
10222
      feedback_fn("Export not found. If trying to remove an export belonging"
10223
                  " to a deleted instance please use its Fully Qualified"
10224
                  " Domain Name.")
10225

    
10226

    
10227
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10228
  """Generic tags LU.
10229

10230
  This is an abstract class which is the parent of all the other tags LUs.
10231

10232
  """
10233

    
10234
  def ExpandNames(self):
10235
    self.needed_locks = {}
10236
    if self.op.kind == constants.TAG_NODE:
10237
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10238
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10239
    elif self.op.kind == constants.TAG_INSTANCE:
10240
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10241
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10242

    
10243
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10244
    # not possible to acquire the BGL based on opcode parameters)
10245

    
10246
  def CheckPrereq(self):
10247
    """Check prerequisites.
10248

10249
    """
10250
    if self.op.kind == constants.TAG_CLUSTER:
10251
      self.target = self.cfg.GetClusterInfo()
10252
    elif self.op.kind == constants.TAG_NODE:
10253
      self.target = self.cfg.GetNodeInfo(self.op.name)
10254
    elif self.op.kind == constants.TAG_INSTANCE:
10255
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10256
    else:
10257
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10258
                                 str(self.op.kind), errors.ECODE_INVAL)
10259

    
10260

    
10261
class LUGetTags(TagsLU):
10262
  """Returns the tags of a given object.
10263

10264
  """
10265
  _OP_PARAMS = [
10266
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10267
    # Name is only meaningful for nodes and instances
10268
    ("name", ht.NoDefault, ht.TMaybeString),
10269
    ]
10270
  REQ_BGL = False
10271

    
10272
  def ExpandNames(self):
10273
    TagsLU.ExpandNames(self)
10274

    
10275
    # Share locks as this is only a read operation
10276
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10277

    
10278
  def Exec(self, feedback_fn):
10279
    """Returns the tag list.
10280

10281
    """
10282
    return list(self.target.GetTags())
10283

    
10284

    
10285
class LUSearchTags(NoHooksLU):
10286
  """Searches the tags for a given pattern.
10287

10288
  """
10289
  _OP_PARAMS = [
10290
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10291
    ]
10292
  REQ_BGL = False
10293

    
10294
  def ExpandNames(self):
10295
    self.needed_locks = {}
10296

    
10297
  def CheckPrereq(self):
10298
    """Check prerequisites.
10299

10300
    This checks the pattern passed for validity by compiling it.
10301

10302
    """
10303
    try:
10304
      self.re = re.compile(self.op.pattern)
10305
    except re.error, err:
10306
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10307
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10308

    
10309
  def Exec(self, feedback_fn):
10310
    """Returns the tag list.
10311

10312
    """
10313
    cfg = self.cfg
10314
    tgts = [("/cluster", cfg.GetClusterInfo())]
10315
    ilist = cfg.GetAllInstancesInfo().values()
10316
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10317
    nlist = cfg.GetAllNodesInfo().values()
10318
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10319
    results = []
10320
    for path, target in tgts:
10321
      for tag in target.GetTags():
10322
        if self.re.search(tag):
10323
          results.append((path, tag))
10324
    return results
10325

    
10326

    
10327
class LUAddTags(TagsLU):
10328
  """Sets a tag on a given object.
10329

10330
  """
10331
  _OP_PARAMS = [
10332
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10333
    # Name is only meaningful for nodes and instances
10334
    ("name", ht.NoDefault, ht.TMaybeString),
10335
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10336
    ]
10337
  REQ_BGL = False
10338

    
10339
  def CheckPrereq(self):
10340
    """Check prerequisites.
10341

10342
    This checks the type and length of the tag name and value.
10343

10344
    """
10345
    TagsLU.CheckPrereq(self)
10346
    for tag in self.op.tags:
10347
      objects.TaggableObject.ValidateTag(tag)
10348

    
10349
  def Exec(self, feedback_fn):
10350
    """Sets the tag.
10351

10352
    """
10353
    try:
10354
      for tag in self.op.tags:
10355
        self.target.AddTag(tag)
10356
    except errors.TagError, err:
10357
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10358
    self.cfg.Update(self.target, feedback_fn)
10359

    
10360

    
10361
class LUDelTags(TagsLU):
10362
  """Delete a list of tags from a given object.
10363

10364
  """
10365
  _OP_PARAMS = [
10366
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10367
    # Name is only meaningful for nodes and instances
10368
    ("name", ht.NoDefault, ht.TMaybeString),
10369
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10370
    ]
10371
  REQ_BGL = False
10372

    
10373
  def CheckPrereq(self):
10374
    """Check prerequisites.
10375

10376
    This checks that we have the given tag.
10377

10378
    """
10379
    TagsLU.CheckPrereq(self)
10380
    for tag in self.op.tags:
10381
      objects.TaggableObject.ValidateTag(tag)
10382
    del_tags = frozenset(self.op.tags)
10383
    cur_tags = self.target.GetTags()
10384

    
10385
    diff_tags = del_tags - cur_tags
10386
    if diff_tags:
10387
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10388
      raise errors.OpPrereqError("Tag(s) %s not found" %
10389
                                 (utils.CommaJoin(diff_names), ),
10390
                                 errors.ECODE_NOENT)
10391

    
10392
  def Exec(self, feedback_fn):
10393
    """Remove the tag from the object.
10394

10395
    """
10396
    for tag in self.op.tags:
10397
      self.target.RemoveTag(tag)
10398
    self.cfg.Update(self.target, feedback_fn)
10399

    
10400

    
10401
class LUTestDelay(NoHooksLU):
10402
  """Sleep for a specified amount of time.
10403

10404
  This LU sleeps on the master and/or nodes for a specified amount of
10405
  time.
10406

10407
  """
10408
  _OP_PARAMS = [
10409
    ("duration", ht.NoDefault, ht.TFloat),
10410
    ("on_master", True, ht.TBool),
10411
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10412
    ("repeat", 0, ht.TPositiveInt)
10413
    ]
10414
  REQ_BGL = False
10415

    
10416
  def ExpandNames(self):
10417
    """Expand names and set required locks.
10418

10419
    This expands the node list, if any.
10420

10421
    """
10422
    self.needed_locks = {}
10423
    if self.op.on_nodes:
10424
      # _GetWantedNodes can be used here, but is not always appropriate to use
10425
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10426
      # more information.
10427
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10428
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10429

    
10430
  def _TestDelay(self):
10431
    """Do the actual sleep.
10432

10433
    """
10434
    if self.op.on_master:
10435
      if not utils.TestDelay(self.op.duration):
10436
        raise errors.OpExecError("Error during master delay test")
10437
    if self.op.on_nodes:
10438
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10439
      for node, node_result in result.items():
10440
        node_result.Raise("Failure during rpc call to node %s" % node)
10441

    
10442
  def Exec(self, feedback_fn):
10443
    """Execute the test delay opcode, with the wanted repetitions.
10444

10445
    """
10446
    if self.op.repeat == 0:
10447
      self._TestDelay()
10448
    else:
10449
      top_value = self.op.repeat - 1
10450
      for i in range(self.op.repeat):
10451
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10452
        self._TestDelay()
10453

    
10454

    
10455
class LUTestJobqueue(NoHooksLU):
10456
  """Utility LU to test some aspects of the job queue.
10457

10458
  """
10459
  _OP_PARAMS = [
10460
    ("notify_waitlock", False, ht.TBool),
10461
    ("notify_exec", False, ht.TBool),
10462
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10463
    ("fail", False, ht.TBool),
10464
    ]
10465
  REQ_BGL = False
10466

    
10467
  # Must be lower than default timeout for WaitForJobChange to see whether it
10468
  # notices changed jobs
10469
  _CLIENT_CONNECT_TIMEOUT = 20.0
10470
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10471

    
10472
  @classmethod
10473
  def _NotifyUsingSocket(cls, cb, errcls):
10474
    """Opens a Unix socket and waits for another program to connect.
10475

10476
    @type cb: callable
10477
    @param cb: Callback to send socket name to client
10478
    @type errcls: class
10479
    @param errcls: Exception class to use for errors
10480

10481
    """
10482
    # Using a temporary directory as there's no easy way to create temporary
10483
    # sockets without writing a custom loop around tempfile.mktemp and
10484
    # socket.bind
10485
    tmpdir = tempfile.mkdtemp()
10486
    try:
10487
      tmpsock = utils.PathJoin(tmpdir, "sock")
10488

    
10489
      logging.debug("Creating temporary socket at %s", tmpsock)
10490
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10491
      try:
10492
        sock.bind(tmpsock)
10493
        sock.listen(1)
10494

    
10495
        # Send details to client
10496
        cb(tmpsock)
10497

    
10498
        # Wait for client to connect before continuing
10499
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10500
        try:
10501
          (conn, _) = sock.accept()
10502
        except socket.error, err:
10503
          raise errcls("Client didn't connect in time (%s)" % err)
10504
      finally:
10505
        sock.close()
10506
    finally:
10507
      # Remove as soon as client is connected
10508
      shutil.rmtree(tmpdir)
10509

    
10510
    # Wait for client to close
10511
    try:
10512
      try:
10513
        # pylint: disable-msg=E1101
10514
        # Instance of '_socketobject' has no ... member
10515
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10516
        conn.recv(1)
10517
      except socket.error, err:
10518
        raise errcls("Client failed to confirm notification (%s)" % err)
10519
    finally:
10520
      conn.close()
10521

    
10522
  def _SendNotification(self, test, arg, sockname):
10523
    """Sends a notification to the client.
10524

10525
    @type test: string
10526
    @param test: Test name
10527
    @param arg: Test argument (depends on test)
10528
    @type sockname: string
10529
    @param sockname: Socket path
10530

10531
    """
10532
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10533

    
10534
  def _Notify(self, prereq, test, arg):
10535
    """Notifies the client of a test.
10536

10537
    @type prereq: bool
10538
    @param prereq: Whether this is a prereq-phase test
10539
    @type test: string
10540
    @param test: Test name
10541
    @param arg: Test argument (depends on test)
10542

10543
    """
10544
    if prereq:
10545
      errcls = errors.OpPrereqError
10546
    else:
10547
      errcls = errors.OpExecError
10548

    
10549
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10550
                                                  test, arg),
10551
                                   errcls)
10552

    
10553
  def CheckArguments(self):
10554
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10555
    self.expandnames_calls = 0
10556

    
10557
  def ExpandNames(self):
10558
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10559
    if checkargs_calls < 1:
10560
      raise errors.ProgrammerError("CheckArguments was not called")
10561

    
10562
    self.expandnames_calls += 1
10563

    
10564
    if self.op.notify_waitlock:
10565
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10566

    
10567
    self.LogInfo("Expanding names")
10568

    
10569
    # Get lock on master node (just to get a lock, not for a particular reason)
10570
    self.needed_locks = {
10571
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10572
      }
10573

    
10574
  def Exec(self, feedback_fn):
10575
    if self.expandnames_calls < 1:
10576
      raise errors.ProgrammerError("ExpandNames was not called")
10577

    
10578
    if self.op.notify_exec:
10579
      self._Notify(False, constants.JQT_EXEC, None)
10580

    
10581
    self.LogInfo("Executing")
10582

    
10583
    if self.op.log_messages:
10584
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10585
      for idx, msg in enumerate(self.op.log_messages):
10586
        self.LogInfo("Sending log message %s", idx + 1)
10587
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10588
        # Report how many test messages have been sent
10589
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10590

    
10591
    if self.op.fail:
10592
      raise errors.OpExecError("Opcode failure was requested")
10593

    
10594
    return True
10595

    
10596

    
10597
class IAllocator(object):
10598
  """IAllocator framework.
10599

10600
  An IAllocator instance has three sets of attributes:
10601
    - cfg that is needed to query the cluster
10602
    - input data (all members of the _KEYS class attribute are required)
10603
    - four buffer attributes (in|out_data|text), that represent the
10604
      input (to the external script) in text and data structure format,
10605
      and the output from it, again in two formats
10606
    - the result variables from the script (success, info, nodes) for
10607
      easy usage
10608

10609
  """
10610
  # pylint: disable-msg=R0902
10611
  # lots of instance attributes
10612
  _ALLO_KEYS = [
10613
    "name", "mem_size", "disks", "disk_template",
10614
    "os", "tags", "nics", "vcpus", "hypervisor",
10615
    ]
10616
  _RELO_KEYS = [
10617
    "name", "relocate_from",
10618
    ]
10619
  _EVAC_KEYS = [
10620
    "evac_nodes",
10621
    ]
10622

    
10623
  def __init__(self, cfg, rpc, mode, **kwargs):
10624
    self.cfg = cfg
10625
    self.rpc = rpc
10626
    # init buffer variables
10627
    self.in_text = self.out_text = self.in_data = self.out_data = None
10628
    # init all input fields so that pylint is happy
10629
    self.mode = mode
10630
    self.mem_size = self.disks = self.disk_template = None
10631
    self.os = self.tags = self.nics = self.vcpus = None
10632
    self.hypervisor = None
10633
    self.relocate_from = None
10634
    self.name = None
10635
    self.evac_nodes = None
10636
    # computed fields
10637
    self.required_nodes = None
10638
    # init result fields
10639
    self.success = self.info = self.result = None
10640
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10641
      keyset = self._ALLO_KEYS
10642
      fn = self._AddNewInstance
10643
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10644
      keyset = self._RELO_KEYS
10645
      fn = self._AddRelocateInstance
10646
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10647
      keyset = self._EVAC_KEYS
10648
      fn = self._AddEvacuateNodes
10649
    else:
10650
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10651
                                   " IAllocator" % self.mode)
10652
    for key in kwargs:
10653
      if key not in keyset:
10654
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10655
                                     " IAllocator" % key)
10656
      setattr(self, key, kwargs[key])
10657

    
10658
    for key in keyset:
10659
      if key not in kwargs:
10660
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10661
                                     " IAllocator" % key)
10662
    self._BuildInputData(fn)
10663

    
10664
  def _ComputeClusterData(self):
10665
    """Compute the generic allocator input data.
10666

10667
    This is the data that is independent of the actual operation.
10668

10669
    """
10670
    cfg = self.cfg
10671
    cluster_info = cfg.GetClusterInfo()
10672
    # cluster data
10673
    data = {
10674
      "version": constants.IALLOCATOR_VERSION,
10675
      "cluster_name": cfg.GetClusterName(),
10676
      "cluster_tags": list(cluster_info.GetTags()),
10677
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10678
      # we don't have job IDs
10679
      }
10680
    iinfo = cfg.GetAllInstancesInfo().values()
10681
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10682

    
10683
    # node data
10684
    node_list = cfg.GetNodeList()
10685

    
10686
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10687
      hypervisor_name = self.hypervisor
10688
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10689
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10690
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10691
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10692

    
10693
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10694
                                        hypervisor_name)
10695
    node_iinfo = \
10696
      self.rpc.call_all_instances_info(node_list,
10697
                                       cluster_info.enabled_hypervisors)
10698

    
10699
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10700

    
10701
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10702

    
10703
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10704

    
10705
    self.in_data = data
10706

    
10707
  @staticmethod
10708
  def _ComputeNodeGroupData(cfg):
10709
    """Compute node groups data.
10710

10711
    """
10712
    ng = {}
10713
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10714
      ng[guuid] = { "name": gdata.name }
10715
    return ng
10716

    
10717
  @staticmethod
10718
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10719
    """Compute global node data.
10720

10721
    """
10722
    node_results = {}
10723
    for nname, nresult in node_data.items():
10724
      # first fill in static (config-based) values
10725
      ninfo = cfg.GetNodeInfo(nname)
10726
      pnr = {
10727
        "tags": list(ninfo.GetTags()),
10728
        "primary_ip": ninfo.primary_ip,
10729
        "secondary_ip": ninfo.secondary_ip,
10730
        "offline": ninfo.offline,
10731
        "drained": ninfo.drained,
10732
        "master_candidate": ninfo.master_candidate,
10733
        "group": ninfo.group,
10734
        "master_capable": ninfo.master_capable,
10735
        "vm_capable": ninfo.vm_capable,
10736
        }
10737

    
10738
      if not (ninfo.offline or ninfo.drained):
10739
        nresult.Raise("Can't get data for node %s" % nname)
10740
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10741
                                nname)
10742
        remote_info = nresult.payload
10743

    
10744
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10745
                     'vg_size', 'vg_free', 'cpu_total']:
10746
          if attr not in remote_info:
10747
            raise errors.OpExecError("Node '%s' didn't return attribute"
10748
                                     " '%s'" % (nname, attr))
10749
          if not isinstance(remote_info[attr], int):
10750
            raise errors.OpExecError("Node '%s' returned invalid value"
10751
                                     " for '%s': %s" %
10752
                                     (nname, attr, remote_info[attr]))
10753
        # compute memory used by primary instances
10754
        i_p_mem = i_p_up_mem = 0
10755
        for iinfo, beinfo in i_list:
10756
          if iinfo.primary_node == nname:
10757
            i_p_mem += beinfo[constants.BE_MEMORY]
10758
            if iinfo.name not in node_iinfo[nname].payload:
10759
              i_used_mem = 0
10760
            else:
10761
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10762
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10763
            remote_info['memory_free'] -= max(0, i_mem_diff)
10764

    
10765
            if iinfo.admin_up:
10766
              i_p_up_mem += beinfo[constants.BE_MEMORY]
10767

    
10768
        # compute memory used by instances
10769
        pnr_dyn = {
10770
          "total_memory": remote_info['memory_total'],
10771
          "reserved_memory": remote_info['memory_dom0'],
10772
          "free_memory": remote_info['memory_free'],
10773
          "total_disk": remote_info['vg_size'],
10774
          "free_disk": remote_info['vg_free'],
10775
          "total_cpus": remote_info['cpu_total'],
10776
          "i_pri_memory": i_p_mem,
10777
          "i_pri_up_memory": i_p_up_mem,
10778
          }
10779
        pnr.update(pnr_dyn)
10780

    
10781
      node_results[nname] = pnr
10782

    
10783
    return node_results
10784

    
10785
  @staticmethod
10786
  def _ComputeInstanceData(cluster_info, i_list):
10787
    """Compute global instance data.
10788

10789
    """
10790
    instance_data = {}
10791
    for iinfo, beinfo in i_list:
10792
      nic_data = []
10793
      for nic in iinfo.nics:
10794
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10795
        nic_dict = {"mac": nic.mac,
10796
                    "ip": nic.ip,
10797
                    "mode": filled_params[constants.NIC_MODE],
10798
                    "link": filled_params[constants.NIC_LINK],
10799
                   }
10800
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10801
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10802
        nic_data.append(nic_dict)
10803
      pir = {
10804
        "tags": list(iinfo.GetTags()),
10805
        "admin_up": iinfo.admin_up,
10806
        "vcpus": beinfo[constants.BE_VCPUS],
10807
        "memory": beinfo[constants.BE_MEMORY],
10808
        "os": iinfo.os,
10809
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10810
        "nics": nic_data,
10811
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10812
        "disk_template": iinfo.disk_template,
10813
        "hypervisor": iinfo.hypervisor,
10814
        }
10815
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10816
                                                 pir["disks"])
10817
      instance_data[iinfo.name] = pir
10818

    
10819
    return instance_data
10820

    
10821
  def _AddNewInstance(self):
10822
    """Add new instance data to allocator structure.
10823

10824
    This in combination with _AllocatorGetClusterData will create the
10825
    correct structure needed as input for the allocator.
10826

10827
    The checks for the completeness of the opcode must have already been
10828
    done.
10829

10830
    """
10831
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10832

    
10833
    if self.disk_template in constants.DTS_NET_MIRROR:
10834
      self.required_nodes = 2
10835
    else:
10836
      self.required_nodes = 1
10837
    request = {
10838
      "name": self.name,
10839
      "disk_template": self.disk_template,
10840
      "tags": self.tags,
10841
      "os": self.os,
10842
      "vcpus": self.vcpus,
10843
      "memory": self.mem_size,
10844
      "disks": self.disks,
10845
      "disk_space_total": disk_space,
10846
      "nics": self.nics,
10847
      "required_nodes": self.required_nodes,
10848
      }
10849
    return request
10850

    
10851
  def _AddRelocateInstance(self):
10852
    """Add relocate instance data to allocator structure.
10853

10854
    This in combination with _IAllocatorGetClusterData will create the
10855
    correct structure needed as input for the allocator.
10856

10857
    The checks for the completeness of the opcode must have already been
10858
    done.
10859

10860
    """
10861
    instance = self.cfg.GetInstanceInfo(self.name)
10862
    if instance is None:
10863
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
10864
                                   " IAllocator" % self.name)
10865

    
10866
    if instance.disk_template not in constants.DTS_NET_MIRROR:
10867
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10868
                                 errors.ECODE_INVAL)
10869

    
10870
    if len(instance.secondary_nodes) != 1:
10871
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
10872
                                 errors.ECODE_STATE)
10873

    
10874
    self.required_nodes = 1
10875
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
10876
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10877

    
10878
    request = {
10879
      "name": self.name,
10880
      "disk_space_total": disk_space,
10881
      "required_nodes": self.required_nodes,
10882
      "relocate_from": self.relocate_from,
10883
      }
10884
    return request
10885

    
10886
  def _AddEvacuateNodes(self):
10887
    """Add evacuate nodes data to allocator structure.
10888

10889
    """
10890
    request = {
10891
      "evac_nodes": self.evac_nodes
10892
      }
10893
    return request
10894

    
10895
  def _BuildInputData(self, fn):
10896
    """Build input data structures.
10897

10898
    """
10899
    self._ComputeClusterData()
10900

    
10901
    request = fn()
10902
    request["type"] = self.mode
10903
    self.in_data["request"] = request
10904

    
10905
    self.in_text = serializer.Dump(self.in_data)
10906

    
10907
  def Run(self, name, validate=True, call_fn=None):
10908
    """Run an instance allocator and return the results.
10909

10910
    """
10911
    if call_fn is None:
10912
      call_fn = self.rpc.call_iallocator_runner
10913

    
10914
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10915
    result.Raise("Failure while running the iallocator script")
10916

    
10917
    self.out_text = result.payload
10918
    if validate:
10919
      self._ValidateResult()
10920

    
10921
  def _ValidateResult(self):
10922
    """Process the allocator results.
10923

10924
    This will process and if successful save the result in
10925
    self.out_data and the other parameters.
10926

10927
    """
10928
    try:
10929
      rdict = serializer.Load(self.out_text)
10930
    except Exception, err:
10931
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10932

    
10933
    if not isinstance(rdict, dict):
10934
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
10935

    
10936
    # TODO: remove backwards compatiblity in later versions
10937
    if "nodes" in rdict and "result" not in rdict:
10938
      rdict["result"] = rdict["nodes"]
10939
      del rdict["nodes"]
10940

    
10941
    for key in "success", "info", "result":
10942
      if key not in rdict:
10943
        raise errors.OpExecError("Can't parse iallocator results:"
10944
                                 " missing key '%s'" % key)
10945
      setattr(self, key, rdict[key])
10946

    
10947
    if not isinstance(rdict["result"], list):
10948
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10949
                               " is not a list")
10950
    self.out_data = rdict
10951

    
10952

    
10953
class LUTestAllocator(NoHooksLU):
10954
  """Run allocator tests.
10955

10956
  This LU runs the allocator tests
10957

10958
  """
10959
  _OP_PARAMS = [
10960
    ("direction", ht.NoDefault,
10961
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10962
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10963
    ("name", ht.NoDefault, ht.TNonEmptyString),
10964
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10965
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10966
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10967
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10968
    ("hypervisor", None, ht.TMaybeString),
10969
    ("allocator", None, ht.TMaybeString),
10970
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10971
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10972
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10973
    ("os", None, ht.TMaybeString),
10974
    ("disk_template", None, ht.TMaybeString),
10975
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10976
    ]
10977

    
10978
  def CheckPrereq(self):
10979
    """Check prerequisites.
10980

10981
    This checks the opcode parameters depending on the director and mode test.
10982

10983
    """
10984
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10985
      for attr in ["mem_size", "disks", "disk_template",
10986
                   "os", "tags", "nics", "vcpus"]:
10987
        if not hasattr(self.op, attr):
10988
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10989
                                     attr, errors.ECODE_INVAL)
10990
      iname = self.cfg.ExpandInstanceName(self.op.name)
10991
      if iname is not None:
10992
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10993
                                   iname, errors.ECODE_EXISTS)
10994
      if not isinstance(self.op.nics, list):
10995
        raise errors.OpPrereqError("Invalid parameter 'nics'",
10996
                                   errors.ECODE_INVAL)
10997
      if not isinstance(self.op.disks, list):
10998
        raise errors.OpPrereqError("Invalid parameter 'disks'",
10999
                                   errors.ECODE_INVAL)
11000
      for row in self.op.disks:
11001
        if (not isinstance(row, dict) or
11002
            "size" not in row or
11003
            not isinstance(row["size"], int) or
11004
            "mode" not in row or
11005
            row["mode"] not in ['r', 'w']):
11006
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11007
                                     " parameter", errors.ECODE_INVAL)
11008
      if self.op.hypervisor is None:
11009
        self.op.hypervisor = self.cfg.GetHypervisorType()
11010
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11011
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11012
      self.op.name = fname
11013
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11014
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11015
      if not hasattr(self.op, "evac_nodes"):
11016
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11017
                                   " opcode input", errors.ECODE_INVAL)
11018
    else:
11019
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11020
                                 self.op.mode, errors.ECODE_INVAL)
11021

    
11022
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11023
      if self.op.allocator is None:
11024
        raise errors.OpPrereqError("Missing allocator name",
11025
                                   errors.ECODE_INVAL)
11026
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11027
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11028
                                 self.op.direction, errors.ECODE_INVAL)
11029

    
11030
  def Exec(self, feedback_fn):
11031
    """Run the allocator test.
11032

11033
    """
11034
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11035
      ial = IAllocator(self.cfg, self.rpc,
11036
                       mode=self.op.mode,
11037
                       name=self.op.name,
11038
                       mem_size=self.op.mem_size,
11039
                       disks=self.op.disks,
11040
                       disk_template=self.op.disk_template,
11041
                       os=self.op.os,
11042
                       tags=self.op.tags,
11043
                       nics=self.op.nics,
11044
                       vcpus=self.op.vcpus,
11045
                       hypervisor=self.op.hypervisor,
11046
                       )
11047
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11048
      ial = IAllocator(self.cfg, self.rpc,
11049
                       mode=self.op.mode,
11050
                       name=self.op.name,
11051
                       relocate_from=list(self.relocate_from),
11052
                       )
11053
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11054
      ial = IAllocator(self.cfg, self.rpc,
11055
                       mode=self.op.mode,
11056
                       evac_nodes=self.op.evac_nodes)
11057
    else:
11058
      raise errors.ProgrammerError("Uncatched mode %s in"
11059
                                   " LUTestAllocator.Exec", self.op.mode)
11060

    
11061
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11062
      result = ial.in_text
11063
    else:
11064
      ial.Run(self.op.allocator, validate=False)
11065
      result = ial.out_text
11066
    return result