Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 38f9d2cf

History | View | Annotate | Download (394.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42

    
43
from ganeti import ssh
44
from ganeti import utils
45
from ganeti import errors
46
from ganeti import hypervisor
47
from ganeti import locking
48
from ganeti import constants
49
from ganeti import objects
50
from ganeti import serializer
51
from ganeti import ssconf
52
from ganeti import uidpool
53
from ganeti import compat
54
from ganeti import masterd
55
from ganeti import netutils
56
from ganeti import ht
57
from ganeti import query
58
from ganeti import qlang
59

    
60
import ganeti.masterd.instance # pylint: disable-msg=W0611
61

    
62
# Common opcode attributes
63

    
64
#: output fields for a query operation
65
_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
66

    
67

    
68
#: the shutdown timeout
69
_PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
70
                     ht.TPositiveInt)
71

    
72
#: the force parameter
73
_PForce = ("force", False, ht.TBool)
74

    
75
#: a required instance name (for single-instance LUs)
76
_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
77

    
78
#: Whether to ignore offline nodes
79
_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
80

    
81
#: a required node name (for single-node LUs)
82
_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
83

    
84
#: a required node group name (for single-group LUs)
85
_PGroupName = ("group_name", ht.NoDefault, ht.TNonEmptyString)
86

    
87
#: the migration type (live/non-live)
88
_PMigrationMode = ("mode", None,
89
                   ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
90

    
91
#: the obsolete 'live' mode (boolean)
92
_PMigrationLive = ("live", None, ht.TMaybeBool)
93

    
94

    
95
# End types
96
class LogicalUnit(object):
97
  """Logical Unit base class.
98

99
  Subclasses must follow these rules:
100
    - implement ExpandNames
101
    - implement CheckPrereq (except when tasklets are used)
102
    - implement Exec (except when tasklets are used)
103
    - implement BuildHooksEnv
104
    - redefine HPATH and HTYPE
105
    - optionally redefine their run requirements:
106
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
107

108
  Note that all commands require root permissions.
109

110
  @ivar dry_run_result: the value (if any) that will be returned to the caller
111
      in dry-run mode (signalled by opcode dry_run parameter)
112
  @cvar _OP_PARAMS: a list of opcode attributes, the default values
113
      they should get if not already defined, and types they must match
114

115
  """
116
  HPATH = None
117
  HTYPE = None
118
  _OP_PARAMS = []
119
  REQ_BGL = True
120

    
121
  def __init__(self, processor, op, context, rpc):
122
    """Constructor for LogicalUnit.
123

124
    This needs to be overridden in derived classes in order to check op
125
    validity.
126

127
    """
128
    self.proc = processor
129
    self.op = op
130
    self.cfg = context.cfg
131
    self.context = context
132
    self.rpc = rpc
133
    # Dicts used to declare locking needs to mcpu
134
    self.needed_locks = None
135
    self.acquired_locks = {}
136
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
137
    self.add_locks = {}
138
    self.remove_locks = {}
139
    # Used to force good behavior when calling helper functions
140
    self.recalculate_locks = {}
141
    self.__ssh = None
142
    # logging
143
    self.Log = processor.Log # pylint: disable-msg=C0103
144
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
145
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
146
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
147
    # support for dry-run
148
    self.dry_run_result = None
149
    # support for generic debug attribute
150
    if (not hasattr(self.op, "debug_level") or
151
        not isinstance(self.op.debug_level, int)):
152
      self.op.debug_level = 0
153

    
154
    # Tasklets
155
    self.tasklets = None
156

    
157
    # The new kind-of-type-system
158
    op_id = self.op.OP_ID
159
    for attr_name, aval, test in self._OP_PARAMS:
160
      if not hasattr(op, attr_name):
161
        if aval == ht.NoDefault:
162
          raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
163
                                     (op_id, attr_name), errors.ECODE_INVAL)
164
        else:
165
          if callable(aval):
166
            dval = aval()
167
          else:
168
            dval = aval
169
          setattr(self.op, attr_name, dval)
170
      attr_val = getattr(op, attr_name)
171
      if test == ht.NoType:
172
        # no tests here
173
        continue
174
      if not callable(test):
175
        raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
176
                                     " given type is not a proper type (%s)" %
177
                                     (op_id, attr_name, test))
178
      if not test(attr_val):
179
        logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
180
                      self.op.OP_ID, attr_name, type(attr_val), attr_val)
181
        raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
182
                                   (op_id, attr_name), errors.ECODE_INVAL)
183

    
184
    self.CheckArguments()
185

    
186
  def __GetSSH(self):
187
    """Returns the SshRunner object
188

189
    """
190
    if not self.__ssh:
191
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
192
    return self.__ssh
193

    
194
  ssh = property(fget=__GetSSH)
195

    
196
  def CheckArguments(self):
197
    """Check syntactic validity for the opcode arguments.
198

199
    This method is for doing a simple syntactic check and ensure
200
    validity of opcode parameters, without any cluster-related
201
    checks. While the same can be accomplished in ExpandNames and/or
202
    CheckPrereq, doing these separate is better because:
203

204
      - ExpandNames is left as as purely a lock-related function
205
      - CheckPrereq is run after we have acquired locks (and possible
206
        waited for them)
207

208
    The function is allowed to change the self.op attribute so that
209
    later methods can no longer worry about missing parameters.
210

211
    """
212
    pass
213

    
214
  def ExpandNames(self):
215
    """Expand names for this LU.
216

217
    This method is called before starting to execute the opcode, and it should
218
    update all the parameters of the opcode to their canonical form (e.g. a
219
    short node name must be fully expanded after this method has successfully
220
    completed). This way locking, hooks, logging, etc. can work correctly.
221

222
    LUs which implement this method must also populate the self.needed_locks
223
    member, as a dict with lock levels as keys, and a list of needed lock names
224
    as values. Rules:
225

226
      - use an empty dict if you don't need any lock
227
      - if you don't need any lock at a particular level omit that level
228
      - don't put anything for the BGL level
229
      - if you want all locks at a level use locking.ALL_SET as a value
230

231
    If you need to share locks (rather than acquire them exclusively) at one
232
    level you can modify self.share_locks, setting a true value (usually 1) for
233
    that level. By default locks are not shared.
234

235
    This function can also define a list of tasklets, which then will be
236
    executed in order instead of the usual LU-level CheckPrereq and Exec
237
    functions, if those are not defined by the LU.
238

239
    Examples::
240

241
      # Acquire all nodes and one instance
242
      self.needed_locks = {
243
        locking.LEVEL_NODE: locking.ALL_SET,
244
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
245
      }
246
      # Acquire just two nodes
247
      self.needed_locks = {
248
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
249
      }
250
      # Acquire no locks
251
      self.needed_locks = {} # No, you can't leave it to the default value None
252

253
    """
254
    # The implementation of this method is mandatory only if the new LU is
255
    # concurrent, so that old LUs don't need to be changed all at the same
256
    # time.
257
    if self.REQ_BGL:
258
      self.needed_locks = {} # Exclusive LUs don't need locks.
259
    else:
260
      raise NotImplementedError
261

    
262
  def DeclareLocks(self, level):
263
    """Declare LU locking needs for a level
264

265
    While most LUs can just declare their locking needs at ExpandNames time,
266
    sometimes there's the need to calculate some locks after having acquired
267
    the ones before. This function is called just before acquiring locks at a
268
    particular level, but after acquiring the ones at lower levels, and permits
269
    such calculations. It can be used to modify self.needed_locks, and by
270
    default it does nothing.
271

272
    This function is only called if you have something already set in
273
    self.needed_locks for the level.
274

275
    @param level: Locking level which is going to be locked
276
    @type level: member of ganeti.locking.LEVELS
277

278
    """
279

    
280
  def CheckPrereq(self):
281
    """Check prerequisites for this LU.
282

283
    This method should check that the prerequisites for the execution
284
    of this LU are fulfilled. It can do internode communication, but
285
    it should be idempotent - no cluster or system changes are
286
    allowed.
287

288
    The method should raise errors.OpPrereqError in case something is
289
    not fulfilled. Its return value is ignored.
290

291
    This method should also update all the parameters of the opcode to
292
    their canonical form if it hasn't been done by ExpandNames before.
293

294
    """
295
    if self.tasklets is not None:
296
      for (idx, tl) in enumerate(self.tasklets):
297
        logging.debug("Checking prerequisites for tasklet %s/%s",
298
                      idx + 1, len(self.tasklets))
299
        tl.CheckPrereq()
300
    else:
301
      pass
302

    
303
  def Exec(self, feedback_fn):
304
    """Execute the LU.
305

306
    This method should implement the actual work. It should raise
307
    errors.OpExecError for failures that are somewhat dealt with in
308
    code, or expected.
309

310
    """
311
    if self.tasklets is not None:
312
      for (idx, tl) in enumerate(self.tasklets):
313
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
314
        tl.Exec(feedback_fn)
315
    else:
316
      raise NotImplementedError
317

    
318
  def BuildHooksEnv(self):
319
    """Build hooks environment for this LU.
320

321
    This method should return a three-node tuple consisting of: a dict
322
    containing the environment that will be used for running the
323
    specific hook for this LU, a list of node names on which the hook
324
    should run before the execution, and a list of node names on which
325
    the hook should run after the execution.
326

327
    The keys of the dict must not have 'GANETI_' prefixed as this will
328
    be handled in the hooks runner. Also note additional keys will be
329
    added by the hooks runner. If the LU doesn't define any
330
    environment, an empty dict (and not None) should be returned.
331

332
    No nodes should be returned as an empty list (and not None).
333

334
    Note that if the HPATH for a LU class is None, this function will
335
    not be called.
336

337
    """
338
    raise NotImplementedError
339

    
340
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
341
    """Notify the LU about the results of its hooks.
342

343
    This method is called every time a hooks phase is executed, and notifies
344
    the Logical Unit about the hooks' result. The LU can then use it to alter
345
    its result based on the hooks.  By default the method does nothing and the
346
    previous result is passed back unchanged but any LU can define it if it
347
    wants to use the local cluster hook-scripts somehow.
348

349
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
350
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
351
    @param hook_results: the results of the multi-node hooks rpc call
352
    @param feedback_fn: function used send feedback back to the caller
353
    @param lu_result: the previous Exec result this LU had, or None
354
        in the PRE phase
355
    @return: the new Exec result, based on the previous result
356
        and hook results
357

358
    """
359
    # API must be kept, thus we ignore the unused argument and could
360
    # be a function warnings
361
    # pylint: disable-msg=W0613,R0201
362
    return lu_result
363

    
364
  def _ExpandAndLockInstance(self):
365
    """Helper function to expand and lock an instance.
366

367
    Many LUs that work on an instance take its name in self.op.instance_name
368
    and need to expand it and then declare the expanded name for locking. This
369
    function does it, and then updates self.op.instance_name to the expanded
370
    name. It also initializes needed_locks as a dict, if this hasn't been done
371
    before.
372

373
    """
374
    if self.needed_locks is None:
375
      self.needed_locks = {}
376
    else:
377
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
378
        "_ExpandAndLockInstance called with instance-level locks set"
379
    self.op.instance_name = _ExpandInstanceName(self.cfg,
380
                                                self.op.instance_name)
381
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
382

    
383
  def _LockInstancesNodes(self, primary_only=False):
384
    """Helper function to declare instances' nodes for locking.
385

386
    This function should be called after locking one or more instances to lock
387
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
388
    with all primary or secondary nodes for instances already locked and
389
    present in self.needed_locks[locking.LEVEL_INSTANCE].
390

391
    It should be called from DeclareLocks, and for safety only works if
392
    self.recalculate_locks[locking.LEVEL_NODE] is set.
393

394
    In the future it may grow parameters to just lock some instance's nodes, or
395
    to just lock primaries or secondary nodes, if needed.
396

397
    If should be called in DeclareLocks in a way similar to::
398

399
      if level == locking.LEVEL_NODE:
400
        self._LockInstancesNodes()
401

402
    @type primary_only: boolean
403
    @param primary_only: only lock primary nodes of locked instances
404

405
    """
406
    assert locking.LEVEL_NODE in self.recalculate_locks, \
407
      "_LockInstancesNodes helper function called with no nodes to recalculate"
408

    
409
    # TODO: check if we're really been called with the instance locks held
410

    
411
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
412
    # future we might want to have different behaviors depending on the value
413
    # of self.recalculate_locks[locking.LEVEL_NODE]
414
    wanted_nodes = []
415
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
416
      instance = self.context.cfg.GetInstanceInfo(instance_name)
417
      wanted_nodes.append(instance.primary_node)
418
      if not primary_only:
419
        wanted_nodes.extend(instance.secondary_nodes)
420

    
421
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
422
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
423
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
424
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
425

    
426
    del self.recalculate_locks[locking.LEVEL_NODE]
427

    
428

    
429
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
430
  """Simple LU which runs no hooks.
431

432
  This LU is intended as a parent for other LogicalUnits which will
433
  run no hooks, in order to reduce duplicate code.
434

435
  """
436
  HPATH = None
437
  HTYPE = None
438

    
439
  def BuildHooksEnv(self):
440
    """Empty BuildHooksEnv for NoHooksLu.
441

442
    This just raises an error.
443

444
    """
445
    assert False, "BuildHooksEnv called for NoHooksLUs"
446

    
447

    
448
class Tasklet:
449
  """Tasklet base class.
450

451
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
453
  tasklets know nothing about locks.
454

455
  Subclasses must follow these rules:
456
    - Implement CheckPrereq
457
    - Implement Exec
458

459
  """
460
  def __init__(self, lu):
461
    self.lu = lu
462

    
463
    # Shortcuts
464
    self.cfg = lu.cfg
465
    self.rpc = lu.rpc
466

    
467
  def CheckPrereq(self):
468
    """Check prerequisites for this tasklets.
469

470
    This method should check whether the prerequisites for the execution of
471
    this tasklet are fulfilled. It can do internode communication, but it
472
    should be idempotent - no cluster or system changes are allowed.
473

474
    The method should raise errors.OpPrereqError in case something is not
475
    fulfilled. Its return value is ignored.
476

477
    This method should also update all parameters to their canonical form if it
478
    hasn't been done before.
479

480
    """
481
    pass
482

    
483
  def Exec(self, feedback_fn):
484
    """Execute the tasklet.
485

486
    This method should implement the actual work. It should raise
487
    errors.OpExecError for failures that are somewhat dealt with in code, or
488
    expected.
489

490
    """
491
    raise NotImplementedError
492

    
493

    
494
class _QueryBase:
495
  """Base for query utility classes.
496

497
  """
498
  #: Attribute holding field definitions
499
  FIELDS = None
500

    
501
  def __init__(self, names, fields, use_locking):
502
    """Initializes this class.
503

504
    """
505
    self.names = names
506
    self.use_locking = use_locking
507

    
508
    self.query = query.Query(self.FIELDS, fields)
509
    self.requested_data = self.query.RequestedData()
510

    
511
    self.do_locking = None
512
    self.wanted = None
513

    
514
  def _GetNames(self, lu, all_names, lock_level):
515
    """Helper function to determine names asked for in the query.
516

517
    """
518
    if self.do_locking:
519
      names = lu.acquired_locks[lock_level]
520
    else:
521
      names = all_names
522

    
523
    if self.wanted == locking.ALL_SET:
524
      assert not self.names
525
      # caller didn't specify names, so ordering is not important
526
      return utils.NiceSort(names)
527

    
528
    # caller specified names and we must keep the same order
529
    assert self.names
530
    assert not self.do_locking or lu.acquired_locks[lock_level]
531

    
532
    missing = set(self.wanted).difference(names)
533
    if missing:
534
      raise errors.OpExecError("Some items were removed before retrieving"
535
                               " their data: %s" % missing)
536

    
537
    # Return expanded names
538
    return self.wanted
539

    
540
  @classmethod
541
  def FieldsQuery(cls, fields):
542
    """Returns list of available fields.
543

544
    @return: List of L{objects.QueryFieldDefinition}
545

546
    """
547
    if fields is None:
548
      # Client requests all fields
549
      fdefs = query.GetAllFields(cls.FIELDS.values())
550
    else:
551
      fdefs = query.Query(cls.FIELDS, fields).GetFields()
552

    
553
    return {
554
      "fields": [fdef.ToDict() for fdef in fdefs],
555
      }
556

    
557
  def ExpandNames(self, lu):
558
    """Expand names for this query.
559

560
    See L{LogicalUnit.ExpandNames}.
561

562
    """
563
    raise NotImplementedError()
564

    
565
  def DeclareLocks(self, lu, level):
566
    """Declare locks for this query.
567

568
    See L{LogicalUnit.DeclareLocks}.
569

570
    """
571
    raise NotImplementedError()
572

    
573
  def _GetQueryData(self, lu):
574
    """Collects all data for this query.
575

576
    @return: Query data object
577

578
    """
579
    raise NotImplementedError()
580

    
581
  def NewStyleQuery(self, lu):
582
    """Collect data and execute query.
583

584
    """
585
    data = self._GetQueryData(lu)
586

    
587
    return {
588
      "data": self.query.Query(data),
589
      "fields": [fdef.ToDict()
590
                 for fdef in self.query.GetFields()],
591
      }
592

    
593
  def OldStyleQuery(self, lu):
594
    """Collect data and execute query.
595

596
    """
597
    return self.query.OldStyleQuery(self._GetQueryData(lu))
598

    
599

    
600
def _GetWantedNodes(lu, nodes):
601
  """Returns list of checked and expanded node names.
602

603
  @type lu: L{LogicalUnit}
604
  @param lu: the logical unit on whose behalf we execute
605
  @type nodes: list
606
  @param nodes: list of node names or None for all nodes
607
  @rtype: list
608
  @return: the list of nodes, sorted
609
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
610

611
  """
612
  if nodes:
613
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
614

    
615
  return utils.NiceSort(lu.cfg.GetNodeList())
616

    
617

    
618
def _GetWantedInstances(lu, instances):
619
  """Returns list of checked and expanded instance names.
620

621
  @type lu: L{LogicalUnit}
622
  @param lu: the logical unit on whose behalf we execute
623
  @type instances: list
624
  @param instances: list of instance names or None for all instances
625
  @rtype: list
626
  @return: the list of instances, sorted
627
  @raise errors.OpPrereqError: if the instances parameter is wrong type
628
  @raise errors.OpPrereqError: if any of the passed instances is not found
629

630
  """
631
  if instances:
632
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
633
  else:
634
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
635
  return wanted
636

    
637

    
638
def _GetUpdatedParams(old_params, update_dict,
639
                      use_default=True, use_none=False):
640
  """Return the new version of a parameter dictionary.
641

642
  @type old_params: dict
643
  @param old_params: old parameters
644
  @type update_dict: dict
645
  @param update_dict: dict containing new parameter values, or
646
      constants.VALUE_DEFAULT to reset the parameter to its default
647
      value
648
  @param use_default: boolean
649
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
650
      values as 'to be deleted' values
651
  @param use_none: boolean
652
  @type use_none: whether to recognise C{None} values as 'to be
653
      deleted' values
654
  @rtype: dict
655
  @return: the new parameter dictionary
656

657
  """
658
  params_copy = copy.deepcopy(old_params)
659
  for key, val in update_dict.iteritems():
660
    if ((use_default and val == constants.VALUE_DEFAULT) or
661
        (use_none and val is None)):
662
      try:
663
        del params_copy[key]
664
      except KeyError:
665
        pass
666
    else:
667
      params_copy[key] = val
668
  return params_copy
669

    
670

    
671
def _CheckOutputFields(static, dynamic, selected):
672
  """Checks whether all selected fields are valid.
673

674
  @type static: L{utils.FieldSet}
675
  @param static: static fields set
676
  @type dynamic: L{utils.FieldSet}
677
  @param dynamic: dynamic fields set
678

679
  """
680
  f = utils.FieldSet()
681
  f.Extend(static)
682
  f.Extend(dynamic)
683

    
684
  delta = f.NonMatching(selected)
685
  if delta:
686
    raise errors.OpPrereqError("Unknown output fields selected: %s"
687
                               % ",".join(delta), errors.ECODE_INVAL)
688

    
689

    
690
def _CheckGlobalHvParams(params):
691
  """Validates that given hypervisor params are not global ones.
692

693
  This will ensure that instances don't get customised versions of
694
  global params.
695

696
  """
697
  used_globals = constants.HVC_GLOBALS.intersection(params)
698
  if used_globals:
699
    msg = ("The following hypervisor parameters are global and cannot"
700
           " be customized at instance level, please modify them at"
701
           " cluster level: %s" % utils.CommaJoin(used_globals))
702
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
703

    
704

    
705
def _CheckNodeOnline(lu, node, msg=None):
706
  """Ensure that a given node is online.
707

708
  @param lu: the LU on behalf of which we make the check
709
  @param node: the node to check
710
  @param msg: if passed, should be a message to replace the default one
711
  @raise errors.OpPrereqError: if the node is offline
712

713
  """
714
  if msg is None:
715
    msg = "Can't use offline node"
716
  if lu.cfg.GetNodeInfo(node).offline:
717
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
718

    
719

    
720
def _CheckNodeNotDrained(lu, node):
721
  """Ensure that a given node is not drained.
722

723
  @param lu: the LU on behalf of which we make the check
724
  @param node: the node to check
725
  @raise errors.OpPrereqError: if the node is drained
726

727
  """
728
  if lu.cfg.GetNodeInfo(node).drained:
729
    raise errors.OpPrereqError("Can't use drained node %s" % node,
730
                               errors.ECODE_STATE)
731

    
732

    
733
def _CheckNodeVmCapable(lu, node):
734
  """Ensure that a given node is vm capable.
735

736
  @param lu: the LU on behalf of which we make the check
737
  @param node: the node to check
738
  @raise errors.OpPrereqError: if the node is not vm capable
739

740
  """
741
  if not lu.cfg.GetNodeInfo(node).vm_capable:
742
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
743
                               errors.ECODE_STATE)
744

    
745

    
746
def _CheckNodeHasOS(lu, node, os_name, force_variant):
747
  """Ensure that a node supports a given OS.
748

749
  @param lu: the LU on behalf of which we make the check
750
  @param node: the node to check
751
  @param os_name: the OS to query about
752
  @param force_variant: whether to ignore variant errors
753
  @raise errors.OpPrereqError: if the node is not supporting the OS
754

755
  """
756
  result = lu.rpc.call_os_get(node, os_name)
757
  result.Raise("OS '%s' not in supported OS list for node %s" %
758
               (os_name, node),
759
               prereq=True, ecode=errors.ECODE_INVAL)
760
  if not force_variant:
761
    _CheckOSVariant(result.payload, os_name)
762

    
763

    
764
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
765
  """Ensure that a node has the given secondary ip.
766

767
  @type lu: L{LogicalUnit}
768
  @param lu: the LU on behalf of which we make the check
769
  @type node: string
770
  @param node: the node to check
771
  @type secondary_ip: string
772
  @param secondary_ip: the ip to check
773
  @type prereq: boolean
774
  @param prereq: whether to throw a prerequisite or an execute error
775
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
776
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
777

778
  """
779
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
780
  result.Raise("Failure checking secondary ip on node %s" % node,
781
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
782
  if not result.payload:
783
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
784
           " please fix and re-run this command" % secondary_ip)
785
    if prereq:
786
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
787
    else:
788
      raise errors.OpExecError(msg)
789

    
790

    
791
def _RequireFileStorage():
792
  """Checks that file storage is enabled.
793

794
  @raise errors.OpPrereqError: when file storage is disabled
795

796
  """
797
  if not constants.ENABLE_FILE_STORAGE:
798
    raise errors.OpPrereqError("File storage disabled at configure time",
799
                               errors.ECODE_INVAL)
800

    
801

    
802
def _CheckDiskTemplate(template):
803
  """Ensure a given disk template is valid.
804

805
  """
806
  if template not in constants.DISK_TEMPLATES:
807
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
808
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
809
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
810
  if template == constants.DT_FILE:
811
    _RequireFileStorage()
812
  return True
813

    
814

    
815
def _CheckStorageType(storage_type):
816
  """Ensure a given storage type is valid.
817

818
  """
819
  if storage_type not in constants.VALID_STORAGE_TYPES:
820
    raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
821
                               errors.ECODE_INVAL)
822
  if storage_type == constants.ST_FILE:
823
    _RequireFileStorage()
824
  return True
825

    
826

    
827
def _GetClusterDomainSecret():
828
  """Reads the cluster domain secret.
829

830
  """
831
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
832
                               strict=True)
833

    
834

    
835
def _CheckInstanceDown(lu, instance, reason):
836
  """Ensure that an instance is not running."""
837
  if instance.admin_up:
838
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
839
                               (instance.name, reason), errors.ECODE_STATE)
840

    
841
  pnode = instance.primary_node
842
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
843
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
844
              prereq=True, ecode=errors.ECODE_ENVIRON)
845

    
846
  if instance.name in ins_l.payload:
847
    raise errors.OpPrereqError("Instance %s is running, %s" %
848
                               (instance.name, reason), errors.ECODE_STATE)
849

    
850

    
851
def _ExpandItemName(fn, name, kind):
852
  """Expand an item name.
853

854
  @param fn: the function to use for expansion
855
  @param name: requested item name
856
  @param kind: text description ('Node' or 'Instance')
857
  @return: the resolved (full) name
858
  @raise errors.OpPrereqError: if the item is not found
859

860
  """
861
  full_name = fn(name)
862
  if full_name is None:
863
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
864
                               errors.ECODE_NOENT)
865
  return full_name
866

    
867

    
868
def _ExpandNodeName(cfg, name):
869
  """Wrapper over L{_ExpandItemName} for nodes."""
870
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
871

    
872

    
873
def _ExpandInstanceName(cfg, name):
874
  """Wrapper over L{_ExpandItemName} for instance."""
875
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
876

    
877

    
878
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
879
                          memory, vcpus, nics, disk_template, disks,
880
                          bep, hvp, hypervisor_name):
881
  """Builds instance related env variables for hooks
882

883
  This builds the hook environment from individual variables.
884

885
  @type name: string
886
  @param name: the name of the instance
887
  @type primary_node: string
888
  @param primary_node: the name of the instance's primary node
889
  @type secondary_nodes: list
890
  @param secondary_nodes: list of secondary nodes as strings
891
  @type os_type: string
892
  @param os_type: the name of the instance's OS
893
  @type status: boolean
894
  @param status: the should_run status of the instance
895
  @type memory: string
896
  @param memory: the memory size of the instance
897
  @type vcpus: string
898
  @param vcpus: the count of VCPUs the instance has
899
  @type nics: list
900
  @param nics: list of tuples (ip, mac, mode, link) representing
901
      the NICs the instance has
902
  @type disk_template: string
903
  @param disk_template: the disk template of the instance
904
  @type disks: list
905
  @param disks: the list of (size, mode) pairs
906
  @type bep: dict
907
  @param bep: the backend parameters for the instance
908
  @type hvp: dict
909
  @param hvp: the hypervisor parameters for the instance
910
  @type hypervisor_name: string
911
  @param hypervisor_name: the hypervisor for the instance
912
  @rtype: dict
913
  @return: the hook environment for this instance
914

915
  """
916
  if status:
917
    str_status = "up"
918
  else:
919
    str_status = "down"
920
  env = {
921
    "OP_TARGET": name,
922
    "INSTANCE_NAME": name,
923
    "INSTANCE_PRIMARY": primary_node,
924
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
925
    "INSTANCE_OS_TYPE": os_type,
926
    "INSTANCE_STATUS": str_status,
927
    "INSTANCE_MEMORY": memory,
928
    "INSTANCE_VCPUS": vcpus,
929
    "INSTANCE_DISK_TEMPLATE": disk_template,
930
    "INSTANCE_HYPERVISOR": hypervisor_name,
931
  }
932

    
933
  if nics:
934
    nic_count = len(nics)
935
    for idx, (ip, mac, mode, link) in enumerate(nics):
936
      if ip is None:
937
        ip = ""
938
      env["INSTANCE_NIC%d_IP" % idx] = ip
939
      env["INSTANCE_NIC%d_MAC" % idx] = mac
940
      env["INSTANCE_NIC%d_MODE" % idx] = mode
941
      env["INSTANCE_NIC%d_LINK" % idx] = link
942
      if mode == constants.NIC_MODE_BRIDGED:
943
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
944
  else:
945
    nic_count = 0
946

    
947
  env["INSTANCE_NIC_COUNT"] = nic_count
948

    
949
  if disks:
950
    disk_count = len(disks)
951
    for idx, (size, mode) in enumerate(disks):
952
      env["INSTANCE_DISK%d_SIZE" % idx] = size
953
      env["INSTANCE_DISK%d_MODE" % idx] = mode
954
  else:
955
    disk_count = 0
956

    
957
  env["INSTANCE_DISK_COUNT"] = disk_count
958

    
959
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
960
    for key, value in source.items():
961
      env["INSTANCE_%s_%s" % (kind, key)] = value
962

    
963
  return env
964

    
965

    
966
def _NICListToTuple(lu, nics):
967
  """Build a list of nic information tuples.
968

969
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
970
  value in LUQueryInstanceData.
971

972
  @type lu:  L{LogicalUnit}
973
  @param lu: the logical unit on whose behalf we execute
974
  @type nics: list of L{objects.NIC}
975
  @param nics: list of nics to convert to hooks tuples
976

977
  """
978
  hooks_nics = []
979
  cluster = lu.cfg.GetClusterInfo()
980
  for nic in nics:
981
    ip = nic.ip
982
    mac = nic.mac
983
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
984
    mode = filled_params[constants.NIC_MODE]
985
    link = filled_params[constants.NIC_LINK]
986
    hooks_nics.append((ip, mac, mode, link))
987
  return hooks_nics
988

    
989

    
990
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
991
  """Builds instance related env variables for hooks from an object.
992

993
  @type lu: L{LogicalUnit}
994
  @param lu: the logical unit on whose behalf we execute
995
  @type instance: L{objects.Instance}
996
  @param instance: the instance for which we should build the
997
      environment
998
  @type override: dict
999
  @param override: dictionary with key/values that will override
1000
      our values
1001
  @rtype: dict
1002
  @return: the hook environment dictionary
1003

1004
  """
1005
  cluster = lu.cfg.GetClusterInfo()
1006
  bep = cluster.FillBE(instance)
1007
  hvp = cluster.FillHV(instance)
1008
  args = {
1009
    'name': instance.name,
1010
    'primary_node': instance.primary_node,
1011
    'secondary_nodes': instance.secondary_nodes,
1012
    'os_type': instance.os,
1013
    'status': instance.admin_up,
1014
    'memory': bep[constants.BE_MEMORY],
1015
    'vcpus': bep[constants.BE_VCPUS],
1016
    'nics': _NICListToTuple(lu, instance.nics),
1017
    'disk_template': instance.disk_template,
1018
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1019
    'bep': bep,
1020
    'hvp': hvp,
1021
    'hypervisor_name': instance.hypervisor,
1022
  }
1023
  if override:
1024
    args.update(override)
1025
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1026

    
1027

    
1028
def _AdjustCandidatePool(lu, exceptions):
1029
  """Adjust the candidate pool after node operations.
1030

1031
  """
1032
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1033
  if mod_list:
1034
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1035
               utils.CommaJoin(node.name for node in mod_list))
1036
    for name in mod_list:
1037
      lu.context.ReaddNode(name)
1038
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1039
  if mc_now > mc_max:
1040
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1041
               (mc_now, mc_max))
1042

    
1043

    
1044
def _DecideSelfPromotion(lu, exceptions=None):
1045
  """Decide whether I should promote myself as a master candidate.
1046

1047
  """
1048
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1049
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1050
  # the new node will increase mc_max with one, so:
1051
  mc_should = min(mc_should + 1, cp_size)
1052
  return mc_now < mc_should
1053

    
1054

    
1055
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1056
  """Check that the brigdes needed by a list of nics exist.
1057

1058
  """
1059
  cluster = lu.cfg.GetClusterInfo()
1060
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1061
  brlist = [params[constants.NIC_LINK] for params in paramslist
1062
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1063
  if brlist:
1064
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1065
    result.Raise("Error checking bridges on destination node '%s'" %
1066
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1067

    
1068

    
1069
def _CheckInstanceBridgesExist(lu, instance, node=None):
1070
  """Check that the brigdes needed by an instance exist.
1071

1072
  """
1073
  if node is None:
1074
    node = instance.primary_node
1075
  _CheckNicsBridgesExist(lu, instance.nics, node)
1076

    
1077

    
1078
def _CheckOSVariant(os_obj, name):
1079
  """Check whether an OS name conforms to the os variants specification.
1080

1081
  @type os_obj: L{objects.OS}
1082
  @param os_obj: OS object to check
1083
  @type name: string
1084
  @param name: OS name passed by the user, to check for validity
1085

1086
  """
1087
  if not os_obj.supported_variants:
1088
    return
1089
  variant = objects.OS.GetVariant(name)
1090
  if not variant:
1091
    raise errors.OpPrereqError("OS name must include a variant",
1092
                               errors.ECODE_INVAL)
1093

    
1094
  if variant not in os_obj.supported_variants:
1095
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1096

    
1097

    
1098
def _GetNodeInstancesInner(cfg, fn):
1099
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1100

    
1101

    
1102
def _GetNodeInstances(cfg, node_name):
1103
  """Returns a list of all primary and secondary instances on a node.
1104

1105
  """
1106

    
1107
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1108

    
1109

    
1110
def _GetNodePrimaryInstances(cfg, node_name):
1111
  """Returns primary instances on a node.
1112

1113
  """
1114
  return _GetNodeInstancesInner(cfg,
1115
                                lambda inst: node_name == inst.primary_node)
1116

    
1117

    
1118
def _GetNodeSecondaryInstances(cfg, node_name):
1119
  """Returns secondary instances on a node.
1120

1121
  """
1122
  return _GetNodeInstancesInner(cfg,
1123
                                lambda inst: node_name in inst.secondary_nodes)
1124

    
1125

    
1126
def _GetStorageTypeArgs(cfg, storage_type):
1127
  """Returns the arguments for a storage type.
1128

1129
  """
1130
  # Special case for file storage
1131
  if storage_type == constants.ST_FILE:
1132
    # storage.FileStorage wants a list of storage directories
1133
    return [[cfg.GetFileStorageDir()]]
1134

    
1135
  return []
1136

    
1137

    
1138
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1139
  faulty = []
1140

    
1141
  for dev in instance.disks:
1142
    cfg.SetDiskID(dev, node_name)
1143

    
1144
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1145
  result.Raise("Failed to get disk status from node %s" % node_name,
1146
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1147

    
1148
  for idx, bdev_status in enumerate(result.payload):
1149
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1150
      faulty.append(idx)
1151

    
1152
  return faulty
1153

    
1154

    
1155
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1156
  """Check the sanity of iallocator and node arguments and use the
1157
  cluster-wide iallocator if appropriate.
1158

1159
  Check that at most one of (iallocator, node) is specified. If none is
1160
  specified, then the LU's opcode's iallocator slot is filled with the
1161
  cluster-wide default iallocator.
1162

1163
  @type iallocator_slot: string
1164
  @param iallocator_slot: the name of the opcode iallocator slot
1165
  @type node_slot: string
1166
  @param node_slot: the name of the opcode target node slot
1167

1168
  """
1169
  node = getattr(lu.op, node_slot, None)
1170
  iallocator = getattr(lu.op, iallocator_slot, None)
1171

    
1172
  if node is not None and iallocator is not None:
1173
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1174
                               errors.ECODE_INVAL)
1175
  elif node is None and iallocator is None:
1176
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1177
    if default_iallocator:
1178
      setattr(lu.op, iallocator_slot, default_iallocator)
1179
    else:
1180
      raise errors.OpPrereqError("No iallocator or node given and no"
1181
                                 " cluster-wide default iallocator found."
1182
                                 " Please specify either an iallocator or a"
1183
                                 " node, or set a cluster-wide default"
1184
                                 " iallocator.")
1185

    
1186

    
1187
class LUPostInitCluster(LogicalUnit):
1188
  """Logical unit for running hooks after cluster initialization.
1189

1190
  """
1191
  HPATH = "cluster-init"
1192
  HTYPE = constants.HTYPE_CLUSTER
1193

    
1194
  def BuildHooksEnv(self):
1195
    """Build hooks env.
1196

1197
    """
1198
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1199
    mn = self.cfg.GetMasterNode()
1200
    return env, [], [mn]
1201

    
1202
  def Exec(self, feedback_fn):
1203
    """Nothing to do.
1204

1205
    """
1206
    return True
1207

    
1208

    
1209
class LUDestroyCluster(LogicalUnit):
1210
  """Logical unit for destroying the cluster.
1211

1212
  """
1213
  HPATH = "cluster-destroy"
1214
  HTYPE = constants.HTYPE_CLUSTER
1215

    
1216
  def BuildHooksEnv(self):
1217
    """Build hooks env.
1218

1219
    """
1220
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1221
    return env, [], []
1222

    
1223
  def CheckPrereq(self):
1224
    """Check prerequisites.
1225

1226
    This checks whether the cluster is empty.
1227

1228
    Any errors are signaled by raising errors.OpPrereqError.
1229

1230
    """
1231
    master = self.cfg.GetMasterNode()
1232

    
1233
    nodelist = self.cfg.GetNodeList()
1234
    if len(nodelist) != 1 or nodelist[0] != master:
1235
      raise errors.OpPrereqError("There are still %d node(s) in"
1236
                                 " this cluster." % (len(nodelist) - 1),
1237
                                 errors.ECODE_INVAL)
1238
    instancelist = self.cfg.GetInstanceList()
1239
    if instancelist:
1240
      raise errors.OpPrereqError("There are still %d instance(s) in"
1241
                                 " this cluster." % len(instancelist),
1242
                                 errors.ECODE_INVAL)
1243

    
1244
  def Exec(self, feedback_fn):
1245
    """Destroys the cluster.
1246

1247
    """
1248
    master = self.cfg.GetMasterNode()
1249

    
1250
    # Run post hooks on master node before it's removed
1251
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1252
    try:
1253
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1254
    except:
1255
      # pylint: disable-msg=W0702
1256
      self.LogWarning("Errors occurred running hooks on %s" % master)
1257

    
1258
    result = self.rpc.call_node_stop_master(master, False)
1259
    result.Raise("Could not disable the master role")
1260

    
1261
    return master
1262

    
1263

    
1264
def _VerifyCertificate(filename):
1265
  """Verifies a certificate for LUVerifyCluster.
1266

1267
  @type filename: string
1268
  @param filename: Path to PEM file
1269

1270
  """
1271
  try:
1272
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273
                                           utils.ReadFile(filename))
1274
  except Exception, err: # pylint: disable-msg=W0703
1275
    return (LUVerifyCluster.ETYPE_ERROR,
1276
            "Failed to load X509 certificate %s: %s" % (filename, err))
1277

    
1278
  (errcode, msg) = \
1279
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280
                                constants.SSL_CERT_EXPIRATION_ERROR)
1281

    
1282
  if msg:
1283
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1284
  else:
1285
    fnamemsg = None
1286

    
1287
  if errcode is None:
1288
    return (None, fnamemsg)
1289
  elif errcode == utils.CERT_WARNING:
1290
    return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1291
  elif errcode == utils.CERT_ERROR:
1292
    return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1293

    
1294
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295

    
1296

    
1297
class LUVerifyCluster(LogicalUnit):
1298
  """Verifies the cluster status.
1299

1300
  """
1301
  HPATH = "cluster-verify"
1302
  HTYPE = constants.HTYPE_CLUSTER
1303
  _OP_PARAMS = [
1304
    ("skip_checks", ht.EmptyList,
1305
     ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1306
    ("verbose", False, ht.TBool),
1307
    ("error_codes", False, ht.TBool),
1308
    ("debug_simulate_errors", False, ht.TBool),
1309
    ]
1310
  REQ_BGL = False
1311

    
1312
  TCLUSTER = "cluster"
1313
  TNODE = "node"
1314
  TINSTANCE = "instance"
1315

    
1316
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1317
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1318
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1319
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1320
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1321
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1322
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1323
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1324
  ENODEDRBD = (TNODE, "ENODEDRBD")
1325
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1326
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1327
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1328
  ENODEHV = (TNODE, "ENODEHV")
1329
  ENODELVM = (TNODE, "ENODELVM")
1330
  ENODEN1 = (TNODE, "ENODEN1")
1331
  ENODENET = (TNODE, "ENODENET")
1332
  ENODEOS = (TNODE, "ENODEOS")
1333
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1334
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1335
  ENODERPC = (TNODE, "ENODERPC")
1336
  ENODESSH = (TNODE, "ENODESSH")
1337
  ENODEVERSION = (TNODE, "ENODEVERSION")
1338
  ENODESETUP = (TNODE, "ENODESETUP")
1339
  ENODETIME = (TNODE, "ENODETIME")
1340

    
1341
  ETYPE_FIELD = "code"
1342
  ETYPE_ERROR = "ERROR"
1343
  ETYPE_WARNING = "WARNING"
1344

    
1345
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1346

    
1347
  class NodeImage(object):
1348
    """A class representing the logical and physical status of a node.
1349

1350
    @type name: string
1351
    @ivar name: the node name to which this object refers
1352
    @ivar volumes: a structure as returned from
1353
        L{ganeti.backend.GetVolumeList} (runtime)
1354
    @ivar instances: a list of running instances (runtime)
1355
    @ivar pinst: list of configured primary instances (config)
1356
    @ivar sinst: list of configured secondary instances (config)
1357
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1358
        of this node (config)
1359
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1360
    @ivar dfree: free disk, as reported by the node (runtime)
1361
    @ivar offline: the offline status (config)
1362
    @type rpc_fail: boolean
1363
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1364
        not whether the individual keys were correct) (runtime)
1365
    @type lvm_fail: boolean
1366
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1367
    @type hyp_fail: boolean
1368
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1369
    @type ghost: boolean
1370
    @ivar ghost: whether this is a known node or not (config)
1371
    @type os_fail: boolean
1372
    @ivar os_fail: whether the RPC call didn't return valid OS data
1373
    @type oslist: list
1374
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1375
    @type vm_capable: boolean
1376
    @ivar vm_capable: whether the node can host instances
1377

1378
    """
1379
    def __init__(self, offline=False, name=None, vm_capable=True):
1380
      self.name = name
1381
      self.volumes = {}
1382
      self.instances = []
1383
      self.pinst = []
1384
      self.sinst = []
1385
      self.sbp = {}
1386
      self.mfree = 0
1387
      self.dfree = 0
1388
      self.offline = offline
1389
      self.vm_capable = vm_capable
1390
      self.rpc_fail = False
1391
      self.lvm_fail = False
1392
      self.hyp_fail = False
1393
      self.ghost = False
1394
      self.os_fail = False
1395
      self.oslist = {}
1396

    
1397
  def ExpandNames(self):
1398
    self.needed_locks = {
1399
      locking.LEVEL_NODE: locking.ALL_SET,
1400
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1401
    }
1402
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1403

    
1404
  def _Error(self, ecode, item, msg, *args, **kwargs):
1405
    """Format an error message.
1406

1407
    Based on the opcode's error_codes parameter, either format a
1408
    parseable error code, or a simpler error string.
1409

1410
    This must be called only from Exec and functions called from Exec.
1411

1412
    """
1413
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1414
    itype, etxt = ecode
1415
    # first complete the msg
1416
    if args:
1417
      msg = msg % args
1418
    # then format the whole message
1419
    if self.op.error_codes:
1420
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1421
    else:
1422
      if item:
1423
        item = " " + item
1424
      else:
1425
        item = ""
1426
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1427
    # and finally report it via the feedback_fn
1428
    self._feedback_fn("  - %s" % msg)
1429

    
1430
  def _ErrorIf(self, cond, *args, **kwargs):
1431
    """Log an error message if the passed condition is True.
1432

1433
    """
1434
    cond = bool(cond) or self.op.debug_simulate_errors
1435
    if cond:
1436
      self._Error(*args, **kwargs)
1437
    # do not mark the operation as failed for WARN cases only
1438
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1439
      self.bad = self.bad or cond
1440

    
1441
  def _VerifyNode(self, ninfo, nresult):
1442
    """Perform some basic validation on data returned from a node.
1443

1444
      - check the result data structure is well formed and has all the
1445
        mandatory fields
1446
      - check ganeti version
1447

1448
    @type ninfo: L{objects.Node}
1449
    @param ninfo: the node to check
1450
    @param nresult: the results from the node
1451
    @rtype: boolean
1452
    @return: whether overall this call was successful (and we can expect
1453
         reasonable values in the respose)
1454

1455
    """
1456
    node = ninfo.name
1457
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1458

    
1459
    # main result, nresult should be a non-empty dict
1460
    test = not nresult or not isinstance(nresult, dict)
1461
    _ErrorIf(test, self.ENODERPC, node,
1462
                  "unable to verify node: no data returned")
1463
    if test:
1464
      return False
1465

    
1466
    # compares ganeti version
1467
    local_version = constants.PROTOCOL_VERSION
1468
    remote_version = nresult.get("version", None)
1469
    test = not (remote_version and
1470
                isinstance(remote_version, (list, tuple)) and
1471
                len(remote_version) == 2)
1472
    _ErrorIf(test, self.ENODERPC, node,
1473
             "connection to node returned invalid data")
1474
    if test:
1475
      return False
1476

    
1477
    test = local_version != remote_version[0]
1478
    _ErrorIf(test, self.ENODEVERSION, node,
1479
             "incompatible protocol versions: master %s,"
1480
             " node %s", local_version, remote_version[0])
1481
    if test:
1482
      return False
1483

    
1484
    # node seems compatible, we can actually try to look into its results
1485

    
1486
    # full package version
1487
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1488
                  self.ENODEVERSION, node,
1489
                  "software version mismatch: master %s, node %s",
1490
                  constants.RELEASE_VERSION, remote_version[1],
1491
                  code=self.ETYPE_WARNING)
1492

    
1493
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1494
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1495
      for hv_name, hv_result in hyp_result.iteritems():
1496
        test = hv_result is not None
1497
        _ErrorIf(test, self.ENODEHV, node,
1498
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1499

    
1500
    test = nresult.get(constants.NV_NODESETUP,
1501
                           ["Missing NODESETUP results"])
1502
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1503
             "; ".join(test))
1504

    
1505
    return True
1506

    
1507
  def _VerifyNodeTime(self, ninfo, nresult,
1508
                      nvinfo_starttime, nvinfo_endtime):
1509
    """Check the node time.
1510

1511
    @type ninfo: L{objects.Node}
1512
    @param ninfo: the node to check
1513
    @param nresult: the remote results for the node
1514
    @param nvinfo_starttime: the start time of the RPC call
1515
    @param nvinfo_endtime: the end time of the RPC call
1516

1517
    """
1518
    node = ninfo.name
1519
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1520

    
1521
    ntime = nresult.get(constants.NV_TIME, None)
1522
    try:
1523
      ntime_merged = utils.MergeTime(ntime)
1524
    except (ValueError, TypeError):
1525
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1526
      return
1527

    
1528
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1529
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1530
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1531
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1532
    else:
1533
      ntime_diff = None
1534

    
1535
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1536
             "Node time diverges by at least %s from master node time",
1537
             ntime_diff)
1538

    
1539
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1540
    """Check the node time.
1541

1542
    @type ninfo: L{objects.Node}
1543
    @param ninfo: the node to check
1544
    @param nresult: the remote results for the node
1545
    @param vg_name: the configured VG name
1546

1547
    """
1548
    if vg_name is None:
1549
      return
1550

    
1551
    node = ninfo.name
1552
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1553

    
1554
    # checks vg existence and size > 20G
1555
    vglist = nresult.get(constants.NV_VGLIST, None)
1556
    test = not vglist
1557
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1558
    if not test:
1559
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1560
                                            constants.MIN_VG_SIZE)
1561
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1562

    
1563
    # check pv names
1564
    pvlist = nresult.get(constants.NV_PVLIST, None)
1565
    test = pvlist is None
1566
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1567
    if not test:
1568
      # check that ':' is not present in PV names, since it's a
1569
      # special character for lvcreate (denotes the range of PEs to
1570
      # use on the PV)
1571
      for _, pvname, owner_vg in pvlist:
1572
        test = ":" in pvname
1573
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1574
                 " '%s' of VG '%s'", pvname, owner_vg)
1575

    
1576
  def _VerifyNodeNetwork(self, ninfo, nresult):
1577
    """Check the node time.
1578

1579
    @type ninfo: L{objects.Node}
1580
    @param ninfo: the node to check
1581
    @param nresult: the remote results for the node
1582

1583
    """
1584
    node = ninfo.name
1585
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1586

    
1587
    test = constants.NV_NODELIST not in nresult
1588
    _ErrorIf(test, self.ENODESSH, node,
1589
             "node hasn't returned node ssh connectivity data")
1590
    if not test:
1591
      if nresult[constants.NV_NODELIST]:
1592
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1593
          _ErrorIf(True, self.ENODESSH, node,
1594
                   "ssh communication with node '%s': %s", a_node, a_msg)
1595

    
1596
    test = constants.NV_NODENETTEST not in nresult
1597
    _ErrorIf(test, self.ENODENET, node,
1598
             "node hasn't returned node tcp connectivity data")
1599
    if not test:
1600
      if nresult[constants.NV_NODENETTEST]:
1601
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1602
        for anode in nlist:
1603
          _ErrorIf(True, self.ENODENET, node,
1604
                   "tcp communication with node '%s': %s",
1605
                   anode, nresult[constants.NV_NODENETTEST][anode])
1606

    
1607
    test = constants.NV_MASTERIP not in nresult
1608
    _ErrorIf(test, self.ENODENET, node,
1609
             "node hasn't returned node master IP reachability data")
1610
    if not test:
1611
      if not nresult[constants.NV_MASTERIP]:
1612
        if node == self.master_node:
1613
          msg = "the master node cannot reach the master IP (not configured?)"
1614
        else:
1615
          msg = "cannot reach the master IP"
1616
        _ErrorIf(True, self.ENODENET, node, msg)
1617

    
1618
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1619
                      diskstatus):
1620
    """Verify an instance.
1621

1622
    This function checks to see if the required block devices are
1623
    available on the instance's node.
1624

1625
    """
1626
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1627
    node_current = instanceconfig.primary_node
1628

    
1629
    node_vol_should = {}
1630
    instanceconfig.MapLVsByNode(node_vol_should)
1631

    
1632
    for node in node_vol_should:
1633
      n_img = node_image[node]
1634
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1635
        # ignore missing volumes on offline or broken nodes
1636
        continue
1637
      for volume in node_vol_should[node]:
1638
        test = volume not in n_img.volumes
1639
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1640
                 "volume %s missing on node %s", volume, node)
1641

    
1642
    if instanceconfig.admin_up:
1643
      pri_img = node_image[node_current]
1644
      test = instance not in pri_img.instances and not pri_img.offline
1645
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1646
               "instance not running on its primary node %s",
1647
               node_current)
1648

    
1649
    for node, n_img in node_image.items():
1650
      if (not node == node_current):
1651
        test = instance in n_img.instances
1652
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1653
                 "instance should not run on node %s", node)
1654

    
1655
    diskdata = [(nname, success, status, idx)
1656
                for (nname, disks) in diskstatus.items()
1657
                for idx, (success, status) in enumerate(disks)]
1658

    
1659
    for nname, success, bdev_status, idx in diskdata:
1660
      _ErrorIf(instanceconfig.admin_up and not success,
1661
               self.EINSTANCEFAULTYDISK, instance,
1662
               "couldn't retrieve status for disk/%s on %s: %s",
1663
               idx, nname, bdev_status)
1664
      _ErrorIf((instanceconfig.admin_up and success and
1665
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1666
               self.EINSTANCEFAULTYDISK, instance,
1667
               "disk/%s on %s is faulty", idx, nname)
1668

    
1669
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1670
    """Verify if there are any unknown volumes in the cluster.
1671

1672
    The .os, .swap and backup volumes are ignored. All other volumes are
1673
    reported as unknown.
1674

1675
    @type reserved: L{ganeti.utils.FieldSet}
1676
    @param reserved: a FieldSet of reserved volume names
1677

1678
    """
1679
    for node, n_img in node_image.items():
1680
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1681
        # skip non-healthy nodes
1682
        continue
1683
      for volume in n_img.volumes:
1684
        test = ((node not in node_vol_should or
1685
                volume not in node_vol_should[node]) and
1686
                not reserved.Matches(volume))
1687
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1688
                      "volume %s is unknown", volume)
1689

    
1690
  def _VerifyOrphanInstances(self, instancelist, node_image):
1691
    """Verify the list of running instances.
1692

1693
    This checks what instances are running but unknown to the cluster.
1694

1695
    """
1696
    for node, n_img in node_image.items():
1697
      for o_inst in n_img.instances:
1698
        test = o_inst not in instancelist
1699
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1700
                      "instance %s on node %s should not exist", o_inst, node)
1701

    
1702
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1703
    """Verify N+1 Memory Resilience.
1704

1705
    Check that if one single node dies we can still start all the
1706
    instances it was primary for.
1707

1708
    """
1709
    for node, n_img in node_image.items():
1710
      # This code checks that every node which is now listed as
1711
      # secondary has enough memory to host all instances it is
1712
      # supposed to should a single other node in the cluster fail.
1713
      # FIXME: not ready for failover to an arbitrary node
1714
      # FIXME: does not support file-backed instances
1715
      # WARNING: we currently take into account down instances as well
1716
      # as up ones, considering that even if they're down someone
1717
      # might want to start them even in the event of a node failure.
1718
      for prinode, instances in n_img.sbp.items():
1719
        needed_mem = 0
1720
        for instance in instances:
1721
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1722
          if bep[constants.BE_AUTO_BALANCE]:
1723
            needed_mem += bep[constants.BE_MEMORY]
1724
        test = n_img.mfree < needed_mem
1725
        self._ErrorIf(test, self.ENODEN1, node,
1726
                      "not enough memory on to accommodate"
1727
                      " failovers should peer node %s fail", prinode)
1728

    
1729
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1730
                       master_files):
1731
    """Verifies and computes the node required file checksums.
1732

1733
    @type ninfo: L{objects.Node}
1734
    @param ninfo: the node to check
1735
    @param nresult: the remote results for the node
1736
    @param file_list: required list of files
1737
    @param local_cksum: dictionary of local files and their checksums
1738
    @param master_files: list of files that only masters should have
1739

1740
    """
1741
    node = ninfo.name
1742
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1743

    
1744
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1745
    test = not isinstance(remote_cksum, dict)
1746
    _ErrorIf(test, self.ENODEFILECHECK, node,
1747
             "node hasn't returned file checksum data")
1748
    if test:
1749
      return
1750

    
1751
    for file_name in file_list:
1752
      node_is_mc = ninfo.master_candidate
1753
      must_have = (file_name not in master_files) or node_is_mc
1754
      # missing
1755
      test1 = file_name not in remote_cksum
1756
      # invalid checksum
1757
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1758
      # existing and good
1759
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1760
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1761
               "file '%s' missing", file_name)
1762
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1763
               "file '%s' has wrong checksum", file_name)
1764
      # not candidate and this is not a must-have file
1765
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1766
               "file '%s' should not exist on non master"
1767
               " candidates (and the file is outdated)", file_name)
1768
      # all good, except non-master/non-must have combination
1769
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1770
               "file '%s' should not exist"
1771
               " on non master candidates", file_name)
1772

    
1773
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1774
                      drbd_map):
1775
    """Verifies and the node DRBD status.
1776

1777
    @type ninfo: L{objects.Node}
1778
    @param ninfo: the node to check
1779
    @param nresult: the remote results for the node
1780
    @param instanceinfo: the dict of instances
1781
    @param drbd_helper: the configured DRBD usermode helper
1782
    @param drbd_map: the DRBD map as returned by
1783
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1784

1785
    """
1786
    node = ninfo.name
1787
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1788

    
1789
    if drbd_helper:
1790
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1791
      test = (helper_result == None)
1792
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1793
               "no drbd usermode helper returned")
1794
      if helper_result:
1795
        status, payload = helper_result
1796
        test = not status
1797
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1798
                 "drbd usermode helper check unsuccessful: %s", payload)
1799
        test = status and (payload != drbd_helper)
1800
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1801
                 "wrong drbd usermode helper: %s", payload)
1802

    
1803
    # compute the DRBD minors
1804
    node_drbd = {}
1805
    for minor, instance in drbd_map[node].items():
1806
      test = instance not in instanceinfo
1807
      _ErrorIf(test, self.ECLUSTERCFG, None,
1808
               "ghost instance '%s' in temporary DRBD map", instance)
1809
        # ghost instance should not be running, but otherwise we
1810
        # don't give double warnings (both ghost instance and
1811
        # unallocated minor in use)
1812
      if test:
1813
        node_drbd[minor] = (instance, False)
1814
      else:
1815
        instance = instanceinfo[instance]
1816
        node_drbd[minor] = (instance.name, instance.admin_up)
1817

    
1818
    # and now check them
1819
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1820
    test = not isinstance(used_minors, (tuple, list))
1821
    _ErrorIf(test, self.ENODEDRBD, node,
1822
             "cannot parse drbd status file: %s", str(used_minors))
1823
    if test:
1824
      # we cannot check drbd status
1825
      return
1826

    
1827
    for minor, (iname, must_exist) in node_drbd.items():
1828
      test = minor not in used_minors and must_exist
1829
      _ErrorIf(test, self.ENODEDRBD, node,
1830
               "drbd minor %d of instance %s is not active", minor, iname)
1831
    for minor in used_minors:
1832
      test = minor not in node_drbd
1833
      _ErrorIf(test, self.ENODEDRBD, node,
1834
               "unallocated drbd minor %d is in use", minor)
1835

    
1836
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1837
    """Builds the node OS structures.
1838

1839
    @type ninfo: L{objects.Node}
1840
    @param ninfo: the node to check
1841
    @param nresult: the remote results for the node
1842
    @param nimg: the node image object
1843

1844
    """
1845
    node = ninfo.name
1846
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1847

    
1848
    remote_os = nresult.get(constants.NV_OSLIST, None)
1849
    test = (not isinstance(remote_os, list) or
1850
            not compat.all(isinstance(v, list) and len(v) == 7
1851
                           for v in remote_os))
1852

    
1853
    _ErrorIf(test, self.ENODEOS, node,
1854
             "node hasn't returned valid OS data")
1855

    
1856
    nimg.os_fail = test
1857

    
1858
    if test:
1859
      return
1860

    
1861
    os_dict = {}
1862

    
1863
    for (name, os_path, status, diagnose,
1864
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1865

    
1866
      if name not in os_dict:
1867
        os_dict[name] = []
1868

    
1869
      # parameters is a list of lists instead of list of tuples due to
1870
      # JSON lacking a real tuple type, fix it:
1871
      parameters = [tuple(v) for v in parameters]
1872
      os_dict[name].append((os_path, status, diagnose,
1873
                            set(variants), set(parameters), set(api_ver)))
1874

    
1875
    nimg.oslist = os_dict
1876

    
1877
  def _VerifyNodeOS(self, ninfo, nimg, base):
1878
    """Verifies the node OS list.
1879

1880
    @type ninfo: L{objects.Node}
1881
    @param ninfo: the node to check
1882
    @param nimg: the node image object
1883
    @param base: the 'template' node we match against (e.g. from the master)
1884

1885
    """
1886
    node = ninfo.name
1887
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1888

    
1889
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1890

    
1891
    for os_name, os_data in nimg.oslist.items():
1892
      assert os_data, "Empty OS status for OS %s?!" % os_name
1893
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1894
      _ErrorIf(not f_status, self.ENODEOS, node,
1895
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1896
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1897
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1898
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1899
      # this will catched in backend too
1900
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1901
               and not f_var, self.ENODEOS, node,
1902
               "OS %s with API at least %d does not declare any variant",
1903
               os_name, constants.OS_API_V15)
1904
      # comparisons with the 'base' image
1905
      test = os_name not in base.oslist
1906
      _ErrorIf(test, self.ENODEOS, node,
1907
               "Extra OS %s not present on reference node (%s)",
1908
               os_name, base.name)
1909
      if test:
1910
        continue
1911
      assert base.oslist[os_name], "Base node has empty OS status?"
1912
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1913
      if not b_status:
1914
        # base OS is invalid, skipping
1915
        continue
1916
      for kind, a, b in [("API version", f_api, b_api),
1917
                         ("variants list", f_var, b_var),
1918
                         ("parameters", f_param, b_param)]:
1919
        _ErrorIf(a != b, self.ENODEOS, node,
1920
                 "OS %s %s differs from reference node %s: %s vs. %s",
1921
                 kind, os_name, base.name,
1922
                 utils.CommaJoin(a), utils.CommaJoin(b))
1923

    
1924
    # check any missing OSes
1925
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1926
    _ErrorIf(missing, self.ENODEOS, node,
1927
             "OSes present on reference node %s but missing on this node: %s",
1928
             base.name, utils.CommaJoin(missing))
1929

    
1930
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1931
    """Verifies and updates the node volume data.
1932

1933
    This function will update a L{NodeImage}'s internal structures
1934
    with data from the remote call.
1935

1936
    @type ninfo: L{objects.Node}
1937
    @param ninfo: the node to check
1938
    @param nresult: the remote results for the node
1939
    @param nimg: the node image object
1940
    @param vg_name: the configured VG name
1941

1942
    """
1943
    node = ninfo.name
1944
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1945

    
1946
    nimg.lvm_fail = True
1947
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1948
    if vg_name is None:
1949
      pass
1950
    elif isinstance(lvdata, basestring):
1951
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1952
               utils.SafeEncode(lvdata))
1953
    elif not isinstance(lvdata, dict):
1954
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1955
    else:
1956
      nimg.volumes = lvdata
1957
      nimg.lvm_fail = False
1958

    
1959
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1960
    """Verifies and updates the node instance list.
1961

1962
    If the listing was successful, then updates this node's instance
1963
    list. Otherwise, it marks the RPC call as failed for the instance
1964
    list key.
1965

1966
    @type ninfo: L{objects.Node}
1967
    @param ninfo: the node to check
1968
    @param nresult: the remote results for the node
1969
    @param nimg: the node image object
1970

1971
    """
1972
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1973
    test = not isinstance(idata, list)
1974
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1975
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1976
    if test:
1977
      nimg.hyp_fail = True
1978
    else:
1979
      nimg.instances = idata
1980

    
1981
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1982
    """Verifies and computes a node information map
1983

1984
    @type ninfo: L{objects.Node}
1985
    @param ninfo: the node to check
1986
    @param nresult: the remote results for the node
1987
    @param nimg: the node image object
1988
    @param vg_name: the configured VG name
1989

1990
    """
1991
    node = ninfo.name
1992
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1993

    
1994
    # try to read free memory (from the hypervisor)
1995
    hv_info = nresult.get(constants.NV_HVINFO, None)
1996
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1997
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1998
    if not test:
1999
      try:
2000
        nimg.mfree = int(hv_info["memory_free"])
2001
      except (ValueError, TypeError):
2002
        _ErrorIf(True, self.ENODERPC, node,
2003
                 "node returned invalid nodeinfo, check hypervisor")
2004

    
2005
    # FIXME: devise a free space model for file based instances as well
2006
    if vg_name is not None:
2007
      test = (constants.NV_VGLIST not in nresult or
2008
              vg_name not in nresult[constants.NV_VGLIST])
2009
      _ErrorIf(test, self.ENODELVM, node,
2010
               "node didn't return data for the volume group '%s'"
2011
               " - it is either missing or broken", vg_name)
2012
      if not test:
2013
        try:
2014
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2015
        except (ValueError, TypeError):
2016
          _ErrorIf(True, self.ENODERPC, node,
2017
                   "node returned invalid LVM info, check LVM status")
2018

    
2019
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2020
    """Gets per-disk status information for all instances.
2021

2022
    @type nodelist: list of strings
2023
    @param nodelist: Node names
2024
    @type node_image: dict of (name, L{objects.Node})
2025
    @param node_image: Node objects
2026
    @type instanceinfo: dict of (name, L{objects.Instance})
2027
    @param instanceinfo: Instance objects
2028

2029
    """
2030
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2031

    
2032
    node_disks = {}
2033
    node_disks_devonly = {}
2034

    
2035
    for nname in nodelist:
2036
      disks = [(inst, disk)
2037
               for instlist in [node_image[nname].pinst,
2038
                                node_image[nname].sinst]
2039
               for inst in instlist
2040
               for disk in instanceinfo[inst].disks]
2041

    
2042
      if not disks:
2043
        # No need to collect data
2044
        continue
2045

    
2046
      node_disks[nname] = disks
2047

    
2048
      # Creating copies as SetDiskID below will modify the objects and that can
2049
      # lead to incorrect data returned from nodes
2050
      devonly = [dev.Copy() for (_, dev) in disks]
2051

    
2052
      for dev in devonly:
2053
        self.cfg.SetDiskID(dev, nname)
2054

    
2055
      node_disks_devonly[nname] = devonly
2056

    
2057
    assert len(node_disks) == len(node_disks_devonly)
2058

    
2059
    # Collect data from all nodes with disks
2060
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2061
                                                          node_disks_devonly)
2062

    
2063
    assert len(result) == len(node_disks)
2064

    
2065
    instdisk = {}
2066

    
2067
    for (nname, nres) in result.items():
2068
      if nres.offline:
2069
        # Ignore offline node
2070
        continue
2071

    
2072
      disks = node_disks[nname]
2073

    
2074
      msg = nres.fail_msg
2075
      _ErrorIf(msg, self.ENODERPC, nname,
2076
               "while getting disk information: %s", nres.fail_msg)
2077
      if msg:
2078
        # No data from this node
2079
        data = len(disks) * [None]
2080
      else:
2081
        data = nres.payload
2082

    
2083
      for ((inst, _), status) in zip(disks, data):
2084
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2085

    
2086
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2087
                      len(nnames) <= len(instanceinfo[inst].all_nodes)
2088
                      for inst, nnames in instdisk.items()
2089
                      for nname, statuses in nnames.items())
2090

    
2091
    return instdisk
2092

    
2093
  def BuildHooksEnv(self):
2094
    """Build hooks env.
2095

2096
    Cluster-Verify hooks just ran in the post phase and their failure makes
2097
    the output be logged in the verify output and the verification to fail.
2098

2099
    """
2100
    all_nodes = self.cfg.GetNodeList()
2101
    env = {
2102
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2103
      }
2104
    for node in self.cfg.GetAllNodesInfo().values():
2105
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2106

    
2107
    return env, [], all_nodes
2108

    
2109
  def Exec(self, feedback_fn):
2110
    """Verify integrity of cluster, performing various test on nodes.
2111

2112
    """
2113
    self.bad = False
2114
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2115
    verbose = self.op.verbose
2116
    self._feedback_fn = feedback_fn
2117
    feedback_fn("* Verifying global settings")
2118
    for msg in self.cfg.VerifyConfig():
2119
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2120

    
2121
    # Check the cluster certificates
2122
    for cert_filename in constants.ALL_CERT_FILES:
2123
      (errcode, msg) = _VerifyCertificate(cert_filename)
2124
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2125

    
2126
    vg_name = self.cfg.GetVGName()
2127
    drbd_helper = self.cfg.GetDRBDHelper()
2128
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2129
    cluster = self.cfg.GetClusterInfo()
2130
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2131
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2132
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2133
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2134
                        for iname in instancelist)
2135
    i_non_redundant = [] # Non redundant instances
2136
    i_non_a_balanced = [] # Non auto-balanced instances
2137
    n_offline = 0 # Count of offline nodes
2138
    n_drained = 0 # Count of nodes being drained
2139
    node_vol_should = {}
2140

    
2141
    # FIXME: verify OS list
2142
    # do local checksums
2143
    master_files = [constants.CLUSTER_CONF_FILE]
2144
    master_node = self.master_node = self.cfg.GetMasterNode()
2145
    master_ip = self.cfg.GetMasterIP()
2146

    
2147
    file_names = ssconf.SimpleStore().GetFileList()
2148
    file_names.extend(constants.ALL_CERT_FILES)
2149
    file_names.extend(master_files)
2150
    if cluster.modify_etc_hosts:
2151
      file_names.append(constants.ETC_HOSTS)
2152

    
2153
    local_checksums = utils.FingerprintFiles(file_names)
2154

    
2155
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2156
    node_verify_param = {
2157
      constants.NV_FILELIST: file_names,
2158
      constants.NV_NODELIST: [node.name for node in nodeinfo
2159
                              if not node.offline],
2160
      constants.NV_HYPERVISOR: hypervisors,
2161
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2162
                                  node.secondary_ip) for node in nodeinfo
2163
                                 if not node.offline],
2164
      constants.NV_INSTANCELIST: hypervisors,
2165
      constants.NV_VERSION: None,
2166
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2167
      constants.NV_NODESETUP: None,
2168
      constants.NV_TIME: None,
2169
      constants.NV_MASTERIP: (master_node, master_ip),
2170
      constants.NV_OSLIST: None,
2171
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2172
      }
2173

    
2174
    if vg_name is not None:
2175
      node_verify_param[constants.NV_VGLIST] = None
2176
      node_verify_param[constants.NV_LVLIST] = vg_name
2177
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2178
      node_verify_param[constants.NV_DRBDLIST] = None
2179

    
2180
    if drbd_helper:
2181
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2182

    
2183
    # Build our expected cluster state
2184
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2185
                                                 name=node.name,
2186
                                                 vm_capable=node.vm_capable))
2187
                      for node in nodeinfo)
2188

    
2189
    for instance in instancelist:
2190
      inst_config = instanceinfo[instance]
2191

    
2192
      for nname in inst_config.all_nodes:
2193
        if nname not in node_image:
2194
          # ghost node
2195
          gnode = self.NodeImage(name=nname)
2196
          gnode.ghost = True
2197
          node_image[nname] = gnode
2198

    
2199
      inst_config.MapLVsByNode(node_vol_should)
2200

    
2201
      pnode = inst_config.primary_node
2202
      node_image[pnode].pinst.append(instance)
2203

    
2204
      for snode in inst_config.secondary_nodes:
2205
        nimg = node_image[snode]
2206
        nimg.sinst.append(instance)
2207
        if pnode not in nimg.sbp:
2208
          nimg.sbp[pnode] = []
2209
        nimg.sbp[pnode].append(instance)
2210

    
2211
    # At this point, we have the in-memory data structures complete,
2212
    # except for the runtime information, which we'll gather next
2213

    
2214
    # Due to the way our RPC system works, exact response times cannot be
2215
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2216
    # time before and after executing the request, we can at least have a time
2217
    # window.
2218
    nvinfo_starttime = time.time()
2219
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2220
                                           self.cfg.GetClusterName())
2221
    nvinfo_endtime = time.time()
2222

    
2223
    all_drbd_map = self.cfg.ComputeDRBDMap()
2224

    
2225
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2226
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2227

    
2228
    feedback_fn("* Verifying node status")
2229

    
2230
    refos_img = None
2231

    
2232
    for node_i in nodeinfo:
2233
      node = node_i.name
2234
      nimg = node_image[node]
2235

    
2236
      if node_i.offline:
2237
        if verbose:
2238
          feedback_fn("* Skipping offline node %s" % (node,))
2239
        n_offline += 1
2240
        continue
2241

    
2242
      if node == master_node:
2243
        ntype = "master"
2244
      elif node_i.master_candidate:
2245
        ntype = "master candidate"
2246
      elif node_i.drained:
2247
        ntype = "drained"
2248
        n_drained += 1
2249
      else:
2250
        ntype = "regular"
2251
      if verbose:
2252
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2253

    
2254
      msg = all_nvinfo[node].fail_msg
2255
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2256
      if msg:
2257
        nimg.rpc_fail = True
2258
        continue
2259

    
2260
      nresult = all_nvinfo[node].payload
2261

    
2262
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2263
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2264
      self._VerifyNodeNetwork(node_i, nresult)
2265
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2266
                            master_files)
2267

    
2268
      if nimg.vm_capable:
2269
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2270
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2271
                             all_drbd_map)
2272

    
2273
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2274
        self._UpdateNodeInstances(node_i, nresult, nimg)
2275
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2276
        self._UpdateNodeOS(node_i, nresult, nimg)
2277
        if not nimg.os_fail:
2278
          if refos_img is None:
2279
            refos_img = nimg
2280
          self._VerifyNodeOS(node_i, nimg, refos_img)
2281

    
2282
    feedback_fn("* Verifying instance status")
2283
    for instance in instancelist:
2284
      if verbose:
2285
        feedback_fn("* Verifying instance %s" % instance)
2286
      inst_config = instanceinfo[instance]
2287
      self._VerifyInstance(instance, inst_config, node_image,
2288
                           instdisk[instance])
2289
      inst_nodes_offline = []
2290

    
2291
      pnode = inst_config.primary_node
2292
      pnode_img = node_image[pnode]
2293
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2294
               self.ENODERPC, pnode, "instance %s, connection to"
2295
               " primary node failed", instance)
2296

    
2297
      if pnode_img.offline:
2298
        inst_nodes_offline.append(pnode)
2299

    
2300
      # If the instance is non-redundant we cannot survive losing its primary
2301
      # node, so we are not N+1 compliant. On the other hand we have no disk
2302
      # templates with more than one secondary so that situation is not well
2303
      # supported either.
2304
      # FIXME: does not support file-backed instances
2305
      if not inst_config.secondary_nodes:
2306
        i_non_redundant.append(instance)
2307
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2308
               instance, "instance has multiple secondary nodes: %s",
2309
               utils.CommaJoin(inst_config.secondary_nodes),
2310
               code=self.ETYPE_WARNING)
2311

    
2312
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2313
        i_non_a_balanced.append(instance)
2314

    
2315
      for snode in inst_config.secondary_nodes:
2316
        s_img = node_image[snode]
2317
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2318
                 "instance %s, connection to secondary node failed", instance)
2319

    
2320
        if s_img.offline:
2321
          inst_nodes_offline.append(snode)
2322

    
2323
      # warn that the instance lives on offline nodes
2324
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2325
               "instance lives on offline node(s) %s",
2326
               utils.CommaJoin(inst_nodes_offline))
2327
      # ... or ghost/non-vm_capable nodes
2328
      for node in inst_config.all_nodes:
2329
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2330
                 "instance lives on ghost node %s", node)
2331
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2332
                 instance, "instance lives on non-vm_capable node %s", node)
2333

    
2334
    feedback_fn("* Verifying orphan volumes")
2335
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2336
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2337

    
2338
    feedback_fn("* Verifying orphan instances")
2339
    self._VerifyOrphanInstances(instancelist, node_image)
2340

    
2341
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2342
      feedback_fn("* Verifying N+1 Memory redundancy")
2343
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2344

    
2345
    feedback_fn("* Other Notes")
2346
    if i_non_redundant:
2347
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2348
                  % len(i_non_redundant))
2349

    
2350
    if i_non_a_balanced:
2351
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2352
                  % len(i_non_a_balanced))
2353

    
2354
    if n_offline:
2355
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2356

    
2357
    if n_drained:
2358
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2359

    
2360
    return not self.bad
2361

    
2362
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2363
    """Analyze the post-hooks' result
2364

2365
    This method analyses the hook result, handles it, and sends some
2366
    nicely-formatted feedback back to the user.
2367

2368
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2369
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2370
    @param hooks_results: the results of the multi-node hooks rpc call
2371
    @param feedback_fn: function used send feedback back to the caller
2372
    @param lu_result: previous Exec result
2373
    @return: the new Exec result, based on the previous result
2374
        and hook results
2375

2376
    """
2377
    # We only really run POST phase hooks, and are only interested in
2378
    # their results
2379
    if phase == constants.HOOKS_PHASE_POST:
2380
      # Used to change hooks' output to proper indentation
2381
      feedback_fn("* Hooks Results")
2382
      assert hooks_results, "invalid result from hooks"
2383

    
2384
      for node_name in hooks_results:
2385
        res = hooks_results[node_name]
2386
        msg = res.fail_msg
2387
        test = msg and not res.offline
2388
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2389
                      "Communication failure in hooks execution: %s", msg)
2390
        if res.offline or msg:
2391
          # No need to investigate payload if node is offline or gave an error.
2392
          # override manually lu_result here as _ErrorIf only
2393
          # overrides self.bad
2394
          lu_result = 1
2395
          continue
2396
        for script, hkr, output in res.payload:
2397
          test = hkr == constants.HKR_FAIL
2398
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2399
                        "Script %s failed, output:", script)
2400
          if test:
2401
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2402
            feedback_fn("%s" % output)
2403
            lu_result = 0
2404

    
2405
      return lu_result
2406

    
2407

    
2408
class LUVerifyDisks(NoHooksLU):
2409
  """Verifies the cluster disks status.
2410

2411
  """
2412
  REQ_BGL = False
2413

    
2414
  def ExpandNames(self):
2415
    self.needed_locks = {
2416
      locking.LEVEL_NODE: locking.ALL_SET,
2417
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2418
    }
2419
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2420

    
2421
  def Exec(self, feedback_fn):
2422
    """Verify integrity of cluster disks.
2423

2424
    @rtype: tuple of three items
2425
    @return: a tuple of (dict of node-to-node_error, list of instances
2426
        which need activate-disks, dict of instance: (node, volume) for
2427
        missing volumes
2428

2429
    """
2430
    result = res_nodes, res_instances, res_missing = {}, [], {}
2431

    
2432
    nodes = utils.NiceSort(self.cfg.GetNodeList())
2433
    instances = [self.cfg.GetInstanceInfo(name)
2434
                 for name in self.cfg.GetInstanceList()]
2435

    
2436
    nv_dict = {}
2437
    for inst in instances:
2438
      inst_lvs = {}
2439
      if (not inst.admin_up or
2440
          inst.disk_template not in constants.DTS_NET_MIRROR):
2441
        continue
2442
      inst.MapLVsByNode(inst_lvs)
2443
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2444
      for node, vol_list in inst_lvs.iteritems():
2445
        for vol in vol_list:
2446
          nv_dict[(node, vol)] = inst
2447

    
2448
    if not nv_dict:
2449
      return result
2450

    
2451
    vg_names = self.rpc.call_vg_list(nodes)
2452
    vg_names.Raise("Cannot get list of VGs")
2453

    
2454
    for node in nodes:
2455
      # node_volume
2456
      node_res = self.rpc.call_lv_list([node],
2457
                                       vg_names[node].payload.keys())[node]
2458
      if node_res.offline:
2459
        continue
2460
      msg = node_res.fail_msg
2461
      if msg:
2462
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2463
        res_nodes[node] = msg
2464
        continue
2465

    
2466
      lvs = node_res.payload
2467
      for lv_name, (_, _, lv_online) in lvs.items():
2468
        inst = nv_dict.pop((node, lv_name), None)
2469
        if (not lv_online and inst is not None
2470
            and inst.name not in res_instances):
2471
          res_instances.append(inst.name)
2472

    
2473
    # any leftover items in nv_dict are missing LVs, let's arrange the
2474
    # data better
2475
    for key, inst in nv_dict.iteritems():
2476
      if inst.name not in res_missing:
2477
        res_missing[inst.name] = []
2478
      res_missing[inst.name].append(key)
2479

    
2480
    return result
2481

    
2482

    
2483
class LURepairDiskSizes(NoHooksLU):
2484
  """Verifies the cluster disks sizes.
2485

2486
  """
2487
  _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2488
  REQ_BGL = False
2489

    
2490
  def ExpandNames(self):
2491
    if self.op.instances:
2492
      self.wanted_names = []
2493
      for name in self.op.instances:
2494
        full_name = _ExpandInstanceName(self.cfg, name)
2495
        self.wanted_names.append(full_name)
2496
      self.needed_locks = {
2497
        locking.LEVEL_NODE: [],
2498
        locking.LEVEL_INSTANCE: self.wanted_names,
2499
        }
2500
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2501
    else:
2502
      self.wanted_names = None
2503
      self.needed_locks = {
2504
        locking.LEVEL_NODE: locking.ALL_SET,
2505
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2506
        }
2507
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2508

    
2509
  def DeclareLocks(self, level):
2510
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2511
      self._LockInstancesNodes(primary_only=True)
2512

    
2513
  def CheckPrereq(self):
2514
    """Check prerequisites.
2515

2516
    This only checks the optional instance list against the existing names.
2517

2518
    """
2519
    if self.wanted_names is None:
2520
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2521

    
2522
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2523
                             in self.wanted_names]
2524

    
2525
  def _EnsureChildSizes(self, disk):
2526
    """Ensure children of the disk have the needed disk size.
2527

2528
    This is valid mainly for DRBD8 and fixes an issue where the
2529
    children have smaller disk size.
2530

2531
    @param disk: an L{ganeti.objects.Disk} object
2532

2533
    """
2534
    if disk.dev_type == constants.LD_DRBD8:
2535
      assert disk.children, "Empty children for DRBD8?"
2536
      fchild = disk.children[0]
2537
      mismatch = fchild.size < disk.size
2538
      if mismatch:
2539
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2540
                     fchild.size, disk.size)
2541
        fchild.size = disk.size
2542

    
2543
      # and we recurse on this child only, not on the metadev
2544
      return self._EnsureChildSizes(fchild) or mismatch
2545
    else:
2546
      return False
2547

    
2548
  def Exec(self, feedback_fn):
2549
    """Verify the size of cluster disks.
2550

2551
    """
2552
    # TODO: check child disks too
2553
    # TODO: check differences in size between primary/secondary nodes
2554
    per_node_disks = {}
2555
    for instance in self.wanted_instances:
2556
      pnode = instance.primary_node
2557
      if pnode not in per_node_disks:
2558
        per_node_disks[pnode] = []
2559
      for idx, disk in enumerate(instance.disks):
2560
        per_node_disks[pnode].append((instance, idx, disk))
2561

    
2562
    changed = []
2563
    for node, dskl in per_node_disks.items():
2564
      newl = [v[2].Copy() for v in dskl]
2565
      for dsk in newl:
2566
        self.cfg.SetDiskID(dsk, node)
2567
      result = self.rpc.call_blockdev_getsizes(node, newl)
2568
      if result.fail_msg:
2569
        self.LogWarning("Failure in blockdev_getsizes call to node"
2570
                        " %s, ignoring", node)
2571
        continue
2572
      if len(result.data) != len(dskl):
2573
        self.LogWarning("Invalid result from node %s, ignoring node results",
2574
                        node)
2575
        continue
2576
      for ((instance, idx, disk), size) in zip(dskl, result.data):
2577
        if size is None:
2578
          self.LogWarning("Disk %d of instance %s did not return size"
2579
                          " information, ignoring", idx, instance.name)
2580
          continue
2581
        if not isinstance(size, (int, long)):
2582
          self.LogWarning("Disk %d of instance %s did not return valid"
2583
                          " size information, ignoring", idx, instance.name)
2584
          continue
2585
        size = size >> 20
2586
        if size != disk.size:
2587
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2588
                       " correcting: recorded %d, actual %d", idx,
2589
                       instance.name, disk.size, size)
2590
          disk.size = size
2591
          self.cfg.Update(instance, feedback_fn)
2592
          changed.append((instance.name, idx, size))
2593
        if self._EnsureChildSizes(disk):
2594
          self.cfg.Update(instance, feedback_fn)
2595
          changed.append((instance.name, idx, disk.size))
2596
    return changed
2597

    
2598

    
2599
class LURenameCluster(LogicalUnit):
2600
  """Rename the cluster.
2601

2602
  """
2603
  HPATH = "cluster-rename"
2604
  HTYPE = constants.HTYPE_CLUSTER
2605
  _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2606

    
2607
  def BuildHooksEnv(self):
2608
    """Build hooks env.
2609

2610
    """
2611
    env = {
2612
      "OP_TARGET": self.cfg.GetClusterName(),
2613
      "NEW_NAME": self.op.name,
2614
      }
2615
    mn = self.cfg.GetMasterNode()
2616
    all_nodes = self.cfg.GetNodeList()
2617
    return env, [mn], all_nodes
2618

    
2619
  def CheckPrereq(self):
2620
    """Verify that the passed name is a valid one.
2621

2622
    """
2623
    hostname = netutils.GetHostname(name=self.op.name,
2624
                                    family=self.cfg.GetPrimaryIPFamily())
2625

    
2626
    new_name = hostname.name
2627
    self.ip = new_ip = hostname.ip
2628
    old_name = self.cfg.GetClusterName()
2629
    old_ip = self.cfg.GetMasterIP()
2630
    if new_name == old_name and new_ip == old_ip:
2631
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2632
                                 " cluster has changed",
2633
                                 errors.ECODE_INVAL)
2634
    if new_ip != old_ip:
2635
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2636
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2637
                                   " reachable on the network" %
2638
                                   new_ip, errors.ECODE_NOTUNIQUE)
2639

    
2640
    self.op.name = new_name
2641

    
2642
  def Exec(self, feedback_fn):
2643
    """Rename the cluster.
2644

2645
    """
2646
    clustername = self.op.name
2647
    ip = self.ip
2648

    
2649
    # shutdown the master IP
2650
    master = self.cfg.GetMasterNode()
2651
    result = self.rpc.call_node_stop_master(master, False)
2652
    result.Raise("Could not disable the master role")
2653

    
2654
    try:
2655
      cluster = self.cfg.GetClusterInfo()
2656
      cluster.cluster_name = clustername
2657
      cluster.master_ip = ip
2658
      self.cfg.Update(cluster, feedback_fn)
2659

    
2660
      # update the known hosts file
2661
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2662
      node_list = self.cfg.GetOnlineNodeList()
2663
      try:
2664
        node_list.remove(master)
2665
      except ValueError:
2666
        pass
2667
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2668
    finally:
2669
      result = self.rpc.call_node_start_master(master, False, False)
2670
      msg = result.fail_msg
2671
      if msg:
2672
        self.LogWarning("Could not re-enable the master role on"
2673
                        " the master, please restart manually: %s", msg)
2674

    
2675
    return clustername
2676

    
2677

    
2678
class LUSetClusterParams(LogicalUnit):
2679
  """Change the parameters of the cluster.
2680

2681
  """
2682
  HPATH = "cluster-modify"
2683
  HTYPE = constants.HTYPE_CLUSTER
2684
  _OP_PARAMS = [
2685
    ("vg_name", None, ht.TMaybeString),
2686
    ("enabled_hypervisors", None,
2687
     ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2688
            ht.TNone)),
2689
    ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2690
                              ht.TNone)),
2691
    ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2692
    ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2693
                            ht.TNone)),
2694
    ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2695
                              ht.TNone)),
2696
    ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2697
    ("uid_pool", None, ht.NoType),
2698
    ("add_uids", None, ht.NoType),
2699
    ("remove_uids", None, ht.NoType),
2700
    ("maintain_node_health", None, ht.TMaybeBool),
2701
    ("prealloc_wipe_disks", None, ht.TMaybeBool),
2702
    ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2703
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2704
    ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2705
    ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2706
    ("master_netdev", None, ht.TOr(ht.TString, ht.TNone)),
2707
    ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2708
    ("hidden_os", None, ht.TOr(ht.TListOf(\
2709
          ht.TAnd(ht.TList,
2710
                ht.TIsLength(2),
2711
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2712
          ht.TNone)),
2713
    ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2714
          ht.TAnd(ht.TList,
2715
                ht.TIsLength(2),
2716
                ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2717
          ht.TNone)),
2718
    ]
2719
  REQ_BGL = False
2720

    
2721
  def CheckArguments(self):
2722
    """Check parameters
2723

2724
    """
2725
    if self.op.uid_pool:
2726
      uidpool.CheckUidPool(self.op.uid_pool)
2727

    
2728
    if self.op.add_uids:
2729
      uidpool.CheckUidPool(self.op.add_uids)
2730

    
2731
    if self.op.remove_uids:
2732
      uidpool.CheckUidPool(self.op.remove_uids)
2733

    
2734
  def ExpandNames(self):
2735
    # FIXME: in the future maybe other cluster params won't require checking on
2736
    # all nodes to be modified.
2737
    self.needed_locks = {
2738
      locking.LEVEL_NODE: locking.ALL_SET,
2739
    }
2740
    self.share_locks[locking.LEVEL_NODE] = 1
2741

    
2742
  def BuildHooksEnv(self):
2743
    """Build hooks env.
2744

2745
    """
2746
    env = {
2747
      "OP_TARGET": self.cfg.GetClusterName(),
2748
      "NEW_VG_NAME": self.op.vg_name,
2749
      }
2750
    mn = self.cfg.GetMasterNode()
2751
    return env, [mn], [mn]
2752

    
2753
  def CheckPrereq(self):
2754
    """Check prerequisites.
2755

2756
    This checks whether the given params don't conflict and
2757
    if the given volume group is valid.
2758

2759
    """
2760
    if self.op.vg_name is not None and not self.op.vg_name:
2761
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2762
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2763
                                   " instances exist", errors.ECODE_INVAL)
2764

    
2765
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2766
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2767
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2768
                                   " drbd-based instances exist",
2769
                                   errors.ECODE_INVAL)
2770

    
2771
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2772

    
2773
    # if vg_name not None, checks given volume group on all nodes
2774
    if self.op.vg_name:
2775
      vglist = self.rpc.call_vg_list(node_list)
2776
      for node in node_list:
2777
        msg = vglist[node].fail_msg
2778
        if msg:
2779
          # ignoring down node
2780
          self.LogWarning("Error while gathering data on node %s"
2781
                          " (ignoring node): %s", node, msg)
2782
          continue
2783
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2784
                                              self.op.vg_name,
2785
                                              constants.MIN_VG_SIZE)
2786
        if vgstatus:
2787
          raise errors.OpPrereqError("Error on node '%s': %s" %
2788
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2789

    
2790
    if self.op.drbd_helper:
2791
      # checks given drbd helper on all nodes
2792
      helpers = self.rpc.call_drbd_helper(node_list)
2793
      for node in node_list:
2794
        ninfo = self.cfg.GetNodeInfo(node)
2795
        if ninfo.offline:
2796
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2797
          continue
2798
        msg = helpers[node].fail_msg
2799
        if msg:
2800
          raise errors.OpPrereqError("Error checking drbd helper on node"
2801
                                     " '%s': %s" % (node, msg),
2802
                                     errors.ECODE_ENVIRON)
2803
        node_helper = helpers[node].payload
2804
        if node_helper != self.op.drbd_helper:
2805
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2806
                                     (node, node_helper), errors.ECODE_ENVIRON)
2807

    
2808
    self.cluster = cluster = self.cfg.GetClusterInfo()
2809
    # validate params changes
2810
    if self.op.beparams:
2811
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2812
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2813

    
2814
    if self.op.ndparams:
2815
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2816
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2817

    
2818
    if self.op.nicparams:
2819
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2820
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2821
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2822
      nic_errors = []
2823

    
2824
      # check all instances for consistency
2825
      for instance in self.cfg.GetAllInstancesInfo().values():
2826
        for nic_idx, nic in enumerate(instance.nics):
2827
          params_copy = copy.deepcopy(nic.nicparams)
2828
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2829

    
2830
          # check parameter syntax
2831
          try:
2832
            objects.NIC.CheckParameterSyntax(params_filled)
2833
          except errors.ConfigurationError, err:
2834
            nic_errors.append("Instance %s, nic/%d: %s" %
2835
                              (instance.name, nic_idx, err))
2836

    
2837
          # if we're moving instances to routed, check that they have an ip
2838
          target_mode = params_filled[constants.NIC_MODE]
2839
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2840
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2841
                              (instance.name, nic_idx))
2842
      if nic_errors:
2843
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2844
                                   "\n".join(nic_errors))
2845

    
2846
    # hypervisor list/parameters
2847
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2848
    if self.op.hvparams:
2849
      for hv_name, hv_dict in self.op.hvparams.items():
2850
        if hv_name not in self.new_hvparams:
2851
          self.new_hvparams[hv_name] = hv_dict
2852
        else:
2853
          self.new_hvparams[hv_name].update(hv_dict)
2854

    
2855
    # os hypervisor parameters
2856
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2857
    if self.op.os_hvp:
2858
      for os_name, hvs in self.op.os_hvp.items():
2859
        if os_name not in self.new_os_hvp:
2860
          self.new_os_hvp[os_name] = hvs
2861
        else:
2862
          for hv_name, hv_dict in hvs.items():
2863
            if hv_name not in self.new_os_hvp[os_name]:
2864
              self.new_os_hvp[os_name][hv_name] = hv_dict
2865
            else:
2866
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2867

    
2868
    # os parameters
2869
    self.new_osp = objects.FillDict(cluster.osparams, {})
2870
    if self.op.osparams:
2871
      for os_name, osp in self.op.osparams.items():
2872
        if os_name not in self.new_osp:
2873
          self.new_osp[os_name] = {}
2874

    
2875
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2876
                                                  use_none=True)
2877

    
2878
        if not self.new_osp[os_name]:
2879
          # we removed all parameters
2880
          del self.new_osp[os_name]
2881
        else:
2882
          # check the parameter validity (remote check)
2883
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2884
                         os_name, self.new_osp[os_name])
2885

    
2886
    # changes to the hypervisor list
2887
    if self.op.enabled_hypervisors is not None:
2888
      self.hv_list = self.op.enabled_hypervisors
2889
      for hv in self.hv_list:
2890
        # if the hypervisor doesn't already exist in the cluster
2891
        # hvparams, we initialize it to empty, and then (in both
2892
        # cases) we make sure to fill the defaults, as we might not
2893
        # have a complete defaults list if the hypervisor wasn't
2894
        # enabled before
2895
        if hv not in new_hvp:
2896
          new_hvp[hv] = {}
2897
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2898
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2899
    else:
2900
      self.hv_list = cluster.enabled_hypervisors
2901

    
2902
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2903
      # either the enabled list has changed, or the parameters have, validate
2904
      for hv_name, hv_params in self.new_hvparams.items():
2905
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2906
            (self.op.enabled_hypervisors and
2907
             hv_name in self.op.enabled_hypervisors)):
2908
          # either this is a new hypervisor, or its parameters have changed
2909
          hv_class = hypervisor.GetHypervisor(hv_name)
2910
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2911
          hv_class.CheckParameterSyntax(hv_params)
2912
          _CheckHVParams(self, node_list, hv_name, hv_params)
2913

    
2914
    if self.op.os_hvp:
2915
      # no need to check any newly-enabled hypervisors, since the
2916
      # defaults have already been checked in the above code-block
2917
      for os_name, os_hvp in self.new_os_hvp.items():
2918
        for hv_name, hv_params in os_hvp.items():
2919
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2920
          # we need to fill in the new os_hvp on top of the actual hv_p
2921
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2922
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2923
          hv_class = hypervisor.GetHypervisor(hv_name)
2924
          hv_class.CheckParameterSyntax(new_osp)
2925
          _CheckHVParams(self, node_list, hv_name, new_osp)
2926

    
2927
    if self.op.default_iallocator:
2928
      alloc_script = utils.FindFile(self.op.default_iallocator,
2929
                                    constants.IALLOCATOR_SEARCH_PATH,
2930
                                    os.path.isfile)
2931
      if alloc_script is None:
2932
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2933
                                   " specified" % self.op.default_iallocator,
2934
                                   errors.ECODE_INVAL)
2935

    
2936
  def Exec(self, feedback_fn):
2937
    """Change the parameters of the cluster.
2938

2939
    """
2940
    if self.op.vg_name is not None:
2941
      new_volume = self.op.vg_name
2942
      if not new_volume:
2943
        new_volume = None
2944
      if new_volume != self.cfg.GetVGName():
2945
        self.cfg.SetVGName(new_volume)
2946
      else:
2947
        feedback_fn("Cluster LVM configuration already in desired"
2948
                    " state, not changing")
2949
    if self.op.drbd_helper is not None:
2950
      new_helper = self.op.drbd_helper
2951
      if not new_helper:
2952
        new_helper = None
2953
      if new_helper != self.cfg.GetDRBDHelper():
2954
        self.cfg.SetDRBDHelper(new_helper)
2955
      else:
2956
        feedback_fn("Cluster DRBD helper already in desired state,"
2957
                    " not changing")
2958
    if self.op.hvparams:
2959
      self.cluster.hvparams = self.new_hvparams
2960
    if self.op.os_hvp:
2961
      self.cluster.os_hvp = self.new_os_hvp
2962
    if self.op.enabled_hypervisors is not None:
2963
      self.cluster.hvparams = self.new_hvparams
2964
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2965
    if self.op.beparams:
2966
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2967
    if self.op.nicparams:
2968
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2969
    if self.op.osparams:
2970
      self.cluster.osparams = self.new_osp
2971
    if self.op.ndparams:
2972
      self.cluster.ndparams = self.new_ndparams
2973

    
2974
    if self.op.candidate_pool_size is not None:
2975
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2976
      # we need to update the pool size here, otherwise the save will fail
2977
      _AdjustCandidatePool(self, [])
2978

    
2979
    if self.op.maintain_node_health is not None:
2980
      self.cluster.maintain_node_health = self.op.maintain_node_health
2981

    
2982
    if self.op.prealloc_wipe_disks is not None:
2983
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2984

    
2985
    if self.op.add_uids is not None:
2986
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2987

    
2988
    if self.op.remove_uids is not None:
2989
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2990

    
2991
    if self.op.uid_pool is not None:
2992
      self.cluster.uid_pool = self.op.uid_pool
2993

    
2994
    if self.op.default_iallocator is not None:
2995
      self.cluster.default_iallocator = self.op.default_iallocator
2996

    
2997
    if self.op.reserved_lvs is not None:
2998
      self.cluster.reserved_lvs = self.op.reserved_lvs
2999

    
3000
    def helper_os(aname, mods, desc):
3001
      desc += " OS list"
3002
      lst = getattr(self.cluster, aname)
3003
      for key, val in mods:
3004
        if key == constants.DDM_ADD:
3005
          if val in lst:
3006
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3007
          else:
3008
            lst.append(val)
3009
        elif key == constants.DDM_REMOVE:
3010
          if val in lst:
3011
            lst.remove(val)
3012
          else:
3013
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3014
        else:
3015
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3016

    
3017
    if self.op.hidden_os:
3018
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3019

    
3020
    if self.op.blacklisted_os:
3021
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3022

    
3023
    if self.op.master_netdev:
3024
      master = self.cfg.GetMasterNode()
3025
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3026
                  self.cluster.master_netdev)
3027
      result = self.rpc.call_node_stop_master(master, False)
3028
      result.Raise("Could not disable the master ip")
3029
      feedback_fn("Changing master_netdev from %s to %s" %
3030
                  (self.cluster.master_netdev, self.op.master_netdev))
3031
      self.cluster.master_netdev = self.op.master_netdev
3032

    
3033
    self.cfg.Update(self.cluster, feedback_fn)
3034

    
3035
    if self.op.master_netdev:
3036
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3037
                  self.op.master_netdev)
3038
      result = self.rpc.call_node_start_master(master, False, False)
3039
      if result.fail_msg:
3040
        self.LogWarning("Could not re-enable the master ip on"
3041
                        " the master, please restart manually: %s",
3042
                        result.fail_msg)
3043

    
3044

    
3045
def _UploadHelper(lu, nodes, fname):
3046
  """Helper for uploading a file and showing warnings.
3047

3048
  """
3049
  if os.path.exists(fname):
3050
    result = lu.rpc.call_upload_file(nodes, fname)
3051
    for to_node, to_result in result.items():
3052
      msg = to_result.fail_msg
3053
      if msg:
3054
        msg = ("Copy of file %s to node %s failed: %s" %
3055
               (fname, to_node, msg))
3056
        lu.proc.LogWarning(msg)
3057

    
3058

    
3059
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3060
  """Distribute additional files which are part of the cluster configuration.
3061

3062
  ConfigWriter takes care of distributing the config and ssconf files, but
3063
  there are more files which should be distributed to all nodes. This function
3064
  makes sure those are copied.
3065

3066
  @param lu: calling logical unit
3067
  @param additional_nodes: list of nodes not in the config to distribute to
3068
  @type additional_vm: boolean
3069
  @param additional_vm: whether the additional nodes are vm-capable or not
3070

3071
  """
3072
  # 1. Gather target nodes
3073
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3074
  dist_nodes = lu.cfg.GetOnlineNodeList()
3075
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3076
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3077
  if additional_nodes is not None:
3078
    dist_nodes.extend(additional_nodes)
3079
    if additional_vm:
3080
      vm_nodes.extend(additional_nodes)
3081
  if myself.name in dist_nodes:
3082
    dist_nodes.remove(myself.name)
3083
  if myself.name in vm_nodes:
3084
    vm_nodes.remove(myself.name)
3085

    
3086
  # 2. Gather files to distribute
3087
  dist_files = set([constants.ETC_HOSTS,
3088
                    constants.SSH_KNOWN_HOSTS_FILE,
3089
                    constants.RAPI_CERT_FILE,
3090
                    constants.RAPI_USERS_FILE,
3091
                    constants.CONFD_HMAC_KEY,
3092
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3093
                   ])
3094

    
3095
  vm_files = set()
3096
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3097
  for hv_name in enabled_hypervisors:
3098
    hv_class = hypervisor.GetHypervisor(hv_name)
3099
    vm_files.update(hv_class.GetAncillaryFiles())
3100

    
3101
  # 3. Perform the files upload
3102
  for fname in dist_files:
3103
    _UploadHelper(lu, dist_nodes, fname)
3104
  for fname in vm_files:
3105
    _UploadHelper(lu, vm_nodes, fname)
3106

    
3107

    
3108
class LURedistributeConfig(NoHooksLU):
3109
  """Force the redistribution of cluster configuration.
3110

3111
  This is a very simple LU.
3112

3113
  """
3114
  REQ_BGL = False
3115

    
3116
  def ExpandNames(self):
3117
    self.needed_locks = {
3118
      locking.LEVEL_NODE: locking.ALL_SET,
3119
    }
3120
    self.share_locks[locking.LEVEL_NODE] = 1
3121

    
3122
  def Exec(self, feedback_fn):
3123
    """Redistribute the configuration.
3124

3125
    """
3126
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3127
    _RedistributeAncillaryFiles(self)
3128

    
3129

    
3130
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3131
  """Sleep and poll for an instance's disk to sync.
3132

3133
  """
3134
  if not instance.disks or disks is not None and not disks:
3135
    return True
3136

    
3137
  disks = _ExpandCheckDisks(instance, disks)
3138

    
3139
  if not oneshot:
3140
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3141

    
3142
  node = instance.primary_node
3143

    
3144
  for dev in disks:
3145
    lu.cfg.SetDiskID(dev, node)
3146

    
3147
  # TODO: Convert to utils.Retry
3148

    
3149
  retries = 0
3150
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3151
  while True:
3152
    max_time = 0
3153
    done = True
3154
    cumul_degraded = False
3155
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3156
    msg = rstats.fail_msg
3157
    if msg:
3158
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3159
      retries += 1
3160
      if retries >= 10:
3161
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3162
                                 " aborting." % node)
3163
      time.sleep(6)
3164
      continue
3165
    rstats = rstats.payload
3166
    retries = 0
3167
    for i, mstat in enumerate(rstats):
3168
      if mstat is None:
3169
        lu.LogWarning("Can't compute data for node %s/%s",
3170
                           node, disks[i].iv_name)
3171
        continue
3172

    
3173
      cumul_degraded = (cumul_degraded or
3174
                        (mstat.is_degraded and mstat.sync_percent is None))
3175
      if mstat.sync_percent is not None:
3176
        done = False
3177
        if mstat.estimated_time is not None:
3178
          rem_time = ("%s remaining (estimated)" %
3179
                      utils.FormatSeconds(mstat.estimated_time))
3180
          max_time = mstat.estimated_time
3181
        else:
3182
          rem_time = "no time estimate"
3183
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3184
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3185

    
3186
    # if we're done but degraded, let's do a few small retries, to
3187
    # make sure we see a stable and not transient situation; therefore
3188
    # we force restart of the loop
3189
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3190
      logging.info("Degraded disks found, %d retries left", degr_retries)
3191
      degr_retries -= 1
3192
      time.sleep(1)
3193
      continue
3194

    
3195
    if done or oneshot:
3196
      break
3197

    
3198
    time.sleep(min(60, max_time))
3199

    
3200
  if done:
3201
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3202
  return not cumul_degraded
3203

    
3204

    
3205
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3206
  """Check that mirrors are not degraded.
3207

3208
  The ldisk parameter, if True, will change the test from the
3209
  is_degraded attribute (which represents overall non-ok status for
3210
  the device(s)) to the ldisk (representing the local storage status).
3211

3212
  """
3213
  lu.cfg.SetDiskID(dev, node)
3214

    
3215
  result = True
3216

    
3217
  if on_primary or dev.AssembleOnSecondary():
3218
    rstats = lu.rpc.call_blockdev_find(node, dev)
3219
    msg = rstats.fail_msg
3220
    if msg:
3221
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3222
      result = False
3223
    elif not rstats.payload:
3224
      lu.LogWarning("Can't find disk on node %s", node)
3225
      result = False
3226
    else:
3227
      if ldisk:
3228
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3229
      else:
3230
        result = result and not rstats.payload.is_degraded
3231

    
3232
  if dev.children:
3233
    for child in dev.children:
3234
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3235

    
3236
  return result
3237

    
3238

    
3239
class LUOutOfBand(NoHooksLU):
3240
  """Logical unit for OOB handling.
3241

3242
  """
3243
  _OP_PARAMS = [
3244
    _PNodeName,
3245
    ("command", None, ht.TElemOf(constants.OOB_COMMANDS)),
3246
    ("timeout", constants.OOB_TIMEOUT, ht.TInt),
3247
    ]
3248
  REG_BGL = False
3249

    
3250
  def CheckPrereq(self):
3251
    """Check prerequisites.
3252

3253
    This checks:
3254
     - the node exists in the configuration
3255
     - OOB is supported
3256

3257
    Any errors are signaled by raising errors.OpPrereqError.
3258

3259
    """
3260
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3261
    node = self.cfg.GetNodeInfo(self.op.node_name)
3262

    
3263
    if node is None:
3264
      raise errors.OpPrereqError("Node %s not found" % self.op.node_name)
3265

    
3266
    self.oob_program = self.cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
3267

    
3268
    if not self.oob_program:
3269
      raise errors.OpPrereqError("OOB is not supported for node %s" %
3270
                                 self.op.node_name)
3271

    
3272
    self.op.node_name = node.name
3273
    self.node = node
3274

    
3275
  def ExpandNames(self):
3276
    """Gather locks we need.
3277

3278
    """
3279
    self.needed_locks = {
3280
      locking.LEVEL_NODE: [self.op.node_name],
3281
      }
3282

    
3283
  def Exec(self, feedback_fn):
3284
    """Execute OOB and return result if we expect any.
3285

3286
    """
3287
    master_node = self.cfg.GetMasterNode()
3288

    
3289
    logging.info("Executing out-of-band command '%s' using '%s' on %s",
3290
                 self.op.command, self.oob_program, self.op.node_name)
3291
    result = self.rpc.call_run_oob(master_node, self.oob_program,
3292
                                   self.op.command, self.op.node_name,
3293
                                   self.op.timeout)
3294

    
3295
    result.Raise("An error occurred on execution of OOB helper")
3296

    
3297
    self._CheckPayload(result)
3298

    
3299
    if self.op.command == constants.OOB_HEALTH:
3300
      # For health we should log important events
3301
      for item, status in result.payload:
3302
        if status in [constants.OOB_STATUS_WARNING,
3303
                      constants.OOB_STATUS_CRITICAL]:
3304
          logging.warning("On node '%s' item '%s' has status '%s'",
3305
                          self.op.node_name, item, status)
3306

    
3307
    return result.payload
3308

    
3309
  def _CheckPayload(self, result):
3310
    """Checks if the payload is valid.
3311

3312
    @param result: RPC result
3313
    @raises errors.OpExecError: If payload is not valid
3314

3315
    """
3316
    errs = []
3317
    if self.op.command == constants.OOB_HEALTH:
3318
      if not isinstance(result.payload, list):
3319
        errs.append("command 'health' is expected to return a list but got %s" %
3320
                    type(result.payload))
3321
      for item, status in result.payload:
3322
        if status not in constants.OOB_STATUSES:
3323
          errs.append("health item '%s' has invalid status '%s'" %
3324
                      (item, status))
3325

    
3326
    if self.op.command == constants.OOB_POWER_STATUS:
3327
      if not isinstance(result.payload, dict):
3328
        errs.append("power-status is expected to return a dict but got %s" %
3329
                    type(result.payload))
3330

    
3331
    if self.op.command in [
3332
        constants.OOB_POWER_ON,
3333
        constants.OOB_POWER_OFF,
3334
        constants.OOB_POWER_CYCLE,
3335
        ]:
3336
      if result.payload is not None:
3337
        errs.append("%s is expected to not return payload but got '%s'" %
3338
                    (self.op.command, result.payload))
3339

    
3340
    if errs:
3341
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3342
                               utils.CommaJoin(errs))
3343

    
3344

    
3345

    
3346
class LUDiagnoseOS(NoHooksLU):
3347
  """Logical unit for OS diagnose/query.
3348

3349
  """
3350
  _OP_PARAMS = [
3351
    _POutputFields,
3352
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3353
    ]
3354
  REQ_BGL = False
3355
  _HID = "hidden"
3356
  _BLK = "blacklisted"
3357
  _VLD = "valid"
3358
  _FIELDS_STATIC = utils.FieldSet()
3359
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3360
                                   "parameters", "api_versions", _HID, _BLK)
3361

    
3362
  def CheckArguments(self):
3363
    if self.op.names:
3364
      raise errors.OpPrereqError("Selective OS query not supported",
3365
                                 errors.ECODE_INVAL)
3366

    
3367
    _CheckOutputFields(static=self._FIELDS_STATIC,
3368
                       dynamic=self._FIELDS_DYNAMIC,
3369
                       selected=self.op.output_fields)
3370

    
3371
  def ExpandNames(self):
3372
    # Lock all nodes, in shared mode
3373
    # Temporary removal of locks, should be reverted later
3374
    # TODO: reintroduce locks when they are lighter-weight
3375
    self.needed_locks = {}
3376
    #self.share_locks[locking.LEVEL_NODE] = 1
3377
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3378

    
3379
  @staticmethod
3380
  def _DiagnoseByOS(rlist):
3381
    """Remaps a per-node return list into an a per-os per-node dictionary
3382

3383
    @param rlist: a map with node names as keys and OS objects as values
3384

3385
    @rtype: dict
3386
    @return: a dictionary with osnames as keys and as value another
3387
        map, with nodes as keys and tuples of (path, status, diagnose,
3388
        variants, parameters, api_versions) as values, eg::
3389

3390
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3391
                                     (/srv/..., False, "invalid api")],
3392
                           "node2": [(/srv/..., True, "", [], [])]}
3393
          }
3394

3395
    """
3396
    all_os = {}
3397
    # we build here the list of nodes that didn't fail the RPC (at RPC
3398
    # level), so that nodes with a non-responding node daemon don't
3399
    # make all OSes invalid
3400
    good_nodes = [node_name for node_name in rlist
3401
                  if not rlist[node_name].fail_msg]
3402
    for node_name, nr in rlist.items():
3403
      if nr.fail_msg or not nr.payload:
3404
        continue
3405
      for (name, path, status, diagnose, variants,
3406
           params, api_versions) in nr.payload:
3407
        if name not in all_os:
3408
          # build a list of nodes for this os containing empty lists
3409
          # for each node in node_list
3410
          all_os[name] = {}
3411
          for nname in good_nodes:
3412
            all_os[name][nname] = []
3413
        # convert params from [name, help] to (name, help)
3414
        params = [tuple(v) for v in params]
3415
        all_os[name][node_name].append((path, status, diagnose,
3416
                                        variants, params, api_versions))
3417
    return all_os
3418

    
3419
  def Exec(self, feedback_fn):
3420
    """Compute the list of OSes.
3421

3422
    """
3423
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3424
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3425
    pol = self._DiagnoseByOS(node_data)
3426
    output = []
3427
    cluster = self.cfg.GetClusterInfo()
3428

    
3429
    for os_name in utils.NiceSort(pol.keys()):
3430
      os_data = pol[os_name]
3431
      row = []
3432
      valid = True
3433
      (variants, params, api_versions) = null_state = (set(), set(), set())
3434
      for idx, osl in enumerate(os_data.values()):
3435
        valid = bool(valid and osl and osl[0][1])
3436
        if not valid:
3437
          (variants, params, api_versions) = null_state
3438
          break
3439
        node_variants, node_params, node_api = osl[0][3:6]
3440
        if idx == 0: # first entry
3441
          variants = set(node_variants)
3442
          params = set(node_params)
3443
          api_versions = set(node_api)
3444
        else: # keep consistency
3445
          variants.intersection_update(node_variants)
3446
          params.intersection_update(node_params)
3447
          api_versions.intersection_update(node_api)
3448

    
3449
      is_hid = os_name in cluster.hidden_os
3450
      is_blk = os_name in cluster.blacklisted_os
3451
      if ((self._HID not in self.op.output_fields and is_hid) or
3452
          (self._BLK not in self.op.output_fields and is_blk) or
3453
          (self._VLD not in self.op.output_fields and not valid)):
3454
        continue
3455

    
3456
      for field in self.op.output_fields:
3457
        if field == "name":
3458
          val = os_name
3459
        elif field == self._VLD:
3460
          val = valid
3461
        elif field == "node_status":
3462
          # this is just a copy of the dict
3463
          val = {}
3464
          for node_name, nos_list in os_data.items():
3465
            val[node_name] = nos_list
3466
        elif field == "variants":
3467
          val = utils.NiceSort(list(variants))
3468
        elif field == "parameters":
3469
          val = list(params)
3470
        elif field == "api_versions":
3471
          val = list(api_versions)
3472
        elif field == self._HID:
3473
          val = is_hid
3474
        elif field == self._BLK:
3475
          val = is_blk
3476
        else:
3477
          raise errors.ParameterError(field)
3478
        row.append(val)
3479
      output.append(row)
3480

    
3481
    return output
3482

    
3483

    
3484
class LURemoveNode(LogicalUnit):
3485
  """Logical unit for removing a node.
3486

3487
  """
3488
  HPATH = "node-remove"
3489
  HTYPE = constants.HTYPE_NODE
3490
  _OP_PARAMS = [
3491
    _PNodeName,
3492
    ]
3493

    
3494
  def BuildHooksEnv(self):
3495
    """Build hooks env.
3496

3497
    This doesn't run on the target node in the pre phase as a failed
3498
    node would then be impossible to remove.
3499

3500
    """
3501
    env = {
3502
      "OP_TARGET": self.op.node_name,
3503
      "NODE_NAME": self.op.node_name,
3504
      }
3505
    all_nodes = self.cfg.GetNodeList()
3506
    try:
3507
      all_nodes.remove(self.op.node_name)
3508
    except ValueError:
3509
      logging.warning("Node %s which is about to be removed not found"
3510
                      " in the all nodes list", self.op.node_name)
3511
    return env, all_nodes, all_nodes
3512

    
3513
  def CheckPrereq(self):
3514
    """Check prerequisites.
3515

3516
    This checks:
3517
     - the node exists in the configuration
3518
     - it does not have primary or secondary instances
3519
     - it's not the master
3520

3521
    Any errors are signaled by raising errors.OpPrereqError.
3522

3523
    """
3524
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3525
    node = self.cfg.GetNodeInfo(self.op.node_name)
3526
    assert node is not None
3527

    
3528
    instance_list = self.cfg.GetInstanceList()
3529

    
3530
    masternode = self.cfg.GetMasterNode()
3531
    if node.name == masternode:
3532
      raise errors.OpPrereqError("Node is the master node,"
3533
                                 " you need to failover first.",
3534
                                 errors.ECODE_INVAL)
3535

    
3536
    for instance_name in instance_list:
3537
      instance = self.cfg.GetInstanceInfo(instance_name)
3538
      if node.name in instance.all_nodes:
3539
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3540
                                   " please remove first." % instance_name,
3541
                                   errors.ECODE_INVAL)
3542
    self.op.node_name = node.name
3543
    self.node = node
3544

    
3545
  def Exec(self, feedback_fn):
3546
    """Removes the node from the cluster.
3547

3548
    """
3549
    node = self.node
3550
    logging.info("Stopping the node daemon and removing configs from node %s",
3551
                 node.name)
3552

    
3553
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3554

    
3555
    # Promote nodes to master candidate as needed
3556
    _AdjustCandidatePool(self, exceptions=[node.name])
3557
    self.context.RemoveNode(node.name)
3558

    
3559
    # Run post hooks on the node before it's removed
3560
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3561
    try:
3562
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3563
    except:
3564
      # pylint: disable-msg=W0702
3565
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3566

    
3567
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3568
    msg = result.fail_msg
3569
    if msg:
3570
      self.LogWarning("Errors encountered on the remote node while leaving"
3571
                      " the cluster: %s", msg)
3572

    
3573
    # Remove node from our /etc/hosts
3574
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3575
      master_node = self.cfg.GetMasterNode()
3576
      result = self.rpc.call_etc_hosts_modify(master_node,
3577
                                              constants.ETC_HOSTS_REMOVE,
3578
                                              node.name, None)
3579
      result.Raise("Can't update hosts file with new host data")
3580
      _RedistributeAncillaryFiles(self)
3581

    
3582

    
3583
class _NodeQuery(_QueryBase):
3584
  FIELDS = query.NODE_FIELDS
3585

    
3586
  def ExpandNames(self, lu):
3587
    lu.needed_locks = {}
3588
    lu.share_locks[locking.LEVEL_NODE] = 1
3589

    
3590
    if self.names:
3591
      self.wanted = _GetWantedNodes(lu, self.names)
3592
    else:
3593
      self.wanted = locking.ALL_SET
3594

    
3595
    self.do_locking = (self.use_locking and
3596
                       query.NQ_LIVE in self.requested_data)
3597

    
3598
    if self.do_locking:
3599
      # if we don't request only static fields, we need to lock the nodes
3600
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3601

    
3602
  def DeclareLocks(self, lu, level):
3603
    pass
3604

    
3605
  def _GetQueryData(self, lu):
3606
    """Computes the list of nodes and their attributes.
3607

3608
    """
3609
    all_info = lu.cfg.GetAllNodesInfo()
3610

    
3611
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3612

    
3613
    # Gather data as requested
3614
    if query.NQ_LIVE in self.requested_data:
3615
      node_data = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3616
                                        lu.cfg.GetHypervisorType())
3617
      live_data = dict((name, nresult.payload)
3618
                       for (name, nresult) in node_data.items()
3619
                       if not nresult.fail_msg and nresult.payload)
3620
    else:
3621
      live_data = None
3622

    
3623
    if query.NQ_INST in self.requested_data:
3624
      node_to_primary = dict([(name, set()) for name in nodenames])
3625
      node_to_secondary = dict([(name, set()) for name in nodenames])
3626

    
3627
      inst_data = lu.cfg.GetAllInstancesInfo()
3628

    
3629
      for inst in inst_data.values():
3630
        if inst.primary_node in node_to_primary:
3631
          node_to_primary[inst.primary_node].add(inst.name)
3632
        for secnode in inst.secondary_nodes:
3633
          if secnode in node_to_secondary:
3634
            node_to_secondary[secnode].add(inst.name)
3635
    else:
3636
      node_to_primary = None
3637
      node_to_secondary = None
3638

    
3639
    if query.NQ_GROUP in self.requested_data:
3640
      groups = lu.cfg.GetAllNodeGroupsInfo()
3641
    else:
3642
      groups = {}
3643

    
3644
    return query.NodeQueryData([all_info[name] for name in nodenames],
3645
                               live_data, lu.cfg.GetMasterNode(),
3646
                               node_to_primary, node_to_secondary, groups)
3647

    
3648

    
3649
class LUQueryNodes(NoHooksLU):
3650
  """Logical unit for querying nodes.
3651

3652
  """
3653
  # pylint: disable-msg=W0142
3654
  _OP_PARAMS = [
3655
    _POutputFields,
3656
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3657
    ("use_locking", False, ht.TBool),
3658
    ]
3659
  REQ_BGL = False
3660

    
3661
  def CheckArguments(self):
3662
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3663
                         self.op.use_locking)
3664

    
3665
  def ExpandNames(self):
3666
    self.nq.ExpandNames(self)
3667

    
3668
  def Exec(self, feedback_fn):
3669
    return self.nq.OldStyleQuery(self)
3670

    
3671

    
3672
class LUQueryNodeVolumes(NoHooksLU):
3673
  """Logical unit for getting volumes on node(s).
3674

3675
  """
3676
  _OP_PARAMS = [
3677
    _POutputFields,
3678
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3679
    ]
3680
  REQ_BGL = False
3681
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3682
  _FIELDS_STATIC = utils.FieldSet("node")
3683

    
3684
  def CheckArguments(self):
3685
    _CheckOutputFields(static=self._FIELDS_STATIC,
3686
                       dynamic=self._FIELDS_DYNAMIC,
3687
                       selected=self.op.output_fields)
3688

    
3689
  def ExpandNames(self):
3690
    self.needed_locks = {}
3691
    self.share_locks[locking.LEVEL_NODE] = 1
3692
    if not self.op.nodes:
3693
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3694
    else:
3695
      self.needed_locks[locking.LEVEL_NODE] = \
3696
        _GetWantedNodes(self, self.op.nodes)
3697

    
3698
  def Exec(self, feedback_fn):
3699
    """Computes the list of nodes and their attributes.
3700

3701
    """
3702
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3703
    volumes = self.rpc.call_node_volumes(nodenames)
3704

    
3705
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3706
             in self.cfg.GetInstanceList()]
3707

    
3708
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3709

    
3710
    output = []
3711
    for node in nodenames:
3712
      nresult = volumes[node]
3713
      if nresult.offline:
3714
        continue
3715
      msg = nresult.fail_msg
3716
      if msg:
3717
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3718
        continue
3719

    
3720
      node_vols = nresult.payload[:]
3721
      node_vols.sort(key=lambda vol: vol['dev'])
3722

    
3723
      for vol in node_vols:
3724
        node_output = []
3725
        for field in self.op.output_fields:
3726
          if field == "node":
3727
            val = node
3728
          elif field == "phys":
3729
            val = vol['dev']
3730
          elif field == "vg":
3731
            val = vol['vg']
3732
          elif field == "name":
3733
            val = vol['name']
3734
          elif field == "size":
3735
            val = int(float(vol['size']))
3736
          elif field == "instance":
3737
            for inst in ilist:
3738
              if node not in lv_by_node[inst]:
3739
                continue
3740
              if vol['name'] in lv_by_node[inst][node]:
3741
                val = inst.name
3742
                break
3743
            else:
3744
              val = '-'
3745
          else:
3746
            raise errors.ParameterError(field)
3747
          node_output.append(str(val))
3748

    
3749
        output.append(node_output)
3750

    
3751
    return output
3752

    
3753

    
3754
class LUQueryNodeStorage(NoHooksLU):
3755
  """Logical unit for getting information on storage units on node(s).
3756

3757
  """
3758
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3759
  _OP_PARAMS = [
3760
    _POutputFields,
3761
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3762
    ("storage_type", ht.NoDefault, _CheckStorageType),
3763
    ("name", None, ht.TMaybeString),
3764
    ]
3765
  REQ_BGL = False
3766

    
3767
  def CheckArguments(self):
3768
    _CheckOutputFields(static=self._FIELDS_STATIC,
3769
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3770
                       selected=self.op.output_fields)
3771

    
3772
  def ExpandNames(self):
3773
    self.needed_locks = {}
3774
    self.share_locks[locking.LEVEL_NODE] = 1
3775

    
3776
    if self.op.nodes:
3777
      self.needed_locks[locking.LEVEL_NODE] = \
3778
        _GetWantedNodes(self, self.op.nodes)
3779
    else:
3780
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3781

    
3782
  def Exec(self, feedback_fn):
3783
    """Computes the list of nodes and their attributes.
3784

3785
    """
3786
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3787

    
3788
    # Always get name to sort by
3789
    if constants.SF_NAME in self.op.output_fields:
3790
      fields = self.op.output_fields[:]
3791
    else:
3792
      fields = [constants.SF_NAME] + self.op.output_fields
3793

    
3794
    # Never ask for node or type as it's only known to the LU
3795
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3796
      while extra in fields:
3797
        fields.remove(extra)
3798

    
3799
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3800
    name_idx = field_idx[constants.SF_NAME]
3801

    
3802
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3803
    data = self.rpc.call_storage_list(self.nodes,
3804
                                      self.op.storage_type, st_args,
3805
                                      self.op.name, fields)
3806

    
3807
    result = []
3808

    
3809
    for node in utils.NiceSort(self.nodes):
3810
      nresult = data[node]
3811
      if nresult.offline:
3812
        continue
3813

    
3814
      msg = nresult.fail_msg
3815
      if msg:
3816
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3817
        continue
3818

    
3819
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3820

    
3821
      for name in utils.NiceSort(rows.keys()):
3822
        row = rows[name]
3823

    
3824
        out = []
3825

    
3826
        for field in self.op.output_fields:
3827
          if field == constants.SF_NODE:
3828
            val = node
3829
          elif field == constants.SF_TYPE:
3830
            val = self.op.storage_type
3831
          elif field in field_idx:
3832
            val = row[field_idx[field]]
3833
          else:
3834
            raise errors.ParameterError(field)
3835

    
3836
          out.append(val)
3837

    
3838
        result.append(out)
3839

    
3840
    return result
3841

    
3842

    
3843
class _InstanceQuery(_QueryBase):
3844
  FIELDS = query.INSTANCE_FIELDS
3845

    
3846
  def ExpandNames(self, lu):
3847
    lu.needed_locks = {}
3848
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3849
    lu.share_locks[locking.LEVEL_NODE] = 1
3850

    
3851
    if self.names:
3852
      self.wanted = _GetWantedInstances(lu, self.names)
3853
    else:
3854
      self.wanted = locking.ALL_SET
3855

    
3856
    self.do_locking = (self.use_locking and
3857
                       query.IQ_LIVE in self.requested_data)
3858
    if self.do_locking:
3859
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3860
      lu.needed_locks[locking.LEVEL_NODE] = []
3861
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3862

    
3863
  def DeclareLocks(self, lu, level):
3864
    if level == locking.LEVEL_NODE and self.do_locking:
3865
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3866

    
3867
  def _GetQueryData(self, lu):
3868
    """Computes the list of instances and their attributes.
3869

3870
    """
3871
    all_info = lu.cfg.GetAllInstancesInfo()
3872

    
3873
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3874

    
3875
    instance_list = [all_info[name] for name in instance_names]
3876
    nodes = frozenset([inst.primary_node for inst in instance_list])
3877
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3878
    bad_nodes = []
3879
    offline_nodes = []
3880

    
3881
    # Gather data as requested
3882
    if query.IQ_LIVE in self.requested_data:
3883
      live_data = {}
3884
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3885
      for name in nodes:
3886
        result = node_data[name]
3887
        if result.offline:
3888
          # offline nodes will be in both lists
3889
          assert result.fail_msg
3890
          offline_nodes.append(name)
3891
        if result.fail_msg:
3892
          bad_nodes.append(name)
3893
        elif result.payload:
3894
          live_data.update(result.payload)
3895
        # else no instance is alive
3896
    else:
3897
      live_data = {}
3898

    
3899
    if query.IQ_DISKUSAGE in self.requested_data:
3900
      disk_usage = dict((inst.name,
3901
                         _ComputeDiskSize(inst.disk_template,
3902
                                          [{"size": disk.size}
3903
                                           for disk in inst.disks]))
3904
                        for inst in instance_list)
3905
    else:
3906
      disk_usage = None
3907

    
3908
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3909
                                   disk_usage, offline_nodes, bad_nodes,
3910
                                   live_data)
3911

    
3912

    
3913
#: Query type implementations
3914
_QUERY_IMPL = {
3915
  constants.QR_INSTANCE: _InstanceQuery,
3916
  constants.QR_NODE: _NodeQuery,
3917
  }
3918

    
3919

    
3920
def _GetQueryImplementation(name):
3921
  """Returns the implemtnation for a query type.
3922

3923
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
3924

3925
  """
3926
  try:
3927
    return _QUERY_IMPL[name]
3928
  except KeyError:
3929
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
3930
                               errors.ECODE_INVAL)
3931

    
3932

    
3933
class LUQuery(NoHooksLU):
3934
  """Query for resources/items of a certain kind.
3935

3936
  """
3937
  # pylint: disable-msg=W0142
3938
  _OP_PARAMS = [
3939
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3940
    ("fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3941
    ("filter", None, ht.TOr(ht.TNone,
3942
                            ht.TListOf(ht.TOr(ht.TNonEmptyString, ht.TList)))),
3943
    ]
3944
  REQ_BGL = False
3945

    
3946
  def CheckArguments(self):
3947
    qcls = _GetQueryImplementation(self.op.what)
3948
    names = qlang.ReadSimpleFilter("name", self.op.filter)
3949

    
3950
    self.impl = qcls(names, self.op.fields, False)
3951

    
3952
  def ExpandNames(self):
3953
    self.impl.ExpandNames(self)
3954

    
3955
  def DeclareLocks(self, level):
3956
    self.impl.DeclareLocks(self, level)
3957

    
3958
  def Exec(self, feedback_fn):
3959
    return self.impl.NewStyleQuery(self)
3960

    
3961

    
3962
class LUQueryFields(NoHooksLU):
3963
  """Query for resources/items of a certain kind.
3964

3965
  """
3966
  # pylint: disable-msg=W0142
3967
  _OP_PARAMS = [
3968
    ("what", ht.NoDefault, ht.TElemOf(constants.QR_OP_QUERY)),
3969
    ("fields", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
3970
    ]
3971
  REQ_BGL = False
3972

    
3973
  def CheckArguments(self):
3974
    self.qcls = _GetQueryImplementation(self.op.what)
3975

    
3976
  def ExpandNames(self):
3977
    self.needed_locks = {}
3978

    
3979
  def Exec(self, feedback_fn):
3980
    return self.qcls.FieldsQuery(self.op.fields)
3981

    
3982

    
3983
class LUModifyNodeStorage(NoHooksLU):
3984
  """Logical unit for modifying a storage volume on a node.
3985

3986
  """
3987
  _OP_PARAMS = [
3988
    _PNodeName,
3989
    ("storage_type", ht.NoDefault, _CheckStorageType),
3990
    ("name", ht.NoDefault, ht.TNonEmptyString),
3991
    ("changes", ht.NoDefault, ht.TDict),
3992
    ]
3993
  REQ_BGL = False
3994

    
3995
  def CheckArguments(self):
3996
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3997

    
3998
    storage_type = self.op.storage_type
3999

    
4000
    try:
4001
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4002
    except KeyError:
4003
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4004
                                 " modified" % storage_type,
4005
                                 errors.ECODE_INVAL)
4006

    
4007
    diff = set(self.op.changes.keys()) - modifiable
4008
    if diff:
4009
      raise errors.OpPrereqError("The following fields can not be modified for"
4010
                                 " storage units of type '%s': %r" %
4011
                                 (storage_type, list(diff)),
4012
                                 errors.ECODE_INVAL)
4013

    
4014
  def ExpandNames(self):
4015
    self.needed_locks = {
4016
      locking.LEVEL_NODE: self.op.node_name,
4017
      }
4018

    
4019
  def Exec(self, feedback_fn):
4020
    """Computes the list of nodes and their attributes.
4021

4022
    """
4023
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4024
    result = self.rpc.call_storage_modify(self.op.node_name,
4025
                                          self.op.storage_type, st_args,
4026
                                          self.op.name, self.op.changes)
4027
    result.Raise("Failed to modify storage unit '%s' on %s" %
4028
                 (self.op.name, self.op.node_name))
4029

    
4030

    
4031
class LUAddNode(LogicalUnit):
4032
  """Logical unit for adding node to the cluster.
4033

4034
  """
4035
  HPATH = "node-add"
4036
  HTYPE = constants.HTYPE_NODE
4037
  _OP_PARAMS = [
4038
    _PNodeName,
4039
    ("primary_ip", None, ht.NoType),
4040
    ("secondary_ip", None, ht.TMaybeString),
4041
    ("readd", False, ht.TBool),
4042
    ("group", None, ht.TMaybeString),
4043
    ("master_capable", None, ht.TMaybeBool),
4044
    ("vm_capable", None, ht.TMaybeBool),
4045
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4046
    ]
4047
  _NFLAGS = ["master_capable", "vm_capable"]
4048

    
4049
  def CheckArguments(self):
4050
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4051
    # validate/normalize the node name
4052
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4053
                                         family=self.primary_ip_family)
4054
    self.op.node_name = self.hostname.name
4055
    if self.op.readd and self.op.group:
4056
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4057
                                 " being readded", errors.ECODE_INVAL)
4058

    
4059
  def BuildHooksEnv(self):
4060
    """Build hooks env.
4061

4062
    This will run on all nodes before, and on all nodes + the new node after.
4063

4064
    """
4065
    env = {
4066
      "OP_TARGET": self.op.node_name,
4067
      "NODE_NAME": self.op.node_name,
4068
      "NODE_PIP": self.op.primary_ip,
4069
      "NODE_SIP": self.op.secondary_ip,
4070
      "MASTER_CAPABLE": str(self.op.master_capable),
4071
      "VM_CAPABLE": str(self.op.vm_capable),
4072
      }
4073
    nodes_0 = self.cfg.GetNodeList()
4074
    nodes_1 = nodes_0 + [self.op.node_name, ]
4075
    return env, nodes_0, nodes_1
4076

    
4077
  def CheckPrereq(self):
4078
    """Check prerequisites.
4079

4080
    This checks:
4081
     - the new node is not already in the config
4082
     - it is resolvable
4083
     - its parameters (single/dual homed) matches the cluster
4084

4085
    Any errors are signaled by raising errors.OpPrereqError.
4086

4087
    """
4088
    cfg = self.cfg
4089
    hostname = self.hostname
4090
    node = hostname.name
4091
    primary_ip = self.op.primary_ip = hostname.ip
4092
    if self.op.secondary_ip is None:
4093
      if self.primary_ip_family == netutils.IP6Address.family:
4094
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4095
                                   " IPv4 address must be given as secondary",
4096
                                   errors.ECODE_INVAL)
4097
      self.op.secondary_ip = primary_ip
4098

    
4099
    secondary_ip = self.op.secondary_ip
4100
    if not netutils.IP4Address.IsValid(secondary_ip):
4101
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4102
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4103

    
4104
    node_list = cfg.GetNodeList()
4105
    if not self.op.readd and node in node_list:
4106
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4107
                                 node, errors.ECODE_EXISTS)
4108
    elif self.op.readd and node not in node_list:
4109
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4110
                                 errors.ECODE_NOENT)
4111

    
4112
    self.changed_primary_ip = False
4113

    
4114
    for existing_node_name in node_list:
4115
      existing_node = cfg.GetNodeInfo(existing_node_name)
4116

    
4117
      if self.op.readd and node == existing_node_name:
4118
        if existing_node.secondary_ip != secondary_ip:
4119
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4120
                                     " address configuration as before",
4121
                                     errors.ECODE_INVAL)
4122
        if existing_node.primary_ip != primary_ip:
4123
          self.changed_primary_ip = True
4124

    
4125
        continue
4126

    
4127
      if (existing_node.primary_ip == primary_ip or
4128
          existing_node.secondary_ip == primary_ip or
4129
          existing_node.primary_ip == secondary_ip or
4130
          existing_node.secondary_ip == secondary_ip):
4131
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4132
                                   " existing node %s" % existing_node.name,
4133
                                   errors.ECODE_NOTUNIQUE)
4134

    
4135
    # After this 'if' block, None is no longer a valid value for the
4136
    # _capable op attributes
4137
    if self.op.readd:
4138
      old_node = self.cfg.GetNodeInfo(node)
4139
      assert old_node is not None, "Can't retrieve locked node %s" % node
4140
      for attr in self._NFLAGS:
4141
        if getattr(self.op, attr) is None:
4142
          setattr(self.op, attr, getattr(old_node, attr))
4143
    else:
4144
      for attr in self._NFLAGS:
4145
        if getattr(self.op, attr) is None:
4146
          setattr(self.op, attr, True)
4147

    
4148
    if self.op.readd and not self.op.vm_capable:
4149
      pri, sec = cfg.GetNodeInstances(node)
4150
      if pri or sec:
4151
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4152
                                   " flag set to false, but it already holds"
4153
                                   " instances" % node,
4154
                                   errors.ECODE_STATE)
4155

    
4156
    # check that the type of the node (single versus dual homed) is the
4157
    # same as for the master
4158
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4159
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4160
    newbie_singlehomed = secondary_ip == primary_ip
4161
    if master_singlehomed != newbie_singlehomed:
4162
      if master_singlehomed:
4163
        raise errors.OpPrereqError("The master has no secondary ip but the"
4164
                                   " new node has one",
4165
                                   errors.ECODE_INVAL)
4166
      else:
4167
        raise errors.OpPrereqError("The master has a secondary ip but the"
4168
                                   " new node doesn't have one",
4169
                                   errors.ECODE_INVAL)
4170

    
4171
    # checks reachability
4172
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4173
      raise errors.OpPrereqError("Node not reachable by ping",
4174
                                 errors.ECODE_ENVIRON)
4175

    
4176
    if not newbie_singlehomed:
4177
      # check reachability from my secondary ip to newbie's secondary ip
4178
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4179
                           source=myself.secondary_ip):
4180
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4181
                                   " based ping to node daemon port",
4182
                                   errors.ECODE_ENVIRON)
4183

    
4184
    if self.op.readd:
4185
      exceptions = [node]
4186
    else:
4187
      exceptions = []
4188

    
4189
    if self.op.master_capable:
4190
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4191
    else:
4192
      self.master_candidate = False
4193

    
4194
    if self.op.readd:
4195
      self.new_node = old_node
4196
    else:
4197
      node_group = cfg.LookupNodeGroup(self.op.group)
4198
      self.new_node = objects.Node(name=node,
4199
                                   primary_ip=primary_ip,
4200
                                   secondary_ip=secondary_ip,
4201
                                   master_candidate=self.master_candidate,
4202
                                   offline=False, drained=False,
4203
                                   group=node_group)
4204

    
4205
    if self.op.ndparams:
4206
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4207

    
4208
  def Exec(self, feedback_fn):
4209
    """Adds the new node to the cluster.
4210

4211
    """
4212
    new_node = self.new_node
4213
    node = new_node.name
4214

    
4215
    # for re-adds, reset the offline/drained/master-candidate flags;
4216
    # we need to reset here, otherwise offline would prevent RPC calls
4217
    # later in the procedure; this also means that if the re-add
4218
    # fails, we are left with a non-offlined, broken node
4219
    if self.op.readd:
4220
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4221
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4222
      # if we demote the node, we do cleanup later in the procedure
4223
      new_node.master_candidate = self.master_candidate
4224
      if self.changed_primary_ip:
4225
        new_node.primary_ip = self.op.primary_ip
4226

    
4227
    # copy the master/vm_capable flags
4228
    for attr in self._NFLAGS:
4229
      setattr(new_node, attr, getattr(self.op, attr))
4230

    
4231
    # notify the user about any possible mc promotion
4232
    if new_node.master_candidate:
4233
      self.LogInfo("Node will be a master candidate")
4234

    
4235
    if self.op.ndparams:
4236
      new_node.ndparams = self.op.ndparams
4237

    
4238
    # check connectivity
4239
    result = self.rpc.call_version([node])[node]
4240
    result.Raise("Can't get version information from node %s" % node)
4241
    if constants.PROTOCOL_VERSION == result.payload:
4242
      logging.info("Communication to node %s fine, sw version %s match",
4243
                   node, result.payload)
4244
    else:
4245
      raise errors.OpExecError("Version mismatch master version %s,"
4246
                               " node version %s" %
4247
                               (constants.PROTOCOL_VERSION, result.payload))
4248

    
4249
    # Add node to our /etc/hosts, and add key to known_hosts
4250
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4251
      master_node = self.cfg.GetMasterNode()
4252
      result = self.rpc.call_etc_hosts_modify(master_node,
4253
                                              constants.ETC_HOSTS_ADD,
4254
                                              self.hostname.name,
4255
                                              self.hostname.ip)
4256
      result.Raise("Can't update hosts file with new host data")
4257

    
4258
    if new_node.secondary_ip != new_node.primary_ip:
4259
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4260
                               False)
4261

    
4262
    node_verify_list = [self.cfg.GetMasterNode()]
4263
    node_verify_param = {
4264
      constants.NV_NODELIST: [node],
4265
      # TODO: do a node-net-test as well?
4266
    }
4267

    
4268
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4269
                                       self.cfg.GetClusterName())
4270
    for verifier in node_verify_list:
4271
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4272
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4273
      if nl_payload:
4274
        for failed in nl_payload:
4275
          feedback_fn("ssh/hostname verification failed"
4276
                      " (checking from %s): %s" %
4277
                      (verifier, nl_payload[failed]))
4278
        raise errors.OpExecError("ssh/hostname verification failed.")
4279

    
4280
    if self.op.readd:
4281
      _RedistributeAncillaryFiles(self)
4282
      self.context.ReaddNode(new_node)
4283
      # make sure we redistribute the config
4284
      self.cfg.Update(new_node, feedback_fn)
4285
      # and make sure the new node will not have old files around
4286
      if not new_node.master_candidate:
4287
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4288
        msg = result.fail_msg
4289
        if msg:
4290
          self.LogWarning("Node failed to demote itself from master"
4291
                          " candidate status: %s" % msg)
4292
    else:
4293
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4294
                                  additional_vm=self.op.vm_capable)
4295
      self.context.AddNode(new_node, self.proc.GetECId())
4296

    
4297

    
4298
class LUSetNodeParams(LogicalUnit):
4299
  """Modifies the parameters of a node.
4300

4301
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4302
      to the node role (as _ROLE_*)
4303
  @cvar _R2F: a dictionary from node role to tuples of flags
4304
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4305

4306
  """
4307
  HPATH = "node-modify"
4308
  HTYPE = constants.HTYPE_NODE
4309
  _OP_PARAMS = [
4310
    _PNodeName,
4311
    ("master_candidate", None, ht.TMaybeBool),
4312
    ("offline", None, ht.TMaybeBool),
4313
    ("drained", None, ht.TMaybeBool),
4314
    ("auto_promote", False, ht.TBool),
4315
    ("master_capable", None, ht.TMaybeBool),
4316
    ("vm_capable", None, ht.TMaybeBool),
4317
    ("secondary_ip", None, ht.TMaybeString),
4318
    ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4319
    _PForce,
4320
    ]
4321
  REQ_BGL = False
4322
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4323
  _F2R = {
4324
    (True, False, False): _ROLE_CANDIDATE,
4325
    (False, True, False): _ROLE_DRAINED,
4326
    (False, False, True): _ROLE_OFFLINE,
4327
    (False, False, False): _ROLE_REGULAR,
4328
    }
4329
  _R2F = dict((v, k) for k, v in _F2R.items())
4330
  _FLAGS = ["master_candidate", "drained", "offline"]
4331

    
4332
  def CheckArguments(self):
4333
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4334
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4335
                self.op.master_capable, self.op.vm_capable,
4336
                self.op.secondary_ip, self.op.ndparams]
4337
    if all_mods.count(None) == len(all_mods):
4338
      raise errors.OpPrereqError("Please pass at least one modification",
4339
                                 errors.ECODE_INVAL)
4340
    if all_mods.count(True) > 1:
4341
      raise errors.OpPrereqError("Can't set the node into more than one"
4342
                                 " state at the same time",
4343
                                 errors.ECODE_INVAL)
4344

    
4345
    # Boolean value that tells us whether we might be demoting from MC
4346
    self.might_demote = (self.op.master_candidate == False or
4347
                         self.op.offline == True or
4348
                         self.op.drained == True or
4349
                         self.op.master_capable == False)
4350

    
4351
    if self.op.secondary_ip:
4352
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4353
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4354
                                   " address" % self.op.secondary_ip,
4355
                                   errors.ECODE_INVAL)
4356

    
4357
    self.lock_all = self.op.auto_promote and self.might_demote
4358
    self.lock_instances = self.op.secondary_ip is not None
4359

    
4360
  def ExpandNames(self):
4361
    if self.lock_all:
4362
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4363
    else:
4364
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4365

    
4366
    if self.lock_instances:
4367
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4368

    
4369
  def DeclareLocks(self, level):
4370
    # If we have locked all instances, before waiting to lock nodes, release
4371
    # all the ones living on nodes unrelated to the current operation.
4372
    if level == locking.LEVEL_NODE and self.lock_instances:
4373
      instances_release = []
4374
      instances_keep = []
4375
      self.affected_instances = []
4376
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4377
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4378
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4379
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4380
          if i_mirrored and self.op.node_name in instance.all_nodes:
4381
            instances_keep.append(instance_name)
4382
            self.affected_instances.append(instance)
4383
          else:
4384
            instances_release.append(instance_name)
4385
        if instances_release:
4386
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4387
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4388

    
4389
  def BuildHooksEnv(self):
4390
    """Build hooks env.
4391

4392
    This runs on the master node.
4393

4394
    """
4395
    env = {
4396
      "OP_TARGET": self.op.node_name,
4397
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4398
      "OFFLINE": str(self.op.offline),
4399
      "DRAINED": str(self.op.drained),
4400
      "MASTER_CAPABLE": str(self.op.master_capable),
4401
      "VM_CAPABLE": str(self.op.vm_capable),
4402
      }
4403
    nl = [self.cfg.GetMasterNode(),
4404
          self.op.node_name]
4405
    return env, nl, nl
4406

    
4407
  def CheckPrereq(self):
4408
    """Check prerequisites.
4409

4410
    This only checks the instance list against the existing names.
4411

4412
    """
4413
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4414

    
4415
    if (self.op.master_candidate is not None or
4416
        self.op.drained is not None or
4417
        self.op.offline is not None):
4418
      # we can't change the master's node flags
4419
      if self.op.node_name == self.cfg.GetMasterNode():
4420
        raise errors.OpPrereqError("The master role can be changed"
4421
                                   " only via master-failover",
4422
                                   errors.ECODE_INVAL)
4423

    
4424
    if self.op.master_candidate and not node.master_capable:
4425
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4426
                                 " it a master candidate" % node.name,
4427
                                 errors.ECODE_STATE)
4428

    
4429
    if self.op.vm_capable == False:
4430
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4431
      if ipri or isec:
4432
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4433
                                   " the vm_capable flag" % node.name,
4434
                                   errors.ECODE_STATE)
4435

    
4436
    if node.master_candidate and self.might_demote and not self.lock_all:
4437
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
4438
      # check if after removing the current node, we're missing master
4439
      # candidates
4440
      (mc_remaining, mc_should, _) = \
4441
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4442
      if mc_remaining < mc_should:
4443
        raise errors.OpPrereqError("Not enough master candidates, please"
4444
                                   " pass auto_promote to allow promotion",
4445
                                   errors.ECODE_STATE)
4446

    
4447
    self.old_flags = old_flags = (node.master_candidate,
4448
                                  node.drained, node.offline)
4449
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4450
    self.old_role = old_role = self._F2R[old_flags]
4451

    
4452
    # Check for ineffective changes
4453
    for attr in self._FLAGS:
4454
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4455
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4456
        setattr(self.op, attr, None)
4457

    
4458
    # Past this point, any flag change to False means a transition
4459
    # away from the respective state, as only real changes are kept
4460

    
4461
    # If we're being deofflined/drained, we'll MC ourself if needed
4462
    if (self.op.drained == False or self.op.offline == False or
4463
        (self.op.master_capable and not node.master_capable)):
4464
      if _DecideSelfPromotion(self):
4465
        self.op.master_candidate = True
4466
        self.LogInfo("Auto-promoting node to master candidate")
4467

    
4468
    # If we're no longer master capable, we'll demote ourselves from MC
4469
    if self.op.master_capable == False and node.master_candidate:
4470
      self.LogInfo("Demoting from master candidate")
4471
      self.op.master_candidate = False
4472

    
4473
    # Compute new role
4474
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4475
    if self.op.master_candidate:
4476
      new_role = self._ROLE_CANDIDATE
4477
    elif self.op.drained:
4478
      new_role = self._ROLE_DRAINED
4479
    elif self.op.offline:
4480
      new_role = self._ROLE_OFFLINE
4481
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4482
      # False is still in new flags, which means we're un-setting (the
4483
      # only) True flag
4484
      new_role = self._ROLE_REGULAR
4485
    else: # no new flags, nothing, keep old role
4486
      new_role = old_role
4487

    
4488
    self.new_role = new_role
4489

    
4490
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4491
      # Trying to transition out of offline status
4492
      result = self.rpc.call_version([node.name])[node.name]
4493
      if result.fail_msg:
4494
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4495
                                   " to report its version: %s" %
4496
                                   (node.name, result.fail_msg),
4497
                                   errors.ECODE_STATE)
4498
      else:
4499
        self.LogWarning("Transitioning node from offline to online state"
4500
                        " without using re-add. Please make sure the node"
4501
                        " is healthy!")
4502

    
4503
    if self.op.secondary_ip:
4504
      # Ok even without locking, because this can't be changed by any LU
4505
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4506
      master_singlehomed = master.secondary_ip == master.primary_ip
4507
      if master_singlehomed and self.op.secondary_ip:
4508
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4509
                                   " homed cluster", errors.ECODE_INVAL)
4510

    
4511
      if node.offline:
4512
        if self.affected_instances:
4513
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4514
                                     " node has instances (%s) configured"
4515
                                     " to use it" % self.affected_instances)
4516
      else:
4517
        # On online nodes, check that no instances are running, and that
4518
        # the node has the new ip and we can reach it.
4519
        for instance in self.affected_instances:
4520
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4521

    
4522
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4523
        if master.name != node.name:
4524
          # check reachability from master secondary ip to new secondary ip
4525
          if not netutils.TcpPing(self.op.secondary_ip,
4526
                                  constants.DEFAULT_NODED_PORT,
4527
                                  source=master.secondary_ip):
4528
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4529
                                       " based ping to node daemon port",
4530
                                       errors.ECODE_ENVIRON)
4531

    
4532
    if self.op.ndparams:
4533
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4534
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4535
      self.new_ndparams = new_ndparams
4536

    
4537
  def Exec(self, feedback_fn):
4538
    """Modifies a node.
4539

4540
    """
4541
    node = self.node
4542
    old_role = self.old_role
4543
    new_role = self.new_role
4544

    
4545
    result = []
4546

    
4547
    if self.op.ndparams:
4548
      node.ndparams = self.new_ndparams
4549

    
4550
    for attr in ["master_capable", "vm_capable"]:
4551
      val = getattr(self.op, attr)
4552
      if val is not None:
4553
        setattr(node, attr, val)
4554
        result.append((attr, str(val)))
4555

    
4556
    if new_role != old_role:
4557
      # Tell the node to demote itself, if no longer MC and not offline
4558
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4559
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4560
        if msg:
4561
          self.LogWarning("Node failed to demote itself: %s", msg)
4562

    
4563
      new_flags = self._R2F[new_role]
4564
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4565
        if of != nf:
4566
          result.append((desc, str(nf)))
4567
      (node.master_candidate, node.drained, node.offline) = new_flags
4568

    
4569
      # we locked all nodes, we adjust the CP before updating this node
4570
      if self.lock_all:
4571
        _AdjustCandidatePool(self, [node.name])
4572

    
4573
    if self.op.secondary_ip:
4574
      node.secondary_ip = self.op.secondary_ip
4575
      result.append(("secondary_ip", self.op.secondary_ip))
4576

    
4577
    # this will trigger configuration file update, if needed
4578
    self.cfg.Update(node, feedback_fn)
4579

    
4580
    # this will trigger job queue propagation or cleanup if the mc
4581
    # flag changed
4582
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4583
      self.context.ReaddNode(node)
4584

    
4585
    return result
4586

    
4587

    
4588
class LUPowercycleNode(NoHooksLU):
4589
  """Powercycles a node.
4590

4591
  """
4592
  _OP_PARAMS = [
4593
    _PNodeName,
4594
    _PForce,
4595
    ]
4596
  REQ_BGL = False
4597

    
4598
  def CheckArguments(self):
4599
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4600
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4601
      raise errors.OpPrereqError("The node is the master and the force"
4602
                                 " parameter was not set",
4603
                                 errors.ECODE_INVAL)
4604

    
4605
  def ExpandNames(self):
4606
    """Locking for PowercycleNode.
4607

4608
    This is a last-resort option and shouldn't block on other
4609
    jobs. Therefore, we grab no locks.
4610

4611
    """
4612
    self.needed_locks = {}
4613

    
4614
  def Exec(self, feedback_fn):
4615
    """Reboots a node.
4616

4617
    """
4618
    result = self.rpc.call_node_powercycle(self.op.node_name,
4619
                                           self.cfg.GetHypervisorType())
4620
    result.Raise("Failed to schedule the reboot")
4621
    return result.payload
4622

    
4623

    
4624
class LUQueryClusterInfo(NoHooksLU):
4625
  """Query cluster configuration.
4626

4627
  """
4628
  REQ_BGL = False
4629

    
4630
  def ExpandNames(self):
4631
    self.needed_locks = {}
4632

    
4633
  def Exec(self, feedback_fn):
4634
    """Return cluster config.
4635

4636
    """
4637
    cluster = self.cfg.GetClusterInfo()
4638
    os_hvp = {}
4639

    
4640
    # Filter just for enabled hypervisors
4641
    for os_name, hv_dict in cluster.os_hvp.items():
4642
      os_hvp[os_name] = {}
4643
      for hv_name, hv_params in hv_dict.items():
4644
        if hv_name in cluster.enabled_hypervisors:
4645
          os_hvp[os_name][hv_name] = hv_params
4646

    
4647
    # Convert ip_family to ip_version
4648
    primary_ip_version = constants.IP4_VERSION
4649
    if cluster.primary_ip_family == netutils.IP6Address.family:
4650
      primary_ip_version = constants.IP6_VERSION
4651

    
4652
    result = {
4653
      "software_version": constants.RELEASE_VERSION,
4654
      "protocol_version": constants.PROTOCOL_VERSION,
4655
      "config_version": constants.CONFIG_VERSION,
4656
      "os_api_version": max(constants.OS_API_VERSIONS),
4657
      "export_version": constants.EXPORT_VERSION,
4658
      "architecture": (platform.architecture()[0], platform.machine()),
4659
      "name": cluster.cluster_name,
4660
      "master": cluster.master_node,
4661
      "default_hypervisor": cluster.enabled_hypervisors[0],
4662
      "enabled_hypervisors": cluster.enabled_hypervisors,
4663
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4664
                        for hypervisor_name in cluster.enabled_hypervisors]),
4665
      "os_hvp": os_hvp,
4666
      "beparams": cluster.beparams,
4667
      "osparams": cluster.osparams,
4668
      "nicparams": cluster.nicparams,
4669
      "candidate_pool_size": cluster.candidate_pool_size,
4670
      "master_netdev": cluster.master_netdev,
4671
      "volume_group_name": cluster.volume_group_name,
4672
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4673
      "file_storage_dir": cluster.file_storage_dir,
4674
      "maintain_node_health": cluster.maintain_node_health,
4675
      "ctime": cluster.ctime,
4676
      "mtime": cluster.mtime,
4677
      "uuid": cluster.uuid,
4678
      "tags": list(cluster.GetTags()),
4679
      "uid_pool": cluster.uid_pool,
4680
      "default_iallocator": cluster.default_iallocator,
4681
      "reserved_lvs": cluster.reserved_lvs,
4682
      "primary_ip_version": primary_ip_version,
4683
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4684
      }
4685

    
4686
    return result
4687

    
4688

    
4689
class LUQueryConfigValues(NoHooksLU):
4690
  """Return configuration values.
4691

4692
  """
4693
  _OP_PARAMS = [_POutputFields]
4694
  REQ_BGL = False
4695
  _FIELDS_DYNAMIC = utils.FieldSet()
4696
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4697
                                  "watcher_pause", "volume_group_name")
4698

    
4699
  def CheckArguments(self):
4700
    _CheckOutputFields(static=self._FIELDS_STATIC,
4701
                       dynamic=self._FIELDS_DYNAMIC,
4702
                       selected=self.op.output_fields)
4703

    
4704
  def ExpandNames(self):
4705
    self.needed_locks = {}
4706

    
4707
  def Exec(self, feedback_fn):
4708
    """Dump a representation of the cluster config to the standard output.
4709

4710
    """
4711
    values = []
4712
    for field in self.op.output_fields:
4713
      if field == "cluster_name":
4714
        entry = self.cfg.GetClusterName()
4715
      elif field == "master_node":
4716
        entry = self.cfg.GetMasterNode()
4717
      elif field == "drain_flag":
4718
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4719
      elif field == "watcher_pause":
4720
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4721
      elif field == "volume_group_name":
4722
        entry = self.cfg.GetVGName()
4723
      else:
4724
        raise errors.ParameterError(field)
4725
      values.append(entry)
4726
    return values
4727

    
4728

    
4729
class LUActivateInstanceDisks(NoHooksLU):
4730
  """Bring up an instance's disks.
4731

4732
  """
4733
  _OP_PARAMS = [
4734
    _PInstanceName,
4735
    ("ignore_size", False, ht.TBool),
4736
    ]
4737
  REQ_BGL = False
4738

    
4739
  def ExpandNames(self):
4740
    self._ExpandAndLockInstance()
4741
    self.needed_locks[locking.LEVEL_NODE] = []
4742
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4743

    
4744
  def DeclareLocks(self, level):
4745
    if level == locking.LEVEL_NODE:
4746
      self._LockInstancesNodes()
4747

    
4748
  def CheckPrereq(self):
4749
    """Check prerequisites.
4750

4751
    This checks that the instance is in the cluster.
4752

4753
    """
4754
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4755
    assert self.instance is not None, \
4756
      "Cannot retrieve locked instance %s" % self.op.instance_name
4757
    _CheckNodeOnline(self, self.instance.primary_node)
4758

    
4759
  def Exec(self, feedback_fn):
4760
    """Activate the disks.
4761

4762
    """
4763
    disks_ok, disks_info = \
4764
              _AssembleInstanceDisks(self, self.instance,
4765
                                     ignore_size=self.op.ignore_size)
4766
    if not disks_ok:
4767
      raise errors.OpExecError("Cannot activate block devices")
4768

    
4769
    return disks_info
4770

    
4771

    
4772
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4773
                           ignore_size=False):
4774
  """Prepare the block devices for an instance.
4775

4776
  This sets up the block devices on all nodes.
4777

4778
  @type lu: L{LogicalUnit}
4779
  @param lu: the logical unit on whose behalf we execute
4780
  @type instance: L{objects.Instance}
4781
  @param instance: the instance for whose disks we assemble
4782
  @type disks: list of L{objects.Disk} or None
4783
  @param disks: which disks to assemble (or all, if None)
4784
  @type ignore_secondaries: boolean
4785
  @param ignore_secondaries: if true, errors on secondary nodes
4786
      won't result in an error return from the function
4787
  @type ignore_size: boolean
4788
  @param ignore_size: if true, the current known size of the disk
4789
      will not be used during the disk activation, useful for cases
4790
      when the size is wrong
4791
  @return: False if the operation failed, otherwise a list of
4792
      (host, instance_visible_name, node_visible_name)
4793
      with the mapping from node devices to instance devices
4794

4795
  """
4796
  device_info = []
4797
  disks_ok = True
4798
  iname = instance.name
4799
  disks = _ExpandCheckDisks(instance, disks)
4800

    
4801
  # With the two passes mechanism we try to reduce the window of
4802
  # opportunity for the race condition of switching DRBD to primary
4803
  # before handshaking occured, but we do not eliminate it
4804

    
4805
  # The proper fix would be to wait (with some limits) until the
4806
  # connection has been made and drbd transitions from WFConnection
4807
  # into any other network-connected state (Connected, SyncTarget,
4808
  # SyncSource, etc.)
4809

    
4810
  # 1st pass, assemble on all nodes in secondary mode
4811
  for inst_disk in disks:
4812
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4813
      if ignore_size:
4814
        node_disk = node_disk.Copy()
4815
        node_disk.UnsetSize()
4816
      lu.cfg.SetDiskID(node_disk, node)
4817
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4818
      msg = result.fail_msg
4819
      if msg:
4820
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4821
                           " (is_primary=False, pass=1): %s",
4822
                           inst_disk.iv_name, node, msg)
4823
        if not ignore_secondaries:
4824
          disks_ok = False
4825

    
4826
  # FIXME: race condition on drbd migration to primary
4827

    
4828
  # 2nd pass, do only the primary node
4829
  for inst_disk in disks:
4830
    dev_path = None
4831

    
4832
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4833
      if node != instance.primary_node:
4834
        continue
4835
      if ignore_size:
4836
        node_disk = node_disk.Copy()
4837
        node_disk.UnsetSize()
4838
      lu.cfg.SetDiskID(node_disk, node)
4839
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4840
      msg = result.fail_msg
4841
      if msg:
4842
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4843
                           " (is_primary=True, pass=2): %s",
4844
                           inst_disk.iv_name, node, msg)
4845
        disks_ok = False
4846
      else:
4847
        dev_path = result.payload
4848

    
4849
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4850

    
4851
  # leave the disks configured for the primary node
4852
  # this is a workaround that would be fixed better by
4853
  # improving the logical/physical id handling
4854
  for disk in disks:
4855
    lu.cfg.SetDiskID(disk, instance.primary_node)
4856

    
4857
  return disks_ok, device_info
4858

    
4859

    
4860
def _StartInstanceDisks(lu, instance, force):
4861
  """Start the disks of an instance.
4862

4863
  """
4864
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4865
                                           ignore_secondaries=force)
4866
  if not disks_ok:
4867
    _ShutdownInstanceDisks(lu, instance)
4868
    if force is not None and not force:
4869
      lu.proc.LogWarning("", hint="If the message above refers to a"
4870
                         " secondary node,"
4871
                         " you can retry the operation using '--force'.")
4872
    raise errors.OpExecError("Disk consistency error")
4873

    
4874

    
4875
class LUDeactivateInstanceDisks(NoHooksLU):
4876
  """Shutdown an instance's disks.
4877

4878
  """
4879
  _OP_PARAMS = [
4880
    _PInstanceName,
4881
    ]
4882
  REQ_BGL = False
4883

    
4884
  def ExpandNames(self):
4885
    self._ExpandAndLockInstance()
4886
    self.needed_locks[locking.LEVEL_NODE] = []
4887
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4888

    
4889
  def DeclareLocks(self, level):
4890
    if level == locking.LEVEL_NODE:
4891
      self._LockInstancesNodes()
4892

    
4893
  def CheckPrereq(self):
4894
    """Check prerequisites.
4895

4896
    This checks that the instance is in the cluster.
4897

4898
    """
4899
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4900
    assert self.instance is not None, \
4901
      "Cannot retrieve locked instance %s" % self.op.instance_name
4902

    
4903
  def Exec(self, feedback_fn):
4904
    """Deactivate the disks
4905

4906
    """
4907
    instance = self.instance
4908
    _SafeShutdownInstanceDisks(self, instance)
4909

    
4910

    
4911
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4912
  """Shutdown block devices of an instance.
4913

4914
  This function checks if an instance is running, before calling
4915
  _ShutdownInstanceDisks.
4916

4917
  """
4918
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4919
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4920

    
4921

    
4922
def _ExpandCheckDisks(instance, disks):
4923
  """Return the instance disks selected by the disks list
4924

4925
  @type disks: list of L{objects.Disk} or None
4926
  @param disks: selected disks
4927
  @rtype: list of L{objects.Disk}
4928
  @return: selected instance disks to act on
4929

4930
  """
4931
  if disks is None:
4932
    return instance.disks
4933
  else:
4934
    if not set(disks).issubset(instance.disks):
4935
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4936
                                   " target instance")
4937
    return disks
4938

    
4939

    
4940
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4941
  """Shutdown block devices of an instance.
4942

4943
  This does the shutdown on all nodes of the instance.
4944

4945
  If the ignore_primary is false, errors on the primary node are
4946
  ignored.
4947

4948
  """
4949
  all_result = True
4950
  disks = _ExpandCheckDisks(instance, disks)
4951

    
4952
  for disk in disks:
4953
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4954
      lu.cfg.SetDiskID(top_disk, node)
4955
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4956
      msg = result.fail_msg
4957
      if msg:
4958
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4959
                      disk.iv_name, node, msg)
4960
        if not ignore_primary or node != instance.primary_node:
4961
          all_result = False
4962
  return all_result
4963

    
4964

    
4965
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4966
  """Checks if a node has enough free memory.
4967

4968
  This function check if a given node has the needed amount of free
4969
  memory. In case the node has less memory or we cannot get the
4970
  information from the node, this function raise an OpPrereqError
4971
  exception.
4972

4973
  @type lu: C{LogicalUnit}
4974
  @param lu: a logical unit from which we get configuration data
4975
  @type node: C{str}
4976
  @param node: the node to check
4977
  @type reason: C{str}
4978
  @param reason: string to use in the error message
4979
  @type requested: C{int}
4980
  @param requested: the amount of memory in MiB to check for
4981
  @type hypervisor_name: C{str}
4982
  @param hypervisor_name: the hypervisor to ask for memory stats
4983
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4984
      we cannot check the node
4985

4986
  """
4987
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
4988
  nodeinfo[node].Raise("Can't get data from node %s" % node,
4989
                       prereq=True, ecode=errors.ECODE_ENVIRON)
4990
  free_mem = nodeinfo[node].payload.get('memory_free', None)
4991
  if not isinstance(free_mem, int):
4992
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4993
                               " was '%s'" % (node, free_mem),
4994
                               errors.ECODE_ENVIRON)
4995
  if requested > free_mem:
4996
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4997
                               " needed %s MiB, available %s MiB" %
4998
                               (node, reason, requested, free_mem),
4999
                               errors.ECODE_NORES)
5000

    
5001

    
5002
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5003
  """Checks if nodes have enough free disk space in the all VGs.
5004

5005
  This function check if all given nodes have the needed amount of
5006
  free disk. In case any node has less disk or we cannot get the
5007
  information from the node, this function raise an OpPrereqError
5008
  exception.
5009

5010
  @type lu: C{LogicalUnit}
5011
  @param lu: a logical unit from which we get configuration data
5012
  @type nodenames: C{list}
5013
  @param nodenames: the list of node names to check
5014
  @type req_sizes: C{dict}
5015
  @param req_sizes: the hash of vg and corresponding amount of disk in
5016
      MiB to check for
5017
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5018
      or we cannot check the node
5019

5020
  """
5021
  if req_sizes is not None:
5022
    for vg, req_size in req_sizes.iteritems():
5023
      _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5024

    
5025

    
5026
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5027
  """Checks if nodes have enough free disk space in the specified VG.
5028

5029
  This function check if all given nodes have the needed amount of
5030
  free disk. In case any node has less disk or we cannot get the
5031
  information from the node, this function raise an OpPrereqError
5032
  exception.
5033

5034
  @type lu: C{LogicalUnit}
5035
  @param lu: a logical unit from which we get configuration data
5036
  @type nodenames: C{list}
5037
  @param nodenames: the list of node names to check
5038
  @type vg: C{str}
5039
  @param vg: the volume group to check
5040
  @type requested: C{int}
5041
  @param requested: the amount of disk in MiB to check for
5042
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5043
      or we cannot check the node
5044

5045
  """
5046
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5047
  for node in nodenames:
5048
    info = nodeinfo[node]
5049
    info.Raise("Cannot get current information from node %s" % node,
5050
               prereq=True, ecode=errors.ECODE_ENVIRON)
5051
    vg_free = info.payload.get("vg_free", None)
5052
    if not isinstance(vg_free, int):
5053
      raise errors.OpPrereqError("Can't compute free disk space on node"
5054
                                 " %s for vg %s, result was '%s'" %
5055
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5056
    if requested > vg_free:
5057
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5058
                                 " vg %s: required %d MiB, available %d MiB" %
5059
                                 (node, vg, requested, vg_free),
5060
                                 errors.ECODE_NORES)
5061

    
5062

    
5063
class LUStartupInstance(LogicalUnit):
5064
  """Starts an instance.
5065

5066
  """
5067
  HPATH = "instance-start"
5068
  HTYPE = constants.HTYPE_INSTANCE
5069
  _OP_PARAMS = [
5070
    _PInstanceName,
5071
    _PForce,
5072
    _PIgnoreOfflineNodes,
5073
    ("hvparams", ht.EmptyDict, ht.TDict),
5074
    ("beparams", ht.EmptyDict, ht.TDict),
5075
    ]
5076
  REQ_BGL = False
5077

    
5078
  def CheckArguments(self):
5079
    # extra beparams
5080
    if self.op.beparams:
5081
      # fill the beparams dict
5082
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5083

    
5084
  def ExpandNames(self):
5085
    self._ExpandAndLockInstance()
5086

    
5087
  def BuildHooksEnv(self):
5088
    """Build hooks env.
5089

5090
    This runs on master, primary and secondary nodes of the instance.
5091

5092
    """
5093
    env = {
5094
      "FORCE": self.op.force,
5095
      }
5096
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5097
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5098
    return env, nl, nl
5099

    
5100
  def CheckPrereq(self):
5101
    """Check prerequisites.
5102

5103
    This checks that the instance is in the cluster.
5104

5105
    """
5106
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5107
    assert self.instance is not None, \
5108
      "Cannot retrieve locked instance %s" % self.op.instance_name
5109

    
5110
    # extra hvparams
5111
    if self.op.hvparams:
5112
      # check hypervisor parameter syntax (locally)
5113
      cluster = self.cfg.GetClusterInfo()
5114
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5115
      filled_hvp = cluster.FillHV(instance)
5116
      filled_hvp.update(self.op.hvparams)
5117
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5118
      hv_type.CheckParameterSyntax(filled_hvp)
5119
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5120

    
5121
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5122

    
5123
    if self.primary_offline and self.op.ignore_offline_nodes:
5124
      self.proc.LogWarning("Ignoring offline primary node")
5125

    
5126
      if self.op.hvparams or self.op.beparams:
5127
        self.proc.LogWarning("Overridden parameters are ignored")
5128
    else:
5129
      _CheckNodeOnline(self, instance.primary_node)
5130

    
5131
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5132

    
5133
      # check bridges existence
5134
      _CheckInstanceBridgesExist(self, instance)
5135

    
5136
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5137
                                                instance.name,
5138
                                                instance.hypervisor)
5139
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5140
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5141
      if not remote_info.payload: # not running already
5142
        _CheckNodeFreeMemory(self, instance.primary_node,
5143
                             "starting instance %s" % instance.name,
5144
                             bep[constants.BE_MEMORY], instance.hypervisor)
5145

    
5146
  def Exec(self, feedback_fn):
5147
    """Start the instance.
5148

5149
    """
5150
    instance = self.instance
5151
    force = self.op.force
5152

    
5153
    self.cfg.MarkInstanceUp(instance.name)
5154

    
5155
    if self.primary_offline:
5156
      assert self.op.ignore_offline_nodes
5157
      self.proc.LogInfo("Primary node offline, marked instance as started")
5158
    else:
5159
      node_current = instance.primary_node
5160

    
5161
      _StartInstanceDisks(self, instance, force)
5162

    
5163
      result = self.rpc.call_instance_start(node_current, instance,
5164
                                            self.op.hvparams, self.op.beparams)
5165
      msg = result.fail_msg
5166
      if msg:
5167
        _ShutdownInstanceDisks(self, instance)
5168
        raise errors.OpExecError("Could not start instance: %s" % msg)
5169

    
5170

    
5171
class LURebootInstance(LogicalUnit):
5172
  """Reboot an instance.
5173

5174
  """
5175
  HPATH = "instance-reboot"
5176
  HTYPE = constants.HTYPE_INSTANCE
5177
  _OP_PARAMS = [
5178
    _PInstanceName,
5179
    ("ignore_secondaries", False, ht.TBool),
5180
    ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
5181
    _PShutdownTimeout,
5182
    ]
5183
  REQ_BGL = False
5184

    
5185
  def ExpandNames(self):
5186
    self._ExpandAndLockInstance()
5187

    
5188
  def BuildHooksEnv(self):
5189
    """Build hooks env.
5190

5191
    This runs on master, primary and secondary nodes of the instance.
5192

5193
    """
5194
    env = {
5195
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5196
      "REBOOT_TYPE": self.op.reboot_type,
5197
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5198
      }
5199
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5200
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5201
    return env, nl, nl
5202

    
5203
  def CheckPrereq(self):
5204
    """Check prerequisites.
5205

5206
    This checks that the instance is in the cluster.
5207

5208
    """
5209
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5210
    assert self.instance is not None, \
5211
      "Cannot retrieve locked instance %s" % self.op.instance_name
5212

    
5213
    _CheckNodeOnline(self, instance.primary_node)
5214

    
5215
    # check bridges existence
5216
    _CheckInstanceBridgesExist(self, instance)
5217

    
5218
  def Exec(self, feedback_fn):
5219
    """Reboot the instance.
5220

5221
    """
5222
    instance = self.instance
5223
    ignore_secondaries = self.op.ignore_secondaries
5224
    reboot_type = self.op.reboot_type
5225

    
5226
    node_current = instance.primary_node
5227

    
5228
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5229
                       constants.INSTANCE_REBOOT_HARD]:
5230
      for disk in instance.disks:
5231
        self.cfg.SetDiskID(disk, node_current)
5232
      result = self.rpc.call_instance_reboot(node_current, instance,
5233
                                             reboot_type,
5234
                                             self.op.shutdown_timeout)
5235
      result.Raise("Could not reboot instance")
5236
    else:
5237
      result = self.rpc.call_instance_shutdown(node_current, instance,
5238
                                               self.op.shutdown_timeout)
5239
      result.Raise("Could not shutdown instance for full reboot")
5240
      _ShutdownInstanceDisks(self, instance)
5241
      _StartInstanceDisks(self, instance, ignore_secondaries)
5242
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5243
      msg = result.fail_msg
5244
      if msg:
5245
        _ShutdownInstanceDisks(self, instance)
5246
        raise errors.OpExecError("Could not start instance for"
5247
                                 " full reboot: %s" % msg)
5248

    
5249
    self.cfg.MarkInstanceUp(instance.name)
5250

    
5251

    
5252
class LUShutdownInstance(LogicalUnit):
5253
  """Shutdown an instance.
5254

5255
  """
5256
  HPATH = "instance-stop"
5257
  HTYPE = constants.HTYPE_INSTANCE
5258
  _OP_PARAMS = [
5259
    _PInstanceName,
5260
    _PIgnoreOfflineNodes,
5261
    ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
5262
    ]
5263
  REQ_BGL = False
5264

    
5265
  def ExpandNames(self):
5266
    self._ExpandAndLockInstance()
5267

    
5268
  def BuildHooksEnv(self):
5269
    """Build hooks env.
5270

5271
    This runs on master, primary and secondary nodes of the instance.
5272

5273
    """
5274
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5275
    env["TIMEOUT"] = self.op.timeout
5276
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5277
    return env, nl, nl
5278

    
5279
  def CheckPrereq(self):
5280
    """Check prerequisites.
5281

5282
    This checks that the instance is in the cluster.
5283

5284
    """
5285
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5286
    assert self.instance is not None, \
5287
      "Cannot retrieve locked instance %s" % self.op.instance_name
5288

    
5289
    self.primary_offline = \
5290
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5291

    
5292
    if self.primary_offline and self.op.ignore_offline_nodes:
5293
      self.proc.LogWarning("Ignoring offline primary node")
5294
    else:
5295
      _CheckNodeOnline(self, self.instance.primary_node)
5296

    
5297
  def Exec(self, feedback_fn):
5298
    """Shutdown the instance.
5299

5300
    """
5301
    instance = self.instance
5302
    node_current = instance.primary_node
5303
    timeout = self.op.timeout
5304

    
5305
    self.cfg.MarkInstanceDown(instance.name)
5306

    
5307
    if self.primary_offline:
5308
      assert self.op.ignore_offline_nodes
5309
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5310
    else:
5311
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5312
      msg = result.fail_msg
5313
      if msg:
5314
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5315

    
5316
      _ShutdownInstanceDisks(self, instance)
5317

    
5318

    
5319
class LUReinstallInstance(LogicalUnit):
5320
  """Reinstall an instance.
5321

5322
  """
5323
  HPATH = "instance-reinstall"
5324
  HTYPE = constants.HTYPE_INSTANCE
5325
  _OP_PARAMS = [
5326
    _PInstanceName,
5327
    ("os_type", None, ht.TMaybeString),
5328
    ("force_variant", False, ht.TBool),
5329
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5330
    ]
5331
  REQ_BGL = False
5332

    
5333
  def ExpandNames(self):
5334
    self._ExpandAndLockInstance()
5335

    
5336
  def BuildHooksEnv(self):
5337
    """Build hooks env.
5338

5339
    This runs on master, primary and secondary nodes of the instance.
5340

5341
    """
5342
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5343
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5344
    return env, nl, nl
5345

    
5346
  def CheckPrereq(self):
5347
    """Check prerequisites.
5348

5349
    This checks that the instance is in the cluster and is not running.
5350

5351
    """
5352
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5353
    assert instance is not None, \
5354
      "Cannot retrieve locked instance %s" % self.op.instance_name
5355
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5356
                     " offline, cannot reinstall")
5357
    for node in instance.secondary_nodes:
5358
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5359
                       " cannot reinstall")
5360

    
5361
    if instance.disk_template == constants.DT_DISKLESS:
5362
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5363
                                 self.op.instance_name,
5364
                                 errors.ECODE_INVAL)
5365
    _CheckInstanceDown(self, instance, "cannot reinstall")
5366

    
5367
    if self.op.os_type is not None:
5368
      # OS verification
5369
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5370
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5371
      instance_os = self.op.os_type
5372
    else:
5373
      instance_os = instance.os
5374

    
5375
    nodelist = list(instance.all_nodes)
5376

    
5377
    if self.op.osparams:
5378
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5379
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5380
      self.os_inst = i_osdict # the new dict (without defaults)
5381
    else:
5382
      self.os_inst = None
5383

    
5384
    self.instance = instance
5385

    
5386
  def Exec(self, feedback_fn):
5387
    """Reinstall the instance.
5388

5389
    """
5390
    inst = self.instance
5391

    
5392
    if self.op.os_type is not None:
5393
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5394
      inst.os = self.op.os_type
5395
      # Write to configuration
5396
      self.cfg.Update(inst, feedback_fn)
5397

    
5398
    _StartInstanceDisks(self, inst, None)
5399
    try:
5400
      feedback_fn("Running the instance OS create scripts...")
5401
      # FIXME: pass debug option from opcode to backend
5402
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5403
                                             self.op.debug_level,
5404
                                             osparams=self.os_inst)
5405
      result.Raise("Could not install OS for instance %s on node %s" %
5406
                   (inst.name, inst.primary_node))
5407
    finally:
5408
      _ShutdownInstanceDisks(self, inst)
5409

    
5410

    
5411
class LURecreateInstanceDisks(LogicalUnit):
5412
  """Recreate an instance's missing disks.
5413

5414
  """
5415
  HPATH = "instance-recreate-disks"
5416
  HTYPE = constants.HTYPE_INSTANCE
5417
  _OP_PARAMS = [
5418
    _PInstanceName,
5419
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5420
    ]
5421
  REQ_BGL = False
5422

    
5423
  def ExpandNames(self):
5424
    self._ExpandAndLockInstance()
5425

    
5426
  def BuildHooksEnv(self):
5427
    """Build hooks env.
5428

5429
    This runs on master, primary and secondary nodes of the instance.
5430

5431
    """
5432
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5433
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5434
    return env, nl, nl
5435

    
5436
  def CheckPrereq(self):
5437
    """Check prerequisites.
5438

5439
    This checks that the instance is in the cluster and is not running.
5440

5441
    """
5442
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5443
    assert instance is not None, \
5444
      "Cannot retrieve locked instance %s" % self.op.instance_name
5445
    _CheckNodeOnline(self, instance.primary_node)
5446

    
5447
    if instance.disk_template == constants.DT_DISKLESS:
5448
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5449
                                 self.op.instance_name, errors.ECODE_INVAL)
5450
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5451

    
5452
    if not self.op.disks:
5453
      self.op.disks = range(len(instance.disks))
5454
    else:
5455
      for idx in self.op.disks:
5456
        if idx >= len(instance.disks):
5457
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5458
                                     errors.ECODE_INVAL)
5459

    
5460
    self.instance = instance
5461

    
5462
  def Exec(self, feedback_fn):
5463
    """Recreate the disks.
5464

5465
    """
5466
    to_skip = []
5467
    for idx, _ in enumerate(self.instance.disks):
5468
      if idx not in self.op.disks: # disk idx has not been passed in
5469
        to_skip.append(idx)
5470
        continue
5471

    
5472
    _CreateDisks(self, self.instance, to_skip=to_skip)
5473

    
5474

    
5475
class LURenameInstance(LogicalUnit):
5476
  """Rename an instance.
5477

5478
  """
5479
  HPATH = "instance-rename"
5480
  HTYPE = constants.HTYPE_INSTANCE
5481
  _OP_PARAMS = [
5482
    _PInstanceName,
5483
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
5484
    ("ip_check", False, ht.TBool),
5485
    ("name_check", True, ht.TBool),
5486
    ]
5487

    
5488
  def CheckArguments(self):
5489
    """Check arguments.
5490

5491
    """
5492
    if self.op.ip_check and not self.op.name_check:
5493
      # TODO: make the ip check more flexible and not depend on the name check
5494
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5495
                                 errors.ECODE_INVAL)
5496

    
5497
  def BuildHooksEnv(self):
5498
    """Build hooks env.
5499

5500
    This runs on master, primary and secondary nodes of the instance.
5501

5502
    """
5503
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5504
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5505
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5506
    return env, nl, nl
5507

    
5508
  def CheckPrereq(self):
5509
    """Check prerequisites.
5510

5511
    This checks that the instance is in the cluster and is not running.
5512

5513
    """
5514
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5515
                                                self.op.instance_name)
5516
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5517
    assert instance is not None
5518
    _CheckNodeOnline(self, instance.primary_node)
5519
    _CheckInstanceDown(self, instance, "cannot rename")
5520
    self.instance = instance
5521

    
5522
    new_name = self.op.new_name
5523
    if self.op.name_check:
5524
      hostname = netutils.GetHostname(name=new_name)
5525
      new_name = self.op.new_name = hostname.name
5526
      if (self.op.ip_check and
5527
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5528
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5529
                                   (hostname.ip, new_name),
5530
                                   errors.ECODE_NOTUNIQUE)
5531

    
5532
    instance_list = self.cfg.GetInstanceList()
5533
    if new_name in instance_list and new_name != instance.name:
5534
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5535
                                 new_name, errors.ECODE_EXISTS)
5536

    
5537
  def Exec(self, feedback_fn):
5538
    """Reinstall the instance.
5539

5540
    """
5541
    inst = self.instance
5542
    old_name = inst.name
5543

    
5544
    rename_file_storage = False
5545
    if (inst.disk_template == constants.DT_FILE and
5546
        self.op.new_name != inst.name):
5547
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5548
      rename_file_storage = True
5549

    
5550
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5551
    # Change the instance lock. This is definitely safe while we hold the BGL
5552
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5553
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5554

    
5555
    # re-read the instance from the configuration after rename
5556
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5557

    
5558
    if rename_file_storage:
5559
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5560
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5561
                                                     old_file_storage_dir,
5562
                                                     new_file_storage_dir)
5563
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5564
                   " (but the instance has been renamed in Ganeti)" %
5565
                   (inst.primary_node, old_file_storage_dir,
5566
                    new_file_storage_dir))
5567

    
5568
    _StartInstanceDisks(self, inst, None)
5569
    try:
5570
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5571
                                                 old_name, self.op.debug_level)
5572
      msg = result.fail_msg
5573
      if msg:
5574
        msg = ("Could not run OS rename script for instance %s on node %s"
5575
               " (but the instance has been renamed in Ganeti): %s" %
5576
               (inst.name, inst.primary_node, msg))
5577
        self.proc.LogWarning(msg)
5578
    finally:
5579
      _ShutdownInstanceDisks(self, inst)
5580

    
5581
    return inst.name
5582

    
5583

    
5584
class LURemoveInstance(LogicalUnit):
5585
  """Remove an instance.
5586

5587
  """
5588
  HPATH = "instance-remove"
5589
  HTYPE = constants.HTYPE_INSTANCE
5590
  _OP_PARAMS = [
5591
    _PInstanceName,
5592
    ("ignore_failures", False, ht.TBool),
5593
    _PShutdownTimeout,
5594
    ]
5595
  REQ_BGL = False
5596

    
5597
  def ExpandNames(self):
5598
    self._ExpandAndLockInstance()
5599
    self.needed_locks[locking.LEVEL_NODE] = []
5600
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5601

    
5602
  def DeclareLocks(self, level):
5603
    if level == locking.LEVEL_NODE:
5604
      self._LockInstancesNodes()
5605

    
5606
  def BuildHooksEnv(self):
5607
    """Build hooks env.
5608

5609
    This runs on master, primary and secondary nodes of the instance.
5610

5611
    """
5612
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5613
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5614
    nl = [self.cfg.GetMasterNode()]
5615
    nl_post = list(self.instance.all_nodes) + nl
5616
    return env, nl, nl_post
5617

    
5618
  def CheckPrereq(self):
5619
    """Check prerequisites.
5620

5621
    This checks that the instance is in the cluster.
5622

5623
    """
5624
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5625
    assert self.instance is not None, \
5626
      "Cannot retrieve locked instance %s" % self.op.instance_name
5627

    
5628
  def Exec(self, feedback_fn):
5629
    """Remove the instance.
5630

5631
    """
5632
    instance = self.instance
5633
    logging.info("Shutting down instance %s on node %s",
5634
                 instance.name, instance.primary_node)
5635

    
5636
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5637
                                             self.op.shutdown_timeout)
5638
    msg = result.fail_msg
5639
    if msg:
5640
      if self.op.ignore_failures:
5641
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5642
      else:
5643
        raise errors.OpExecError("Could not shutdown instance %s on"
5644
                                 " node %s: %s" %
5645
                                 (instance.name, instance.primary_node, msg))
5646

    
5647
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5648

    
5649

    
5650
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5651
  """Utility function to remove an instance.
5652

5653
  """
5654
  logging.info("Removing block devices for instance %s", instance.name)
5655

    
5656
  if not _RemoveDisks(lu, instance):
5657
    if not ignore_failures:
5658
      raise errors.OpExecError("Can't remove instance's disks")
5659
    feedback_fn("Warning: can't remove instance's disks")
5660

    
5661
  logging.info("Removing instance %s out of cluster config", instance.name)
5662

    
5663
  lu.cfg.RemoveInstance(instance.name)
5664

    
5665
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5666
    "Instance lock removal conflict"
5667

    
5668
  # Remove lock for the instance
5669
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5670

    
5671

    
5672
class LUQueryInstances(NoHooksLU):
5673
  """Logical unit for querying instances.
5674

5675
  """
5676
  # pylint: disable-msg=W0142
5677
  _OP_PARAMS = [
5678
    _POutputFields,
5679
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5680
    ("use_locking", False, ht.TBool),
5681
    ]
5682
  REQ_BGL = False
5683

    
5684
  def CheckArguments(self):
5685
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5686
                             self.op.use_locking)
5687

    
5688
  def ExpandNames(self):
5689
    self.iq.ExpandNames(self)
5690

    
5691
  def DeclareLocks(self, level):
5692
    self.iq.DeclareLocks(self, level)
5693

    
5694
  def Exec(self, feedback_fn):
5695
    return self.iq.OldStyleQuery(self)
5696

    
5697

    
5698
class LUFailoverInstance(LogicalUnit):
5699
  """Failover an instance.
5700

5701
  """
5702
  HPATH = "instance-failover"
5703
  HTYPE = constants.HTYPE_INSTANCE
5704
  _OP_PARAMS = [
5705
    _PInstanceName,
5706
    ("ignore_consistency", False, ht.TBool),
5707
    _PShutdownTimeout,
5708
    ]
5709
  REQ_BGL = False
5710

    
5711
  def ExpandNames(self):
5712
    self._ExpandAndLockInstance()
5713
    self.needed_locks[locking.LEVEL_NODE] = []
5714
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5715

    
5716
  def DeclareLocks(self, level):
5717
    if level == locking.LEVEL_NODE:
5718
      self._LockInstancesNodes()
5719

    
5720
  def BuildHooksEnv(self):
5721
    """Build hooks env.
5722

5723
    This runs on master, primary and secondary nodes of the instance.
5724

5725
    """
5726
    instance = self.instance
5727
    source_node = instance.primary_node
5728
    target_node = instance.secondary_nodes[0]
5729
    env = {
5730
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5731
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5732
      "OLD_PRIMARY": source_node,
5733
      "OLD_SECONDARY": target_node,
5734
      "NEW_PRIMARY": target_node,
5735
      "NEW_SECONDARY": source_node,
5736
      }
5737
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5738
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5739
    nl_post = list(nl)
5740
    nl_post.append(source_node)
5741
    return env, nl, nl_post
5742

    
5743
  def CheckPrereq(self):
5744
    """Check prerequisites.
5745

5746
    This checks that the instance is in the cluster.
5747

5748
    """
5749
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5750
    assert self.instance is not None, \
5751
      "Cannot retrieve locked instance %s" % self.op.instance_name
5752

    
5753
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5754
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5755
      raise errors.OpPrereqError("Instance's disk layout is not"
5756
                                 " network mirrored, cannot failover.",
5757
                                 errors.ECODE_STATE)
5758

    
5759
    secondary_nodes = instance.secondary_nodes
5760
    if not secondary_nodes:
5761
      raise errors.ProgrammerError("no secondary node but using "
5762
                                   "a mirrored disk template")
5763

    
5764
    target_node = secondary_nodes[0]
5765
    _CheckNodeOnline(self, target_node)
5766
    _CheckNodeNotDrained(self, target_node)
5767
    if instance.admin_up:
5768
      # check memory requirements on the secondary node
5769
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5770
                           instance.name, bep[constants.BE_MEMORY],
5771
                           instance.hypervisor)
5772
    else:
5773
      self.LogInfo("Not checking memory on the secondary node as"
5774
                   " instance will not be started")
5775

    
5776
    # check bridge existance
5777
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5778

    
5779
  def Exec(self, feedback_fn):
5780
    """Failover an instance.
5781

5782
    The failover is done by shutting it down on its present node and
5783
    starting it on the secondary.
5784

5785
    """
5786
    instance = self.instance
5787
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5788

    
5789
    source_node = instance.primary_node
5790
    target_node = instance.secondary_nodes[0]
5791

    
5792
    if instance.admin_up:
5793
      feedback_fn("* checking disk consistency between source and target")
5794
      for dev in instance.disks:
5795
        # for drbd, these are drbd over lvm
5796
        if not _CheckDiskConsistency(self, dev, target_node, False):
5797
          if not self.op.ignore_consistency:
5798
            raise errors.OpExecError("Disk %s is degraded on target node,"
5799
                                     " aborting failover." % dev.iv_name)
5800
    else:
5801
      feedback_fn("* not checking disk consistency as instance is not running")
5802

    
5803
    feedback_fn("* shutting down instance on source node")
5804
    logging.info("Shutting down instance %s on node %s",
5805
                 instance.name, source_node)
5806

    
5807
    result = self.rpc.call_instance_shutdown(source_node, instance,
5808
                                             self.op.shutdown_timeout)
5809
    msg = result.fail_msg
5810
    if msg:
5811
      if self.op.ignore_consistency or primary_node.offline:
5812
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5813
                             " Proceeding anyway. Please make sure node"
5814
                             " %s is down. Error details: %s",
5815
                             instance.name, source_node, source_node, msg)
5816
      else:
5817
        raise errors.OpExecError("Could not shutdown instance %s on"
5818
                                 " node %s: %s" %
5819
                                 (instance.name, source_node, msg))
5820

    
5821
    feedback_fn("* deactivating the instance's disks on source node")
5822
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5823
      raise errors.OpExecError("Can't shut down the instance's disks.")
5824

    
5825
    instance.primary_node = target_node
5826
    # distribute new instance config to the other nodes
5827
    self.cfg.Update(instance, feedback_fn)
5828

    
5829
    # Only start the instance if it's marked as up
5830
    if instance.admin_up:
5831
      feedback_fn("* activating the instance's disks on target node")
5832
      logging.info("Starting instance %s on node %s",
5833
                   instance.name, target_node)
5834

    
5835
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5836
                                           ignore_secondaries=True)
5837
      if not disks_ok:
5838
        _ShutdownInstanceDisks(self, instance)
5839
        raise errors.OpExecError("Can't activate the instance's disks")
5840

    
5841
      feedback_fn("* starting the instance on the target node")
5842
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5843
      msg = result.fail_msg
5844
      if msg:
5845
        _ShutdownInstanceDisks(self, instance)
5846
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5847
                                 (instance.name, target_node, msg))
5848

    
5849

    
5850
class LUMigrateInstance(LogicalUnit):
5851
  """Migrate an instance.
5852

5853
  This is migration without shutting down, compared to the failover,
5854
  which is done with shutdown.
5855

5856
  """
5857
  HPATH = "instance-migrate"
5858
  HTYPE = constants.HTYPE_INSTANCE
5859
  _OP_PARAMS = [
5860
    _PInstanceName,
5861
    _PMigrationMode,
5862
    _PMigrationLive,
5863
    ("cleanup", False, ht.TBool),
5864
    ]
5865

    
5866
  REQ_BGL = False
5867

    
5868
  def ExpandNames(self):
5869
    self._ExpandAndLockInstance()
5870

    
5871
    self.needed_locks[locking.LEVEL_NODE] = []
5872
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5873

    
5874
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5875
                                       self.op.cleanup)
5876
    self.tasklets = [self._migrater]
5877

    
5878
  def DeclareLocks(self, level):
5879
    if level == locking.LEVEL_NODE:
5880
      self._LockInstancesNodes()
5881

    
5882
  def BuildHooksEnv(self):
5883
    """Build hooks env.
5884

5885
    This runs on master, primary and secondary nodes of the instance.
5886

5887
    """
5888
    instance = self._migrater.instance
5889
    source_node = instance.primary_node
5890
    target_node = instance.secondary_nodes[0]
5891
    env = _BuildInstanceHookEnvByObject(self, instance)
5892
    env["MIGRATE_LIVE"] = self._migrater.live
5893
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5894
    env.update({
5895
        "OLD_PRIMARY": source_node,
5896
        "OLD_SECONDARY": target_node,
5897
        "NEW_PRIMARY": target_node,
5898
        "NEW_SECONDARY": source_node,
5899
        })
5900
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5901
    nl_post = list(nl)
5902
    nl_post.append(source_node)
5903
    return env, nl, nl_post
5904

    
5905

    
5906
class LUMoveInstance(LogicalUnit):
5907
  """Move an instance by data-copying.
5908

5909
  """
5910
  HPATH = "instance-move"
5911
  HTYPE = constants.HTYPE_INSTANCE
5912
  _OP_PARAMS = [
5913
    _PInstanceName,
5914
    ("target_node", ht.NoDefault, ht.TNonEmptyString),
5915
    _PShutdownTimeout,
5916
    ]
5917
  REQ_BGL = False
5918

    
5919
  def ExpandNames(self):
5920
    self._ExpandAndLockInstance()
5921
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5922
    self.op.target_node = target_node
5923
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5924
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5925

    
5926
  def DeclareLocks(self, level):
5927
    if level == locking.LEVEL_NODE:
5928
      self._LockInstancesNodes(primary_only=True)
5929

    
5930
  def BuildHooksEnv(self):
5931
    """Build hooks env.
5932

5933
    This runs on master, primary and secondary nodes of the instance.
5934

5935
    """
5936
    env = {
5937
      "TARGET_NODE": self.op.target_node,
5938
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5939
      }
5940
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5941
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5942
                                       self.op.target_node]
5943
    return env, nl, nl
5944

    
5945
  def CheckPrereq(self):
5946
    """Check prerequisites.
5947

5948
    This checks that the instance is in the cluster.
5949

5950
    """
5951
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5952
    assert self.instance is not None, \
5953
      "Cannot retrieve locked instance %s" % self.op.instance_name
5954

    
5955
    node = self.cfg.GetNodeInfo(self.op.target_node)
5956
    assert node is not None, \
5957
      "Cannot retrieve locked node %s" % self.op.target_node
5958

    
5959
    self.target_node = target_node = node.name
5960

    
5961
    if target_node == instance.primary_node:
5962
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
5963
                                 (instance.name, target_node),
5964
                                 errors.ECODE_STATE)
5965

    
5966
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5967

    
5968
    for idx, dsk in enumerate(instance.disks):
5969
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5970
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5971
                                   " cannot copy" % idx, errors.ECODE_STATE)
5972

    
5973
    _CheckNodeOnline(self, target_node)
5974
    _CheckNodeNotDrained(self, target_node)
5975
    _CheckNodeVmCapable(self, target_node)
5976

    
5977
    if instance.admin_up:
5978
      # check memory requirements on the secondary node
5979
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5980
                           instance.name, bep[constants.BE_MEMORY],
5981
                           instance.hypervisor)
5982
    else:
5983
      self.LogInfo("Not checking memory on the secondary node as"
5984
                   " instance will not be started")
5985

    
5986
    # check bridge existance
5987
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5988

    
5989
  def Exec(self, feedback_fn):
5990
    """Move an instance.
5991

5992
    The move is done by shutting it down on its present node, copying
5993
    the data over (slow) and starting it on the new node.
5994

5995
    """
5996
    instance = self.instance
5997

    
5998
    source_node = instance.primary_node
5999
    target_node = self.target_node
6000

    
6001
    self.LogInfo("Shutting down instance %s on source node %s",
6002
                 instance.name, source_node)
6003

    
6004
    result = self.rpc.call_instance_shutdown(source_node, instance,
6005
                                             self.op.shutdown_timeout)
6006
    msg = result.fail_msg
6007
    if msg:
6008
      if self.op.ignore_consistency:
6009
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6010
                             " Proceeding anyway. Please make sure node"
6011
                             " %s is down. Error details: %s",
6012
                             instance.name, source_node, source_node, msg)
6013
      else:
6014
        raise errors.OpExecError("Could not shutdown instance %s on"
6015
                                 " node %s: %s" %
6016
                                 (instance.name, source_node, msg))
6017

    
6018
    # create the target disks
6019
    try:
6020
      _CreateDisks(self, instance, target_node=target_node)
6021
    except errors.OpExecError:
6022
      self.LogWarning("Device creation failed, reverting...")
6023
      try:
6024
        _RemoveDisks(self, instance, target_node=target_node)
6025
      finally:
6026
        self.cfg.ReleaseDRBDMinors(instance.name)
6027
        raise
6028

    
6029
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6030

    
6031
    errs = []
6032
    # activate, get path, copy the data over
6033
    for idx, disk in enumerate(instance.disks):
6034
      self.LogInfo("Copying data for disk %d", idx)
6035
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6036
                                               instance.name, True)
6037
      if result.fail_msg:
6038
        self.LogWarning("Can't assemble newly created disk %d: %s",
6039
                        idx, result.fail_msg)
6040
        errs.append(result.fail_msg)
6041
        break
6042
      dev_path = result.payload
6043
      result = self.rpc.call_blockdev_export(source_node, disk,
6044
                                             target_node, dev_path,
6045
                                             cluster_name)
6046
      if result.fail_msg:
6047
        self.LogWarning("Can't copy data over for disk %d: %s",
6048
                        idx, result.fail_msg)
6049
        errs.append(result.fail_msg)
6050
        break
6051

    
6052
    if errs:
6053
      self.LogWarning("Some disks failed to copy, aborting")
6054
      try:
6055
        _RemoveDisks(self, instance, target_node=target_node)
6056
      finally:
6057
        self.cfg.ReleaseDRBDMinors(instance.name)
6058
        raise errors.OpExecError("Errors during disk copy: %s" %
6059
                                 (",".join(errs),))
6060

    
6061
    instance.primary_node = target_node
6062
    self.cfg.Update(instance, feedback_fn)
6063

    
6064
    self.LogInfo("Removing the disks on the original node")
6065
    _RemoveDisks(self, instance, target_node=source_node)
6066

    
6067
    # Only start the instance if it's marked as up
6068
    if instance.admin_up:
6069
      self.LogInfo("Starting instance %s on node %s",
6070
                   instance.name, target_node)
6071

    
6072
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6073
                                           ignore_secondaries=True)
6074
      if not disks_ok:
6075
        _ShutdownInstanceDisks(self, instance)
6076
        raise errors.OpExecError("Can't activate the instance's disks")
6077

    
6078
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6079
      msg = result.fail_msg
6080
      if msg:
6081
        _ShutdownInstanceDisks(self, instance)
6082
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6083
                                 (instance.name, target_node, msg))
6084

    
6085

    
6086
class LUMigrateNode(LogicalUnit):
6087
  """Migrate all instances from a node.
6088

6089
  """
6090
  HPATH = "node-migrate"
6091
  HTYPE = constants.HTYPE_NODE
6092
  _OP_PARAMS = [
6093
    _PNodeName,
6094
    _PMigrationMode,
6095
    _PMigrationLive,
6096
    ]
6097
  REQ_BGL = False
6098

    
6099
  def ExpandNames(self):
6100
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6101

    
6102
    self.needed_locks = {
6103
      locking.LEVEL_NODE: [self.op.node_name],
6104
      }
6105

    
6106
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6107

    
6108
    # Create tasklets for migrating instances for all instances on this node
6109
    names = []
6110
    tasklets = []
6111

    
6112
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6113
      logging.debug("Migrating instance %s", inst.name)
6114
      names.append(inst.name)
6115

    
6116
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6117

    
6118
    self.tasklets = tasklets
6119

    
6120
    # Declare instance locks
6121
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6122

    
6123
  def DeclareLocks(self, level):
6124
    if level == locking.LEVEL_NODE:
6125
      self._LockInstancesNodes()
6126

    
6127
  def BuildHooksEnv(self):
6128
    """Build hooks env.
6129

6130
    This runs on the master, the primary and all the secondaries.
6131

6132
    """
6133
    env = {
6134
      "NODE_NAME": self.op.node_name,
6135
      }
6136

    
6137
    nl = [self.cfg.GetMasterNode()]
6138

    
6139
    return (env, nl, nl)
6140

    
6141

    
6142
class TLMigrateInstance(Tasklet):
6143
  """Tasklet class for instance migration.
6144

6145
  @type live: boolean
6146
  @ivar live: whether the migration will be done live or non-live;
6147
      this variable is initalized only after CheckPrereq has run
6148

6149
  """
6150
  def __init__(self, lu, instance_name, cleanup):
6151
    """Initializes this class.
6152

6153
    """
6154
    Tasklet.__init__(self, lu)
6155

    
6156
    # Parameters
6157
    self.instance_name = instance_name
6158
    self.cleanup = cleanup
6159
    self.live = False # will be overridden later
6160

    
6161
  def CheckPrereq(self):
6162
    """Check prerequisites.
6163

6164
    This checks that the instance is in the cluster.
6165

6166
    """
6167
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6168
    instance = self.cfg.GetInstanceInfo(instance_name)
6169
    assert instance is not None
6170

    
6171
    if instance.disk_template != constants.DT_DRBD8:
6172
      raise errors.OpPrereqError("Instance's disk layout is not"
6173
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6174

    
6175
    secondary_nodes = instance.secondary_nodes
6176
    if not secondary_nodes:
6177
      raise errors.ConfigurationError("No secondary node but using"
6178
                                      " drbd8 disk template")
6179

    
6180
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6181

    
6182
    target_node = secondary_nodes[0]
6183
    # check memory requirements on the secondary node
6184
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6185
                         instance.name, i_be[constants.BE_MEMORY],
6186
                         instance.hypervisor)
6187

    
6188
    # check bridge existance
6189
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6190

    
6191
    if not self.cleanup:
6192
      _CheckNodeNotDrained(self.lu, target_node)
6193
      result = self.rpc.call_instance_migratable(instance.primary_node,
6194
                                                 instance)
6195
      result.Raise("Can't migrate, please use failover",
6196
                   prereq=True, ecode=errors.ECODE_STATE)
6197

    
6198
    self.instance = instance
6199

    
6200
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6201
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6202
                                 " parameters are accepted",
6203
                                 errors.ECODE_INVAL)
6204
    if self.lu.op.live is not None:
6205
      if self.lu.op.live:
6206
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6207
      else:
6208
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6209
      # reset the 'live' parameter to None so that repeated
6210
      # invocations of CheckPrereq do not raise an exception
6211
      self.lu.op.live = None
6212
    elif self.lu.op.mode is None:
6213
      # read the default value from the hypervisor
6214
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6215
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6216

    
6217
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6218

    
6219
  def _WaitUntilSync(self):
6220
    """Poll with custom rpc for disk sync.
6221

6222
    This uses our own step-based rpc call.
6223

6224
    """
6225
    self.feedback_fn("* wait until resync is done")
6226
    all_done = False
6227
    while not all_done:
6228
      all_done = True
6229
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6230
                                            self.nodes_ip,
6231
                                            self.instance.disks)
6232
      min_percent = 100
6233
      for node, nres in result.items():
6234
        nres.Raise("Cannot resync disks on node %s" % node)
6235
        node_done, node_percent = nres.payload
6236
        all_done = all_done and node_done
6237
        if node_percent is not None:
6238
          min_percent = min(min_percent, node_percent)
6239
      if not all_done:
6240
        if min_percent < 100:
6241
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6242
        time.sleep(2)
6243

    
6244
  def _EnsureSecondary(self, node):
6245
    """Demote a node to secondary.
6246

6247
    """
6248
    self.feedback_fn("* switching node %s to secondary mode" % node)
6249

    
6250
    for dev in self.instance.disks:
6251
      self.cfg.SetDiskID(dev, node)
6252

    
6253
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6254
                                          self.instance.disks)
6255
    result.Raise("Cannot change disk to secondary on node %s" % node)
6256

    
6257
  def _GoStandalone(self):
6258
    """Disconnect from the network.
6259

6260
    """
6261
    self.feedback_fn("* changing into standalone mode")
6262
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6263
                                               self.instance.disks)
6264
    for node, nres in result.items():
6265
      nres.Raise("Cannot disconnect disks node %s" % node)
6266

    
6267
  def _GoReconnect(self, multimaster):
6268
    """Reconnect to the network.
6269

6270
    """
6271
    if multimaster:
6272
      msg = "dual-master"
6273
    else:
6274
      msg = "single-master"
6275
    self.feedback_fn("* changing disks into %s mode" % msg)
6276
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6277
                                           self.instance.disks,
6278
                                           self.instance.name, multimaster)
6279
    for node, nres in result.items():
6280
      nres.Raise("Cannot change disks config on node %s" % node)
6281

    
6282
  def _ExecCleanup(self):
6283
    """Try to cleanup after a failed migration.
6284

6285
    The cleanup is done by:
6286
      - check that the instance is running only on one node
6287
        (and update the config if needed)
6288
      - change disks on its secondary node to secondary
6289
      - wait until disks are fully synchronized
6290
      - disconnect from the network
6291
      - change disks into single-master mode
6292
      - wait again until disks are fully synchronized
6293

6294
    """
6295
    instance = self.instance
6296
    target_node = self.target_node
6297
    source_node = self.source_node
6298

    
6299
    # check running on only one node
6300
    self.feedback_fn("* checking where the instance actually runs"
6301
                     " (if this hangs, the hypervisor might be in"
6302
                     " a bad state)")
6303
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6304
    for node, result in ins_l.items():
6305
      result.Raise("Can't contact node %s" % node)
6306

    
6307
    runningon_source = instance.name in ins_l[source_node].payload
6308
    runningon_target = instance.name in ins_l[target_node].payload
6309

    
6310
    if runningon_source and runningon_target:
6311
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6312
                               " or the hypervisor is confused. You will have"
6313
                               " to ensure manually that it runs only on one"
6314
                               " and restart this operation.")
6315

    
6316
    if not (runningon_source or runningon_target):
6317
      raise errors.OpExecError("Instance does not seem to be running at all."
6318
                               " In this case, it's safer to repair by"
6319
                               " running 'gnt-instance stop' to ensure disk"
6320
                               " shutdown, and then restarting it.")
6321

    
6322
    if runningon_target:
6323
      # the migration has actually succeeded, we need to update the config
6324
      self.feedback_fn("* instance running on secondary node (%s),"
6325
                       " updating config" % target_node)
6326
      instance.primary_node = target_node
6327
      self.cfg.Update(instance, self.feedback_fn)
6328
      demoted_node = source_node
6329
    else:
6330
      self.feedback_fn("* instance confirmed to be running on its"
6331
                       " primary node (%s)" % source_node)
6332
      demoted_node = target_node
6333

    
6334
    self._EnsureSecondary(demoted_node)
6335
    try:
6336
      self._WaitUntilSync()
6337
    except errors.OpExecError:
6338
      # we ignore here errors, since if the device is standalone, it
6339
      # won't be able to sync
6340
      pass
6341
    self._GoStandalone()
6342
    self._GoReconnect(False)
6343
    self._WaitUntilSync()
6344

    
6345
    self.feedback_fn("* done")
6346

    
6347
  def _RevertDiskStatus(self):
6348
    """Try to revert the disk status after a failed migration.
6349

6350
    """
6351
    target_node = self.target_node
6352
    try:
6353
      self._EnsureSecondary(target_node)
6354
      self._GoStandalone()
6355
      self._GoReconnect(False)
6356
      self._WaitUntilSync()
6357
    except errors.OpExecError, err:
6358
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6359
                         " drives: error '%s'\n"
6360
                         "Please look and recover the instance status" %
6361
                         str(err))
6362

    
6363
  def _AbortMigration(self):
6364
    """Call the hypervisor code to abort a started migration.
6365

6366
    """
6367
    instance = self.instance
6368
    target_node = self.target_node
6369
    migration_info = self.migration_info
6370

    
6371
    abort_result = self.rpc.call_finalize_migration(target_node,
6372
                                                    instance,
6373
                                                    migration_info,
6374
                                                    False)
6375
    abort_msg = abort_result.fail_msg
6376
    if abort_msg:
6377
      logging.error("Aborting migration failed on target node %s: %s",
6378
                    target_node, abort_msg)
6379
      # Don't raise an exception here, as we stil have to try to revert the
6380
      # disk status, even if this step failed.
6381

    
6382
  def _ExecMigration(self):
6383
    """Migrate an instance.
6384

6385
    The migrate is done by:
6386
      - change the disks into dual-master mode
6387
      - wait until disks are fully synchronized again
6388
      - migrate the instance
6389
      - change disks on the new secondary node (the old primary) to secondary
6390
      - wait until disks are fully synchronized
6391
      - change disks into single-master mode
6392

6393
    """
6394
    instance = self.instance
6395
    target_node = self.target_node
6396
    source_node = self.source_node
6397

    
6398
    self.feedback_fn("* checking disk consistency between source and target")
6399
    for dev in instance.disks:
6400
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6401
        raise errors.OpExecError("Disk %s is degraded or not fully"
6402
                                 " synchronized on target node,"
6403
                                 " aborting migrate." % dev.iv_name)
6404

    
6405
    # First get the migration information from the remote node
6406
    result = self.rpc.call_migration_info(source_node, instance)
6407
    msg = result.fail_msg
6408
    if msg:
6409
      log_err = ("Failed fetching source migration information from %s: %s" %
6410
                 (source_node, msg))
6411
      logging.error(log_err)
6412
      raise errors.OpExecError(log_err)
6413

    
6414
    self.migration_info = migration_info = result.payload
6415

    
6416
    # Then switch the disks to master/master mode
6417
    self._EnsureSecondary(target_node)
6418
    self._GoStandalone()
6419
    self._GoReconnect(True)
6420
    self._WaitUntilSync()
6421

    
6422
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6423
    result = self.rpc.call_accept_instance(target_node,
6424
                                           instance,
6425
                                           migration_info,
6426
                                           self.nodes_ip[target_node])
6427

    
6428
    msg = result.fail_msg
6429
    if msg:
6430
      logging.error("Instance pre-migration failed, trying to revert"
6431
                    " disk status: %s", msg)
6432
      self.feedback_fn("Pre-migration failed, aborting")
6433
      self._AbortMigration()
6434
      self._RevertDiskStatus()
6435
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6436
                               (instance.name, msg))
6437

    
6438
    self.feedback_fn("* migrating instance to %s" % target_node)
6439
    time.sleep(10)
6440
    result = self.rpc.call_instance_migrate(source_node, instance,
6441
                                            self.nodes_ip[target_node],
6442
                                            self.live)
6443
    msg = result.fail_msg
6444
    if msg:
6445
      logging.error("Instance migration failed, trying to revert"
6446
                    " disk status: %s", msg)
6447
      self.feedback_fn("Migration failed, aborting")
6448
      self._AbortMigration()
6449
      self._RevertDiskStatus()
6450
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6451
                               (instance.name, msg))
6452
    time.sleep(10)
6453

    
6454
    instance.primary_node = target_node
6455
    # distribute new instance config to the other nodes
6456
    self.cfg.Update(instance, self.feedback_fn)
6457

    
6458
    result = self.rpc.call_finalize_migration(target_node,
6459
                                              instance,
6460
                                              migration_info,
6461
                                              True)
6462
    msg = result.fail_msg
6463
    if msg:
6464
      logging.error("Instance migration succeeded, but finalization failed:"
6465
                    " %s", msg)
6466
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6467
                               msg)
6468

    
6469
    self._EnsureSecondary(source_node)
6470
    self._WaitUntilSync()
6471
    self._GoStandalone()
6472
    self._GoReconnect(False)
6473
    self._WaitUntilSync()
6474

    
6475
    self.feedback_fn("* done")
6476

    
6477
  def Exec(self, feedback_fn):
6478
    """Perform the migration.
6479

6480
    """
6481
    feedback_fn("Migrating instance %s" % self.instance.name)
6482

    
6483
    self.feedback_fn = feedback_fn
6484

    
6485
    self.source_node = self.instance.primary_node
6486
    self.target_node = self.instance.secondary_nodes[0]
6487
    self.all_nodes = [self.source_node, self.target_node]
6488
    self.nodes_ip = {
6489
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6490
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6491
      }
6492

    
6493
    if self.cleanup:
6494
      return self._ExecCleanup()
6495
    else:
6496
      return self._ExecMigration()
6497

    
6498

    
6499
def _CreateBlockDev(lu, node, instance, device, force_create,
6500
                    info, force_open):
6501
  """Create a tree of block devices on a given node.
6502

6503
  If this device type has to be created on secondaries, create it and
6504
  all its children.
6505

6506
  If not, just recurse to children keeping the same 'force' value.
6507

6508
  @param lu: the lu on whose behalf we execute
6509
  @param node: the node on which to create the device
6510
  @type instance: L{objects.Instance}
6511
  @param instance: the instance which owns the device
6512
  @type device: L{objects.Disk}
6513
  @param device: the device to create
6514
  @type force_create: boolean
6515
  @param force_create: whether to force creation of this device; this
6516
      will be change to True whenever we find a device which has
6517
      CreateOnSecondary() attribute
6518
  @param info: the extra 'metadata' we should attach to the device
6519
      (this will be represented as a LVM tag)
6520
  @type force_open: boolean
6521
  @param force_open: this parameter will be passes to the
6522
      L{backend.BlockdevCreate} function where it specifies
6523
      whether we run on primary or not, and it affects both
6524
      the child assembly and the device own Open() execution
6525

6526
  """
6527
  if device.CreateOnSecondary():
6528
    force_create = True
6529

    
6530
  if device.children:
6531
    for child in device.children:
6532
      _CreateBlockDev(lu, node, instance, child, force_create,
6533
                      info, force_open)
6534

    
6535
  if not force_create:
6536
    return
6537

    
6538
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6539

    
6540

    
6541
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6542
  """Create a single block device on a given node.
6543

6544
  This will not recurse over children of the device, so they must be
6545
  created in advance.
6546

6547
  @param lu: the lu on whose behalf we execute
6548
  @param node: the node on which to create the device
6549
  @type instance: L{objects.Instance}
6550
  @param instance: the instance which owns the device
6551
  @type device: L{objects.Disk}
6552
  @param device: the device to create
6553
  @param info: the extra 'metadata' we should attach to the device
6554
      (this will be represented as a LVM tag)
6555
  @type force_open: boolean
6556
  @param force_open: this parameter will be passes to the
6557
      L{backend.BlockdevCreate} function where it specifies
6558
      whether we run on primary or not, and it affects both
6559
      the child assembly and the device own Open() execution
6560

6561
  """
6562
  lu.cfg.SetDiskID(device, node)
6563
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6564
                                       instance.name, force_open, info)
6565
  result.Raise("Can't create block device %s on"
6566
               " node %s for instance %s" % (device, node, instance.name))
6567
  if device.physical_id is None:
6568
    device.physical_id = result.payload
6569

    
6570

    
6571
def _GenerateUniqueNames(lu, exts):
6572
  """Generate a suitable LV name.
6573

6574
  This will generate a logical volume name for the given instance.
6575

6576
  """
6577
  results = []
6578
  for val in exts:
6579
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6580
    results.append("%s%s" % (new_id, val))
6581
  return results
6582

    
6583

    
6584
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6585
                         p_minor, s_minor):
6586
  """Generate a drbd8 device complete with its children.
6587

6588
  """
6589
  port = lu.cfg.AllocatePort()
6590
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6591
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6592
                          logical_id=(vgname, names[0]))
6593
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6594
                          logical_id=(vgname, names[1]))
6595
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6596
                          logical_id=(primary, secondary, port,
6597
                                      p_minor, s_minor,
6598
                                      shared_secret),
6599
                          children=[dev_data, dev_meta],
6600
                          iv_name=iv_name)
6601
  return drbd_dev
6602

    
6603

    
6604
def _GenerateDiskTemplate(lu, template_name,
6605
                          instance_name, primary_node,
6606
                          secondary_nodes, disk_info,
6607
                          file_storage_dir, file_driver,
6608
                          base_index, feedback_fn):
6609
  """Generate the entire disk layout for a given template type.
6610

6611
  """
6612
  #TODO: compute space requirements
6613

    
6614
  vgname = lu.cfg.GetVGName()
6615
  disk_count = len(disk_info)
6616
  disks = []
6617
  if template_name == constants.DT_DISKLESS:
6618
    pass
6619
  elif template_name == constants.DT_PLAIN:
6620
    if len(secondary_nodes) != 0:
6621
      raise errors.ProgrammerError("Wrong template configuration")
6622

    
6623
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6624
                                      for i in range(disk_count)])
6625
    for idx, disk in enumerate(disk_info):
6626
      disk_index = idx + base_index
6627
      vg = disk.get("vg", vgname)
6628
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6629
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6630
                              logical_id=(vg, names[idx]),
6631
                              iv_name="disk/%d" % disk_index,
6632
                              mode=disk["mode"])
6633
      disks.append(disk_dev)
6634
  elif template_name == constants.DT_DRBD8:
6635
    if len(secondary_nodes) != 1:
6636
      raise errors.ProgrammerError("Wrong template configuration")
6637
    remote_node = secondary_nodes[0]
6638
    minors = lu.cfg.AllocateDRBDMinor(
6639
      [primary_node, remote_node] * len(disk_info), instance_name)
6640

    
6641
    names = []
6642
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6643
                                               for i in range(disk_count)]):
6644
      names.append(lv_prefix + "_data")
6645
      names.append(lv_prefix + "_meta")
6646
    for idx, disk in enumerate(disk_info):
6647
      disk_index = idx + base_index
6648
      vg = disk.get("vg", vgname)
6649
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6650
                                      disk["size"], vg, names[idx*2:idx*2+2],
6651
                                      "disk/%d" % disk_index,
6652
                                      minors[idx*2], minors[idx*2+1])
6653
      disk_dev.mode = disk["mode"]
6654
      disks.append(disk_dev)
6655
  elif template_name == constants.DT_FILE:
6656
    if len(secondary_nodes) != 0:
6657
      raise errors.ProgrammerError("Wrong template configuration")
6658

    
6659
    _RequireFileStorage()
6660

    
6661
    for idx, disk in enumerate(disk_info):
6662
      disk_index = idx + base_index
6663
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6664
                              iv_name="disk/%d" % disk_index,
6665
                              logical_id=(file_driver,
6666
                                          "%s/disk%d" % (file_storage_dir,
6667
                                                         disk_index)),
6668
                              mode=disk["mode"])
6669
      disks.append(disk_dev)
6670
  else:
6671
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6672
  return disks
6673

    
6674

    
6675
def _GetInstanceInfoText(instance):
6676
  """Compute that text that should be added to the disk's metadata.
6677

6678
  """
6679
  return "originstname+%s" % instance.name
6680

    
6681

    
6682
def _CalcEta(time_taken, written, total_size):
6683
  """Calculates the ETA based on size written and total size.
6684

6685
  @param time_taken: The time taken so far
6686
  @param written: amount written so far
6687
  @param total_size: The total size of data to be written
6688
  @return: The remaining time in seconds
6689

6690
  """
6691
  avg_time = time_taken / float(written)
6692
  return (total_size - written) * avg_time
6693

    
6694

    
6695
def _WipeDisks(lu, instance):
6696
  """Wipes instance disks.
6697

6698
  @type lu: L{LogicalUnit}
6699
  @param lu: the logical unit on whose behalf we execute
6700
  @type instance: L{objects.Instance}
6701
  @param instance: the instance whose disks we should create
6702
  @return: the success of the wipe
6703

6704
  """
6705
  node = instance.primary_node
6706
  for idx, device in enumerate(instance.disks):
6707
    lu.LogInfo("* Wiping disk %d", idx)
6708
    logging.info("Wiping disk %d for instance %s", idx, instance.name)
6709

    
6710
    # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6711
    # MAX_WIPE_CHUNK at max
6712
    wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6713
                          constants.MIN_WIPE_CHUNK_PERCENT)
6714

    
6715
    offset = 0
6716
    size = device.size
6717
    last_output = 0
6718
    start_time = time.time()
6719

    
6720
    while offset < size:
6721
      wipe_size = min(wipe_chunk_size, size - offset)
6722
      result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6723
      result.Raise("Could not wipe disk %d at offset %d for size %d" %
6724
                   (idx, offset, wipe_size))
6725
      now = time.time()
6726
      offset += wipe_size
6727
      if now - last_output >= 60:
6728
        eta = _CalcEta(now - start_time, offset, size)
6729
        lu.LogInfo(" - done: %.1f%% ETA: %s" %
6730
                   (offset / float(size) * 100, utils.FormatSeconds(eta)))
6731
        last_output = now
6732

    
6733

    
6734
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6735
  """Create all disks for an instance.
6736

6737
  This abstracts away some work from AddInstance.
6738

6739
  @type lu: L{LogicalUnit}
6740
  @param lu: the logical unit on whose behalf we execute
6741
  @type instance: L{objects.Instance}
6742
  @param instance: the instance whose disks we should create
6743
  @type to_skip: list
6744
  @param to_skip: list of indices to skip
6745
  @type target_node: string
6746
  @param target_node: if passed, overrides the target node for creation
6747
  @rtype: boolean
6748
  @return: the success of the creation
6749

6750
  """
6751
  info = _GetInstanceInfoText(instance)
6752
  if target_node is None:
6753
    pnode = instance.primary_node
6754
    all_nodes = instance.all_nodes
6755
  else:
6756
    pnode = target_node
6757
    all_nodes = [pnode]
6758

    
6759
  if instance.disk_template == constants.DT_FILE:
6760
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6761
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6762

    
6763
    result.Raise("Failed to create directory '%s' on"
6764
                 " node %s" % (file_storage_dir, pnode))
6765

    
6766
  # Note: this needs to be kept in sync with adding of disks in
6767
  # LUSetInstanceParams
6768
  for idx, device in enumerate(instance.disks):
6769
    if to_skip and idx in to_skip:
6770
      continue
6771
    logging.info("Creating volume %s for instance %s",
6772
                 device.iv_name, instance.name)
6773
    #HARDCODE
6774
    for node in all_nodes:
6775
      f_create = node == pnode
6776
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6777

    
6778

    
6779
def _RemoveDisks(lu, instance, target_node=None):
6780
  """Remove all disks for an instance.
6781

6782
  This abstracts away some work from `AddInstance()` and
6783
  `RemoveInstance()`. Note that in case some of the devices couldn't
6784
  be removed, the removal will continue with the other ones (compare
6785
  with `_CreateDisks()`).
6786

6787
  @type lu: L{LogicalUnit}
6788
  @param lu: the logical unit on whose behalf we execute
6789
  @type instance: L{objects.Instance}
6790
  @param instance: the instance whose disks we should remove
6791
  @type target_node: string
6792
  @param target_node: used to override the node on which to remove the disks
6793
  @rtype: boolean
6794
  @return: the success of the removal
6795

6796
  """
6797
  logging.info("Removing block devices for instance %s", instance.name)
6798

    
6799
  all_result = True
6800
  for device in instance.disks:
6801
    if target_node:
6802
      edata = [(target_node, device)]
6803
    else:
6804
      edata = device.ComputeNodeTree(instance.primary_node)
6805
    for node, disk in edata:
6806
      lu.cfg.SetDiskID(disk, node)
6807
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6808
      if msg:
6809
        lu.LogWarning("Could not remove block device %s on node %s,"
6810
                      " continuing anyway: %s", device.iv_name, node, msg)
6811
        all_result = False
6812

    
6813
  if instance.disk_template == constants.DT_FILE:
6814
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6815
    if target_node:
6816
      tgt = target_node
6817
    else:
6818
      tgt = instance.primary_node
6819
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6820
    if result.fail_msg:
6821
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6822
                    file_storage_dir, instance.primary_node, result.fail_msg)
6823
      all_result = False
6824

    
6825
  return all_result
6826

    
6827

    
6828
def _ComputeDiskSizePerVG(disk_template, disks):
6829
  """Compute disk size requirements in the volume group
6830

6831
  """
6832
  def _compute(disks, payload):
6833
    """Universal algorithm
6834

6835
    """
6836
    vgs = {}
6837
    for disk in disks:
6838
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6839

    
6840
    return vgs
6841

    
6842
  # Required free disk space as a function of disk and swap space
6843
  req_size_dict = {
6844
    constants.DT_DISKLESS: None,
6845
    constants.DT_PLAIN: _compute(disks, 0),
6846
    # 128 MB are added for drbd metadata for each disk
6847
    constants.DT_DRBD8: _compute(disks, 128),
6848
    constants.DT_FILE: None,
6849
  }
6850

    
6851
  if disk_template not in req_size_dict:
6852
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6853
                                 " is unknown" %  disk_template)
6854

    
6855
  return req_size_dict[disk_template]
6856

    
6857

    
6858
def _ComputeDiskSize(disk_template, disks):
6859
  """Compute disk size requirements in the volume group
6860

6861
  """
6862
  # Required free disk space as a function of disk and swap space
6863
  req_size_dict = {
6864
    constants.DT_DISKLESS: None,
6865
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6866
    # 128 MB are added for drbd metadata for each disk
6867
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6868
    constants.DT_FILE: None,
6869
  }
6870

    
6871
  if disk_template not in req_size_dict:
6872
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6873
                                 " is unknown" %  disk_template)
6874

    
6875
  return req_size_dict[disk_template]
6876

    
6877

    
6878
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6879
  """Hypervisor parameter validation.
6880

6881
  This function abstract the hypervisor parameter validation to be
6882
  used in both instance create and instance modify.
6883

6884
  @type lu: L{LogicalUnit}
6885
  @param lu: the logical unit for which we check
6886
  @type nodenames: list
6887
  @param nodenames: the list of nodes on which we should check
6888
  @type hvname: string
6889
  @param hvname: the name of the hypervisor we should use
6890
  @type hvparams: dict
6891
  @param hvparams: the parameters which we need to check
6892
  @raise errors.OpPrereqError: if the parameters are not valid
6893

6894
  """
6895
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6896
                                                  hvname,
6897
                                                  hvparams)
6898
  for node in nodenames:
6899
    info = hvinfo[node]
6900
    if info.offline:
6901
      continue
6902
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
6903

    
6904

    
6905
def _CheckOSParams(lu, required, nodenames, osname, osparams):
6906
  """OS parameters validation.
6907

6908
  @type lu: L{LogicalUnit}
6909
  @param lu: the logical unit for which we check
6910
  @type required: boolean
6911
  @param required: whether the validation should fail if the OS is not
6912
      found
6913
  @type nodenames: list
6914
  @param nodenames: the list of nodes on which we should check
6915
  @type osname: string
6916
  @param osname: the name of the hypervisor we should use
6917
  @type osparams: dict
6918
  @param osparams: the parameters which we need to check
6919
  @raise errors.OpPrereqError: if the parameters are not valid
6920

6921
  """
6922
  result = lu.rpc.call_os_validate(required, nodenames, osname,
6923
                                   [constants.OS_VALIDATE_PARAMETERS],
6924
                                   osparams)
6925
  for node, nres in result.items():
6926
    # we don't check for offline cases since this should be run only
6927
    # against the master node and/or an instance's nodes
6928
    nres.Raise("OS Parameters validation failed on node %s" % node)
6929
    if not nres.payload:
6930
      lu.LogInfo("OS %s not found on node %s, validation skipped",
6931
                 osname, node)
6932

    
6933

    
6934
class LUCreateInstance(LogicalUnit):
6935
  """Create an instance.
6936

6937
  """
6938
  HPATH = "instance-add"
6939
  HTYPE = constants.HTYPE_INSTANCE
6940
  _OP_PARAMS = [
6941
    _PInstanceName,
6942
    ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6943
    ("start", True, ht.TBool),
6944
    ("wait_for_sync", True, ht.TBool),
6945
    ("ip_check", True, ht.TBool),
6946
    ("name_check", True, ht.TBool),
6947
    ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6948
    ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6949
    ("hvparams", ht.EmptyDict, ht.TDict),
6950
    ("beparams", ht.EmptyDict, ht.TDict),
6951
    ("osparams", ht.EmptyDict, ht.TDict),
6952
    ("no_install", None, ht.TMaybeBool),
6953
    ("os_type", None, ht.TMaybeString),
6954
    ("force_variant", False, ht.TBool),
6955
    ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6956
    ("source_x509_ca", None, ht.TMaybeString),
6957
    ("source_instance_name", None, ht.TMaybeString),
6958
    ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6959
     ht.TPositiveInt),
6960
    ("src_node", None, ht.TMaybeString),
6961
    ("src_path", None, ht.TMaybeString),
6962
    ("pnode", None, ht.TMaybeString),
6963
    ("snode", None, ht.TMaybeString),
6964
    ("iallocator", None, ht.TMaybeString),
6965
    ("hypervisor", None, ht.TMaybeString),
6966
    ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6967
    ("identify_defaults", False, ht.TBool),
6968
    ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6969
    ("file_storage_dir", None, ht.TMaybeString),
6970
    ]
6971
  REQ_BGL = False
6972

    
6973
  def CheckArguments(self):
6974
    """Check arguments.
6975

6976
    """
6977
    # do not require name_check to ease forward/backward compatibility
6978
    # for tools
6979
    if self.op.no_install and self.op.start:
6980
      self.LogInfo("No-installation mode selected, disabling startup")
6981
      self.op.start = False
6982
    # validate/normalize the instance name
6983
    self.op.instance_name = \
6984
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6985

    
6986
    if self.op.ip_check and not self.op.name_check:
6987
      # TODO: make the ip check more flexible and not depend on the name check
6988
      raise errors.OpPrereqError("Cannot do ip check without a name check",
6989
                                 errors.ECODE_INVAL)
6990

    
6991
    # check nics' parameter names
6992
    for nic in self.op.nics:
6993
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6994

    
6995
    # check disks. parameter names and consistent adopt/no-adopt strategy
6996
    has_adopt = has_no_adopt = False
6997
    for disk in self.op.disks:
6998
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6999
      if "adopt" in disk:
7000
        has_adopt = True
7001
      else:
7002
        has_no_adopt = True
7003
    if has_adopt and has_no_adopt:
7004
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7005
                                 errors.ECODE_INVAL)
7006
    if has_adopt:
7007
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7008
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7009
                                   " '%s' disk template" %
7010
                                   self.op.disk_template,
7011
                                   errors.ECODE_INVAL)
7012
      if self.op.iallocator is not None:
7013
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7014
                                   " iallocator script", errors.ECODE_INVAL)
7015
      if self.op.mode == constants.INSTANCE_IMPORT:
7016
        raise errors.OpPrereqError("Disk adoption not allowed for"
7017
                                   " instance import", errors.ECODE_INVAL)
7018

    
7019
    self.adopt_disks = has_adopt
7020

    
7021
    # instance name verification
7022
    if self.op.name_check:
7023
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7024
      self.op.instance_name = self.hostname1.name
7025
      # used in CheckPrereq for ip ping check
7026
      self.check_ip = self.hostname1.ip
7027
    else:
7028
      self.check_ip = None
7029

    
7030
    # file storage checks
7031
    if (self.op.file_driver and
7032
        not self.op.file_driver in constants.FILE_DRIVER):
7033
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7034
                                 self.op.file_driver, errors.ECODE_INVAL)
7035

    
7036
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7037
      raise errors.OpPrereqError("File storage directory path not absolute",
7038
                                 errors.ECODE_INVAL)
7039

    
7040
    ### Node/iallocator related checks
7041
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7042

    
7043
    if self.op.pnode is not None:
7044
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7045
        if self.op.snode is None:
7046
          raise errors.OpPrereqError("The networked disk templates need"
7047
                                     " a mirror node", errors.ECODE_INVAL)
7048
      elif self.op.snode:
7049
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7050
                        " template")
7051
        self.op.snode = None
7052

    
7053
    self._cds = _GetClusterDomainSecret()
7054

    
7055
    if self.op.mode == constants.INSTANCE_IMPORT:
7056
      # On import force_variant must be True, because if we forced it at
7057
      # initial install, our only chance when importing it back is that it
7058
      # works again!
7059
      self.op.force_variant = True
7060

    
7061
      if self.op.no_install:
7062
        self.LogInfo("No-installation mode has no effect during import")
7063

    
7064
    elif self.op.mode == constants.INSTANCE_CREATE:
7065
      if self.op.os_type is None:
7066
        raise errors.OpPrereqError("No guest OS specified",
7067
                                   errors.ECODE_INVAL)
7068
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7069
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7070
                                   " installation" % self.op.os_type,
7071
                                   errors.ECODE_STATE)
7072
      if self.op.disk_template is None:
7073
        raise errors.OpPrereqError("No disk template specified",
7074
                                   errors.ECODE_INVAL)
7075

    
7076
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7077
      # Check handshake to ensure both clusters have the same domain secret
7078
      src_handshake = self.op.source_handshake
7079
      if not src_handshake:
7080
        raise errors.OpPrereqError("Missing source handshake",
7081
                                   errors.ECODE_INVAL)
7082

    
7083
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7084
                                                           src_handshake)
7085
      if errmsg:
7086
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7087
                                   errors.ECODE_INVAL)
7088

    
7089
      # Load and check source CA
7090
      self.source_x509_ca_pem = self.op.source_x509_ca
7091
      if not self.source_x509_ca_pem:
7092
        raise errors.OpPrereqError("Missing source X509 CA",
7093
                                   errors.ECODE_INVAL)
7094

    
7095
      try:
7096
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7097
                                                    self._cds)
7098
      except OpenSSL.crypto.Error, err:
7099
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7100
                                   (err, ), errors.ECODE_INVAL)
7101

    
7102
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7103
      if errcode is not None:
7104
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7105
                                   errors.ECODE_INVAL)
7106

    
7107
      self.source_x509_ca = cert
7108

    
7109
      src_instance_name = self.op.source_instance_name
7110
      if not src_instance_name:
7111
        raise errors.OpPrereqError("Missing source instance name",
7112
                                   errors.ECODE_INVAL)
7113

    
7114
      self.source_instance_name = \
7115
          netutils.GetHostname(name=src_instance_name).name
7116

    
7117
    else:
7118
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7119
                                 self.op.mode, errors.ECODE_INVAL)
7120

    
7121
  def ExpandNames(self):
7122
    """ExpandNames for CreateInstance.
7123

7124
    Figure out the right locks for instance creation.
7125

7126
    """
7127
    self.needed_locks = {}
7128

    
7129
    instance_name = self.op.instance_name
7130
    # this is just a preventive check, but someone might still add this
7131
    # instance in the meantime, and creation will fail at lock-add time
7132
    if instance_name in self.cfg.GetInstanceList():
7133
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7134
                                 instance_name, errors.ECODE_EXISTS)
7135

    
7136
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7137

    
7138
    if self.op.iallocator:
7139
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7140
    else:
7141
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7142
      nodelist = [self.op.pnode]
7143
      if self.op.snode is not None:
7144
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7145
        nodelist.append(self.op.snode)
7146
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7147

    
7148
    # in case of import lock the source node too
7149
    if self.op.mode == constants.INSTANCE_IMPORT:
7150
      src_node = self.op.src_node
7151
      src_path = self.op.src_path
7152

    
7153
      if src_path is None:
7154
        self.op.src_path = src_path = self.op.instance_name
7155

    
7156
      if src_node is None:
7157
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7158
        self.op.src_node = None
7159
        if os.path.isabs(src_path):
7160
          raise errors.OpPrereqError("Importing an instance from an absolute"
7161
                                     " path requires a source node option.",
7162
                                     errors.ECODE_INVAL)
7163
      else:
7164
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7165
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7166
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7167
        if not os.path.isabs(src_path):
7168
          self.op.src_path = src_path = \
7169
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7170

    
7171
  def _RunAllocator(self):
7172
    """Run the allocator based on input opcode.
7173

7174
    """
7175
    nics = [n.ToDict() for n in self.nics]
7176
    ial = IAllocator(self.cfg, self.rpc,
7177
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7178
                     name=self.op.instance_name,
7179
                     disk_template=self.op.disk_template,
7180
                     tags=[],
7181
                     os=self.op.os_type,
7182
                     vcpus=self.be_full[constants.BE_VCPUS],
7183
                     mem_size=self.be_full[constants.BE_MEMORY],
7184
                     disks=self.disks,
7185
                     nics=nics,
7186
                     hypervisor=self.op.hypervisor,
7187
                     )
7188

    
7189
    ial.Run(self.op.iallocator)
7190

    
7191
    if not ial.success:
7192
      raise errors.OpPrereqError("Can't compute nodes using"
7193
                                 " iallocator '%s': %s" %
7194
                                 (self.op.iallocator, ial.info),
7195
                                 errors.ECODE_NORES)
7196
    if len(ial.result) != ial.required_nodes:
7197
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7198
                                 " of nodes (%s), required %s" %
7199
                                 (self.op.iallocator, len(ial.result),
7200
                                  ial.required_nodes), errors.ECODE_FAULT)
7201
    self.op.pnode = ial.result[0]
7202
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7203
                 self.op.instance_name, self.op.iallocator,
7204
                 utils.CommaJoin(ial.result))
7205
    if ial.required_nodes == 2:
7206
      self.op.snode = ial.result[1]
7207

    
7208
  def BuildHooksEnv(self):
7209
    """Build hooks env.
7210

7211
    This runs on master, primary and secondary nodes of the instance.
7212

7213
    """
7214
    env = {
7215
      "ADD_MODE": self.op.mode,
7216
      }
7217
    if self.op.mode == constants.INSTANCE_IMPORT:
7218
      env["SRC_NODE"] = self.op.src_node
7219
      env["SRC_PATH"] = self.op.src_path
7220
      env["SRC_IMAGES"] = self.src_images
7221

    
7222
    env.update(_BuildInstanceHookEnv(
7223
      name=self.op.instance_name,
7224
      primary_node=self.op.pnode,
7225
      secondary_nodes=self.secondaries,
7226
      status=self.op.start,
7227
      os_type=self.op.os_type,
7228
      memory=self.be_full[constants.BE_MEMORY],
7229
      vcpus=self.be_full[constants.BE_VCPUS],
7230
      nics=_NICListToTuple(self, self.nics),
7231
      disk_template=self.op.disk_template,
7232
      disks=[(d["size"], d["mode"]) for d in self.disks],
7233
      bep=self.be_full,
7234
      hvp=self.hv_full,
7235
      hypervisor_name=self.op.hypervisor,
7236
    ))
7237

    
7238
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7239
          self.secondaries)
7240
    return env, nl, nl
7241

    
7242
  def _ReadExportInfo(self):
7243
    """Reads the export information from disk.
7244

7245
    It will override the opcode source node and path with the actual
7246
    information, if these two were not specified before.
7247

7248
    @return: the export information
7249

7250
    """
7251
    assert self.op.mode == constants.INSTANCE_IMPORT
7252

    
7253
    src_node = self.op.src_node
7254
    src_path = self.op.src_path
7255

    
7256
    if src_node is None:
7257
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7258
      exp_list = self.rpc.call_export_list(locked_nodes)
7259
      found = False
7260
      for node in exp_list:
7261
        if exp_list[node].fail_msg:
7262
          continue
7263
        if src_path in exp_list[node].payload:
7264
          found = True
7265
          self.op.src_node = src_node = node
7266
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7267
                                                       src_path)
7268
          break
7269
      if not found:
7270
        raise errors.OpPrereqError("No export found for relative path %s" %
7271
                                    src_path, errors.ECODE_INVAL)
7272

    
7273
    _CheckNodeOnline(self, src_node)
7274
    result = self.rpc.call_export_info(src_node, src_path)
7275
    result.Raise("No export or invalid export found in dir %s" % src_path)
7276

    
7277
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7278
    if not export_info.has_section(constants.INISECT_EXP):
7279
      raise errors.ProgrammerError("Corrupted export config",
7280
                                   errors.ECODE_ENVIRON)
7281

    
7282
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7283
    if (int(ei_version) != constants.EXPORT_VERSION):
7284
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7285
                                 (ei_version, constants.EXPORT_VERSION),
7286
                                 errors.ECODE_ENVIRON)
7287
    return export_info
7288

    
7289
  def _ReadExportParams(self, einfo):
7290
    """Use export parameters as defaults.
7291

7292
    In case the opcode doesn't specify (as in override) some instance
7293
    parameters, then try to use them from the export information, if
7294
    that declares them.
7295

7296
    """
7297
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7298

    
7299
    if self.op.disk_template is None:
7300
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7301
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7302
                                          "disk_template")
7303
      else:
7304
        raise errors.OpPrereqError("No disk template specified and the export"
7305
                                   " is missing the disk_template information",
7306
                                   errors.ECODE_INVAL)
7307

    
7308
    if not self.op.disks:
7309
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7310
        disks = []
7311
        # TODO: import the disk iv_name too
7312
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7313
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7314
          disks.append({"size": disk_sz})
7315
        self.op.disks = disks
7316
      else:
7317
        raise errors.OpPrereqError("No disk info specified and the export"
7318
                                   " is missing the disk information",
7319
                                   errors.ECODE_INVAL)
7320

    
7321
    if (not self.op.nics and
7322
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7323
      nics = []
7324
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7325
        ndict = {}
7326
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7327
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7328
          ndict[name] = v
7329
        nics.append(ndict)
7330
      self.op.nics = nics
7331

    
7332
    if (self.op.hypervisor is None and
7333
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7334
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7335
    if einfo.has_section(constants.INISECT_HYP):
7336
      # use the export parameters but do not override the ones
7337
      # specified by the user
7338
      for name, value in einfo.items(constants.INISECT_HYP):
7339
        if name not in self.op.hvparams:
7340
          self.op.hvparams[name] = value
7341

    
7342
    if einfo.has_section(constants.INISECT_BEP):
7343
      # use the parameters, without overriding
7344
      for name, value in einfo.items(constants.INISECT_BEP):
7345
        if name not in self.op.beparams:
7346
          self.op.beparams[name] = value
7347
    else:
7348
      # try to read the parameters old style, from the main section
7349
      for name in constants.BES_PARAMETERS:
7350
        if (name not in self.op.beparams and
7351
            einfo.has_option(constants.INISECT_INS, name)):
7352
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7353

    
7354
    if einfo.has_section(constants.INISECT_OSP):
7355
      # use the parameters, without overriding
7356
      for name, value in einfo.items(constants.INISECT_OSP):
7357
        if name not in self.op.osparams:
7358
          self.op.osparams[name] = value
7359

    
7360
  def _RevertToDefaults(self, cluster):
7361
    """Revert the instance parameters to the default values.
7362

7363
    """
7364
    # hvparams
7365
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7366
    for name in self.op.hvparams.keys():
7367
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7368
        del self.op.hvparams[name]
7369
    # beparams
7370
    be_defs = cluster.SimpleFillBE({})
7371
    for name in self.op.beparams.keys():
7372
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7373
        del self.op.beparams[name]
7374
    # nic params
7375
    nic_defs = cluster.SimpleFillNIC({})
7376
    for nic in self.op.nics:
7377
      for name in constants.NICS_PARAMETERS:
7378
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7379
          del nic[name]
7380
    # osparams
7381
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7382
    for name in self.op.osparams.keys():
7383
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7384
        del self.op.osparams[name]
7385

    
7386
  def CheckPrereq(self):
7387
    """Check prerequisites.
7388

7389
    """
7390
    if self.op.mode == constants.INSTANCE_IMPORT:
7391
      export_info = self._ReadExportInfo()
7392
      self._ReadExportParams(export_info)
7393

    
7394
    _CheckDiskTemplate(self.op.disk_template)
7395

    
7396
    if (not self.cfg.GetVGName() and
7397
        self.op.disk_template not in constants.DTS_NOT_LVM):
7398
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7399
                                 " instances", errors.ECODE_STATE)
7400

    
7401
    if self.op.hypervisor is None:
7402
      self.op.hypervisor = self.cfg.GetHypervisorType()
7403

    
7404
    cluster = self.cfg.GetClusterInfo()
7405
    enabled_hvs = cluster.enabled_hypervisors
7406
    if self.op.hypervisor not in enabled_hvs:
7407
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7408
                                 " cluster (%s)" % (self.op.hypervisor,
7409
                                  ",".join(enabled_hvs)),
7410
                                 errors.ECODE_STATE)
7411

    
7412
    # check hypervisor parameter syntax (locally)
7413
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7414
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7415
                                      self.op.hvparams)
7416
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7417
    hv_type.CheckParameterSyntax(filled_hvp)
7418
    self.hv_full = filled_hvp
7419
    # check that we don't specify global parameters on an instance
7420
    _CheckGlobalHvParams(self.op.hvparams)
7421

    
7422
    # fill and remember the beparams dict
7423
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7424
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7425

    
7426
    # build os parameters
7427
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7428

    
7429
    # now that hvp/bep are in final format, let's reset to defaults,
7430
    # if told to do so
7431
    if self.op.identify_defaults:
7432
      self._RevertToDefaults(cluster)
7433

    
7434
    # NIC buildup
7435
    self.nics = []
7436
    for idx, nic in enumerate(self.op.nics):
7437
      nic_mode_req = nic.get("mode", None)
7438
      nic_mode = nic_mode_req
7439
      if nic_mode is None:
7440
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7441

    
7442
      # in routed mode, for the first nic, the default ip is 'auto'
7443
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7444
        default_ip_mode = constants.VALUE_AUTO
7445
      else:
7446
        default_ip_mode = constants.VALUE_NONE
7447

    
7448
      # ip validity checks
7449
      ip = nic.get("ip", default_ip_mode)
7450
      if ip is None or ip.lower() == constants.VALUE_NONE:
7451
        nic_ip = None
7452
      elif ip.lower() == constants.VALUE_AUTO:
7453
        if not self.op.name_check:
7454
          raise errors.OpPrereqError("IP address set to auto but name checks"
7455
                                     " have been skipped",
7456
                                     errors.ECODE_INVAL)
7457
        nic_ip = self.hostname1.ip
7458
      else:
7459
        if not netutils.IPAddress.IsValid(ip):
7460
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7461
                                     errors.ECODE_INVAL)
7462
        nic_ip = ip
7463

    
7464
      # TODO: check the ip address for uniqueness
7465
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7466
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7467
                                   errors.ECODE_INVAL)
7468

    
7469
      # MAC address verification
7470
      mac = nic.get("mac", constants.VALUE_AUTO)
7471
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7472
        mac = utils.NormalizeAndValidateMac(mac)
7473

    
7474
        try:
7475
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7476
        except errors.ReservationError:
7477
          raise errors.OpPrereqError("MAC address %s already in use"
7478
                                     " in cluster" % mac,
7479
                                     errors.ECODE_NOTUNIQUE)
7480

    
7481
      # bridge verification
7482
      bridge = nic.get("bridge", None)
7483
      link = nic.get("link", None)
7484
      if bridge and link:
7485
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7486
                                   " at the same time", errors.ECODE_INVAL)
7487
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7488
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7489
                                   errors.ECODE_INVAL)
7490
      elif bridge:
7491
        link = bridge
7492

    
7493
      nicparams = {}
7494
      if nic_mode_req:
7495
        nicparams[constants.NIC_MODE] = nic_mode_req
7496
      if link:
7497
        nicparams[constants.NIC_LINK] = link
7498

    
7499
      check_params = cluster.SimpleFillNIC(nicparams)
7500
      objects.NIC.CheckParameterSyntax(check_params)
7501
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7502

    
7503
    # disk checks/pre-build
7504
    self.disks = []
7505
    for disk in self.op.disks:
7506
      mode = disk.get("mode", constants.DISK_RDWR)
7507
      if mode not in constants.DISK_ACCESS_SET:
7508
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7509
                                   mode, errors.ECODE_INVAL)
7510
      size = disk.get("size", None)
7511
      if size is None:
7512
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7513
      try:
7514
        size = int(size)
7515
      except (TypeError, ValueError):
7516
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7517
                                   errors.ECODE_INVAL)
7518
      vg = disk.get("vg", self.cfg.GetVGName())
7519
      new_disk = {"size": size, "mode": mode, "vg": vg}
7520
      if "adopt" in disk:
7521
        new_disk["adopt"] = disk["adopt"]
7522
      self.disks.append(new_disk)
7523

    
7524
    if self.op.mode == constants.INSTANCE_IMPORT:
7525

    
7526
      # Check that the new instance doesn't have less disks than the export
7527
      instance_disks = len(self.disks)
7528
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7529
      if instance_disks < export_disks:
7530
        raise errors.OpPrereqError("Not enough disks to import."
7531
                                   " (instance: %d, export: %d)" %
7532
                                   (instance_disks, export_disks),
7533
                                   errors.ECODE_INVAL)
7534

    
7535
      disk_images = []
7536
      for idx in range(export_disks):
7537
        option = 'disk%d_dump' % idx
7538
        if export_info.has_option(constants.INISECT_INS, option):
7539
          # FIXME: are the old os-es, disk sizes, etc. useful?
7540
          export_name = export_info.get(constants.INISECT_INS, option)
7541
          image = utils.PathJoin(self.op.src_path, export_name)
7542
          disk_images.append(image)
7543
        else:
7544
          disk_images.append(False)
7545

    
7546
      self.src_images = disk_images
7547

    
7548
      old_name = export_info.get(constants.INISECT_INS, 'name')
7549
      try:
7550
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7551
      except (TypeError, ValueError), err:
7552
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7553
                                   " an integer: %s" % str(err),
7554
                                   errors.ECODE_STATE)
7555
      if self.op.instance_name == old_name:
7556
        for idx, nic in enumerate(self.nics):
7557
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7558
            nic_mac_ini = 'nic%d_mac' % idx
7559
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7560

    
7561
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7562

    
7563
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7564
    if self.op.ip_check:
7565
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7566
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7567
                                   (self.check_ip, self.op.instance_name),
7568
                                   errors.ECODE_NOTUNIQUE)
7569

    
7570
    #### mac address generation
7571
    # By generating here the mac address both the allocator and the hooks get
7572
    # the real final mac address rather than the 'auto' or 'generate' value.
7573
    # There is a race condition between the generation and the instance object
7574
    # creation, which means that we know the mac is valid now, but we're not
7575
    # sure it will be when we actually add the instance. If things go bad
7576
    # adding the instance will abort because of a duplicate mac, and the
7577
    # creation job will fail.
7578
    for nic in self.nics:
7579
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7580
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7581

    
7582
    #### allocator run
7583

    
7584
    if self.op.iallocator is not None:
7585
      self._RunAllocator()
7586

    
7587
    #### node related checks
7588

    
7589
    # check primary node
7590
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7591
    assert self.pnode is not None, \
7592
      "Cannot retrieve locked node %s" % self.op.pnode
7593
    if pnode.offline:
7594
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7595
                                 pnode.name, errors.ECODE_STATE)
7596
    if pnode.drained:
7597
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7598
                                 pnode.name, errors.ECODE_STATE)
7599
    if not pnode.vm_capable:
7600
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7601
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7602

    
7603
    self.secondaries = []
7604

    
7605
    # mirror node verification
7606
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7607
      if self.op.snode == pnode.name:
7608
        raise errors.OpPrereqError("The secondary node cannot be the"
7609
                                   " primary node.", errors.ECODE_INVAL)
7610
      _CheckNodeOnline(self, self.op.snode)
7611
      _CheckNodeNotDrained(self, self.op.snode)
7612
      _CheckNodeVmCapable(self, self.op.snode)
7613
      self.secondaries.append(self.op.snode)
7614

    
7615
    nodenames = [pnode.name] + self.secondaries
7616

    
7617
    if not self.adopt_disks:
7618
      # Check lv size requirements, if not adopting
7619
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7620
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7621

    
7622
    else: # instead, we must check the adoption data
7623
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7624
      if len(all_lvs) != len(self.disks):
7625
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7626
                                   errors.ECODE_INVAL)
7627
      for lv_name in all_lvs:
7628
        try:
7629
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7630
          # to ReserveLV uses the same syntax
7631
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7632
        except errors.ReservationError:
7633
          raise errors.OpPrereqError("LV named %s used by another instance" %
7634
                                     lv_name, errors.ECODE_NOTUNIQUE)
7635

    
7636
      vg_names = self.rpc.call_vg_list([pnode.name])
7637
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7638

    
7639
      node_lvs = self.rpc.call_lv_list([pnode.name],
7640
                                       vg_names[pnode.name].payload.keys()
7641
                                      )[pnode.name]
7642
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7643
      node_lvs = node_lvs.payload
7644

    
7645
      delta = all_lvs.difference(node_lvs.keys())
7646
      if delta:
7647
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7648
                                   utils.CommaJoin(delta),
7649
                                   errors.ECODE_INVAL)
7650
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7651
      if online_lvs:
7652
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7653
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7654
                                   errors.ECODE_STATE)
7655
      # update the size of disk based on what is found
7656
      for dsk in self.disks:
7657
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7658

    
7659
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7660

    
7661
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7662
    # check OS parameters (remotely)
7663
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7664

    
7665
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7666

    
7667
    # memory check on primary node
7668
    if self.op.start:
7669
      _CheckNodeFreeMemory(self, self.pnode.name,
7670
                           "creating instance %s" % self.op.instance_name,
7671
                           self.be_full[constants.BE_MEMORY],
7672
                           self.op.hypervisor)
7673

    
7674
    self.dry_run_result = list(nodenames)
7675

    
7676
  def Exec(self, feedback_fn):
7677
    """Create and add the instance to the cluster.
7678

7679
    """
7680
    instance = self.op.instance_name
7681
    pnode_name = self.pnode.name
7682

    
7683
    ht_kind = self.op.hypervisor
7684
    if ht_kind in constants.HTS_REQ_PORT:
7685
      network_port = self.cfg.AllocatePort()
7686
    else:
7687
      network_port = None
7688

    
7689
    if constants.ENABLE_FILE_STORAGE:
7690
      # this is needed because os.path.join does not accept None arguments
7691
      if self.op.file_storage_dir is None:
7692
        string_file_storage_dir = ""
7693
      else:
7694
        string_file_storage_dir = self.op.file_storage_dir
7695

    
7696
      # build the full file storage dir path
7697
      file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7698
                                        string_file_storage_dir, instance)
7699
    else:
7700
      file_storage_dir = ""
7701

    
7702
    disks = _GenerateDiskTemplate(self,
7703
                                  self.op.disk_template,
7704
                                  instance, pnode_name,
7705
                                  self.secondaries,
7706
                                  self.disks,
7707
                                  file_storage_dir,
7708
                                  self.op.file_driver,
7709
                                  0,
7710
                                  feedback_fn)
7711

    
7712
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7713
                            primary_node=pnode_name,
7714
                            nics=self.nics, disks=disks,
7715
                            disk_template=self.op.disk_template,
7716
                            admin_up=False,
7717
                            network_port=network_port,
7718
                            beparams=self.op.beparams,
7719
                            hvparams=self.op.hvparams,
7720
                            hypervisor=self.op.hypervisor,
7721
                            osparams=self.op.osparams,
7722
                            )
7723

    
7724
    if self.adopt_disks:
7725
      # rename LVs to the newly-generated names; we need to construct
7726
      # 'fake' LV disks with the old data, plus the new unique_id
7727
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7728
      rename_to = []
7729
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7730
        rename_to.append(t_dsk.logical_id)
7731
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7732
        self.cfg.SetDiskID(t_dsk, pnode_name)
7733
      result = self.rpc.call_blockdev_rename(pnode_name,
7734
                                             zip(tmp_disks, rename_to))
7735
      result.Raise("Failed to rename adoped LVs")
7736
    else:
7737
      feedback_fn("* creating instance disks...")
7738
      try:
7739
        _CreateDisks(self, iobj)
7740
      except errors.OpExecError:
7741
        self.LogWarning("Device creation failed, reverting...")
7742
        try:
7743
          _RemoveDisks(self, iobj)
7744
        finally:
7745
          self.cfg.ReleaseDRBDMinors(instance)
7746
          raise
7747

    
7748
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7749
        feedback_fn("* wiping instance disks...")
7750
        try:
7751
          _WipeDisks(self, iobj)
7752
        except errors.OpExecError:
7753
          self.LogWarning("Device wiping failed, reverting...")
7754
          try:
7755
            _RemoveDisks(self, iobj)
7756
          finally:
7757
            self.cfg.ReleaseDRBDMinors(instance)
7758
            raise
7759

    
7760
    feedback_fn("adding instance %s to cluster config" % instance)
7761

    
7762
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7763

    
7764
    # Declare that we don't want to remove the instance lock anymore, as we've
7765
    # added the instance to the config
7766
    del self.remove_locks[locking.LEVEL_INSTANCE]
7767
    # Unlock all the nodes
7768
    if self.op.mode == constants.INSTANCE_IMPORT:
7769
      nodes_keep = [self.op.src_node]
7770
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7771
                       if node != self.op.src_node]
7772
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7773
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7774
    else:
7775
      self.context.glm.release(locking.LEVEL_NODE)
7776
      del self.acquired_locks[locking.LEVEL_NODE]
7777

    
7778
    if self.op.wait_for_sync:
7779
      disk_abort = not _WaitForSync(self, iobj)
7780
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7781
      # make sure the disks are not degraded (still sync-ing is ok)
7782
      time.sleep(15)
7783
      feedback_fn("* checking mirrors status")
7784
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7785
    else:
7786
      disk_abort = False
7787

    
7788
    if disk_abort:
7789
      _RemoveDisks(self, iobj)
7790
      self.cfg.RemoveInstance(iobj.name)
7791
      # Make sure the instance lock gets removed
7792
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7793
      raise errors.OpExecError("There are some degraded disks for"
7794
                               " this instance")
7795

    
7796
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7797
      if self.op.mode == constants.INSTANCE_CREATE:
7798
        if not self.op.no_install:
7799
          feedback_fn("* running the instance OS create scripts...")
7800
          # FIXME: pass debug option from opcode to backend
7801
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7802
                                                 self.op.debug_level)
7803
          result.Raise("Could not add os for instance %s"
7804
                       " on node %s" % (instance, pnode_name))
7805

    
7806
      elif self.op.mode == constants.INSTANCE_IMPORT:
7807
        feedback_fn("* running the instance OS import scripts...")
7808

    
7809
        transfers = []
7810

    
7811
        for idx, image in enumerate(self.src_images):
7812
          if not image:
7813
            continue
7814

    
7815
          # FIXME: pass debug option from opcode to backend
7816
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7817
                                             constants.IEIO_FILE, (image, ),
7818
                                             constants.IEIO_SCRIPT,
7819
                                             (iobj.disks[idx], idx),
7820
                                             None)
7821
          transfers.append(dt)
7822

    
7823
        import_result = \
7824
          masterd.instance.TransferInstanceData(self, feedback_fn,
7825
                                                self.op.src_node, pnode_name,
7826
                                                self.pnode.secondary_ip,
7827
                                                iobj, transfers)
7828
        if not compat.all(import_result):
7829
          self.LogWarning("Some disks for instance %s on node %s were not"
7830
                          " imported successfully" % (instance, pnode_name))
7831

    
7832
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7833
        feedback_fn("* preparing remote import...")
7834
        # The source cluster will stop the instance before attempting to make a
7835
        # connection. In some cases stopping an instance can take a long time,
7836
        # hence the shutdown timeout is added to the connection timeout.
7837
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7838
                           self.op.source_shutdown_timeout)
7839
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7840

    
7841
        assert iobj.primary_node == self.pnode.name
7842
        disk_results = \
7843
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7844
                                        self.source_x509_ca,
7845
                                        self._cds, timeouts)
7846
        if not compat.all(disk_results):
7847
          # TODO: Should the instance still be started, even if some disks
7848
          # failed to import (valid for local imports, too)?
7849
          self.LogWarning("Some disks for instance %s on node %s were not"
7850
                          " imported successfully" % (instance, pnode_name))
7851

    
7852
        # Run rename script on newly imported instance
7853
        assert iobj.name == instance
7854
        feedback_fn("Running rename script for %s" % instance)
7855
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7856
                                                   self.source_instance_name,
7857
                                                   self.op.debug_level)
7858
        if result.fail_msg:
7859
          self.LogWarning("Failed to run rename script for %s on node"
7860
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7861

    
7862
      else:
7863
        # also checked in the prereq part
7864
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7865
                                     % self.op.mode)
7866

    
7867
    if self.op.start:
7868
      iobj.admin_up = True
7869
      self.cfg.Update(iobj, feedback_fn)
7870
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7871
      feedback_fn("* starting instance...")
7872
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7873
      result.Raise("Could not start instance")
7874

    
7875
    return list(iobj.all_nodes)
7876

    
7877

    
7878
class LUConnectConsole(NoHooksLU):
7879
  """Connect to an instance's console.
7880

7881
  This is somewhat special in that it returns the command line that
7882
  you need to run on the master node in order to connect to the
7883
  console.
7884

7885
  """
7886
  _OP_PARAMS = [
7887
    _PInstanceName
7888
    ]
7889
  REQ_BGL = False
7890

    
7891
  def ExpandNames(self):
7892
    self._ExpandAndLockInstance()
7893

    
7894
  def CheckPrereq(self):
7895
    """Check prerequisites.
7896

7897
    This checks that the instance is in the cluster.
7898

7899
    """
7900
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7901
    assert self.instance is not None, \
7902
      "Cannot retrieve locked instance %s" % self.op.instance_name
7903
    _CheckNodeOnline(self, self.instance.primary_node)
7904

    
7905
  def Exec(self, feedback_fn):
7906
    """Connect to the console of an instance
7907

7908
    """
7909
    instance = self.instance
7910
    node = instance.primary_node
7911

    
7912
    node_insts = self.rpc.call_instance_list([node],
7913
                                             [instance.hypervisor])[node]
7914
    node_insts.Raise("Can't get node information from %s" % node)
7915

    
7916
    if instance.name not in node_insts.payload:
7917
      if instance.admin_up:
7918
        state = "ERROR_down"
7919
      else:
7920
        state = "ADMIN_down"
7921
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7922
                               (instance.name, state))
7923

    
7924
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7925

    
7926
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
7927
    cluster = self.cfg.GetClusterInfo()
7928
    # beparams and hvparams are passed separately, to avoid editing the
7929
    # instance and then saving the defaults in the instance itself.
7930
    hvparams = cluster.FillHV(instance)
7931
    beparams = cluster.FillBE(instance)
7932
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7933

    
7934
    # build ssh cmdline
7935
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7936

    
7937

    
7938
class LUReplaceDisks(LogicalUnit):
7939
  """Replace the disks of an instance.
7940

7941
  """
7942
  HPATH = "mirrors-replace"
7943
  HTYPE = constants.HTYPE_INSTANCE
7944
  _OP_PARAMS = [
7945
    _PInstanceName,
7946
    ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7947
    ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7948
    ("remote_node", None, ht.TMaybeString),
7949
    ("iallocator", None, ht.TMaybeString),
7950
    ("early_release", False, ht.TBool),
7951
    ]
7952
  REQ_BGL = False
7953

    
7954
  def CheckArguments(self):
7955
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7956
                                  self.op.iallocator)
7957

    
7958
  def ExpandNames(self):
7959
    self._ExpandAndLockInstance()
7960

    
7961
    if self.op.iallocator is not None:
7962
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7963

    
7964
    elif self.op.remote_node is not None:
7965
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7966
      self.op.remote_node = remote_node
7967

    
7968
      # Warning: do not remove the locking of the new secondary here
7969
      # unless DRBD8.AddChildren is changed to work in parallel;
7970
      # currently it doesn't since parallel invocations of
7971
      # FindUnusedMinor will conflict
7972
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7973
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7974

    
7975
    else:
7976
      self.needed_locks[locking.LEVEL_NODE] = []
7977
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7978

    
7979
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7980
                                   self.op.iallocator, self.op.remote_node,
7981
                                   self.op.disks, False, self.op.early_release)
7982

    
7983
    self.tasklets = [self.replacer]
7984

    
7985
  def DeclareLocks(self, level):
7986
    # If we're not already locking all nodes in the set we have to declare the
7987
    # instance's primary/secondary nodes.
7988
    if (level == locking.LEVEL_NODE and
7989
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7990
      self._LockInstancesNodes()
7991

    
7992
  def BuildHooksEnv(self):
7993
    """Build hooks env.
7994

7995
    This runs on the master, the primary and all the secondaries.
7996

7997
    """
7998
    instance = self.replacer.instance
7999
    env = {
8000
      "MODE": self.op.mode,
8001
      "NEW_SECONDARY": self.op.remote_node,
8002
      "OLD_SECONDARY": instance.secondary_nodes[0],
8003
      }
8004
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8005
    nl = [
8006
      self.cfg.GetMasterNode(),
8007
      instance.primary_node,
8008
      ]
8009
    if self.op.remote_node is not None:
8010
      nl.append(self.op.remote_node)
8011
    return env, nl, nl
8012

    
8013

    
8014
class TLReplaceDisks(Tasklet):
8015
  """Replaces disks for an instance.
8016

8017
  Note: Locking is not within the scope of this class.
8018

8019
  """
8020
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8021
               disks, delay_iallocator, early_release):
8022
    """Initializes this class.
8023

8024
    """
8025
    Tasklet.__init__(self, lu)
8026

    
8027
    # Parameters
8028
    self.instance_name = instance_name
8029
    self.mode = mode
8030
    self.iallocator_name = iallocator_name
8031
    self.remote_node = remote_node
8032
    self.disks = disks
8033
    self.delay_iallocator = delay_iallocator
8034
    self.early_release = early_release
8035

    
8036
    # Runtime data
8037
    self.instance = None
8038
    self.new_node = None
8039
    self.target_node = None
8040
    self.other_node = None
8041
    self.remote_node_info = None
8042
    self.node_secondary_ip = None
8043

    
8044
  @staticmethod
8045
  def CheckArguments(mode, remote_node, iallocator):
8046
    """Helper function for users of this class.
8047

8048
    """
8049
    # check for valid parameter combination
8050
    if mode == constants.REPLACE_DISK_CHG:
8051
      if remote_node is None and iallocator is None:
8052
        raise errors.OpPrereqError("When changing the secondary either an"
8053
                                   " iallocator script must be used or the"
8054
                                   " new node given", errors.ECODE_INVAL)
8055

    
8056
      if remote_node is not None and iallocator is not None:
8057
        raise errors.OpPrereqError("Give either the iallocator or the new"
8058
                                   " secondary, not both", errors.ECODE_INVAL)
8059

    
8060
    elif remote_node is not None or iallocator is not None:
8061
      # Not replacing the secondary
8062
      raise errors.OpPrereqError("The iallocator and new node options can"
8063
                                 " only be used when changing the"
8064
                                 " secondary node", errors.ECODE_INVAL)
8065

    
8066
  @staticmethod
8067
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8068
    """Compute a new secondary node using an IAllocator.
8069

8070
    """
8071
    ial = IAllocator(lu.cfg, lu.rpc,
8072
                     mode=constants.IALLOCATOR_MODE_RELOC,
8073
                     name=instance_name,
8074
                     relocate_from=relocate_from)
8075

    
8076
    ial.Run(iallocator_name)
8077

    
8078
    if not ial.success:
8079
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8080
                                 " %s" % (iallocator_name, ial.info),
8081
                                 errors.ECODE_NORES)
8082

    
8083
    if len(ial.result) != ial.required_nodes:
8084
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8085
                                 " of nodes (%s), required %s" %
8086
                                 (iallocator_name,
8087
                                  len(ial.result), ial.required_nodes),
8088
                                 errors.ECODE_FAULT)
8089

    
8090
    remote_node_name = ial.result[0]
8091

    
8092
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8093
               instance_name, remote_node_name)
8094

    
8095
    return remote_node_name
8096

    
8097
  def _FindFaultyDisks(self, node_name):
8098
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8099
                                    node_name, True)
8100

    
8101
  def CheckPrereq(self):
8102
    """Check prerequisites.
8103

8104
    This checks that the instance is in the cluster.
8105

8106
    """
8107
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8108
    assert instance is not None, \
8109
      "Cannot retrieve locked instance %s" % self.instance_name
8110

    
8111
    if instance.disk_template != constants.DT_DRBD8:
8112
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8113
                                 " instances", errors.ECODE_INVAL)
8114

    
8115
    if len(instance.secondary_nodes) != 1:
8116
      raise errors.OpPrereqError("The instance has a strange layout,"
8117
                                 " expected one secondary but found %d" %
8118
                                 len(instance.secondary_nodes),
8119
                                 errors.ECODE_FAULT)
8120

    
8121
    if not self.delay_iallocator:
8122
      self._CheckPrereq2()
8123

    
8124
  def _CheckPrereq2(self):
8125
    """Check prerequisites, second part.
8126

8127
    This function should always be part of CheckPrereq. It was separated and is
8128
    now called from Exec because during node evacuation iallocator was only
8129
    called with an unmodified cluster model, not taking planned changes into
8130
    account.
8131

8132
    """
8133
    instance = self.instance
8134
    secondary_node = instance.secondary_nodes[0]
8135

    
8136
    if self.iallocator_name is None:
8137
      remote_node = self.remote_node
8138
    else:
8139
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8140
                                       instance.name, instance.secondary_nodes)
8141

    
8142
    if remote_node is not None:
8143
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8144
      assert self.remote_node_info is not None, \
8145
        "Cannot retrieve locked node %s" % remote_node
8146
    else:
8147
      self.remote_node_info = None
8148

    
8149
    if remote_node == self.instance.primary_node:
8150
      raise errors.OpPrereqError("The specified node is the primary node of"
8151
                                 " the instance.", errors.ECODE_INVAL)
8152

    
8153
    if remote_node == secondary_node:
8154
      raise errors.OpPrereqError("The specified node is already the"
8155
                                 " secondary node of the instance.",
8156
                                 errors.ECODE_INVAL)
8157

    
8158
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8159
                                    constants.REPLACE_DISK_CHG):
8160
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8161
                                 errors.ECODE_INVAL)
8162

    
8163
    if self.mode == constants.REPLACE_DISK_AUTO:
8164
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8165
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8166

    
8167
      if faulty_primary and faulty_secondary:
8168
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8169
                                   " one node and can not be repaired"
8170
                                   " automatically" % self.instance_name,
8171
                                   errors.ECODE_STATE)
8172

    
8173
      if faulty_primary:
8174
        self.disks = faulty_primary
8175
        self.target_node = instance.primary_node
8176
        self.other_node = secondary_node
8177
        check_nodes = [self.target_node, self.other_node]
8178
      elif faulty_secondary:
8179
        self.disks = faulty_secondary
8180
        self.target_node = secondary_node
8181
        self.other_node = instance.primary_node
8182
        check_nodes = [self.target_node, self.other_node]
8183
      else:
8184
        self.disks = []
8185
        check_nodes = []
8186

    
8187
    else:
8188
      # Non-automatic modes
8189
      if self.mode == constants.REPLACE_DISK_PRI:
8190
        self.target_node = instance.primary_node
8191
        self.other_node = secondary_node
8192
        check_nodes = [self.target_node, self.other_node]
8193

    
8194
      elif self.mode == constants.REPLACE_DISK_SEC:
8195
        self.target_node = secondary_node
8196
        self.other_node = instance.primary_node
8197
        check_nodes = [self.target_node, self.other_node]
8198

    
8199
      elif self.mode == constants.REPLACE_DISK_CHG:
8200
        self.new_node = remote_node
8201
        self.other_node = instance.primary_node
8202
        self.target_node = secondary_node
8203
        check_nodes = [self.new_node, self.other_node]
8204

    
8205
        _CheckNodeNotDrained(self.lu, remote_node)
8206
        _CheckNodeVmCapable(self.lu, remote_node)
8207

    
8208
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8209
        assert old_node_info is not None
8210
        if old_node_info.offline and not self.early_release:
8211
          # doesn't make sense to delay the release
8212
          self.early_release = True
8213
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8214
                          " early-release mode", secondary_node)
8215

    
8216
      else:
8217
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8218
                                     self.mode)
8219

    
8220
      # If not specified all disks should be replaced
8221
      if not self.disks:
8222
        self.disks = range(len(self.instance.disks))
8223

    
8224
    for node in check_nodes:
8225
      _CheckNodeOnline(self.lu, node)
8226

    
8227
    # Check whether disks are valid
8228
    for disk_idx in self.disks:
8229
      instance.FindDisk(disk_idx)
8230

    
8231
    # Get secondary node IP addresses
8232
    node_2nd_ip = {}
8233

    
8234
    for node_name in [self.target_node, self.other_node, self.new_node]:
8235
      if node_name is not None:
8236
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8237

    
8238
    self.node_secondary_ip = node_2nd_ip
8239

    
8240
  def Exec(self, feedback_fn):
8241
    """Execute disk replacement.
8242

8243
    This dispatches the disk replacement to the appropriate handler.
8244

8245
    """
8246
    if self.delay_iallocator:
8247
      self._CheckPrereq2()
8248

    
8249
    if not self.disks:
8250
      feedback_fn("No disks need replacement")
8251
      return
8252

    
8253
    feedback_fn("Replacing disk(s) %s for %s" %
8254
                (utils.CommaJoin(self.disks), self.instance.name))
8255

    
8256
    activate_disks = (not self.instance.admin_up)
8257

    
8258
    # Activate the instance disks if we're replacing them on a down instance
8259
    if activate_disks:
8260
      _StartInstanceDisks(self.lu, self.instance, True)
8261

    
8262
    try:
8263
      # Should we replace the secondary node?
8264
      if self.new_node is not None:
8265
        fn = self._ExecDrbd8Secondary
8266
      else:
8267
        fn = self._ExecDrbd8DiskOnly
8268

    
8269
      return fn(feedback_fn)
8270

    
8271
    finally:
8272
      # Deactivate the instance disks if we're replacing them on a
8273
      # down instance
8274
      if activate_disks:
8275
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8276

    
8277
  def _CheckVolumeGroup(self, nodes):
8278
    self.lu.LogInfo("Checking volume groups")
8279

    
8280
    vgname = self.cfg.GetVGName()
8281

    
8282
    # Make sure volume group exists on all involved nodes
8283
    results = self.rpc.call_vg_list(nodes)
8284
    if not results:
8285
      raise errors.OpExecError("Can't list volume groups on the nodes")
8286

    
8287
    for node in nodes:
8288
      res = results[node]
8289
      res.Raise("Error checking node %s" % node)
8290
      if vgname not in res.payload:
8291
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8292
                                 (vgname, node))
8293

    
8294
  def _CheckDisksExistence(self, nodes):
8295
    # Check disk existence
8296
    for idx, dev in enumerate(self.instance.disks):
8297
      if idx not in self.disks:
8298
        continue
8299

    
8300
      for node in nodes:
8301
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8302
        self.cfg.SetDiskID(dev, node)
8303

    
8304
        result = self.rpc.call_blockdev_find(node, dev)
8305

    
8306
        msg = result.fail_msg
8307
        if msg or not result.payload:
8308
          if not msg:
8309
            msg = "disk not found"
8310
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8311
                                   (idx, node, msg))
8312

    
8313
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8314
    for idx, dev in enumerate(self.instance.disks):
8315
      if idx not in self.disks:
8316
        continue
8317

    
8318
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8319
                      (idx, node_name))
8320

    
8321
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8322
                                   ldisk=ldisk):
8323
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8324
                                 " replace disks for instance %s" %
8325
                                 (node_name, self.instance.name))
8326

    
8327
  def _CreateNewStorage(self, node_name):
8328
    vgname = self.cfg.GetVGName()
8329
    iv_names = {}
8330

    
8331
    for idx, dev in enumerate(self.instance.disks):
8332
      if idx not in self.disks:
8333
        continue
8334

    
8335
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8336

    
8337
      self.cfg.SetDiskID(dev, node_name)
8338

    
8339
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8340
      names = _GenerateUniqueNames(self.lu, lv_names)
8341

    
8342
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8343
                             logical_id=(vgname, names[0]))
8344
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8345
                             logical_id=(vgname, names[1]))
8346

    
8347
      new_lvs = [lv_data, lv_meta]
8348
      old_lvs = dev.children
8349
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8350

    
8351
      # we pass force_create=True to force the LVM creation
8352
      for new_lv in new_lvs:
8353
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8354
                        _GetInstanceInfoText(self.instance), False)
8355

    
8356
    return iv_names
8357

    
8358
  def _CheckDevices(self, node_name, iv_names):
8359
    for name, (dev, _, _) in iv_names.iteritems():
8360
      self.cfg.SetDiskID(dev, node_name)
8361

    
8362
      result = self.rpc.call_blockdev_find(node_name, dev)
8363

    
8364
      msg = result.fail_msg
8365
      if msg or not result.payload:
8366
        if not msg:
8367
          msg = "disk not found"
8368
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8369
                                 (name, msg))
8370

    
8371
      if result.payload.is_degraded:
8372
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8373

    
8374
  def _RemoveOldStorage(self, node_name, iv_names):
8375
    for name, (_, old_lvs, _) in iv_names.iteritems():
8376
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8377

    
8378
      for lv in old_lvs:
8379
        self.cfg.SetDiskID(lv, node_name)
8380

    
8381
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8382
        if msg:
8383
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8384
                             hint="remove unused LVs manually")
8385

    
8386
  def _ReleaseNodeLock(self, node_name):
8387
    """Releases the lock for a given node."""
8388
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8389

    
8390
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8391
    """Replace a disk on the primary or secondary for DRBD 8.
8392

8393
    The algorithm for replace is quite complicated:
8394

8395
      1. for each disk to be replaced:
8396

8397
        1. create new LVs on the target node with unique names
8398
        1. detach old LVs from the drbd device
8399
        1. rename old LVs to name_replaced.<time_t>
8400
        1. rename new LVs to old LVs
8401
        1. attach the new LVs (with the old names now) to the drbd device
8402

8403
      1. wait for sync across all devices
8404

8405
      1. for each modified disk:
8406

8407
        1. remove old LVs (which have the name name_replaces.<time_t>)
8408

8409
    Failures are not very well handled.
8410

8411
    """
8412
    steps_total = 6
8413

    
8414
    # Step: check device activation
8415
    self.lu.LogStep(1, steps_total, "Check device existence")
8416
    self._CheckDisksExistence([self.other_node, self.target_node])
8417
    self._CheckVolumeGroup([self.target_node, self.other_node])
8418

    
8419
    # Step: check other node consistency
8420
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8421
    self._CheckDisksConsistency(self.other_node,
8422
                                self.other_node == self.instance.primary_node,
8423
                                False)
8424

    
8425
    # Step: create new storage
8426
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8427
    iv_names = self._CreateNewStorage(self.target_node)
8428

    
8429
    # Step: for each lv, detach+rename*2+attach
8430
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8431
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8432
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8433

    
8434
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8435
                                                     old_lvs)
8436
      result.Raise("Can't detach drbd from local storage on node"
8437
                   " %s for device %s" % (self.target_node, dev.iv_name))
8438
      #dev.children = []
8439
      #cfg.Update(instance)
8440

    
8441
      # ok, we created the new LVs, so now we know we have the needed
8442
      # storage; as such, we proceed on the target node to rename
8443
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8444
      # using the assumption that logical_id == physical_id (which in
8445
      # turn is the unique_id on that node)
8446

    
8447
      # FIXME(iustin): use a better name for the replaced LVs
8448
      temp_suffix = int(time.time())
8449
      ren_fn = lambda d, suff: (d.physical_id[0],
8450
                                d.physical_id[1] + "_replaced-%s" % suff)
8451

    
8452
      # Build the rename list based on what LVs exist on the node
8453
      rename_old_to_new = []
8454
      for to_ren in old_lvs:
8455
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8456
        if not result.fail_msg and result.payload:
8457
          # device exists
8458
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8459

    
8460
      self.lu.LogInfo("Renaming the old LVs on the target node")
8461
      result = self.rpc.call_blockdev_rename(self.target_node,
8462
                                             rename_old_to_new)
8463
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8464

    
8465
      # Now we rename the new LVs to the old LVs
8466
      self.lu.LogInfo("Renaming the new LVs on the target node")
8467
      rename_new_to_old = [(new, old.physical_id)
8468
                           for old, new in zip(old_lvs, new_lvs)]
8469
      result = self.rpc.call_blockdev_rename(self.target_node,
8470
                                             rename_new_to_old)
8471
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8472

    
8473
      for old, new in zip(old_lvs, new_lvs):
8474
        new.logical_id = old.logical_id
8475
        self.cfg.SetDiskID(new, self.target_node)
8476

    
8477
      for disk in old_lvs:
8478
        disk.logical_id = ren_fn(disk, temp_suffix)
8479
        self.cfg.SetDiskID(disk, self.target_node)
8480

    
8481
      # Now that the new lvs have the old name, we can add them to the device
8482
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8483
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8484
                                                  new_lvs)
8485
      msg = result.fail_msg
8486
      if msg:
8487
        for new_lv in new_lvs:
8488
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8489
                                               new_lv).fail_msg
8490
          if msg2:
8491
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8492
                               hint=("cleanup manually the unused logical"
8493
                                     "volumes"))
8494
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8495

    
8496
      dev.children = new_lvs
8497

    
8498
      self.cfg.Update(self.instance, feedback_fn)
8499

    
8500
    cstep = 5
8501
    if self.early_release:
8502
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8503
      cstep += 1
8504
      self._RemoveOldStorage(self.target_node, iv_names)
8505
      # WARNING: we release both node locks here, do not do other RPCs
8506
      # than WaitForSync to the primary node
8507
      self._ReleaseNodeLock([self.target_node, self.other_node])
8508

    
8509
    # Wait for sync
8510
    # This can fail as the old devices are degraded and _WaitForSync
8511
    # does a combined result over all disks, so we don't check its return value
8512
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8513
    cstep += 1
8514
    _WaitForSync(self.lu, self.instance)
8515

    
8516
    # Check all devices manually
8517
    self._CheckDevices(self.instance.primary_node, iv_names)
8518

    
8519
    # Step: remove old storage
8520
    if not self.early_release:
8521
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8522
      cstep += 1
8523
      self._RemoveOldStorage(self.target_node, iv_names)
8524

    
8525
  def _ExecDrbd8Secondary(self, feedback_fn):
8526
    """Replace the secondary node for DRBD 8.
8527

8528
    The algorithm for replace is quite complicated:
8529
      - for all disks of the instance:
8530
        - create new LVs on the new node with same names
8531
        - shutdown the drbd device on the old secondary
8532
        - disconnect the drbd network on the primary
8533
        - create the drbd device on the new secondary
8534
        - network attach the drbd on the primary, using an artifice:
8535
          the drbd code for Attach() will connect to the network if it
8536
          finds a device which is connected to the good local disks but
8537
          not network enabled
8538
      - wait for sync across all devices
8539
      - remove all disks from the old secondary
8540

8541
    Failures are not very well handled.
8542

8543
    """
8544
    steps_total = 6
8545

    
8546
    # Step: check device activation
8547
    self.lu.LogStep(1, steps_total, "Check device existence")
8548
    self._CheckDisksExistence([self.instance.primary_node])
8549
    self._CheckVolumeGroup([self.instance.primary_node])
8550

    
8551
    # Step: check other node consistency
8552
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8553
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8554

    
8555
    # Step: create new storage
8556
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8557
    for idx, dev in enumerate(self.instance.disks):
8558
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8559
                      (self.new_node, idx))
8560
      # we pass force_create=True to force LVM creation
8561
      for new_lv in dev.children:
8562
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8563
                        _GetInstanceInfoText(self.instance), False)
8564

    
8565
    # Step 4: dbrd minors and drbd setups changes
8566
    # after this, we must manually remove the drbd minors on both the
8567
    # error and the success paths
8568
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8569
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8570
                                         for dev in self.instance.disks],
8571
                                        self.instance.name)
8572
    logging.debug("Allocated minors %r", minors)
8573

    
8574
    iv_names = {}
8575
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8576
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8577
                      (self.new_node, idx))
8578
      # create new devices on new_node; note that we create two IDs:
8579
      # one without port, so the drbd will be activated without
8580
      # networking information on the new node at this stage, and one
8581
      # with network, for the latter activation in step 4
8582
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8583
      if self.instance.primary_node == o_node1:
8584
        p_minor = o_minor1
8585
      else:
8586
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8587
        p_minor = o_minor2
8588

    
8589
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8590
                      p_minor, new_minor, o_secret)
8591
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8592
                    p_minor, new_minor, o_secret)
8593

    
8594
      iv_names[idx] = (dev, dev.children, new_net_id)
8595
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8596
                    new_net_id)
8597
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8598
                              logical_id=new_alone_id,
8599
                              children=dev.children,
8600
                              size=dev.size)
8601
      try:
8602
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8603
                              _GetInstanceInfoText(self.instance), False)
8604
      except errors.GenericError:
8605
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8606
        raise
8607

    
8608
    # We have new devices, shutdown the drbd on the old secondary
8609
    for idx, dev in enumerate(self.instance.disks):
8610
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8611
      self.cfg.SetDiskID(dev, self.target_node)
8612
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8613
      if msg:
8614
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8615
                           "node: %s" % (idx, msg),
8616
                           hint=("Please cleanup this device manually as"
8617
                                 " soon as possible"))
8618

    
8619
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8620
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8621
                                               self.node_secondary_ip,
8622
                                               self.instance.disks)\
8623
                                              [self.instance.primary_node]
8624

    
8625
    msg = result.fail_msg
8626
    if msg:
8627
      # detaches didn't succeed (unlikely)
8628
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8629
      raise errors.OpExecError("Can't detach the disks from the network on"
8630
                               " old node: %s" % (msg,))
8631

    
8632
    # if we managed to detach at least one, we update all the disks of
8633
    # the instance to point to the new secondary
8634
    self.lu.LogInfo("Updating instance configuration")
8635
    for dev, _, new_logical_id in iv_names.itervalues():
8636
      dev.logical_id = new_logical_id
8637
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8638

    
8639
    self.cfg.Update(self.instance, feedback_fn)
8640

    
8641
    # and now perform the drbd attach
8642
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8643
                    " (standalone => connected)")
8644
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8645
                                            self.new_node],
8646
                                           self.node_secondary_ip,
8647
                                           self.instance.disks,
8648
                                           self.instance.name,
8649
                                           False)
8650
    for to_node, to_result in result.items():
8651
      msg = to_result.fail_msg
8652
      if msg:
8653
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8654
                           to_node, msg,
8655
                           hint=("please do a gnt-instance info to see the"
8656
                                 " status of disks"))
8657
    cstep = 5
8658
    if self.early_release:
8659
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8660
      cstep += 1
8661
      self._RemoveOldStorage(self.target_node, iv_names)
8662
      # WARNING: we release all node locks here, do not do other RPCs
8663
      # than WaitForSync to the primary node
8664
      self._ReleaseNodeLock([self.instance.primary_node,
8665
                             self.target_node,
8666
                             self.new_node])
8667

    
8668
    # Wait for sync
8669
    # This can fail as the old devices are degraded and _WaitForSync
8670
    # does a combined result over all disks, so we don't check its return value
8671
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8672
    cstep += 1
8673
    _WaitForSync(self.lu, self.instance)
8674

    
8675
    # Check all devices manually
8676
    self._CheckDevices(self.instance.primary_node, iv_names)
8677

    
8678
    # Step: remove old storage
8679
    if not self.early_release:
8680
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8681
      self._RemoveOldStorage(self.target_node, iv_names)
8682

    
8683

    
8684
class LURepairNodeStorage(NoHooksLU):
8685
  """Repairs the volume group on a node.
8686

8687
  """
8688
  _OP_PARAMS = [
8689
    _PNodeName,
8690
    ("storage_type", ht.NoDefault, _CheckStorageType),
8691
    ("name", ht.NoDefault, ht.TNonEmptyString),
8692
    ("ignore_consistency", False, ht.TBool),
8693
    ]
8694
  REQ_BGL = False
8695

    
8696
  def CheckArguments(self):
8697
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8698

    
8699
    storage_type = self.op.storage_type
8700

    
8701
    if (constants.SO_FIX_CONSISTENCY not in
8702
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8703
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8704
                                 " repaired" % storage_type,
8705
                                 errors.ECODE_INVAL)
8706

    
8707
  def ExpandNames(self):
8708
    self.needed_locks = {
8709
      locking.LEVEL_NODE: [self.op.node_name],
8710
      }
8711

    
8712
  def _CheckFaultyDisks(self, instance, node_name):
8713
    """Ensure faulty disks abort the opcode or at least warn."""
8714
    try:
8715
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8716
                                  node_name, True):
8717
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8718
                                   " node '%s'" % (instance.name, node_name),
8719
                                   errors.ECODE_STATE)
8720
    except errors.OpPrereqError, err:
8721
      if self.op.ignore_consistency:
8722
        self.proc.LogWarning(str(err.args[0]))
8723
      else:
8724
        raise
8725

    
8726
  def CheckPrereq(self):
8727
    """Check prerequisites.
8728

8729
    """
8730
    # Check whether any instance on this node has faulty disks
8731
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8732
      if not inst.admin_up:
8733
        continue
8734
      check_nodes = set(inst.all_nodes)
8735
      check_nodes.discard(self.op.node_name)
8736
      for inst_node_name in check_nodes:
8737
        self._CheckFaultyDisks(inst, inst_node_name)
8738

    
8739
  def Exec(self, feedback_fn):
8740
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8741
                (self.op.name, self.op.node_name))
8742

    
8743
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8744
    result = self.rpc.call_storage_execute(self.op.node_name,
8745
                                           self.op.storage_type, st_args,
8746
                                           self.op.name,
8747
                                           constants.SO_FIX_CONSISTENCY)
8748
    result.Raise("Failed to repair storage unit '%s' on %s" %
8749
                 (self.op.name, self.op.node_name))
8750

    
8751

    
8752
class LUNodeEvacuationStrategy(NoHooksLU):
8753
  """Computes the node evacuation strategy.
8754

8755
  """
8756
  _OP_PARAMS = [
8757
    ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8758
    ("remote_node", None, ht.TMaybeString),
8759
    ("iallocator", None, ht.TMaybeString),
8760
    ]
8761
  REQ_BGL = False
8762

    
8763
  def CheckArguments(self):
8764
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8765

    
8766
  def ExpandNames(self):
8767
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8768
    self.needed_locks = locks = {}
8769
    if self.op.remote_node is None:
8770
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8771
    else:
8772
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8773
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8774

    
8775
  def Exec(self, feedback_fn):
8776
    if self.op.remote_node is not None:
8777
      instances = []
8778
      for node in self.op.nodes:
8779
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8780
      result = []
8781
      for i in instances:
8782
        if i.primary_node == self.op.remote_node:
8783
          raise errors.OpPrereqError("Node %s is the primary node of"
8784
                                     " instance %s, cannot use it as"
8785
                                     " secondary" %
8786
                                     (self.op.remote_node, i.name),
8787
                                     errors.ECODE_INVAL)
8788
        result.append([i.name, self.op.remote_node])
8789
    else:
8790
      ial = IAllocator(self.cfg, self.rpc,
8791
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8792
                       evac_nodes=self.op.nodes)
8793
      ial.Run(self.op.iallocator, validate=True)
8794
      if not ial.success:
8795
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8796
                                 errors.ECODE_NORES)
8797
      result = ial.result
8798
    return result
8799

    
8800

    
8801
class LUGrowDisk(LogicalUnit):
8802
  """Grow a disk of an instance.
8803

8804
  """
8805
  HPATH = "disk-grow"
8806
  HTYPE = constants.HTYPE_INSTANCE
8807
  _OP_PARAMS = [
8808
    _PInstanceName,
8809
    ("disk", ht.NoDefault, ht.TInt),
8810
    ("amount", ht.NoDefault, ht.TInt),
8811
    ("wait_for_sync", True, ht.TBool),
8812
    ]
8813
  REQ_BGL = False
8814

    
8815
  def ExpandNames(self):
8816
    self._ExpandAndLockInstance()
8817
    self.needed_locks[locking.LEVEL_NODE] = []
8818
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8819

    
8820
  def DeclareLocks(self, level):
8821
    if level == locking.LEVEL_NODE:
8822
      self._LockInstancesNodes()
8823

    
8824
  def BuildHooksEnv(self):
8825
    """Build hooks env.
8826

8827
    This runs on the master, the primary and all the secondaries.
8828

8829
    """
8830
    env = {
8831
      "DISK": self.op.disk,
8832
      "AMOUNT": self.op.amount,
8833
      }
8834
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8835
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8836
    return env, nl, nl
8837

    
8838
  def CheckPrereq(self):
8839
    """Check prerequisites.
8840

8841
    This checks that the instance is in the cluster.
8842

8843
    """
8844
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8845
    assert instance is not None, \
8846
      "Cannot retrieve locked instance %s" % self.op.instance_name
8847
    nodenames = list(instance.all_nodes)
8848
    for node in nodenames:
8849
      _CheckNodeOnline(self, node)
8850

    
8851
    self.instance = instance
8852

    
8853
    if instance.disk_template not in constants.DTS_GROWABLE:
8854
      raise errors.OpPrereqError("Instance's disk layout does not support"
8855
                                 " growing.", errors.ECODE_INVAL)
8856

    
8857
    self.disk = instance.FindDisk(self.op.disk)
8858

    
8859
    if instance.disk_template != constants.DT_FILE:
8860
      # TODO: check the free disk space for file, when that feature
8861
      # will be supported
8862
      _CheckNodesFreeDiskPerVG(self, nodenames,
8863
                               {self.disk.physical_id[0]: self.op.amount})
8864

    
8865
  def Exec(self, feedback_fn):
8866
    """Execute disk grow.
8867

8868
    """
8869
    instance = self.instance
8870
    disk = self.disk
8871

    
8872
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8873
    if not disks_ok:
8874
      raise errors.OpExecError("Cannot activate block device to grow")
8875

    
8876
    for node in instance.all_nodes:
8877
      self.cfg.SetDiskID(disk, node)
8878
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8879
      result.Raise("Grow request failed to node %s" % node)
8880

    
8881
      # TODO: Rewrite code to work properly
8882
      # DRBD goes into sync mode for a short amount of time after executing the
8883
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8884
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8885
      # time is a work-around.
8886
      time.sleep(5)
8887

    
8888
    disk.RecordGrow(self.op.amount)
8889
    self.cfg.Update(instance, feedback_fn)
8890
    if self.op.wait_for_sync:
8891
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8892
      if disk_abort:
8893
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8894
                             " status.\nPlease check the instance.")
8895
      if not instance.admin_up:
8896
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8897
    elif not instance.admin_up:
8898
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
8899
                           " not supposed to be running because no wait for"
8900
                           " sync mode was requested.")
8901

    
8902

    
8903
class LUQueryInstanceData(NoHooksLU):
8904
  """Query runtime instance data.
8905

8906
  """
8907
  _OP_PARAMS = [
8908
    ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8909
    ("static", False, ht.TBool),
8910
    ]
8911
  REQ_BGL = False
8912

    
8913
  def ExpandNames(self):
8914
    self.needed_locks = {}
8915
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8916

    
8917
    if self.op.instances:
8918
      self.wanted_names = []
8919
      for name in self.op.instances:
8920
        full_name = _ExpandInstanceName(self.cfg, name)
8921
        self.wanted_names.append(full_name)
8922
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8923
    else:
8924
      self.wanted_names = None
8925
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8926

    
8927
    self.needed_locks[locking.LEVEL_NODE] = []
8928
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8929

    
8930
  def DeclareLocks(self, level):
8931
    if level == locking.LEVEL_NODE:
8932
      self._LockInstancesNodes()
8933

    
8934
  def CheckPrereq(self):
8935
    """Check prerequisites.
8936

8937
    This only checks the optional instance list against the existing names.
8938

8939
    """
8940
    if self.wanted_names is None:
8941
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8942

    
8943
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8944
                             in self.wanted_names]
8945

    
8946
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
8947
    """Returns the status of a block device
8948

8949
    """
8950
    if self.op.static or not node:
8951
      return None
8952

    
8953
    self.cfg.SetDiskID(dev, node)
8954

    
8955
    result = self.rpc.call_blockdev_find(node, dev)
8956
    if result.offline:
8957
      return None
8958

    
8959
    result.Raise("Can't compute disk status for %s" % instance_name)
8960

    
8961
    status = result.payload
8962
    if status is None:
8963
      return None
8964

    
8965
    return (status.dev_path, status.major, status.minor,
8966
            status.sync_percent, status.estimated_time,
8967
            status.is_degraded, status.ldisk_status)
8968

    
8969
  def _ComputeDiskStatus(self, instance, snode, dev):
8970
    """Compute block device status.
8971

8972
    """
8973
    if dev.dev_type in constants.LDS_DRBD:
8974
      # we change the snode then (otherwise we use the one passed in)
8975
      if dev.logical_id[0] == instance.primary_node:
8976
        snode = dev.logical_id[1]
8977
      else:
8978
        snode = dev.logical_id[0]
8979

    
8980
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8981
                                              instance.name, dev)
8982
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8983

    
8984
    if dev.children:
8985
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
8986
                      for child in dev.children]
8987
    else:
8988
      dev_children = []
8989

    
8990
    data = {
8991
      "iv_name": dev.iv_name,
8992
      "dev_type": dev.dev_type,
8993
      "logical_id": dev.logical_id,
8994
      "physical_id": dev.physical_id,
8995
      "pstatus": dev_pstatus,
8996
      "sstatus": dev_sstatus,
8997
      "children": dev_children,
8998
      "mode": dev.mode,
8999
      "size": dev.size,
9000
      }
9001

    
9002
    return data
9003

    
9004
  def Exec(self, feedback_fn):
9005
    """Gather and return data"""
9006
    result = {}
9007

    
9008
    cluster = self.cfg.GetClusterInfo()
9009

    
9010
    for instance in self.wanted_instances:
9011
      if not self.op.static:
9012
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9013
                                                  instance.name,
9014
                                                  instance.hypervisor)
9015
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9016
        remote_info = remote_info.payload
9017
        if remote_info and "state" in remote_info:
9018
          remote_state = "up"
9019
        else:
9020
          remote_state = "down"
9021
      else:
9022
        remote_state = None
9023
      if instance.admin_up:
9024
        config_state = "up"
9025
      else:
9026
        config_state = "down"
9027

    
9028
      disks = [self._ComputeDiskStatus(instance, None, device)
9029
               for device in instance.disks]
9030

    
9031
      idict = {
9032
        "name": instance.name,
9033
        "config_state": config_state,
9034
        "run_state": remote_state,
9035
        "pnode": instance.primary_node,
9036
        "snodes": instance.secondary_nodes,
9037
        "os": instance.os,
9038
        # this happens to be the same format used for hooks
9039
        "nics": _NICListToTuple(self, instance.nics),
9040
        "disk_template": instance.disk_template,
9041
        "disks": disks,
9042
        "hypervisor": instance.hypervisor,
9043
        "network_port": instance.network_port,
9044
        "hv_instance": instance.hvparams,
9045
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9046
        "be_instance": instance.beparams,
9047
        "be_actual": cluster.FillBE(instance),
9048
        "os_instance": instance.osparams,
9049
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9050
        "serial_no": instance.serial_no,
9051
        "mtime": instance.mtime,
9052
        "ctime": instance.ctime,
9053
        "uuid": instance.uuid,
9054
        }
9055

    
9056
      result[instance.name] = idict
9057

    
9058
    return result
9059

    
9060

    
9061
class LUSetInstanceParams(LogicalUnit):
9062
  """Modifies an instances's parameters.
9063

9064
  """
9065
  HPATH = "instance-modify"
9066
  HTYPE = constants.HTYPE_INSTANCE
9067
  _OP_PARAMS = [
9068
    _PInstanceName,
9069
    ("nics", ht.EmptyList, ht.TList),
9070
    ("disks", ht.EmptyList, ht.TList),
9071
    ("beparams", ht.EmptyDict, ht.TDict),
9072
    ("hvparams", ht.EmptyDict, ht.TDict),
9073
    ("disk_template", None, ht.TMaybeString),
9074
    ("remote_node", None, ht.TMaybeString),
9075
    ("os_name", None, ht.TMaybeString),
9076
    ("force_variant", False, ht.TBool),
9077
    ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9078
    _PForce,
9079
    ]
9080
  REQ_BGL = False
9081

    
9082
  def CheckArguments(self):
9083
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9084
            self.op.hvparams or self.op.beparams or self.op.os_name):
9085
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9086

    
9087
    if self.op.hvparams:
9088
      _CheckGlobalHvParams(self.op.hvparams)
9089

    
9090
    # Disk validation
9091
    disk_addremove = 0
9092
    for disk_op, disk_dict in self.op.disks:
9093
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9094
      if disk_op == constants.DDM_REMOVE:
9095
        disk_addremove += 1
9096
        continue
9097
      elif disk_op == constants.DDM_ADD:
9098
        disk_addremove += 1
9099
      else:
9100
        if not isinstance(disk_op, int):
9101
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9102
        if not isinstance(disk_dict, dict):
9103
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9104
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9105

    
9106
      if disk_op == constants.DDM_ADD:
9107
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9108
        if mode not in constants.DISK_ACCESS_SET:
9109
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9110
                                     errors.ECODE_INVAL)
9111
        size = disk_dict.get('size', None)
9112
        if size is None:
9113
          raise errors.OpPrereqError("Required disk parameter size missing",
9114
                                     errors.ECODE_INVAL)
9115
        try:
9116
          size = int(size)
9117
        except (TypeError, ValueError), err:
9118
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9119
                                     str(err), errors.ECODE_INVAL)
9120
        disk_dict['size'] = size
9121
      else:
9122
        # modification of disk
9123
        if 'size' in disk_dict:
9124
          raise errors.OpPrereqError("Disk size change not possible, use"
9125
                                     " grow-disk", errors.ECODE_INVAL)
9126

    
9127
    if disk_addremove > 1:
9128
      raise errors.OpPrereqError("Only one disk add or remove operation"
9129
                                 " supported at a time", errors.ECODE_INVAL)
9130

    
9131
    if self.op.disks and self.op.disk_template is not None:
9132
      raise errors.OpPrereqError("Disk template conversion and other disk"
9133
                                 " changes not supported at the same time",
9134
                                 errors.ECODE_INVAL)
9135

    
9136
    if self.op.disk_template:
9137
      _CheckDiskTemplate(self.op.disk_template)
9138
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
9139
          self.op.remote_node is None):
9140
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
9141
                                   " one requires specifying a secondary node",
9142
                                   errors.ECODE_INVAL)
9143

    
9144
    # NIC validation
9145
    nic_addremove = 0
9146
    for nic_op, nic_dict in self.op.nics:
9147
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9148
      if nic_op == constants.DDM_REMOVE:
9149
        nic_addremove += 1
9150
        continue
9151
      elif nic_op == constants.DDM_ADD:
9152
        nic_addremove += 1
9153
      else:
9154
        if not isinstance(nic_op, int):
9155
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9156
        if not isinstance(nic_dict, dict):
9157
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9158
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9159

    
9160
      # nic_dict should be a dict
9161
      nic_ip = nic_dict.get('ip', None)
9162
      if nic_ip is not None:
9163
        if nic_ip.lower() == constants.VALUE_NONE:
9164
          nic_dict['ip'] = None
9165
        else:
9166
          if not netutils.IPAddress.IsValid(nic_ip):
9167
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9168
                                       errors.ECODE_INVAL)
9169

    
9170
      nic_bridge = nic_dict.get('bridge', None)
9171
      nic_link = nic_dict.get('link', None)
9172
      if nic_bridge and nic_link:
9173
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9174
                                   " at the same time", errors.ECODE_INVAL)
9175
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9176
        nic_dict['bridge'] = None
9177
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9178
        nic_dict['link'] = None
9179

    
9180
      if nic_op == constants.DDM_ADD:
9181
        nic_mac = nic_dict.get('mac', None)
9182
        if nic_mac is None:
9183
          nic_dict['mac'] = constants.VALUE_AUTO
9184

    
9185
      if 'mac' in nic_dict:
9186
        nic_mac = nic_dict['mac']
9187
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9188
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9189

    
9190
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9191
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9192
                                     " modifying an existing nic",
9193
                                     errors.ECODE_INVAL)
9194

    
9195
    if nic_addremove > 1:
9196
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9197
                                 " supported at a time", errors.ECODE_INVAL)
9198

    
9199
  def ExpandNames(self):
9200
    self._ExpandAndLockInstance()
9201
    self.needed_locks[locking.LEVEL_NODE] = []
9202
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9203

    
9204
  def DeclareLocks(self, level):
9205
    if level == locking.LEVEL_NODE:
9206
      self._LockInstancesNodes()
9207
      if self.op.disk_template and self.op.remote_node:
9208
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9209
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9210

    
9211
  def BuildHooksEnv(self):
9212
    """Build hooks env.
9213

9214
    This runs on the master, primary and secondaries.
9215

9216
    """
9217
    args = dict()
9218
    if constants.BE_MEMORY in self.be_new:
9219
      args['memory'] = self.be_new[constants.BE_MEMORY]
9220
    if constants.BE_VCPUS in self.be_new:
9221
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9222
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9223
    # information at all.
9224
    if self.op.nics:
9225
      args['nics'] = []
9226
      nic_override = dict(self.op.nics)
9227
      for idx, nic in enumerate(self.instance.nics):
9228
        if idx in nic_override:
9229
          this_nic_override = nic_override[idx]
9230
        else:
9231
          this_nic_override = {}
9232
        if 'ip' in this_nic_override:
9233
          ip = this_nic_override['ip']
9234
        else:
9235
          ip = nic.ip
9236
        if 'mac' in this_nic_override:
9237
          mac = this_nic_override['mac']
9238
        else:
9239
          mac = nic.mac
9240
        if idx in self.nic_pnew:
9241
          nicparams = self.nic_pnew[idx]
9242
        else:
9243
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9244
        mode = nicparams[constants.NIC_MODE]
9245
        link = nicparams[constants.NIC_LINK]
9246
        args['nics'].append((ip, mac, mode, link))
9247
      if constants.DDM_ADD in nic_override:
9248
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9249
        mac = nic_override[constants.DDM_ADD]['mac']
9250
        nicparams = self.nic_pnew[constants.DDM_ADD]
9251
        mode = nicparams[constants.NIC_MODE]
9252
        link = nicparams[constants.NIC_LINK]
9253
        args['nics'].append((ip, mac, mode, link))
9254
      elif constants.DDM_REMOVE in nic_override:
9255
        del args['nics'][-1]
9256

    
9257
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9258
    if self.op.disk_template:
9259
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9260
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9261
    return env, nl, nl
9262

    
9263
  def CheckPrereq(self):
9264
    """Check prerequisites.
9265

9266
    This only checks the instance list against the existing names.
9267

9268
    """
9269
    # checking the new params on the primary/secondary nodes
9270

    
9271
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9272
    cluster = self.cluster = self.cfg.GetClusterInfo()
9273
    assert self.instance is not None, \
9274
      "Cannot retrieve locked instance %s" % self.op.instance_name
9275
    pnode = instance.primary_node
9276
    nodelist = list(instance.all_nodes)
9277

    
9278
    # OS change
9279
    if self.op.os_name and not self.op.force:
9280
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9281
                      self.op.force_variant)
9282
      instance_os = self.op.os_name
9283
    else:
9284
      instance_os = instance.os
9285

    
9286
    if self.op.disk_template:
9287
      if instance.disk_template == self.op.disk_template:
9288
        raise errors.OpPrereqError("Instance already has disk template %s" %
9289
                                   instance.disk_template, errors.ECODE_INVAL)
9290

    
9291
      if (instance.disk_template,
9292
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9293
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9294
                                   " %s to %s" % (instance.disk_template,
9295
                                                  self.op.disk_template),
9296
                                   errors.ECODE_INVAL)
9297
      _CheckInstanceDown(self, instance, "cannot change disk template")
9298
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9299
        if self.op.remote_node == pnode:
9300
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9301
                                     " as the primary node of the instance" %
9302
                                     self.op.remote_node, errors.ECODE_STATE)
9303
        _CheckNodeOnline(self, self.op.remote_node)
9304
        _CheckNodeNotDrained(self, self.op.remote_node)
9305
        # FIXME: here we assume that the old instance type is DT_PLAIN
9306
        assert instance.disk_template == constants.DT_PLAIN
9307
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9308
                 for d in instance.disks]
9309
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9310
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9311

    
9312
    # hvparams processing
9313
    if self.op.hvparams:
9314
      hv_type = instance.hypervisor
9315
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9316
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9317
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9318

    
9319
      # local check
9320
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9321
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9322
      self.hv_new = hv_new # the new actual values
9323
      self.hv_inst = i_hvdict # the new dict (without defaults)
9324
    else:
9325
      self.hv_new = self.hv_inst = {}
9326

    
9327
    # beparams processing
9328
    if self.op.beparams:
9329
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9330
                                   use_none=True)
9331
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9332
      be_new = cluster.SimpleFillBE(i_bedict)
9333
      self.be_new = be_new # the new actual values
9334
      self.be_inst = i_bedict # the new dict (without defaults)
9335
    else:
9336
      self.be_new = self.be_inst = {}
9337

    
9338
    # osparams processing
9339
    if self.op.osparams:
9340
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9341
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9342
      self.os_inst = i_osdict # the new dict (without defaults)
9343
    else:
9344
      self.os_inst = {}
9345

    
9346
    self.warn = []
9347

    
9348
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9349
      mem_check_list = [pnode]
9350
      if be_new[constants.BE_AUTO_BALANCE]:
9351
        # either we changed auto_balance to yes or it was from before
9352
        mem_check_list.extend(instance.secondary_nodes)
9353
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9354
                                                  instance.hypervisor)
9355
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9356
                                         instance.hypervisor)
9357
      pninfo = nodeinfo[pnode]
9358
      msg = pninfo.fail_msg
9359
      if msg:
9360
        # Assume the primary node is unreachable and go ahead
9361
        self.warn.append("Can't get info from primary node %s: %s" %
9362
                         (pnode,  msg))
9363
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9364
        self.warn.append("Node data from primary node %s doesn't contain"
9365
                         " free memory information" % pnode)
9366
      elif instance_info.fail_msg:
9367
        self.warn.append("Can't get instance runtime information: %s" %
9368
                        instance_info.fail_msg)
9369
      else:
9370
        if instance_info.payload:
9371
          current_mem = int(instance_info.payload['memory'])
9372
        else:
9373
          # Assume instance not running
9374
          # (there is a slight race condition here, but it's not very probable,
9375
          # and we have no other way to check)
9376
          current_mem = 0
9377
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9378
                    pninfo.payload['memory_free'])
9379
        if miss_mem > 0:
9380
          raise errors.OpPrereqError("This change will prevent the instance"
9381
                                     " from starting, due to %d MB of memory"
9382
                                     " missing on its primary node" % miss_mem,
9383
                                     errors.ECODE_NORES)
9384

    
9385
      if be_new[constants.BE_AUTO_BALANCE]:
9386
        for node, nres in nodeinfo.items():
9387
          if node not in instance.secondary_nodes:
9388
            continue
9389
          msg = nres.fail_msg
9390
          if msg:
9391
            self.warn.append("Can't get info from secondary node %s: %s" %
9392
                             (node, msg))
9393
          elif not isinstance(nres.payload.get('memory_free', None), int):
9394
            self.warn.append("Secondary node %s didn't return free"
9395
                             " memory information" % node)
9396
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9397
            self.warn.append("Not enough memory to failover instance to"
9398
                             " secondary node %s" % node)
9399

    
9400
    # NIC processing
9401
    self.nic_pnew = {}
9402
    self.nic_pinst = {}
9403
    for nic_op, nic_dict in self.op.nics:
9404
      if nic_op == constants.DDM_REMOVE:
9405
        if not instance.nics:
9406
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9407
                                     errors.ECODE_INVAL)
9408
        continue
9409
      if nic_op != constants.DDM_ADD:
9410
        # an existing nic
9411
        if not instance.nics:
9412
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9413
                                     " no NICs" % nic_op,
9414
                                     errors.ECODE_INVAL)
9415
        if nic_op < 0 or nic_op >= len(instance.nics):
9416
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9417
                                     " are 0 to %d" %
9418
                                     (nic_op, len(instance.nics) - 1),
9419
                                     errors.ECODE_INVAL)
9420
        old_nic_params = instance.nics[nic_op].nicparams
9421
        old_nic_ip = instance.nics[nic_op].ip
9422
      else:
9423
        old_nic_params = {}
9424
        old_nic_ip = None
9425

    
9426
      update_params_dict = dict([(key, nic_dict[key])
9427
                                 for key in constants.NICS_PARAMETERS
9428
                                 if key in nic_dict])
9429

    
9430
      if 'bridge' in nic_dict:
9431
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9432

    
9433
      new_nic_params = _GetUpdatedParams(old_nic_params,
9434
                                         update_params_dict)
9435
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9436
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9437
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9438
      self.nic_pinst[nic_op] = new_nic_params
9439
      self.nic_pnew[nic_op] = new_filled_nic_params
9440
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9441

    
9442
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9443
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9444
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9445
        if msg:
9446
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9447
          if self.op.force:
9448
            self.warn.append(msg)
9449
          else:
9450
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9451
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9452
        if 'ip' in nic_dict:
9453
          nic_ip = nic_dict['ip']
9454
        else:
9455
          nic_ip = old_nic_ip
9456
        if nic_ip is None:
9457
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9458
                                     ' on a routed nic', errors.ECODE_INVAL)
9459
      if 'mac' in nic_dict:
9460
        nic_mac = nic_dict['mac']
9461
        if nic_mac is None:
9462
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9463
                                     errors.ECODE_INVAL)
9464
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9465
          # otherwise generate the mac
9466
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9467
        else:
9468
          # or validate/reserve the current one
9469
          try:
9470
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9471
          except errors.ReservationError:
9472
            raise errors.OpPrereqError("MAC address %s already in use"
9473
                                       " in cluster" % nic_mac,
9474
                                       errors.ECODE_NOTUNIQUE)
9475

    
9476
    # DISK processing
9477
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9478
      raise errors.OpPrereqError("Disk operations not supported for"
9479
                                 " diskless instances",
9480
                                 errors.ECODE_INVAL)
9481
    for disk_op, _ in self.op.disks:
9482
      if disk_op == constants.DDM_REMOVE:
9483
        if len(instance.disks) == 1:
9484
          raise errors.OpPrereqError("Cannot remove the last disk of"
9485
                                     " an instance", errors.ECODE_INVAL)
9486
        _CheckInstanceDown(self, instance, "cannot remove disks")
9487

    
9488
      if (disk_op == constants.DDM_ADD and
9489
          len(instance.nics) >= constants.MAX_DISKS):
9490
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9491
                                   " add more" % constants.MAX_DISKS,
9492
                                   errors.ECODE_STATE)
9493
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9494
        # an existing disk
9495
        if disk_op < 0 or disk_op >= len(instance.disks):
9496
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9497
                                     " are 0 to %d" %
9498
                                     (disk_op, len(instance.disks)),
9499
                                     errors.ECODE_INVAL)
9500

    
9501
    return
9502

    
9503
  def _ConvertPlainToDrbd(self, feedback_fn):
9504
    """Converts an instance from plain to drbd.
9505

9506
    """
9507
    feedback_fn("Converting template to drbd")
9508
    instance = self.instance
9509
    pnode = instance.primary_node
9510
    snode = self.op.remote_node
9511

    
9512
    # create a fake disk info for _GenerateDiskTemplate
9513
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9514
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9515
                                      instance.name, pnode, [snode],
9516
                                      disk_info, None, None, 0, feedback_fn)
9517
    info = _GetInstanceInfoText(instance)
9518
    feedback_fn("Creating aditional volumes...")
9519
    # first, create the missing data and meta devices
9520
    for disk in new_disks:
9521
      # unfortunately this is... not too nice
9522
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9523
                            info, True)
9524
      for child in disk.children:
9525
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9526
    # at this stage, all new LVs have been created, we can rename the
9527
    # old ones
9528
    feedback_fn("Renaming original volumes...")
9529
    rename_list = [(o, n.children[0].logical_id)
9530
                   for (o, n) in zip(instance.disks, new_disks)]
9531
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9532
    result.Raise("Failed to rename original LVs")
9533

    
9534
    feedback_fn("Initializing DRBD devices...")
9535
    # all child devices are in place, we can now create the DRBD devices
9536
    for disk in new_disks:
9537
      for node in [pnode, snode]:
9538
        f_create = node == pnode
9539
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9540

    
9541
    # at this point, the instance has been modified
9542
    instance.disk_template = constants.DT_DRBD8
9543
    instance.disks = new_disks
9544
    self.cfg.Update(instance, feedback_fn)
9545

    
9546
    # disks are created, waiting for sync
9547
    disk_abort = not _WaitForSync(self, instance)
9548
    if disk_abort:
9549
      raise errors.OpExecError("There are some degraded disks for"
9550
                               " this instance, please cleanup manually")
9551

    
9552
  def _ConvertDrbdToPlain(self, feedback_fn):
9553
    """Converts an instance from drbd to plain.
9554

9555
    """
9556
    instance = self.instance
9557
    assert len(instance.secondary_nodes) == 1
9558
    pnode = instance.primary_node
9559
    snode = instance.secondary_nodes[0]
9560
    feedback_fn("Converting template to plain")
9561

    
9562
    old_disks = instance.disks
9563
    new_disks = [d.children[0] for d in old_disks]
9564

    
9565
    # copy over size and mode
9566
    for parent, child in zip(old_disks, new_disks):
9567
      child.size = parent.size
9568
      child.mode = parent.mode
9569

    
9570
    # update instance structure
9571
    instance.disks = new_disks
9572
    instance.disk_template = constants.DT_PLAIN
9573
    self.cfg.Update(instance, feedback_fn)
9574

    
9575
    feedback_fn("Removing volumes on the secondary node...")
9576
    for disk in old_disks:
9577
      self.cfg.SetDiskID(disk, snode)
9578
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9579
      if msg:
9580
        self.LogWarning("Could not remove block device %s on node %s,"
9581
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9582

    
9583
    feedback_fn("Removing unneeded volumes on the primary node...")
9584
    for idx, disk in enumerate(old_disks):
9585
      meta = disk.children[1]
9586
      self.cfg.SetDiskID(meta, pnode)
9587
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9588
      if msg:
9589
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9590
                        " continuing anyway: %s", idx, pnode, msg)
9591

    
9592
  def Exec(self, feedback_fn):
9593
    """Modifies an instance.
9594

9595
    All parameters take effect only at the next restart of the instance.
9596

9597
    """
9598
    # Process here the warnings from CheckPrereq, as we don't have a
9599
    # feedback_fn there.
9600
    for warn in self.warn:
9601
      feedback_fn("WARNING: %s" % warn)
9602

    
9603
    result = []
9604
    instance = self.instance
9605
    # disk changes
9606
    for disk_op, disk_dict in self.op.disks:
9607
      if disk_op == constants.DDM_REMOVE:
9608
        # remove the last disk
9609
        device = instance.disks.pop()
9610
        device_idx = len(instance.disks)
9611
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9612
          self.cfg.SetDiskID(disk, node)
9613
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9614
          if msg:
9615
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9616
                            " continuing anyway", device_idx, node, msg)
9617
        result.append(("disk/%d" % device_idx, "remove"))
9618
      elif disk_op == constants.DDM_ADD:
9619
        # add a new disk
9620
        if instance.disk_template == constants.DT_FILE:
9621
          file_driver, file_path = instance.disks[0].logical_id
9622
          file_path = os.path.dirname(file_path)
9623
        else:
9624
          file_driver = file_path = None
9625
        disk_idx_base = len(instance.disks)
9626
        new_disk = _GenerateDiskTemplate(self,
9627
                                         instance.disk_template,
9628
                                         instance.name, instance.primary_node,
9629
                                         instance.secondary_nodes,
9630
                                         [disk_dict],
9631
                                         file_path,
9632
                                         file_driver,
9633
                                         disk_idx_base, feedback_fn)[0]
9634
        instance.disks.append(new_disk)
9635
        info = _GetInstanceInfoText(instance)
9636

    
9637
        logging.info("Creating volume %s for instance %s",
9638
                     new_disk.iv_name, instance.name)
9639
        # Note: this needs to be kept in sync with _CreateDisks
9640
        #HARDCODE
9641
        for node in instance.all_nodes:
9642
          f_create = node == instance.primary_node
9643
          try:
9644
            _CreateBlockDev(self, node, instance, new_disk,
9645
                            f_create, info, f_create)
9646
          except errors.OpExecError, err:
9647
            self.LogWarning("Failed to create volume %s (%s) on"
9648
                            " node %s: %s",
9649
                            new_disk.iv_name, new_disk, node, err)
9650
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9651
                       (new_disk.size, new_disk.mode)))
9652
      else:
9653
        # change a given disk
9654
        instance.disks[disk_op].mode = disk_dict['mode']
9655
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9656

    
9657
    if self.op.disk_template:
9658
      r_shut = _ShutdownInstanceDisks(self, instance)
9659
      if not r_shut:
9660
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9661
                                 " proceed with disk template conversion")
9662
      mode = (instance.disk_template, self.op.disk_template)
9663
      try:
9664
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9665
      except:
9666
        self.cfg.ReleaseDRBDMinors(instance.name)
9667
        raise
9668
      result.append(("disk_template", self.op.disk_template))
9669

    
9670
    # NIC changes
9671
    for nic_op, nic_dict in self.op.nics:
9672
      if nic_op == constants.DDM_REMOVE:
9673
        # remove the last nic
9674
        del instance.nics[-1]
9675
        result.append(("nic.%d" % len(instance.nics), "remove"))
9676
      elif nic_op == constants.DDM_ADD:
9677
        # mac and bridge should be set, by now
9678
        mac = nic_dict['mac']
9679
        ip = nic_dict.get('ip', None)
9680
        nicparams = self.nic_pinst[constants.DDM_ADD]
9681
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9682
        instance.nics.append(new_nic)
9683
        result.append(("nic.%d" % (len(instance.nics) - 1),
9684
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9685
                       (new_nic.mac, new_nic.ip,
9686
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9687
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9688
                       )))
9689
      else:
9690
        for key in 'mac', 'ip':
9691
          if key in nic_dict:
9692
            setattr(instance.nics[nic_op], key, nic_dict[key])
9693
        if nic_op in self.nic_pinst:
9694
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9695
        for key, val in nic_dict.iteritems():
9696
          result.append(("nic.%s/%d" % (key, nic_op), val))
9697

    
9698
    # hvparams changes
9699
    if self.op.hvparams:
9700
      instance.hvparams = self.hv_inst
9701
      for key, val in self.op.hvparams.iteritems():
9702
        result.append(("hv/%s" % key, val))
9703

    
9704
    # beparams changes
9705
    if self.op.beparams:
9706
      instance.beparams = self.be_inst
9707
      for key, val in self.op.beparams.iteritems():
9708
        result.append(("be/%s" % key, val))
9709

    
9710
    # OS change
9711
    if self.op.os_name:
9712
      instance.os = self.op.os_name
9713

    
9714
    # osparams changes
9715
    if self.op.osparams:
9716
      instance.osparams = self.os_inst
9717
      for key, val in self.op.osparams.iteritems():
9718
        result.append(("os/%s" % key, val))
9719

    
9720
    self.cfg.Update(instance, feedback_fn)
9721

    
9722
    return result
9723

    
9724
  _DISK_CONVERSIONS = {
9725
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9726
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9727
    }
9728

    
9729

    
9730
class LUQueryExports(NoHooksLU):
9731
  """Query the exports list
9732

9733
  """
9734
  _OP_PARAMS = [
9735
    ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9736
    ("use_locking", False, ht.TBool),
9737
    ]
9738
  REQ_BGL = False
9739

    
9740
  def ExpandNames(self):
9741
    self.needed_locks = {}
9742
    self.share_locks[locking.LEVEL_NODE] = 1
9743
    if not self.op.nodes:
9744
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9745
    else:
9746
      self.needed_locks[locking.LEVEL_NODE] = \
9747
        _GetWantedNodes(self, self.op.nodes)
9748

    
9749
  def Exec(self, feedback_fn):
9750
    """Compute the list of all the exported system images.
9751

9752
    @rtype: dict
9753
    @return: a dictionary with the structure node->(export-list)
9754
        where export-list is a list of the instances exported on
9755
        that node.
9756

9757
    """
9758
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9759
    rpcresult = self.rpc.call_export_list(self.nodes)
9760
    result = {}
9761
    for node in rpcresult:
9762
      if rpcresult[node].fail_msg:
9763
        result[node] = False
9764
      else:
9765
        result[node] = rpcresult[node].payload
9766

    
9767
    return result
9768

    
9769

    
9770
class LUPrepareExport(NoHooksLU):
9771
  """Prepares an instance for an export and returns useful information.
9772

9773
  """
9774
  _OP_PARAMS = [
9775
    _PInstanceName,
9776
    ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9777
    ]
9778
  REQ_BGL = False
9779

    
9780
  def ExpandNames(self):
9781
    self._ExpandAndLockInstance()
9782

    
9783
  def CheckPrereq(self):
9784
    """Check prerequisites.
9785

9786
    """
9787
    instance_name = self.op.instance_name
9788

    
9789
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9790
    assert self.instance is not None, \
9791
          "Cannot retrieve locked instance %s" % self.op.instance_name
9792
    _CheckNodeOnline(self, self.instance.primary_node)
9793

    
9794
    self._cds = _GetClusterDomainSecret()
9795

    
9796
  def Exec(self, feedback_fn):
9797
    """Prepares an instance for an export.
9798

9799
    """
9800
    instance = self.instance
9801

    
9802
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9803
      salt = utils.GenerateSecret(8)
9804

    
9805
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9806
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9807
                                              constants.RIE_CERT_VALIDITY)
9808
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9809

    
9810
      (name, cert_pem) = result.payload
9811

    
9812
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9813
                                             cert_pem)
9814

    
9815
      return {
9816
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9817
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9818
                          salt),
9819
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9820
        }
9821

    
9822
    return None
9823

    
9824

    
9825
class LUExportInstance(LogicalUnit):
9826
  """Export an instance to an image in the cluster.
9827

9828
  """
9829
  HPATH = "instance-export"
9830
  HTYPE = constants.HTYPE_INSTANCE
9831
  _OP_PARAMS = [
9832
    _PInstanceName,
9833
    ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9834
    ("shutdown", True, ht.TBool),
9835
    _PShutdownTimeout,
9836
    ("remove_instance", False, ht.TBool),
9837
    ("ignore_remove_failures", False, ht.TBool),
9838
    ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9839
    ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9840
    ("destination_x509_ca", None, ht.TMaybeString),
9841
    ]
9842
  REQ_BGL = False
9843

    
9844
  def CheckArguments(self):
9845
    """Check the arguments.
9846

9847
    """
9848
    self.x509_key_name = self.op.x509_key_name
9849
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9850

    
9851
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9852
      if not self.x509_key_name:
9853
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9854
                                   errors.ECODE_INVAL)
9855

    
9856
      if not self.dest_x509_ca_pem:
9857
        raise errors.OpPrereqError("Missing destination X509 CA",
9858
                                   errors.ECODE_INVAL)
9859

    
9860
  def ExpandNames(self):
9861
    self._ExpandAndLockInstance()
9862

    
9863
    # Lock all nodes for local exports
9864
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9865
      # FIXME: lock only instance primary and destination node
9866
      #
9867
      # Sad but true, for now we have do lock all nodes, as we don't know where
9868
      # the previous export might be, and in this LU we search for it and
9869
      # remove it from its current node. In the future we could fix this by:
9870
      #  - making a tasklet to search (share-lock all), then create the
9871
      #    new one, then one to remove, after
9872
      #  - removing the removal operation altogether
9873
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9874

    
9875
  def DeclareLocks(self, level):
9876
    """Last minute lock declaration."""
9877
    # All nodes are locked anyway, so nothing to do here.
9878

    
9879
  def BuildHooksEnv(self):
9880
    """Build hooks env.
9881

9882
    This will run on the master, primary node and target node.
9883

9884
    """
9885
    env = {
9886
      "EXPORT_MODE": self.op.mode,
9887
      "EXPORT_NODE": self.op.target_node,
9888
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9889
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9890
      # TODO: Generic function for boolean env variables
9891
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9892
      }
9893

    
9894
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9895

    
9896
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9897

    
9898
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9899
      nl.append(self.op.target_node)
9900

    
9901
    return env, nl, nl
9902

    
9903
  def CheckPrereq(self):
9904
    """Check prerequisites.
9905

9906
    This checks that the instance and node names are valid.
9907

9908
    """
9909
    instance_name = self.op.instance_name
9910

    
9911
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9912
    assert self.instance is not None, \
9913
          "Cannot retrieve locked instance %s" % self.op.instance_name
9914
    _CheckNodeOnline(self, self.instance.primary_node)
9915

    
9916
    if (self.op.remove_instance and self.instance.admin_up and
9917
        not self.op.shutdown):
9918
      raise errors.OpPrereqError("Can not remove instance without shutting it"
9919
                                 " down before")
9920

    
9921
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9922
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9923
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9924
      assert self.dst_node is not None
9925

    
9926
      _CheckNodeOnline(self, self.dst_node.name)
9927
      _CheckNodeNotDrained(self, self.dst_node.name)
9928

    
9929
      self._cds = None
9930
      self.dest_disk_info = None
9931
      self.dest_x509_ca = None
9932

    
9933
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9934
      self.dst_node = None
9935

    
9936
      if len(self.op.target_node) != len(self.instance.disks):
9937
        raise errors.OpPrereqError(("Received destination information for %s"
9938
                                    " disks, but instance %s has %s disks") %
9939
                                   (len(self.op.target_node), instance_name,
9940
                                    len(self.instance.disks)),
9941
                                   errors.ECODE_INVAL)
9942

    
9943
      cds = _GetClusterDomainSecret()
9944

    
9945
      # Check X509 key name
9946
      try:
9947
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9948
      except (TypeError, ValueError), err:
9949
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9950

    
9951
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9952
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9953
                                   errors.ECODE_INVAL)
9954

    
9955
      # Load and verify CA
9956
      try:
9957
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9958
      except OpenSSL.crypto.Error, err:
9959
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9960
                                   (err, ), errors.ECODE_INVAL)
9961

    
9962
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9963
      if errcode is not None:
9964
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9965
                                   (msg, ), errors.ECODE_INVAL)
9966

    
9967
      self.dest_x509_ca = cert
9968

    
9969
      # Verify target information
9970
      disk_info = []
9971
      for idx, disk_data in enumerate(self.op.target_node):
9972
        try:
9973
          (host, port, magic) = \
9974
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9975
        except errors.GenericError, err:
9976
          raise errors.OpPrereqError("Target info for disk %s: %s" %
9977
                                     (idx, err), errors.ECODE_INVAL)
9978

    
9979
        disk_info.append((host, port, magic))
9980

    
9981
      assert len(disk_info) == len(self.op.target_node)
9982
      self.dest_disk_info = disk_info
9983

    
9984
    else:
9985
      raise errors.ProgrammerError("Unhandled export mode %r" %
9986
                                   self.op.mode)
9987

    
9988
    # instance disk type verification
9989
    # TODO: Implement export support for file-based disks
9990
    for disk in self.instance.disks:
9991
      if disk.dev_type == constants.LD_FILE:
9992
        raise errors.OpPrereqError("Export not supported for instances with"
9993
                                   " file-based disks", errors.ECODE_INVAL)
9994

    
9995
  def _CleanupExports(self, feedback_fn):
9996
    """Removes exports of current instance from all other nodes.
9997

9998
    If an instance in a cluster with nodes A..D was exported to node C, its
9999
    exports will be removed from the nodes A, B and D.
10000

10001
    """
10002
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10003

    
10004
    nodelist = self.cfg.GetNodeList()
10005
    nodelist.remove(self.dst_node.name)
10006

    
10007
    # on one-node clusters nodelist will be empty after the removal
10008
    # if we proceed the backup would be removed because OpQueryExports
10009
    # substitutes an empty list with the full cluster node list.
10010
    iname = self.instance.name
10011
    if nodelist:
10012
      feedback_fn("Removing old exports for instance %s" % iname)
10013
      exportlist = self.rpc.call_export_list(nodelist)
10014
      for node in exportlist:
10015
        if exportlist[node].fail_msg:
10016
          continue
10017
        if iname in exportlist[node].payload:
10018
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10019
          if msg:
10020
            self.LogWarning("Could not remove older export for instance %s"
10021
                            " on node %s: %s", iname, node, msg)
10022

    
10023
  def Exec(self, feedback_fn):
10024
    """Export an instance to an image in the cluster.
10025

10026
    """
10027
    assert self.op.mode in constants.EXPORT_MODES
10028

    
10029
    instance = self.instance
10030
    src_node = instance.primary_node
10031

    
10032
    if self.op.shutdown:
10033
      # shutdown the instance, but not the disks
10034
      feedback_fn("Shutting down instance %s" % instance.name)
10035
      result = self.rpc.call_instance_shutdown(src_node, instance,
10036
                                               self.op.shutdown_timeout)
10037
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10038
      result.Raise("Could not shutdown instance %s on"
10039
                   " node %s" % (instance.name, src_node))
10040

    
10041
    # set the disks ID correctly since call_instance_start needs the
10042
    # correct drbd minor to create the symlinks
10043
    for disk in instance.disks:
10044
      self.cfg.SetDiskID(disk, src_node)
10045

    
10046
    activate_disks = (not instance.admin_up)
10047

    
10048
    if activate_disks:
10049
      # Activate the instance disks if we'exporting a stopped instance
10050
      feedback_fn("Activating disks for %s" % instance.name)
10051
      _StartInstanceDisks(self, instance, None)
10052

    
10053
    try:
10054
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10055
                                                     instance)
10056

    
10057
      helper.CreateSnapshots()
10058
      try:
10059
        if (self.op.shutdown and instance.admin_up and
10060
            not self.op.remove_instance):
10061
          assert not activate_disks
10062
          feedback_fn("Starting instance %s" % instance.name)
10063
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10064
          msg = result.fail_msg
10065
          if msg:
10066
            feedback_fn("Failed to start instance: %s" % msg)
10067
            _ShutdownInstanceDisks(self, instance)
10068
            raise errors.OpExecError("Could not start instance: %s" % msg)
10069

    
10070
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10071
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10072
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10073
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10074
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10075

    
10076
          (key_name, _, _) = self.x509_key_name
10077

    
10078
          dest_ca_pem = \
10079
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10080
                                            self.dest_x509_ca)
10081

    
10082
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10083
                                                     key_name, dest_ca_pem,
10084
                                                     timeouts)
10085
      finally:
10086
        helper.Cleanup()
10087

    
10088
      # Check for backwards compatibility
10089
      assert len(dresults) == len(instance.disks)
10090
      assert compat.all(isinstance(i, bool) for i in dresults), \
10091
             "Not all results are boolean: %r" % dresults
10092

    
10093
    finally:
10094
      if activate_disks:
10095
        feedback_fn("Deactivating disks for %s" % instance.name)
10096
        _ShutdownInstanceDisks(self, instance)
10097

    
10098
    if not (compat.all(dresults) and fin_resu):
10099
      failures = []
10100
      if not fin_resu:
10101
        failures.append("export finalization")
10102
      if not compat.all(dresults):
10103
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10104
                               if not dsk)
10105
        failures.append("disk export: disk(s) %s" % fdsk)
10106

    
10107
      raise errors.OpExecError("Export failed, errors in %s" %
10108
                               utils.CommaJoin(failures))
10109

    
10110
    # At this point, the export was successful, we can cleanup/finish
10111

    
10112
    # Remove instance if requested
10113
    if self.op.remove_instance:
10114
      feedback_fn("Removing instance %s" % instance.name)
10115
      _RemoveInstance(self, feedback_fn, instance,
10116
                      self.op.ignore_remove_failures)
10117

    
10118
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10119
      self._CleanupExports(feedback_fn)
10120

    
10121
    return fin_resu, dresults
10122

    
10123

    
10124
class LURemoveExport(NoHooksLU):
10125
  """Remove exports related to the named instance.
10126

10127
  """
10128
  _OP_PARAMS = [
10129
    _PInstanceName,
10130
    ]
10131
  REQ_BGL = False
10132

    
10133
  def ExpandNames(self):
10134
    self.needed_locks = {}
10135
    # We need all nodes to be locked in order for RemoveExport to work, but we
10136
    # don't need to lock the instance itself, as nothing will happen to it (and
10137
    # we can remove exports also for a removed instance)
10138
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10139

    
10140
  def Exec(self, feedback_fn):
10141
    """Remove any export.
10142

10143
    """
10144
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10145
    # If the instance was not found we'll try with the name that was passed in.
10146
    # This will only work if it was an FQDN, though.
10147
    fqdn_warn = False
10148
    if not instance_name:
10149
      fqdn_warn = True
10150
      instance_name = self.op.instance_name
10151

    
10152
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10153
    exportlist = self.rpc.call_export_list(locked_nodes)
10154
    found = False
10155
    for node in exportlist:
10156
      msg = exportlist[node].fail_msg
10157
      if msg:
10158
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10159
        continue
10160
      if instance_name in exportlist[node].payload:
10161
        found = True
10162
        result = self.rpc.call_export_remove(node, instance_name)
10163
        msg = result.fail_msg
10164
        if msg:
10165
          logging.error("Could not remove export for instance %s"
10166
                        " on node %s: %s", instance_name, node, msg)
10167

    
10168
    if fqdn_warn and not found:
10169
      feedback_fn("Export not found. If trying to remove an export belonging"
10170
                  " to a deleted instance please use its Fully Qualified"
10171
                  " Domain Name.")
10172

    
10173

    
10174
class LUAddGroup(LogicalUnit):
10175
  """Logical unit for creating node groups.
10176

10177
  """
10178
  HPATH = "group-add"
10179
  HTYPE = constants.HTYPE_GROUP
10180

    
10181
  _OP_PARAMS = [
10182
    _PGroupName,
10183
    ]
10184

    
10185
  REQ_BGL = False
10186

    
10187
  def ExpandNames(self):
10188
    # We need the new group's UUID here so that we can create and acquire the
10189
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10190
    # that it should not check whether the UUID exists in the configuration.
10191
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10192
    self.needed_locks = {}
10193
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10194

    
10195
  def CheckPrereq(self):
10196
    """Check prerequisites.
10197

10198
    This checks that the given group name is not an existing node group
10199
    already.
10200

10201
    """
10202
    try:
10203
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10204
    except errors.OpPrereqError:
10205
      pass
10206
    else:
10207
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10208
                                 " node group (UUID: %s)" %
10209
                                 (self.op.group_name, existing_uuid),
10210
                                 errors.ECODE_EXISTS)
10211

    
10212
  def BuildHooksEnv(self):
10213
    """Build hooks env.
10214

10215
    """
10216
    env = {
10217
      "GROUP_NAME": self.op.group_name,
10218
      }
10219
    mn = self.cfg.GetMasterNode()
10220
    return env, [mn], [mn]
10221

    
10222
  def Exec(self, feedback_fn):
10223
    """Add the node group to the cluster.
10224

10225
    """
10226
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10227
                                  uuid=self.group_uuid)
10228

    
10229
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10230
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10231

    
10232

    
10233
class LUQueryGroups(NoHooksLU):
10234
  """Logical unit for querying node groups.
10235

10236
  """
10237
  # pylint: disable-msg=W0142
10238
  _OP_PARAMS = [
10239
    _POutputFields,
10240
    ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10241
    ]
10242

    
10243
  REQ_BGL = False
10244

    
10245
  _FIELDS_DYNAMIC = utils.FieldSet()
10246

    
10247
  _SIMPLE_FIELDS = ["name", "uuid", "ctime", "mtime", "serial_no"]
10248

    
10249
  _FIELDS_STATIC = utils.FieldSet(
10250
      "node_cnt", "node_list", "pinst_cnt", "pinst_list", *_SIMPLE_FIELDS)
10251

    
10252
  def CheckArguments(self):
10253
    _CheckOutputFields(static=self._FIELDS_STATIC,
10254
                       dynamic=self._FIELDS_DYNAMIC,
10255
                       selected=self.op.output_fields)
10256

    
10257
  def ExpandNames(self):
10258
    self.needed_locks = {}
10259

    
10260
  def Exec(self, feedback_fn):
10261
    """Computes the list of groups and their attributes.
10262

10263
    """
10264
    all_groups = self.cfg.GetAllNodeGroupsInfo()
10265

    
10266
    if not self.op.names:
10267
      my_groups = utils.NiceSort(all_groups.keys())
10268
    else:
10269
      # Accept names to be either names or UUIDs.
10270
      all_uuid = frozenset(all_groups.keys())
10271
      name_to_uuid = dict((g.name, g.uuid) for g in all_groups.values())
10272
      my_groups = []
10273
      missing = []
10274

    
10275
      for name in self.op.names:
10276
        if name in all_uuid:
10277
          my_groups.append(name)
10278
        elif name in name_to_uuid:
10279
          my_groups.append(name_to_uuid[name])
10280
        else:
10281
          missing.append(name)
10282

    
10283
      if missing:
10284
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10285
                                   errors.ECODE_NOENT)
10286

    
10287
    do_nodes = bool(frozenset(["node_cnt", "node_list"]).
10288
                    intersection(self.op.output_fields))
10289

    
10290
    do_instances = bool(frozenset(["pinst_cnt", "pinst_list"]).
10291
                        intersection(self.op.output_fields))
10292

    
10293
    # We need to map group->[nodes], and group->[instances]. The former is
10294
    # directly attainable, but the latter we have to do through instance->node,
10295
    # hence we need to process nodes even if we only need instance information.
10296
    if do_nodes or do_instances:
10297
      all_nodes = self.cfg.GetAllNodesInfo()
10298
      group_to_nodes = dict((all_groups[name].uuid, []) for name in my_groups)
10299
      node_to_group = {}
10300

    
10301
      for node in all_nodes.values():
10302
        if node.group in group_to_nodes:
10303
          group_to_nodes[node.group].append(node.name)
10304
          node_to_group[node.name] = node.group
10305

    
10306
      if do_instances:
10307
        all_instances = self.cfg.GetAllInstancesInfo()
10308
        group_to_instances = dict((all_groups[name].uuid, [])
10309
                                  for name in my_groups)
10310
        for instance in all_instances.values():
10311
          node = instance.primary_node
10312
          if node in node_to_group:
10313
            group_to_instances[node_to_group[node]].append(instance.name)
10314

    
10315
    output = []
10316

    
10317
    for name in my_groups:
10318
      group = all_groups[name]
10319
      group_output = []
10320

    
10321
      for field in self.op.output_fields:
10322
        if field in self._SIMPLE_FIELDS:
10323
          val = getattr(group, field)
10324
        elif field == "node_list":
10325
          val = utils.NiceSort(group_to_nodes[group.uuid])
10326
        elif field == "node_cnt":
10327
          val = len(group_to_nodes[group.uuid])
10328
        elif field == "pinst_list":
10329
          val = utils.NiceSort(group_to_instances[group.uuid])
10330
        elif field == "pinst_cnt":
10331
          val = len(group_to_instances[group.uuid])
10332
        else:
10333
          raise errors.ParameterError(field)
10334
        group_output.append(val)
10335
      output.append(group_output)
10336

    
10337
    return output
10338

    
10339

    
10340
class LURemoveGroup(LogicalUnit):
10341
  HPATH = "group-remove"
10342
  HTYPE = constants.HTYPE_GROUP
10343

    
10344
  _OP_PARAMS = [
10345
    _PGroupName,
10346
    ]
10347

    
10348
  REQ_BGL = False
10349

    
10350
  def ExpandNames(self):
10351
    # This will raises errors.OpPrereqError on its own:
10352
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10353
    self.needed_locks = {
10354
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10355
      }
10356

    
10357
  def CheckPrereq(self):
10358
    """Check prerequisites.
10359

10360
    This checks that the given group name exists as a node group, that is
10361
    empty (i.e., contains no nodes), and that is not the last group of the
10362
    cluster.
10363

10364
    """
10365
    # Verify that the group is empty.
10366
    group_nodes = [node.name
10367
                   for node in self.cfg.GetAllNodesInfo().values()
10368
                   if node.group == self.group_uuid]
10369

    
10370
    if group_nodes:
10371
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10372
                                 " nodes: %s" %
10373
                                 (self.op.group_name,
10374
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10375
                                 errors.ECODE_STATE)
10376

    
10377
    # Verify the cluster would not be left group-less.
10378
    if len(self.cfg.GetNodeGroupList()) == 1:
10379
      raise errors.OpPrereqError("Group '%s' is the last group in the cluster,"
10380
                                 " which cannot be left without at least one"
10381
                                 " group" % self.op.group_name,
10382
                                 errors.ECODE_STATE)
10383

    
10384
  def BuildHooksEnv(self):
10385
    """Build hooks env.
10386

10387
    """
10388
    env = {
10389
      "GROUP_NAME": self.op.group_name,
10390
      }
10391
    mn = self.cfg.GetMasterNode()
10392
    return env, [mn], [mn]
10393

    
10394
  def Exec(self, feedback_fn):
10395
    """Remove the node group.
10396

10397
    """
10398
    try:
10399
      self.cfg.RemoveNodeGroup(self.group_uuid)
10400
    except errors.ConfigurationError:
10401
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10402
                               (self.op.group_name, self.group_uuid))
10403

    
10404
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10405

    
10406

    
10407
class LURenameGroup(LogicalUnit):
10408
  HPATH = "group-rename"
10409
  HTYPE = constants.HTYPE_GROUP
10410

    
10411
  _OP_PARAMS = [
10412
    ("old_name", ht.NoDefault, ht.TNonEmptyString),
10413
    ("new_name", ht.NoDefault, ht.TNonEmptyString),
10414
    ]
10415

    
10416
  REQ_BGL = False
10417

    
10418
  def ExpandNames(self):
10419
    # This raises errors.OpPrereqError on its own:
10420
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10421

    
10422
    self.needed_locks = {
10423
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10424
      }
10425

    
10426
  def CheckPrereq(self):
10427
    """Check prerequisites.
10428

10429
    This checks that the given old_name exists as a node group, and that
10430
    new_name doesn't.
10431

10432
    """
10433
    try:
10434
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10435
    except errors.OpPrereqError:
10436
      pass
10437
    else:
10438
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10439
                                 " node group (UUID: %s)" %
10440
                                 (self.op.new_name, new_name_uuid),
10441
                                 errors.ECODE_EXISTS)
10442

    
10443
  def BuildHooksEnv(self):
10444
    """Build hooks env.
10445

10446
    """
10447
    env = {
10448
      "OLD_NAME": self.op.old_name,
10449
      "NEW_NAME": self.op.new_name,
10450
      }
10451

    
10452
    mn = self.cfg.GetMasterNode()
10453
    all_nodes = self.cfg.GetAllNodesInfo()
10454
    run_nodes = [mn]
10455
    all_nodes.pop(mn, None)
10456

    
10457
    for node in all_nodes.values():
10458
      if node.group == self.group_uuid:
10459
        run_nodes.append(node.name)
10460

    
10461
    return env, run_nodes, run_nodes
10462

    
10463
  def Exec(self, feedback_fn):
10464
    """Rename the node group.
10465

10466
    """
10467
    group = self.cfg.GetNodeGroup(self.group_uuid)
10468

    
10469
    if group is None:
10470
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10471
                               (self.op.old_name, self.group_uuid))
10472

    
10473
    group.name = self.op.new_name
10474
    self.cfg.Update(group, feedback_fn)
10475

    
10476
    return self.op.new_name
10477

    
10478

    
10479
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10480
  """Generic tags LU.
10481

10482
  This is an abstract class which is the parent of all the other tags LUs.
10483

10484
  """
10485

    
10486
  def ExpandNames(self):
10487
    self.needed_locks = {}
10488
    if self.op.kind == constants.TAG_NODE:
10489
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10490
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10491
    elif self.op.kind == constants.TAG_INSTANCE:
10492
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10493
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10494

    
10495
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10496
    # not possible to acquire the BGL based on opcode parameters)
10497

    
10498
  def CheckPrereq(self):
10499
    """Check prerequisites.
10500

10501
    """
10502
    if self.op.kind == constants.TAG_CLUSTER:
10503
      self.target = self.cfg.GetClusterInfo()
10504
    elif self.op.kind == constants.TAG_NODE:
10505
      self.target = self.cfg.GetNodeInfo(self.op.name)
10506
    elif self.op.kind == constants.TAG_INSTANCE:
10507
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10508
    else:
10509
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10510
                                 str(self.op.kind), errors.ECODE_INVAL)
10511

    
10512

    
10513
class LUGetTags(TagsLU):
10514
  """Returns the tags of a given object.
10515

10516
  """
10517
  _OP_PARAMS = [
10518
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10519
    # Name is only meaningful for nodes and instances
10520
    ("name", ht.NoDefault, ht.TMaybeString),
10521
    ]
10522
  REQ_BGL = False
10523

    
10524
  def ExpandNames(self):
10525
    TagsLU.ExpandNames(self)
10526

    
10527
    # Share locks as this is only a read operation
10528
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10529

    
10530
  def Exec(self, feedback_fn):
10531
    """Returns the tag list.
10532

10533
    """
10534
    return list(self.target.GetTags())
10535

    
10536

    
10537
class LUSearchTags(NoHooksLU):
10538
  """Searches the tags for a given pattern.
10539

10540
  """
10541
  _OP_PARAMS = [
10542
    ("pattern", ht.NoDefault, ht.TNonEmptyString),
10543
    ]
10544
  REQ_BGL = False
10545

    
10546
  def ExpandNames(self):
10547
    self.needed_locks = {}
10548

    
10549
  def CheckPrereq(self):
10550
    """Check prerequisites.
10551

10552
    This checks the pattern passed for validity by compiling it.
10553

10554
    """
10555
    try:
10556
      self.re = re.compile(self.op.pattern)
10557
    except re.error, err:
10558
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10559
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10560

    
10561
  def Exec(self, feedback_fn):
10562
    """Returns the tag list.
10563

10564
    """
10565
    cfg = self.cfg
10566
    tgts = [("/cluster", cfg.GetClusterInfo())]
10567
    ilist = cfg.GetAllInstancesInfo().values()
10568
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10569
    nlist = cfg.GetAllNodesInfo().values()
10570
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10571
    results = []
10572
    for path, target in tgts:
10573
      for tag in target.GetTags():
10574
        if self.re.search(tag):
10575
          results.append((path, tag))
10576
    return results
10577

    
10578

    
10579
class LUAddTags(TagsLU):
10580
  """Sets a tag on a given object.
10581

10582
  """
10583
  _OP_PARAMS = [
10584
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10585
    # Name is only meaningful for nodes and instances
10586
    ("name", ht.NoDefault, ht.TMaybeString),
10587
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10588
    ]
10589
  REQ_BGL = False
10590

    
10591
  def CheckPrereq(self):
10592
    """Check prerequisites.
10593

10594
    This checks the type and length of the tag name and value.
10595

10596
    """
10597
    TagsLU.CheckPrereq(self)
10598
    for tag in self.op.tags:
10599
      objects.TaggableObject.ValidateTag(tag)
10600

    
10601
  def Exec(self, feedback_fn):
10602
    """Sets the tag.
10603

10604
    """
10605
    try:
10606
      for tag in self.op.tags:
10607
        self.target.AddTag(tag)
10608
    except errors.TagError, err:
10609
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10610
    self.cfg.Update(self.target, feedback_fn)
10611

    
10612

    
10613
class LUDelTags(TagsLU):
10614
  """Delete a list of tags from a given object.
10615

10616
  """
10617
  _OP_PARAMS = [
10618
    ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10619
    # Name is only meaningful for nodes and instances
10620
    ("name", ht.NoDefault, ht.TMaybeString),
10621
    ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10622
    ]
10623
  REQ_BGL = False
10624

    
10625
  def CheckPrereq(self):
10626
    """Check prerequisites.
10627

10628
    This checks that we have the given tag.
10629

10630
    """
10631
    TagsLU.CheckPrereq(self)
10632
    for tag in self.op.tags:
10633
      objects.TaggableObject.ValidateTag(tag)
10634
    del_tags = frozenset(self.op.tags)
10635
    cur_tags = self.target.GetTags()
10636

    
10637
    diff_tags = del_tags - cur_tags
10638
    if diff_tags:
10639
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10640
      raise errors.OpPrereqError("Tag(s) %s not found" %
10641
                                 (utils.CommaJoin(diff_names), ),
10642
                                 errors.ECODE_NOENT)
10643

    
10644
  def Exec(self, feedback_fn):
10645
    """Remove the tag from the object.
10646

10647
    """
10648
    for tag in self.op.tags:
10649
      self.target.RemoveTag(tag)
10650
    self.cfg.Update(self.target, feedback_fn)
10651

    
10652

    
10653
class LUTestDelay(NoHooksLU):
10654
  """Sleep for a specified amount of time.
10655

10656
  This LU sleeps on the master and/or nodes for a specified amount of
10657
  time.
10658

10659
  """
10660
  _OP_PARAMS = [
10661
    ("duration", ht.NoDefault, ht.TFloat),
10662
    ("on_master", True, ht.TBool),
10663
    ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10664
    ("repeat", 0, ht.TPositiveInt)
10665
    ]
10666
  REQ_BGL = False
10667

    
10668
  def ExpandNames(self):
10669
    """Expand names and set required locks.
10670

10671
    This expands the node list, if any.
10672

10673
    """
10674
    self.needed_locks = {}
10675
    if self.op.on_nodes:
10676
      # _GetWantedNodes can be used here, but is not always appropriate to use
10677
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10678
      # more information.
10679
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10680
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10681

    
10682
  def _TestDelay(self):
10683
    """Do the actual sleep.
10684

10685
    """
10686
    if self.op.on_master:
10687
      if not utils.TestDelay(self.op.duration):
10688
        raise errors.OpExecError("Error during master delay test")
10689
    if self.op.on_nodes:
10690
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10691
      for node, node_result in result.items():
10692
        node_result.Raise("Failure during rpc call to node %s" % node)
10693

    
10694
  def Exec(self, feedback_fn):
10695
    """Execute the test delay opcode, with the wanted repetitions.
10696

10697
    """
10698
    if self.op.repeat == 0:
10699
      self._TestDelay()
10700
    else:
10701
      top_value = self.op.repeat - 1
10702
      for i in range(self.op.repeat):
10703
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10704
        self._TestDelay()
10705

    
10706

    
10707
class LUTestJobqueue(NoHooksLU):
10708
  """Utility LU to test some aspects of the job queue.
10709

10710
  """
10711
  _OP_PARAMS = [
10712
    ("notify_waitlock", False, ht.TBool),
10713
    ("notify_exec", False, ht.TBool),
10714
    ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10715
    ("fail", False, ht.TBool),
10716
    ]
10717
  REQ_BGL = False
10718

    
10719
  # Must be lower than default timeout for WaitForJobChange to see whether it
10720
  # notices changed jobs
10721
  _CLIENT_CONNECT_TIMEOUT = 20.0
10722
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10723

    
10724
  @classmethod
10725
  def _NotifyUsingSocket(cls, cb, errcls):
10726
    """Opens a Unix socket and waits for another program to connect.
10727

10728
    @type cb: callable
10729
    @param cb: Callback to send socket name to client
10730
    @type errcls: class
10731
    @param errcls: Exception class to use for errors
10732

10733
    """
10734
    # Using a temporary directory as there's no easy way to create temporary
10735
    # sockets without writing a custom loop around tempfile.mktemp and
10736
    # socket.bind
10737
    tmpdir = tempfile.mkdtemp()
10738
    try:
10739
      tmpsock = utils.PathJoin(tmpdir, "sock")
10740

    
10741
      logging.debug("Creating temporary socket at %s", tmpsock)
10742
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10743
      try:
10744
        sock.bind(tmpsock)
10745
        sock.listen(1)
10746

    
10747
        # Send details to client
10748
        cb(tmpsock)
10749

    
10750
        # Wait for client to connect before continuing
10751
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10752
        try:
10753
          (conn, _) = sock.accept()
10754
        except socket.error, err:
10755
          raise errcls("Client didn't connect in time (%s)" % err)
10756
      finally:
10757
        sock.close()
10758
    finally:
10759
      # Remove as soon as client is connected
10760
      shutil.rmtree(tmpdir)
10761

    
10762
    # Wait for client to close
10763
    try:
10764
      try:
10765
        # pylint: disable-msg=E1101
10766
        # Instance of '_socketobject' has no ... member
10767
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10768
        conn.recv(1)
10769
      except socket.error, err:
10770
        raise errcls("Client failed to confirm notification (%s)" % err)
10771
    finally:
10772
      conn.close()
10773

    
10774
  def _SendNotification(self, test, arg, sockname):
10775
    """Sends a notification to the client.
10776

10777
    @type test: string
10778
    @param test: Test name
10779
    @param arg: Test argument (depends on test)
10780
    @type sockname: string
10781
    @param sockname: Socket path
10782

10783
    """
10784
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10785

    
10786
  def _Notify(self, prereq, test, arg):
10787
    """Notifies the client of a test.
10788

10789
    @type prereq: bool
10790
    @param prereq: Whether this is a prereq-phase test
10791
    @type test: string
10792
    @param test: Test name
10793
    @param arg: Test argument (depends on test)
10794

10795
    """
10796
    if prereq:
10797
      errcls = errors.OpPrereqError
10798
    else:
10799
      errcls = errors.OpExecError
10800

    
10801
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10802
                                                  test, arg),
10803
                                   errcls)
10804

    
10805
  def CheckArguments(self):
10806
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10807
    self.expandnames_calls = 0
10808

    
10809
  def ExpandNames(self):
10810
    checkargs_calls = getattr(self, "checkargs_calls", 0)
10811
    if checkargs_calls < 1:
10812
      raise errors.ProgrammerError("CheckArguments was not called")
10813

    
10814
    self.expandnames_calls += 1
10815

    
10816
    if self.op.notify_waitlock:
10817
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
10818

    
10819
    self.LogInfo("Expanding names")
10820

    
10821
    # Get lock on master node (just to get a lock, not for a particular reason)
10822
    self.needed_locks = {
10823
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10824
      }
10825

    
10826
  def Exec(self, feedback_fn):
10827
    if self.expandnames_calls < 1:
10828
      raise errors.ProgrammerError("ExpandNames was not called")
10829

    
10830
    if self.op.notify_exec:
10831
      self._Notify(False, constants.JQT_EXEC, None)
10832

    
10833
    self.LogInfo("Executing")
10834

    
10835
    if self.op.log_messages:
10836
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10837
      for idx, msg in enumerate(self.op.log_messages):
10838
        self.LogInfo("Sending log message %s", idx + 1)
10839
        feedback_fn(constants.JQT_MSGPREFIX + msg)
10840
        # Report how many test messages have been sent
10841
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10842

    
10843
    if self.op.fail:
10844
      raise errors.OpExecError("Opcode failure was requested")
10845

    
10846
    return True
10847

    
10848

    
10849
class IAllocator(object):
10850
  """IAllocator framework.
10851

10852
  An IAllocator instance has three sets of attributes:
10853
    - cfg that is needed to query the cluster
10854
    - input data (all members of the _KEYS class attribute are required)
10855
    - four buffer attributes (in|out_data|text), that represent the
10856
      input (to the external script) in text and data structure format,
10857
      and the output from it, again in two formats
10858
    - the result variables from the script (success, info, nodes) for
10859
      easy usage
10860

10861
  """
10862
  # pylint: disable-msg=R0902
10863
  # lots of instance attributes
10864
  _ALLO_KEYS = [
10865
    "name", "mem_size", "disks", "disk_template",
10866
    "os", "tags", "nics", "vcpus", "hypervisor",
10867
    ]
10868
  _RELO_KEYS = [
10869
    "name", "relocate_from",
10870
    ]
10871
  _EVAC_KEYS = [
10872
    "evac_nodes",
10873
    ]
10874

    
10875
  def __init__(self, cfg, rpc, mode, **kwargs):
10876
    self.cfg = cfg
10877
    self.rpc = rpc
10878
    # init buffer variables
10879
    self.in_text = self.out_text = self.in_data = self.out_data = None
10880
    # init all input fields so that pylint is happy
10881
    self.mode = mode
10882
    self.mem_size = self.disks = self.disk_template = None
10883
    self.os = self.tags = self.nics = self.vcpus = None
10884
    self.hypervisor = None
10885
    self.relocate_from = None
10886
    self.name = None
10887
    self.evac_nodes = None
10888
    # computed fields
10889
    self.required_nodes = None
10890
    # init result fields
10891
    self.success = self.info = self.result = None
10892
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10893
      keyset = self._ALLO_KEYS
10894
      fn = self._AddNewInstance
10895
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10896
      keyset = self._RELO_KEYS
10897
      fn = self._AddRelocateInstance
10898
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10899
      keyset = self._EVAC_KEYS
10900
      fn = self._AddEvacuateNodes
10901
    else:
10902
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10903
                                   " IAllocator" % self.mode)
10904
    for key in kwargs:
10905
      if key not in keyset:
10906
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
10907
                                     " IAllocator" % key)
10908
      setattr(self, key, kwargs[key])
10909

    
10910
    for key in keyset:
10911
      if key not in kwargs:
10912
        raise errors.ProgrammerError("Missing input parameter '%s' to"
10913
                                     " IAllocator" % key)
10914
    self._BuildInputData(fn)
10915

    
10916
  def _ComputeClusterData(self):
10917
    """Compute the generic allocator input data.
10918

10919
    This is the data that is independent of the actual operation.
10920

10921
    """
10922
    cfg = self.cfg
10923
    cluster_info = cfg.GetClusterInfo()
10924
    # cluster data
10925
    data = {
10926
      "version": constants.IALLOCATOR_VERSION,
10927
      "cluster_name": cfg.GetClusterName(),
10928
      "cluster_tags": list(cluster_info.GetTags()),
10929
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10930
      # we don't have job IDs
10931
      }
10932
    iinfo = cfg.GetAllInstancesInfo().values()
10933
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10934

    
10935
    # node data
10936
    node_list = cfg.GetNodeList()
10937

    
10938
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10939
      hypervisor_name = self.hypervisor
10940
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10941
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10942
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10943
      hypervisor_name = cluster_info.enabled_hypervisors[0]
10944

    
10945
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10946
                                        hypervisor_name)
10947
    node_iinfo = \
10948
      self.rpc.call_all_instances_info(node_list,
10949
                                       cluster_info.enabled_hypervisors)
10950

    
10951
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10952

    
10953
    data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10954

    
10955
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10956

    
10957
    self.in_data = data
10958

    
10959
  @staticmethod
10960
  def _ComputeNodeGroupData(cfg):
10961
    """Compute node groups data.
10962

10963
    """
10964
    ng = {}
10965
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10966
      ng[guuid] = { "name": gdata.name }
10967
    return ng
10968

    
10969
  @staticmethod
10970
  def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10971
    """Compute global node data.
10972

10973
    """
10974
    node_results = {}
10975
    for nname, nresult in node_data.items():
10976
      # first fill in static (config-based) values
10977
      ninfo = cfg.GetNodeInfo(nname)
10978
      pnr = {
10979
        "tags": list(ninfo.GetTags()),
10980
        "primary_ip": ninfo.primary_ip,
10981
        "secondary_ip": ninfo.secondary_ip,
10982
        "offline": ninfo.offline,
10983
        "drained": ninfo.drained,
10984
        "master_candidate": ninfo.master_candidate,
10985
        "group": ninfo.group,
10986
        "master_capable": ninfo.master_capable,
10987
        "vm_capable": ninfo.vm_capable,
10988
        }
10989

    
10990
      if not (ninfo.offline or ninfo.drained):
10991
        nresult.Raise("Can't get data for node %s" % nname)
10992
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10993
                                nname)
10994
        remote_info = nresult.payload
10995

    
10996
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
10997
                     'vg_size', 'vg_free', 'cpu_total']:
10998
          if attr not in remote_info:
10999
            raise errors.OpExecError("Node '%s' didn't return attribute"
11000
                                     " '%s'" % (nname, attr))
11001
          if not isinstance(remote_info[attr], int):
11002
            raise errors.OpExecError("Node '%s' returned invalid value"
11003
                                     " for '%s': %s" %
11004
                                     (nname, attr, remote_info[attr]))
11005
        # compute memory used by primary instances
11006
        i_p_mem = i_p_up_mem = 0
11007
        for iinfo, beinfo in i_list:
11008
          if iinfo.primary_node == nname:
11009
            i_p_mem += beinfo[constants.BE_MEMORY]
11010
            if iinfo.name not in node_iinfo[nname].payload:
11011
              i_used_mem = 0
11012
            else:
11013
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11014
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11015
            remote_info['memory_free'] -= max(0, i_mem_diff)
11016

    
11017
            if iinfo.admin_up:
11018
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11019

    
11020
        # compute memory used by instances
11021
        pnr_dyn = {
11022
          "total_memory": remote_info['memory_total'],
11023
          "reserved_memory": remote_info['memory_dom0'],
11024
          "free_memory": remote_info['memory_free'],
11025
          "total_disk": remote_info['vg_size'],
11026
          "free_disk": remote_info['vg_free'],
11027
          "total_cpus": remote_info['cpu_total'],
11028
          "i_pri_memory": i_p_mem,
11029
          "i_pri_up_memory": i_p_up_mem,
11030
          }
11031
        pnr.update(pnr_dyn)
11032

    
11033
      node_results[nname] = pnr
11034

    
11035
    return node_results
11036

    
11037
  @staticmethod
11038
  def _ComputeInstanceData(cluster_info, i_list):
11039
    """Compute global instance data.
11040

11041
    """
11042
    instance_data = {}
11043
    for iinfo, beinfo in i_list:
11044
      nic_data = []
11045
      for nic in iinfo.nics:
11046
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11047
        nic_dict = {"mac": nic.mac,
11048
                    "ip": nic.ip,
11049
                    "mode": filled_params[constants.NIC_MODE],
11050
                    "link": filled_params[constants.NIC_LINK],
11051
                   }
11052
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11053
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11054
        nic_data.append(nic_dict)
11055
      pir = {
11056
        "tags": list(iinfo.GetTags()),
11057
        "admin_up": iinfo.admin_up,
11058
        "vcpus": beinfo[constants.BE_VCPUS],
11059
        "memory": beinfo[constants.BE_MEMORY],
11060
        "os": iinfo.os,
11061
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11062
        "nics": nic_data,
11063
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11064
        "disk_template": iinfo.disk_template,
11065
        "hypervisor": iinfo.hypervisor,
11066
        }
11067
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11068
                                                 pir["disks"])
11069
      instance_data[iinfo.name] = pir
11070

    
11071
    return instance_data
11072

    
11073
  def _AddNewInstance(self):
11074
    """Add new instance data to allocator structure.
11075

11076
    This in combination with _AllocatorGetClusterData will create the
11077
    correct structure needed as input for the allocator.
11078

11079
    The checks for the completeness of the opcode must have already been
11080
    done.
11081

11082
    """
11083
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11084

    
11085
    if self.disk_template in constants.DTS_NET_MIRROR:
11086
      self.required_nodes = 2
11087
    else:
11088
      self.required_nodes = 1
11089
    request = {
11090
      "name": self.name,
11091
      "disk_template": self.disk_template,
11092
      "tags": self.tags,
11093
      "os": self.os,
11094
      "vcpus": self.vcpus,
11095
      "memory": self.mem_size,
11096
      "disks": self.disks,
11097
      "disk_space_total": disk_space,
11098
      "nics": self.nics,
11099
      "required_nodes": self.required_nodes,
11100
      }
11101
    return request
11102

    
11103
  def _AddRelocateInstance(self):
11104
    """Add relocate instance data to allocator structure.
11105

11106
    This in combination with _IAllocatorGetClusterData will create the
11107
    correct structure needed as input for the allocator.
11108

11109
    The checks for the completeness of the opcode must have already been
11110
    done.
11111

11112
    """
11113
    instance = self.cfg.GetInstanceInfo(self.name)
11114
    if instance is None:
11115
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11116
                                   " IAllocator" % self.name)
11117

    
11118
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11119
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11120
                                 errors.ECODE_INVAL)
11121

    
11122
    if len(instance.secondary_nodes) != 1:
11123
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11124
                                 errors.ECODE_STATE)
11125

    
11126
    self.required_nodes = 1
11127
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11128
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11129

    
11130
    request = {
11131
      "name": self.name,
11132
      "disk_space_total": disk_space,
11133
      "required_nodes": self.required_nodes,
11134
      "relocate_from": self.relocate_from,
11135
      }
11136
    return request
11137

    
11138
  def _AddEvacuateNodes(self):
11139
    """Add evacuate nodes data to allocator structure.
11140

11141
    """
11142
    request = {
11143
      "evac_nodes": self.evac_nodes
11144
      }
11145
    return request
11146

    
11147
  def _BuildInputData(self, fn):
11148
    """Build input data structures.
11149

11150
    """
11151
    self._ComputeClusterData()
11152

    
11153
    request = fn()
11154
    request["type"] = self.mode
11155
    self.in_data["request"] = request
11156

    
11157
    self.in_text = serializer.Dump(self.in_data)
11158

    
11159
  def Run(self, name, validate=True, call_fn=None):
11160
    """Run an instance allocator and return the results.
11161

11162
    """
11163
    if call_fn is None:
11164
      call_fn = self.rpc.call_iallocator_runner
11165

    
11166
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11167
    result.Raise("Failure while running the iallocator script")
11168

    
11169
    self.out_text = result.payload
11170
    if validate:
11171
      self._ValidateResult()
11172

    
11173
  def _ValidateResult(self):
11174
    """Process the allocator results.
11175

11176
    This will process and if successful save the result in
11177
    self.out_data and the other parameters.
11178

11179
    """
11180
    try:
11181
      rdict = serializer.Load(self.out_text)
11182
    except Exception, err:
11183
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11184

    
11185
    if not isinstance(rdict, dict):
11186
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11187

    
11188
    # TODO: remove backwards compatiblity in later versions
11189
    if "nodes" in rdict and "result" not in rdict:
11190
      rdict["result"] = rdict["nodes"]
11191
      del rdict["nodes"]
11192

    
11193
    for key in "success", "info", "result":
11194
      if key not in rdict:
11195
        raise errors.OpExecError("Can't parse iallocator results:"
11196
                                 " missing key '%s'" % key)
11197
      setattr(self, key, rdict[key])
11198

    
11199
    if not isinstance(rdict["result"], list):
11200
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11201
                               " is not a list")
11202
    self.out_data = rdict
11203

    
11204

    
11205
class LUTestAllocator(NoHooksLU):
11206
  """Run allocator tests.
11207

11208
  This LU runs the allocator tests
11209

11210
  """
11211
  _OP_PARAMS = [
11212
    ("direction", ht.NoDefault,
11213
     ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
11214
    ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
11215
    ("name", ht.NoDefault, ht.TNonEmptyString),
11216
    ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
11217
      ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
11218
               ht.TOr(ht.TNone, ht.TNonEmptyString))))),
11219
    ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
11220
    ("hypervisor", None, ht.TMaybeString),
11221
    ("allocator", None, ht.TMaybeString),
11222
    ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
11223
    ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11224
    ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
11225
    ("os", None, ht.TMaybeString),
11226
    ("disk_template", None, ht.TMaybeString),
11227
    ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
11228
    ]
11229

    
11230
  def CheckPrereq(self):
11231
    """Check prerequisites.
11232

11233
    This checks the opcode parameters depending on the director and mode test.
11234

11235
    """
11236
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11237
      for attr in ["mem_size", "disks", "disk_template",
11238
                   "os", "tags", "nics", "vcpus"]:
11239
        if not hasattr(self.op, attr):
11240
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11241
                                     attr, errors.ECODE_INVAL)
11242
      iname = self.cfg.ExpandInstanceName(self.op.name)
11243
      if iname is not None:
11244
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11245
                                   iname, errors.ECODE_EXISTS)
11246
      if not isinstance(self.op.nics, list):
11247
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11248
                                   errors.ECODE_INVAL)
11249
      if not isinstance(self.op.disks, list):
11250
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11251
                                   errors.ECODE_INVAL)
11252
      for row in self.op.disks:
11253
        if (not isinstance(row, dict) or
11254
            "size" not in row or
11255
            not isinstance(row["size"], int) or
11256
            "mode" not in row or
11257
            row["mode"] not in ['r', 'w']):
11258
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11259
                                     " parameter", errors.ECODE_INVAL)
11260
      if self.op.hypervisor is None:
11261
        self.op.hypervisor = self.cfg.GetHypervisorType()
11262
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11263
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11264
      self.op.name = fname
11265
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11266
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11267
      if not hasattr(self.op, "evac_nodes"):
11268
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11269
                                   " opcode input", errors.ECODE_INVAL)
11270
    else:
11271
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11272
                                 self.op.mode, errors.ECODE_INVAL)
11273

    
11274
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11275
      if self.op.allocator is None:
11276
        raise errors.OpPrereqError("Missing allocator name",
11277
                                   errors.ECODE_INVAL)
11278
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11279
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11280
                                 self.op.direction, errors.ECODE_INVAL)
11281

    
11282
  def Exec(self, feedback_fn):
11283
    """Run the allocator test.
11284

11285
    """
11286
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11287
      ial = IAllocator(self.cfg, self.rpc,
11288
                       mode=self.op.mode,
11289
                       name=self.op.name,
11290
                       mem_size=self.op.mem_size,
11291
                       disks=self.op.disks,
11292
                       disk_template=self.op.disk_template,
11293
                       os=self.op.os,
11294
                       tags=self.op.tags,
11295
                       nics=self.op.nics,
11296
                       vcpus=self.op.vcpus,
11297
                       hypervisor=self.op.hypervisor,
11298
                       )
11299
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11300
      ial = IAllocator(self.cfg, self.rpc,
11301
                       mode=self.op.mode,
11302
                       name=self.op.name,
11303
                       relocate_from=list(self.relocate_from),
11304
                       )
11305
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11306
      ial = IAllocator(self.cfg, self.rpc,
11307
                       mode=self.op.mode,
11308
                       evac_nodes=self.op.evac_nodes)
11309
    else:
11310
      raise errors.ProgrammerError("Uncatched mode %s in"
11311
                                   " LUTestAllocator.Exec", self.op.mode)
11312

    
11313
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11314
      result = ial.in_text
11315
    else:
11316
      ial.Run(self.op.allocator, validate=False)
11317
      result = ial.out_text
11318
    return result