Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 2522b7c4

History | View | Annotate | Download (407.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
# End types
79
class LogicalUnit(object):
80
  """Logical Unit base class.
81

82
  Subclasses must follow these rules:
83
    - implement ExpandNames
84
    - implement CheckPrereq (except when tasklets are used)
85
    - implement Exec (except when tasklets are used)
86
    - implement BuildHooksEnv
87
    - redefine HPATH and HTYPE
88
    - optionally redefine their run requirements:
89
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
90

91
  Note that all commands require root permissions.
92

93
  @ivar dry_run_result: the value (if any) that will be returned to the caller
94
      in dry-run mode (signalled by opcode dry_run parameter)
95

96
  """
97
  HPATH = None
98
  HTYPE = None
99
  REQ_BGL = True
100

    
101
  def __init__(self, processor, op, context, rpc):
102
    """Constructor for LogicalUnit.
103

104
    This needs to be overridden in derived classes in order to check op
105
    validity.
106

107
    """
108
    self.proc = processor
109
    self.op = op
110
    self.cfg = context.cfg
111
    self.context = context
112
    self.rpc = rpc
113
    # Dicts used to declare locking needs to mcpu
114
    self.needed_locks = None
115
    self.acquired_locks = {}
116
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
117
    self.add_locks = {}
118
    self.remove_locks = {}
119
    # Used to force good behavior when calling helper functions
120
    self.recalculate_locks = {}
121
    self.__ssh = None
122
    # logging
123
    self.Log = processor.Log # pylint: disable-msg=C0103
124
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
125
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
126
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
127
    # support for dry-run
128
    self.dry_run_result = None
129
    # support for generic debug attribute
130
    if (not hasattr(self.op, "debug_level") or
131
        not isinstance(self.op.debug_level, int)):
132
      self.op.debug_level = 0
133

    
134
    # Tasklets
135
    self.tasklets = None
136

    
137
    # Validate opcode parameters and set defaults
138
    self.op.Validate(True)
139

    
140
    self.CheckArguments()
141

    
142
  def __GetSSH(self):
143
    """Returns the SshRunner object
144

145
    """
146
    if not self.__ssh:
147
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
148
    return self.__ssh
149

    
150
  ssh = property(fget=__GetSSH)
151

    
152
  def CheckArguments(self):
153
    """Check syntactic validity for the opcode arguments.
154

155
    This method is for doing a simple syntactic check and ensure
156
    validity of opcode parameters, without any cluster-related
157
    checks. While the same can be accomplished in ExpandNames and/or
158
    CheckPrereq, doing these separate is better because:
159

160
      - ExpandNames is left as as purely a lock-related function
161
      - CheckPrereq is run after we have acquired locks (and possible
162
        waited for them)
163

164
    The function is allowed to change the self.op attribute so that
165
    later methods can no longer worry about missing parameters.
166

167
    """
168
    pass
169

    
170
  def ExpandNames(self):
171
    """Expand names for this LU.
172

173
    This method is called before starting to execute the opcode, and it should
174
    update all the parameters of the opcode to their canonical form (e.g. a
175
    short node name must be fully expanded after this method has successfully
176
    completed). This way locking, hooks, logging, etc. can work correctly.
177

178
    LUs which implement this method must also populate the self.needed_locks
179
    member, as a dict with lock levels as keys, and a list of needed lock names
180
    as values. Rules:
181

182
      - use an empty dict if you don't need any lock
183
      - if you don't need any lock at a particular level omit that level
184
      - don't put anything for the BGL level
185
      - if you want all locks at a level use locking.ALL_SET as a value
186

187
    If you need to share locks (rather than acquire them exclusively) at one
188
    level you can modify self.share_locks, setting a true value (usually 1) for
189
    that level. By default locks are not shared.
190

191
    This function can also define a list of tasklets, which then will be
192
    executed in order instead of the usual LU-level CheckPrereq and Exec
193
    functions, if those are not defined by the LU.
194

195
    Examples::
196

197
      # Acquire all nodes and one instance
198
      self.needed_locks = {
199
        locking.LEVEL_NODE: locking.ALL_SET,
200
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
201
      }
202
      # Acquire just two nodes
203
      self.needed_locks = {
204
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
205
      }
206
      # Acquire no locks
207
      self.needed_locks = {} # No, you can't leave it to the default value None
208

209
    """
210
    # The implementation of this method is mandatory only if the new LU is
211
    # concurrent, so that old LUs don't need to be changed all at the same
212
    # time.
213
    if self.REQ_BGL:
214
      self.needed_locks = {} # Exclusive LUs don't need locks.
215
    else:
216
      raise NotImplementedError
217

    
218
  def DeclareLocks(self, level):
219
    """Declare LU locking needs for a level
220

221
    While most LUs can just declare their locking needs at ExpandNames time,
222
    sometimes there's the need to calculate some locks after having acquired
223
    the ones before. This function is called just before acquiring locks at a
224
    particular level, but after acquiring the ones at lower levels, and permits
225
    such calculations. It can be used to modify self.needed_locks, and by
226
    default it does nothing.
227

228
    This function is only called if you have something already set in
229
    self.needed_locks for the level.
230

231
    @param level: Locking level which is going to be locked
232
    @type level: member of ganeti.locking.LEVELS
233

234
    """
235

    
236
  def CheckPrereq(self):
237
    """Check prerequisites for this LU.
238

239
    This method should check that the prerequisites for the execution
240
    of this LU are fulfilled. It can do internode communication, but
241
    it should be idempotent - no cluster or system changes are
242
    allowed.
243

244
    The method should raise errors.OpPrereqError in case something is
245
    not fulfilled. Its return value is ignored.
246

247
    This method should also update all the parameters of the opcode to
248
    their canonical form if it hasn't been done by ExpandNames before.
249

250
    """
251
    if self.tasklets is not None:
252
      for (idx, tl) in enumerate(self.tasklets):
253
        logging.debug("Checking prerequisites for tasklet %s/%s",
254
                      idx + 1, len(self.tasklets))
255
        tl.CheckPrereq()
256
    else:
257
      pass
258

    
259
  def Exec(self, feedback_fn):
260
    """Execute the LU.
261

262
    This method should implement the actual work. It should raise
263
    errors.OpExecError for failures that are somewhat dealt with in
264
    code, or expected.
265

266
    """
267
    if self.tasklets is not None:
268
      for (idx, tl) in enumerate(self.tasklets):
269
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
270
        tl.Exec(feedback_fn)
271
    else:
272
      raise NotImplementedError
273

    
274
  def BuildHooksEnv(self):
275
    """Build hooks environment for this LU.
276

277
    This method should return a three-node tuple consisting of: a dict
278
    containing the environment that will be used for running the
279
    specific hook for this LU, a list of node names on which the hook
280
    should run before the execution, and a list of node names on which
281
    the hook should run after the execution.
282

283
    The keys of the dict must not have 'GANETI_' prefixed as this will
284
    be handled in the hooks runner. Also note additional keys will be
285
    added by the hooks runner. If the LU doesn't define any
286
    environment, an empty dict (and not None) should be returned.
287

288
    No nodes should be returned as an empty list (and not None).
289

290
    Note that if the HPATH for a LU class is None, this function will
291
    not be called.
292

293
    """
294
    raise NotImplementedError
295

    
296
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
297
    """Notify the LU about the results of its hooks.
298

299
    This method is called every time a hooks phase is executed, and notifies
300
    the Logical Unit about the hooks' result. The LU can then use it to alter
301
    its result based on the hooks.  By default the method does nothing and the
302
    previous result is passed back unchanged but any LU can define it if it
303
    wants to use the local cluster hook-scripts somehow.
304

305
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
306
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
307
    @param hook_results: the results of the multi-node hooks rpc call
308
    @param feedback_fn: function used send feedback back to the caller
309
    @param lu_result: the previous Exec result this LU had, or None
310
        in the PRE phase
311
    @return: the new Exec result, based on the previous result
312
        and hook results
313

314
    """
315
    # API must be kept, thus we ignore the unused argument and could
316
    # be a function warnings
317
    # pylint: disable-msg=W0613,R0201
318
    return lu_result
319

    
320
  def _ExpandAndLockInstance(self):
321
    """Helper function to expand and lock an instance.
322

323
    Many LUs that work on an instance take its name in self.op.instance_name
324
    and need to expand it and then declare the expanded name for locking. This
325
    function does it, and then updates self.op.instance_name to the expanded
326
    name. It also initializes needed_locks as a dict, if this hasn't been done
327
    before.
328

329
    """
330
    if self.needed_locks is None:
331
      self.needed_locks = {}
332
    else:
333
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
334
        "_ExpandAndLockInstance called with instance-level locks set"
335
    self.op.instance_name = _ExpandInstanceName(self.cfg,
336
                                                self.op.instance_name)
337
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
338

    
339
  def _LockInstancesNodes(self, primary_only=False):
340
    """Helper function to declare instances' nodes for locking.
341

342
    This function should be called after locking one or more instances to lock
343
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
344
    with all primary or secondary nodes for instances already locked and
345
    present in self.needed_locks[locking.LEVEL_INSTANCE].
346

347
    It should be called from DeclareLocks, and for safety only works if
348
    self.recalculate_locks[locking.LEVEL_NODE] is set.
349

350
    In the future it may grow parameters to just lock some instance's nodes, or
351
    to just lock primaries or secondary nodes, if needed.
352

353
    If should be called in DeclareLocks in a way similar to::
354

355
      if level == locking.LEVEL_NODE:
356
        self._LockInstancesNodes()
357

358
    @type primary_only: boolean
359
    @param primary_only: only lock primary nodes of locked instances
360

361
    """
362
    assert locking.LEVEL_NODE in self.recalculate_locks, \
363
      "_LockInstancesNodes helper function called with no nodes to recalculate"
364

    
365
    # TODO: check if we're really been called with the instance locks held
366

    
367
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
368
    # future we might want to have different behaviors depending on the value
369
    # of self.recalculate_locks[locking.LEVEL_NODE]
370
    wanted_nodes = []
371
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
372
      instance = self.context.cfg.GetInstanceInfo(instance_name)
373
      wanted_nodes.append(instance.primary_node)
374
      if not primary_only:
375
        wanted_nodes.extend(instance.secondary_nodes)
376

    
377
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
378
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
379
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
380
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
381

    
382
    del self.recalculate_locks[locking.LEVEL_NODE]
383

    
384

    
385
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
386
  """Simple LU which runs no hooks.
387

388
  This LU is intended as a parent for other LogicalUnits which will
389
  run no hooks, in order to reduce duplicate code.
390

391
  """
392
  HPATH = None
393
  HTYPE = None
394

    
395
  def BuildHooksEnv(self):
396
    """Empty BuildHooksEnv for NoHooksLu.
397

398
    This just raises an error.
399

400
    """
401
    assert False, "BuildHooksEnv called for NoHooksLUs"
402

    
403

    
404
class Tasklet:
405
  """Tasklet base class.
406

407
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
408
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
409
  tasklets know nothing about locks.
410

411
  Subclasses must follow these rules:
412
    - Implement CheckPrereq
413
    - Implement Exec
414

415
  """
416
  def __init__(self, lu):
417
    self.lu = lu
418

    
419
    # Shortcuts
420
    self.cfg = lu.cfg
421
    self.rpc = lu.rpc
422

    
423
  def CheckPrereq(self):
424
    """Check prerequisites for this tasklets.
425

426
    This method should check whether the prerequisites for the execution of
427
    this tasklet are fulfilled. It can do internode communication, but it
428
    should be idempotent - no cluster or system changes are allowed.
429

430
    The method should raise errors.OpPrereqError in case something is not
431
    fulfilled. Its return value is ignored.
432

433
    This method should also update all parameters to their canonical form if it
434
    hasn't been done before.
435

436
    """
437
    pass
438

    
439
  def Exec(self, feedback_fn):
440
    """Execute the tasklet.
441

442
    This method should implement the actual work. It should raise
443
    errors.OpExecError for failures that are somewhat dealt with in code, or
444
    expected.
445

446
    """
447
    raise NotImplementedError
448

    
449

    
450
class _QueryBase:
451
  """Base for query utility classes.
452

453
  """
454
  #: Attribute holding field definitions
455
  FIELDS = None
456

    
457
  def __init__(self, names, fields, use_locking):
458
    """Initializes this class.
459

460
    """
461
    self.names = names
462
    self.use_locking = use_locking
463

    
464
    self.query = query.Query(self.FIELDS, fields)
465
    self.requested_data = self.query.RequestedData()
466

    
467
    self.do_locking = None
468
    self.wanted = None
469

    
470
  def _GetNames(self, lu, all_names, lock_level):
471
    """Helper function to determine names asked for in the query.
472

473
    """
474
    if self.do_locking:
475
      names = lu.acquired_locks[lock_level]
476
    else:
477
      names = all_names
478

    
479
    if self.wanted == locking.ALL_SET:
480
      assert not self.names
481
      # caller didn't specify names, so ordering is not important
482
      return utils.NiceSort(names)
483

    
484
    # caller specified names and we must keep the same order
485
    assert self.names
486
    assert not self.do_locking or lu.acquired_locks[lock_level]
487

    
488
    missing = set(self.wanted).difference(names)
489
    if missing:
490
      raise errors.OpExecError("Some items were removed before retrieving"
491
                               " their data: %s" % missing)
492

    
493
    # Return expanded names
494
    return self.wanted
495

    
496
  @classmethod
497
  def FieldsQuery(cls, fields):
498
    """Returns list of available fields.
499

500
    @return: List of L{objects.QueryFieldDefinition}
501

502
    """
503
    return query.QueryFields(cls.FIELDS, fields)
504

    
505
  def ExpandNames(self, lu):
506
    """Expand names for this query.
507

508
    See L{LogicalUnit.ExpandNames}.
509

510
    """
511
    raise NotImplementedError()
512

    
513
  def DeclareLocks(self, lu, level):
514
    """Declare locks for this query.
515

516
    See L{LogicalUnit.DeclareLocks}.
517

518
    """
519
    raise NotImplementedError()
520

    
521
  def _GetQueryData(self, lu):
522
    """Collects all data for this query.
523

524
    @return: Query data object
525

526
    """
527
    raise NotImplementedError()
528

    
529
  def NewStyleQuery(self, lu):
530
    """Collect data and execute query.
531

532
    """
533
    return query.GetQueryResponse(self.query, self._GetQueryData(lu))
534

    
535
  def OldStyleQuery(self, lu):
536
    """Collect data and execute query.
537

538
    """
539
    return self.query.OldStyleQuery(self._GetQueryData(lu))
540

    
541

    
542
def _GetWantedNodes(lu, nodes):
543
  """Returns list of checked and expanded node names.
544

545
  @type lu: L{LogicalUnit}
546
  @param lu: the logical unit on whose behalf we execute
547
  @type nodes: list
548
  @param nodes: list of node names or None for all nodes
549
  @rtype: list
550
  @return: the list of nodes, sorted
551
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
552

553
  """
554
  if nodes:
555
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
556

    
557
  return utils.NiceSort(lu.cfg.GetNodeList())
558

    
559

    
560
def _GetWantedInstances(lu, instances):
561
  """Returns list of checked and expanded instance names.
562

563
  @type lu: L{LogicalUnit}
564
  @param lu: the logical unit on whose behalf we execute
565
  @type instances: list
566
  @param instances: list of instance names or None for all instances
567
  @rtype: list
568
  @return: the list of instances, sorted
569
  @raise errors.OpPrereqError: if the instances parameter is wrong type
570
  @raise errors.OpPrereqError: if any of the passed instances is not found
571

572
  """
573
  if instances:
574
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
575
  else:
576
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
577
  return wanted
578

    
579

    
580
def _GetUpdatedParams(old_params, update_dict,
581
                      use_default=True, use_none=False):
582
  """Return the new version of a parameter dictionary.
583

584
  @type old_params: dict
585
  @param old_params: old parameters
586
  @type update_dict: dict
587
  @param update_dict: dict containing new parameter values, or
588
      constants.VALUE_DEFAULT to reset the parameter to its default
589
      value
590
  @param use_default: boolean
591
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
592
      values as 'to be deleted' values
593
  @param use_none: boolean
594
  @type use_none: whether to recognise C{None} values as 'to be
595
      deleted' values
596
  @rtype: dict
597
  @return: the new parameter dictionary
598

599
  """
600
  params_copy = copy.deepcopy(old_params)
601
  for key, val in update_dict.iteritems():
602
    if ((use_default and val == constants.VALUE_DEFAULT) or
603
        (use_none and val is None)):
604
      try:
605
        del params_copy[key]
606
      except KeyError:
607
        pass
608
    else:
609
      params_copy[key] = val
610
  return params_copy
611

    
612

    
613
def _CheckOutputFields(static, dynamic, selected):
614
  """Checks whether all selected fields are valid.
615

616
  @type static: L{utils.FieldSet}
617
  @param static: static fields set
618
  @type dynamic: L{utils.FieldSet}
619
  @param dynamic: dynamic fields set
620

621
  """
622
  f = utils.FieldSet()
623
  f.Extend(static)
624
  f.Extend(dynamic)
625

    
626
  delta = f.NonMatching(selected)
627
  if delta:
628
    raise errors.OpPrereqError("Unknown output fields selected: %s"
629
                               % ",".join(delta), errors.ECODE_INVAL)
630

    
631

    
632
def _CheckGlobalHvParams(params):
633
  """Validates that given hypervisor params are not global ones.
634

635
  This will ensure that instances don't get customised versions of
636
  global params.
637

638
  """
639
  used_globals = constants.HVC_GLOBALS.intersection(params)
640
  if used_globals:
641
    msg = ("The following hypervisor parameters are global and cannot"
642
           " be customized at instance level, please modify them at"
643
           " cluster level: %s" % utils.CommaJoin(used_globals))
644
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
645

    
646

    
647
def _CheckNodeOnline(lu, node, msg=None):
648
  """Ensure that a given node is online.
649

650
  @param lu: the LU on behalf of which we make the check
651
  @param node: the node to check
652
  @param msg: if passed, should be a message to replace the default one
653
  @raise errors.OpPrereqError: if the node is offline
654

655
  """
656
  if msg is None:
657
    msg = "Can't use offline node"
658
  if lu.cfg.GetNodeInfo(node).offline:
659
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
660

    
661

    
662
def _CheckNodeNotDrained(lu, node):
663
  """Ensure that a given node is not drained.
664

665
  @param lu: the LU on behalf of which we make the check
666
  @param node: the node to check
667
  @raise errors.OpPrereqError: if the node is drained
668

669
  """
670
  if lu.cfg.GetNodeInfo(node).drained:
671
    raise errors.OpPrereqError("Can't use drained node %s" % node,
672
                               errors.ECODE_STATE)
673

    
674

    
675
def _CheckNodeVmCapable(lu, node):
676
  """Ensure that a given node is vm capable.
677

678
  @param lu: the LU on behalf of which we make the check
679
  @param node: the node to check
680
  @raise errors.OpPrereqError: if the node is not vm capable
681

682
  """
683
  if not lu.cfg.GetNodeInfo(node).vm_capable:
684
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
685
                               errors.ECODE_STATE)
686

    
687

    
688
def _CheckNodeHasOS(lu, node, os_name, force_variant):
689
  """Ensure that a node supports a given OS.
690

691
  @param lu: the LU on behalf of which we make the check
692
  @param node: the node to check
693
  @param os_name: the OS to query about
694
  @param force_variant: whether to ignore variant errors
695
  @raise errors.OpPrereqError: if the node is not supporting the OS
696

697
  """
698
  result = lu.rpc.call_os_get(node, os_name)
699
  result.Raise("OS '%s' not in supported OS list for node %s" %
700
               (os_name, node),
701
               prereq=True, ecode=errors.ECODE_INVAL)
702
  if not force_variant:
703
    _CheckOSVariant(result.payload, os_name)
704

    
705

    
706
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
707
  """Ensure that a node has the given secondary ip.
708

709
  @type lu: L{LogicalUnit}
710
  @param lu: the LU on behalf of which we make the check
711
  @type node: string
712
  @param node: the node to check
713
  @type secondary_ip: string
714
  @param secondary_ip: the ip to check
715
  @type prereq: boolean
716
  @param prereq: whether to throw a prerequisite or an execute error
717
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
718
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
719

720
  """
721
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
722
  result.Raise("Failure checking secondary ip on node %s" % node,
723
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
724
  if not result.payload:
725
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
726
           " please fix and re-run this command" % secondary_ip)
727
    if prereq:
728
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
729
    else:
730
      raise errors.OpExecError(msg)
731

    
732

    
733
def _GetClusterDomainSecret():
734
  """Reads the cluster domain secret.
735

736
  """
737
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
738
                               strict=True)
739

    
740

    
741
def _CheckInstanceDown(lu, instance, reason):
742
  """Ensure that an instance is not running."""
743
  if instance.admin_up:
744
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
745
                               (instance.name, reason), errors.ECODE_STATE)
746

    
747
  pnode = instance.primary_node
748
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
749
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
750
              prereq=True, ecode=errors.ECODE_ENVIRON)
751

    
752
  if instance.name in ins_l.payload:
753
    raise errors.OpPrereqError("Instance %s is running, %s" %
754
                               (instance.name, reason), errors.ECODE_STATE)
755

    
756

    
757
def _ExpandItemName(fn, name, kind):
758
  """Expand an item name.
759

760
  @param fn: the function to use for expansion
761
  @param name: requested item name
762
  @param kind: text description ('Node' or 'Instance')
763
  @return: the resolved (full) name
764
  @raise errors.OpPrereqError: if the item is not found
765

766
  """
767
  full_name = fn(name)
768
  if full_name is None:
769
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
770
                               errors.ECODE_NOENT)
771
  return full_name
772

    
773

    
774
def _ExpandNodeName(cfg, name):
775
  """Wrapper over L{_ExpandItemName} for nodes."""
776
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
777

    
778

    
779
def _ExpandInstanceName(cfg, name):
780
  """Wrapper over L{_ExpandItemName} for instance."""
781
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
782

    
783

    
784
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
785
                          memory, vcpus, nics, disk_template, disks,
786
                          bep, hvp, hypervisor_name):
787
  """Builds instance related env variables for hooks
788

789
  This builds the hook environment from individual variables.
790

791
  @type name: string
792
  @param name: the name of the instance
793
  @type primary_node: string
794
  @param primary_node: the name of the instance's primary node
795
  @type secondary_nodes: list
796
  @param secondary_nodes: list of secondary nodes as strings
797
  @type os_type: string
798
  @param os_type: the name of the instance's OS
799
  @type status: boolean
800
  @param status: the should_run status of the instance
801
  @type memory: string
802
  @param memory: the memory size of the instance
803
  @type vcpus: string
804
  @param vcpus: the count of VCPUs the instance has
805
  @type nics: list
806
  @param nics: list of tuples (ip, mac, mode, link) representing
807
      the NICs the instance has
808
  @type disk_template: string
809
  @param disk_template: the disk template of the instance
810
  @type disks: list
811
  @param disks: the list of (size, mode) pairs
812
  @type bep: dict
813
  @param bep: the backend parameters for the instance
814
  @type hvp: dict
815
  @param hvp: the hypervisor parameters for the instance
816
  @type hypervisor_name: string
817
  @param hypervisor_name: the hypervisor for the instance
818
  @rtype: dict
819
  @return: the hook environment for this instance
820

821
  """
822
  if status:
823
    str_status = "up"
824
  else:
825
    str_status = "down"
826
  env = {
827
    "OP_TARGET": name,
828
    "INSTANCE_NAME": name,
829
    "INSTANCE_PRIMARY": primary_node,
830
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
831
    "INSTANCE_OS_TYPE": os_type,
832
    "INSTANCE_STATUS": str_status,
833
    "INSTANCE_MEMORY": memory,
834
    "INSTANCE_VCPUS": vcpus,
835
    "INSTANCE_DISK_TEMPLATE": disk_template,
836
    "INSTANCE_HYPERVISOR": hypervisor_name,
837
  }
838

    
839
  if nics:
840
    nic_count = len(nics)
841
    for idx, (ip, mac, mode, link) in enumerate(nics):
842
      if ip is None:
843
        ip = ""
844
      env["INSTANCE_NIC%d_IP" % idx] = ip
845
      env["INSTANCE_NIC%d_MAC" % idx] = mac
846
      env["INSTANCE_NIC%d_MODE" % idx] = mode
847
      env["INSTANCE_NIC%d_LINK" % idx] = link
848
      if mode == constants.NIC_MODE_BRIDGED:
849
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
850
  else:
851
    nic_count = 0
852

    
853
  env["INSTANCE_NIC_COUNT"] = nic_count
854

    
855
  if disks:
856
    disk_count = len(disks)
857
    for idx, (size, mode) in enumerate(disks):
858
      env["INSTANCE_DISK%d_SIZE" % idx] = size
859
      env["INSTANCE_DISK%d_MODE" % idx] = mode
860
  else:
861
    disk_count = 0
862

    
863
  env["INSTANCE_DISK_COUNT"] = disk_count
864

    
865
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
866
    for key, value in source.items():
867
      env["INSTANCE_%s_%s" % (kind, key)] = value
868

    
869
  return env
870

    
871

    
872
def _NICListToTuple(lu, nics):
873
  """Build a list of nic information tuples.
874

875
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
876
  value in LUInstanceQueryData.
877

878
  @type lu:  L{LogicalUnit}
879
  @param lu: the logical unit on whose behalf we execute
880
  @type nics: list of L{objects.NIC}
881
  @param nics: list of nics to convert to hooks tuples
882

883
  """
884
  hooks_nics = []
885
  cluster = lu.cfg.GetClusterInfo()
886
  for nic in nics:
887
    ip = nic.ip
888
    mac = nic.mac
889
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
890
    mode = filled_params[constants.NIC_MODE]
891
    link = filled_params[constants.NIC_LINK]
892
    hooks_nics.append((ip, mac, mode, link))
893
  return hooks_nics
894

    
895

    
896
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
897
  """Builds instance related env variables for hooks from an object.
898

899
  @type lu: L{LogicalUnit}
900
  @param lu: the logical unit on whose behalf we execute
901
  @type instance: L{objects.Instance}
902
  @param instance: the instance for which we should build the
903
      environment
904
  @type override: dict
905
  @param override: dictionary with key/values that will override
906
      our values
907
  @rtype: dict
908
  @return: the hook environment dictionary
909

910
  """
911
  cluster = lu.cfg.GetClusterInfo()
912
  bep = cluster.FillBE(instance)
913
  hvp = cluster.FillHV(instance)
914
  args = {
915
    'name': instance.name,
916
    'primary_node': instance.primary_node,
917
    'secondary_nodes': instance.secondary_nodes,
918
    'os_type': instance.os,
919
    'status': instance.admin_up,
920
    'memory': bep[constants.BE_MEMORY],
921
    'vcpus': bep[constants.BE_VCPUS],
922
    'nics': _NICListToTuple(lu, instance.nics),
923
    'disk_template': instance.disk_template,
924
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
925
    'bep': bep,
926
    'hvp': hvp,
927
    'hypervisor_name': instance.hypervisor,
928
  }
929
  if override:
930
    args.update(override)
931
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
932

    
933

    
934
def _AdjustCandidatePool(lu, exceptions):
935
  """Adjust the candidate pool after node operations.
936

937
  """
938
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
939
  if mod_list:
940
    lu.LogInfo("Promoted nodes to master candidate role: %s",
941
               utils.CommaJoin(node.name for node in mod_list))
942
    for name in mod_list:
943
      lu.context.ReaddNode(name)
944
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
945
  if mc_now > mc_max:
946
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
947
               (mc_now, mc_max))
948

    
949

    
950
def _DecideSelfPromotion(lu, exceptions=None):
951
  """Decide whether I should promote myself as a master candidate.
952

953
  """
954
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
955
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
956
  # the new node will increase mc_max with one, so:
957
  mc_should = min(mc_should + 1, cp_size)
958
  return mc_now < mc_should
959

    
960

    
961
def _CheckNicsBridgesExist(lu, target_nics, target_node):
962
  """Check that the brigdes needed by a list of nics exist.
963

964
  """
965
  cluster = lu.cfg.GetClusterInfo()
966
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
967
  brlist = [params[constants.NIC_LINK] for params in paramslist
968
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
969
  if brlist:
970
    result = lu.rpc.call_bridges_exist(target_node, brlist)
971
    result.Raise("Error checking bridges on destination node '%s'" %
972
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
973

    
974

    
975
def _CheckInstanceBridgesExist(lu, instance, node=None):
976
  """Check that the brigdes needed by an instance exist.
977

978
  """
979
  if node is None:
980
    node = instance.primary_node
981
  _CheckNicsBridgesExist(lu, instance.nics, node)
982

    
983

    
984
def _CheckOSVariant(os_obj, name):
985
  """Check whether an OS name conforms to the os variants specification.
986

987
  @type os_obj: L{objects.OS}
988
  @param os_obj: OS object to check
989
  @type name: string
990
  @param name: OS name passed by the user, to check for validity
991

992
  """
993
  if not os_obj.supported_variants:
994
    return
995
  variant = objects.OS.GetVariant(name)
996
  if not variant:
997
    raise errors.OpPrereqError("OS name must include a variant",
998
                               errors.ECODE_INVAL)
999

    
1000
  if variant not in os_obj.supported_variants:
1001
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1002

    
1003

    
1004
def _GetNodeInstancesInner(cfg, fn):
1005
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1006

    
1007

    
1008
def _GetNodeInstances(cfg, node_name):
1009
  """Returns a list of all primary and secondary instances on a node.
1010

1011
  """
1012

    
1013
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1014

    
1015

    
1016
def _GetNodePrimaryInstances(cfg, node_name):
1017
  """Returns primary instances on a node.
1018

1019
  """
1020
  return _GetNodeInstancesInner(cfg,
1021
                                lambda inst: node_name == inst.primary_node)
1022

    
1023

    
1024
def _GetNodeSecondaryInstances(cfg, node_name):
1025
  """Returns secondary instances on a node.
1026

1027
  """
1028
  return _GetNodeInstancesInner(cfg,
1029
                                lambda inst: node_name in inst.secondary_nodes)
1030

    
1031

    
1032
def _GetStorageTypeArgs(cfg, storage_type):
1033
  """Returns the arguments for a storage type.
1034

1035
  """
1036
  # Special case for file storage
1037
  if storage_type == constants.ST_FILE:
1038
    # storage.FileStorage wants a list of storage directories
1039
    return [[cfg.GetFileStorageDir()]]
1040

    
1041
  return []
1042

    
1043

    
1044
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1045
  faulty = []
1046

    
1047
  for dev in instance.disks:
1048
    cfg.SetDiskID(dev, node_name)
1049

    
1050
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1051
  result.Raise("Failed to get disk status from node %s" % node_name,
1052
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1053

    
1054
  for idx, bdev_status in enumerate(result.payload):
1055
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1056
      faulty.append(idx)
1057

    
1058
  return faulty
1059

    
1060

    
1061
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1062
  """Check the sanity of iallocator and node arguments and use the
1063
  cluster-wide iallocator if appropriate.
1064

1065
  Check that at most one of (iallocator, node) is specified. If none is
1066
  specified, then the LU's opcode's iallocator slot is filled with the
1067
  cluster-wide default iallocator.
1068

1069
  @type iallocator_slot: string
1070
  @param iallocator_slot: the name of the opcode iallocator slot
1071
  @type node_slot: string
1072
  @param node_slot: the name of the opcode target node slot
1073

1074
  """
1075
  node = getattr(lu.op, node_slot, None)
1076
  iallocator = getattr(lu.op, iallocator_slot, None)
1077

    
1078
  if node is not None and iallocator is not None:
1079
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1080
                               errors.ECODE_INVAL)
1081
  elif node is None and iallocator is None:
1082
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1083
    if default_iallocator:
1084
      setattr(lu.op, iallocator_slot, default_iallocator)
1085
    else:
1086
      raise errors.OpPrereqError("No iallocator or node given and no"
1087
                                 " cluster-wide default iallocator found."
1088
                                 " Please specify either an iallocator or a"
1089
                                 " node, or set a cluster-wide default"
1090
                                 " iallocator.")
1091

    
1092

    
1093
class LUClusterPostInit(LogicalUnit):
1094
  """Logical unit for running hooks after cluster initialization.
1095

1096
  """
1097
  HPATH = "cluster-init"
1098
  HTYPE = constants.HTYPE_CLUSTER
1099

    
1100
  def BuildHooksEnv(self):
1101
    """Build hooks env.
1102

1103
    """
1104
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1105
    mn = self.cfg.GetMasterNode()
1106
    return env, [], [mn]
1107

    
1108
  def Exec(self, feedback_fn):
1109
    """Nothing to do.
1110

1111
    """
1112
    return True
1113

    
1114

    
1115
class LUClusterDestroy(LogicalUnit):
1116
  """Logical unit for destroying the cluster.
1117

1118
  """
1119
  HPATH = "cluster-destroy"
1120
  HTYPE = constants.HTYPE_CLUSTER
1121

    
1122
  def BuildHooksEnv(self):
1123
    """Build hooks env.
1124

1125
    """
1126
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1127
    return env, [], []
1128

    
1129
  def CheckPrereq(self):
1130
    """Check prerequisites.
1131

1132
    This checks whether the cluster is empty.
1133

1134
    Any errors are signaled by raising errors.OpPrereqError.
1135

1136
    """
1137
    master = self.cfg.GetMasterNode()
1138

    
1139
    nodelist = self.cfg.GetNodeList()
1140
    if len(nodelist) != 1 or nodelist[0] != master:
1141
      raise errors.OpPrereqError("There are still %d node(s) in"
1142
                                 " this cluster." % (len(nodelist) - 1),
1143
                                 errors.ECODE_INVAL)
1144
    instancelist = self.cfg.GetInstanceList()
1145
    if instancelist:
1146
      raise errors.OpPrereqError("There are still %d instance(s) in"
1147
                                 " this cluster." % len(instancelist),
1148
                                 errors.ECODE_INVAL)
1149

    
1150
  def Exec(self, feedback_fn):
1151
    """Destroys the cluster.
1152

1153
    """
1154
    master = self.cfg.GetMasterNode()
1155

    
1156
    # Run post hooks on master node before it's removed
1157
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1158
    try:
1159
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1160
    except:
1161
      # pylint: disable-msg=W0702
1162
      self.LogWarning("Errors occurred running hooks on %s" % master)
1163

    
1164
    result = self.rpc.call_node_stop_master(master, False)
1165
    result.Raise("Could not disable the master role")
1166

    
1167
    return master
1168

    
1169

    
1170
def _VerifyCertificate(filename):
1171
  """Verifies a certificate for LUClusterVerify.
1172

1173
  @type filename: string
1174
  @param filename: Path to PEM file
1175

1176
  """
1177
  try:
1178
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1179
                                           utils.ReadFile(filename))
1180
  except Exception, err: # pylint: disable-msg=W0703
1181
    return (LUClusterVerify.ETYPE_ERROR,
1182
            "Failed to load X509 certificate %s: %s" % (filename, err))
1183

    
1184
  (errcode, msg) = \
1185
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1186
                                constants.SSL_CERT_EXPIRATION_ERROR)
1187

    
1188
  if msg:
1189
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1190
  else:
1191
    fnamemsg = None
1192

    
1193
  if errcode is None:
1194
    return (None, fnamemsg)
1195
  elif errcode == utils.CERT_WARNING:
1196
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1197
  elif errcode == utils.CERT_ERROR:
1198
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1199

    
1200
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1201

    
1202

    
1203
class LUClusterVerify(LogicalUnit):
1204
  """Verifies the cluster status.
1205

1206
  """
1207
  HPATH = "cluster-verify"
1208
  HTYPE = constants.HTYPE_CLUSTER
1209
  REQ_BGL = False
1210

    
1211
  TCLUSTER = "cluster"
1212
  TNODE = "node"
1213
  TINSTANCE = "instance"
1214

    
1215
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1216
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1217
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1218
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1219
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1220
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1221
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1222
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1223
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1224
  ENODEDRBD = (TNODE, "ENODEDRBD")
1225
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1226
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1227
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1228
  ENODEHV = (TNODE, "ENODEHV")
1229
  ENODELVM = (TNODE, "ENODELVM")
1230
  ENODEN1 = (TNODE, "ENODEN1")
1231
  ENODENET = (TNODE, "ENODENET")
1232
  ENODEOS = (TNODE, "ENODEOS")
1233
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1234
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1235
  ENODERPC = (TNODE, "ENODERPC")
1236
  ENODESSH = (TNODE, "ENODESSH")
1237
  ENODEVERSION = (TNODE, "ENODEVERSION")
1238
  ENODESETUP = (TNODE, "ENODESETUP")
1239
  ENODETIME = (TNODE, "ENODETIME")
1240
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1241

    
1242
  ETYPE_FIELD = "code"
1243
  ETYPE_ERROR = "ERROR"
1244
  ETYPE_WARNING = "WARNING"
1245

    
1246
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1247

    
1248
  class NodeImage(object):
1249
    """A class representing the logical and physical status of a node.
1250

1251
    @type name: string
1252
    @ivar name: the node name to which this object refers
1253
    @ivar volumes: a structure as returned from
1254
        L{ganeti.backend.GetVolumeList} (runtime)
1255
    @ivar instances: a list of running instances (runtime)
1256
    @ivar pinst: list of configured primary instances (config)
1257
    @ivar sinst: list of configured secondary instances (config)
1258
    @ivar sbp: diction of {secondary-node: list of instances} of all peers
1259
        of this node (config)
1260
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1261
    @ivar dfree: free disk, as reported by the node (runtime)
1262
    @ivar offline: the offline status (config)
1263
    @type rpc_fail: boolean
1264
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1265
        not whether the individual keys were correct) (runtime)
1266
    @type lvm_fail: boolean
1267
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1268
    @type hyp_fail: boolean
1269
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1270
    @type ghost: boolean
1271
    @ivar ghost: whether this is a known node or not (config)
1272
    @type os_fail: boolean
1273
    @ivar os_fail: whether the RPC call didn't return valid OS data
1274
    @type oslist: list
1275
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1276
    @type vm_capable: boolean
1277
    @ivar vm_capable: whether the node can host instances
1278

1279
    """
1280
    def __init__(self, offline=False, name=None, vm_capable=True):
1281
      self.name = name
1282
      self.volumes = {}
1283
      self.instances = []
1284
      self.pinst = []
1285
      self.sinst = []
1286
      self.sbp = {}
1287
      self.mfree = 0
1288
      self.dfree = 0
1289
      self.offline = offline
1290
      self.vm_capable = vm_capable
1291
      self.rpc_fail = False
1292
      self.lvm_fail = False
1293
      self.hyp_fail = False
1294
      self.ghost = False
1295
      self.os_fail = False
1296
      self.oslist = {}
1297

    
1298
  def ExpandNames(self):
1299
    self.needed_locks = {
1300
      locking.LEVEL_NODE: locking.ALL_SET,
1301
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1302
    }
1303
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1304

    
1305
  def _Error(self, ecode, item, msg, *args, **kwargs):
1306
    """Format an error message.
1307

1308
    Based on the opcode's error_codes parameter, either format a
1309
    parseable error code, or a simpler error string.
1310

1311
    This must be called only from Exec and functions called from Exec.
1312

1313
    """
1314
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1315
    itype, etxt = ecode
1316
    # first complete the msg
1317
    if args:
1318
      msg = msg % args
1319
    # then format the whole message
1320
    if self.op.error_codes:
1321
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1322
    else:
1323
      if item:
1324
        item = " " + item
1325
      else:
1326
        item = ""
1327
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1328
    # and finally report it via the feedback_fn
1329
    self._feedback_fn("  - %s" % msg)
1330

    
1331
  def _ErrorIf(self, cond, *args, **kwargs):
1332
    """Log an error message if the passed condition is True.
1333

1334
    """
1335
    cond = bool(cond) or self.op.debug_simulate_errors
1336
    if cond:
1337
      self._Error(*args, **kwargs)
1338
    # do not mark the operation as failed for WARN cases only
1339
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1340
      self.bad = self.bad or cond
1341

    
1342
  def _VerifyNode(self, ninfo, nresult):
1343
    """Perform some basic validation on data returned from a node.
1344

1345
      - check the result data structure is well formed and has all the
1346
        mandatory fields
1347
      - check ganeti version
1348

1349
    @type ninfo: L{objects.Node}
1350
    @param ninfo: the node to check
1351
    @param nresult: the results from the node
1352
    @rtype: boolean
1353
    @return: whether overall this call was successful (and we can expect
1354
         reasonable values in the respose)
1355

1356
    """
1357
    node = ninfo.name
1358
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1359

    
1360
    # main result, nresult should be a non-empty dict
1361
    test = not nresult or not isinstance(nresult, dict)
1362
    _ErrorIf(test, self.ENODERPC, node,
1363
                  "unable to verify node: no data returned")
1364
    if test:
1365
      return False
1366

    
1367
    # compares ganeti version
1368
    local_version = constants.PROTOCOL_VERSION
1369
    remote_version = nresult.get("version", None)
1370
    test = not (remote_version and
1371
                isinstance(remote_version, (list, tuple)) and
1372
                len(remote_version) == 2)
1373
    _ErrorIf(test, self.ENODERPC, node,
1374
             "connection to node returned invalid data")
1375
    if test:
1376
      return False
1377

    
1378
    test = local_version != remote_version[0]
1379
    _ErrorIf(test, self.ENODEVERSION, node,
1380
             "incompatible protocol versions: master %s,"
1381
             " node %s", local_version, remote_version[0])
1382
    if test:
1383
      return False
1384

    
1385
    # node seems compatible, we can actually try to look into its results
1386

    
1387
    # full package version
1388
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1389
                  self.ENODEVERSION, node,
1390
                  "software version mismatch: master %s, node %s",
1391
                  constants.RELEASE_VERSION, remote_version[1],
1392
                  code=self.ETYPE_WARNING)
1393

    
1394
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1395
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1396
      for hv_name, hv_result in hyp_result.iteritems():
1397
        test = hv_result is not None
1398
        _ErrorIf(test, self.ENODEHV, node,
1399
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1400

    
1401
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1402
    if ninfo.vm_capable and isinstance(hvp_result, list):
1403
      for item, hv_name, hv_result in hvp_result:
1404
        _ErrorIf(True, self.ENODEHV, node,
1405
                 "hypervisor %s parameter verify failure (source %s): %s",
1406
                 hv_name, item, hv_result)
1407

    
1408
    test = nresult.get(constants.NV_NODESETUP,
1409
                           ["Missing NODESETUP results"])
1410
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1411
             "; ".join(test))
1412

    
1413
    return True
1414

    
1415
  def _VerifyNodeTime(self, ninfo, nresult,
1416
                      nvinfo_starttime, nvinfo_endtime):
1417
    """Check the node time.
1418

1419
    @type ninfo: L{objects.Node}
1420
    @param ninfo: the node to check
1421
    @param nresult: the remote results for the node
1422
    @param nvinfo_starttime: the start time of the RPC call
1423
    @param nvinfo_endtime: the end time of the RPC call
1424

1425
    """
1426
    node = ninfo.name
1427
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1428

    
1429
    ntime = nresult.get(constants.NV_TIME, None)
1430
    try:
1431
      ntime_merged = utils.MergeTime(ntime)
1432
    except (ValueError, TypeError):
1433
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1434
      return
1435

    
1436
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1437
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1438
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1439
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1440
    else:
1441
      ntime_diff = None
1442

    
1443
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1444
             "Node time diverges by at least %s from master node time",
1445
             ntime_diff)
1446

    
1447
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1448
    """Check the node LVM results.
1449

1450
    @type ninfo: L{objects.Node}
1451
    @param ninfo: the node to check
1452
    @param nresult: the remote results for the node
1453
    @param vg_name: the configured VG name
1454

1455
    """
1456
    if vg_name is None:
1457
      return
1458

    
1459
    node = ninfo.name
1460
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1461

    
1462
    # checks vg existence and size > 20G
1463
    vglist = nresult.get(constants.NV_VGLIST, None)
1464
    test = not vglist
1465
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1466
    if not test:
1467
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1468
                                            constants.MIN_VG_SIZE)
1469
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1470

    
1471
    # check pv names
1472
    pvlist = nresult.get(constants.NV_PVLIST, None)
1473
    test = pvlist is None
1474
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1475
    if not test:
1476
      # check that ':' is not present in PV names, since it's a
1477
      # special character for lvcreate (denotes the range of PEs to
1478
      # use on the PV)
1479
      for _, pvname, owner_vg in pvlist:
1480
        test = ":" in pvname
1481
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1482
                 " '%s' of VG '%s'", pvname, owner_vg)
1483

    
1484
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1485
    """Check the node bridges.
1486

1487
    @type ninfo: L{objects.Node}
1488
    @param ninfo: the node to check
1489
    @param nresult: the remote results for the node
1490
    @param bridges: the expected list of bridges
1491

1492
    """
1493
    if not bridges:
1494
      return
1495

    
1496
    node = ninfo.name
1497
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1498

    
1499
    missing = nresult.get(constants.NV_BRIDGES, None)
1500
    test = not isinstance(missing, list)
1501
    _ErrorIf(test, self.ENODENET, node,
1502
             "did not return valid bridge information")
1503
    if not test:
1504
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1505
               utils.CommaJoin(sorted(missing)))
1506

    
1507
  def _VerifyNodeNetwork(self, ninfo, nresult):
1508
    """Check the node network connectivity results.
1509

1510
    @type ninfo: L{objects.Node}
1511
    @param ninfo: the node to check
1512
    @param nresult: the remote results for the node
1513

1514
    """
1515
    node = ninfo.name
1516
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517

    
1518
    test = constants.NV_NODELIST not in nresult
1519
    _ErrorIf(test, self.ENODESSH, node,
1520
             "node hasn't returned node ssh connectivity data")
1521
    if not test:
1522
      if nresult[constants.NV_NODELIST]:
1523
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1524
          _ErrorIf(True, self.ENODESSH, node,
1525
                   "ssh communication with node '%s': %s", a_node, a_msg)
1526

    
1527
    test = constants.NV_NODENETTEST not in nresult
1528
    _ErrorIf(test, self.ENODENET, node,
1529
             "node hasn't returned node tcp connectivity data")
1530
    if not test:
1531
      if nresult[constants.NV_NODENETTEST]:
1532
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1533
        for anode in nlist:
1534
          _ErrorIf(True, self.ENODENET, node,
1535
                   "tcp communication with node '%s': %s",
1536
                   anode, nresult[constants.NV_NODENETTEST][anode])
1537

    
1538
    test = constants.NV_MASTERIP not in nresult
1539
    _ErrorIf(test, self.ENODENET, node,
1540
             "node hasn't returned node master IP reachability data")
1541
    if not test:
1542
      if not nresult[constants.NV_MASTERIP]:
1543
        if node == self.master_node:
1544
          msg = "the master node cannot reach the master IP (not configured?)"
1545
        else:
1546
          msg = "cannot reach the master IP"
1547
        _ErrorIf(True, self.ENODENET, node, msg)
1548

    
1549
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1550
                      diskstatus):
1551
    """Verify an instance.
1552

1553
    This function checks to see if the required block devices are
1554
    available on the instance's node.
1555

1556
    """
1557
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1558
    node_current = instanceconfig.primary_node
1559

    
1560
    node_vol_should = {}
1561
    instanceconfig.MapLVsByNode(node_vol_should)
1562

    
1563
    for node in node_vol_should:
1564
      n_img = node_image[node]
1565
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1566
        # ignore missing volumes on offline or broken nodes
1567
        continue
1568
      for volume in node_vol_should[node]:
1569
        test = volume not in n_img.volumes
1570
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1571
                 "volume %s missing on node %s", volume, node)
1572

    
1573
    if instanceconfig.admin_up:
1574
      pri_img = node_image[node_current]
1575
      test = instance not in pri_img.instances and not pri_img.offline
1576
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1577
               "instance not running on its primary node %s",
1578
               node_current)
1579

    
1580
    for node, n_img in node_image.items():
1581
      if node != node_current:
1582
        test = instance in n_img.instances
1583
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1584
                 "instance should not run on node %s", node)
1585

    
1586
    diskdata = [(nname, success, status, idx)
1587
                for (nname, disks) in diskstatus.items()
1588
                for idx, (success, status) in enumerate(disks)]
1589

    
1590
    for nname, success, bdev_status, idx in diskdata:
1591
      # the 'ghost node' construction in Exec() ensures that we have a
1592
      # node here
1593
      snode = node_image[nname]
1594
      bad_snode = snode.ghost or snode.offline
1595
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1596
               self.EINSTANCEFAULTYDISK, instance,
1597
               "couldn't retrieve status for disk/%s on %s: %s",
1598
               idx, nname, bdev_status)
1599
      _ErrorIf((instanceconfig.admin_up and success and
1600
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1601
               self.EINSTANCEFAULTYDISK, instance,
1602
               "disk/%s on %s is faulty", idx, nname)
1603

    
1604
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1605
    """Verify if there are any unknown volumes in the cluster.
1606

1607
    The .os, .swap and backup volumes are ignored. All other volumes are
1608
    reported as unknown.
1609

1610
    @type reserved: L{ganeti.utils.FieldSet}
1611
    @param reserved: a FieldSet of reserved volume names
1612

1613
    """
1614
    for node, n_img in node_image.items():
1615
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1616
        # skip non-healthy nodes
1617
        continue
1618
      for volume in n_img.volumes:
1619
        test = ((node not in node_vol_should or
1620
                volume not in node_vol_should[node]) and
1621
                not reserved.Matches(volume))
1622
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1623
                      "volume %s is unknown", volume)
1624

    
1625
  def _VerifyOrphanInstances(self, instancelist, node_image):
1626
    """Verify the list of running instances.
1627

1628
    This checks what instances are running but unknown to the cluster.
1629

1630
    """
1631
    for node, n_img in node_image.items():
1632
      for o_inst in n_img.instances:
1633
        test = o_inst not in instancelist
1634
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1635
                      "instance %s on node %s should not exist", o_inst, node)
1636

    
1637
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1638
    """Verify N+1 Memory Resilience.
1639

1640
    Check that if one single node dies we can still start all the
1641
    instances it was primary for.
1642

1643
    """
1644
    for node, n_img in node_image.items():
1645
      # This code checks that every node which is now listed as
1646
      # secondary has enough memory to host all instances it is
1647
      # supposed to should a single other node in the cluster fail.
1648
      # FIXME: not ready for failover to an arbitrary node
1649
      # FIXME: does not support file-backed instances
1650
      # WARNING: we currently take into account down instances as well
1651
      # as up ones, considering that even if they're down someone
1652
      # might want to start them even in the event of a node failure.
1653
      if n_img.offline:
1654
        # we're skipping offline nodes from the N+1 warning, since
1655
        # most likely we don't have good memory infromation from them;
1656
        # we already list instances living on such nodes, and that's
1657
        # enough warning
1658
        continue
1659
      for prinode, instances in n_img.sbp.items():
1660
        needed_mem = 0
1661
        for instance in instances:
1662
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1663
          if bep[constants.BE_AUTO_BALANCE]:
1664
            needed_mem += bep[constants.BE_MEMORY]
1665
        test = n_img.mfree < needed_mem
1666
        self._ErrorIf(test, self.ENODEN1, node,
1667
                      "not enough memory to accomodate instance failovers"
1668
                      " should node %s fail (%dMiB needed, %dMiB available)",
1669
                      prinode, needed_mem, n_img.mfree)
1670

    
1671
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1672
                       master_files):
1673
    """Verifies and computes the node required file checksums.
1674

1675
    @type ninfo: L{objects.Node}
1676
    @param ninfo: the node to check
1677
    @param nresult: the remote results for the node
1678
    @param file_list: required list of files
1679
    @param local_cksum: dictionary of local files and their checksums
1680
    @param master_files: list of files that only masters should have
1681

1682
    """
1683
    node = ninfo.name
1684
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1685

    
1686
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1687
    test = not isinstance(remote_cksum, dict)
1688
    _ErrorIf(test, self.ENODEFILECHECK, node,
1689
             "node hasn't returned file checksum data")
1690
    if test:
1691
      return
1692

    
1693
    for file_name in file_list:
1694
      node_is_mc = ninfo.master_candidate
1695
      must_have = (file_name not in master_files) or node_is_mc
1696
      # missing
1697
      test1 = file_name not in remote_cksum
1698
      # invalid checksum
1699
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1700
      # existing and good
1701
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1702
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1703
               "file '%s' missing", file_name)
1704
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1705
               "file '%s' has wrong checksum", file_name)
1706
      # not candidate and this is not a must-have file
1707
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1708
               "file '%s' should not exist on non master"
1709
               " candidates (and the file is outdated)", file_name)
1710
      # all good, except non-master/non-must have combination
1711
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1712
               "file '%s' should not exist"
1713
               " on non master candidates", file_name)
1714

    
1715
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1716
                      drbd_map):
1717
    """Verifies and the node DRBD status.
1718

1719
    @type ninfo: L{objects.Node}
1720
    @param ninfo: the node to check
1721
    @param nresult: the remote results for the node
1722
    @param instanceinfo: the dict of instances
1723
    @param drbd_helper: the configured DRBD usermode helper
1724
    @param drbd_map: the DRBD map as returned by
1725
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1726

1727
    """
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    if drbd_helper:
1732
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1733
      test = (helper_result == None)
1734
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1735
               "no drbd usermode helper returned")
1736
      if helper_result:
1737
        status, payload = helper_result
1738
        test = not status
1739
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1740
                 "drbd usermode helper check unsuccessful: %s", payload)
1741
        test = status and (payload != drbd_helper)
1742
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1743
                 "wrong drbd usermode helper: %s", payload)
1744

    
1745
    # compute the DRBD minors
1746
    node_drbd = {}
1747
    for minor, instance in drbd_map[node].items():
1748
      test = instance not in instanceinfo
1749
      _ErrorIf(test, self.ECLUSTERCFG, None,
1750
               "ghost instance '%s' in temporary DRBD map", instance)
1751
        # ghost instance should not be running, but otherwise we
1752
        # don't give double warnings (both ghost instance and
1753
        # unallocated minor in use)
1754
      if test:
1755
        node_drbd[minor] = (instance, False)
1756
      else:
1757
        instance = instanceinfo[instance]
1758
        node_drbd[minor] = (instance.name, instance.admin_up)
1759

    
1760
    # and now check them
1761
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1762
    test = not isinstance(used_minors, (tuple, list))
1763
    _ErrorIf(test, self.ENODEDRBD, node,
1764
             "cannot parse drbd status file: %s", str(used_minors))
1765
    if test:
1766
      # we cannot check drbd status
1767
      return
1768

    
1769
    for minor, (iname, must_exist) in node_drbd.items():
1770
      test = minor not in used_minors and must_exist
1771
      _ErrorIf(test, self.ENODEDRBD, node,
1772
               "drbd minor %d of instance %s is not active", minor, iname)
1773
    for minor in used_minors:
1774
      test = minor not in node_drbd
1775
      _ErrorIf(test, self.ENODEDRBD, node,
1776
               "unallocated drbd minor %d is in use", minor)
1777

    
1778
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1779
    """Builds the node OS structures.
1780

1781
    @type ninfo: L{objects.Node}
1782
    @param ninfo: the node to check
1783
    @param nresult: the remote results for the node
1784
    @param nimg: the node image object
1785

1786
    """
1787
    node = ninfo.name
1788
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1789

    
1790
    remote_os = nresult.get(constants.NV_OSLIST, None)
1791
    test = (not isinstance(remote_os, list) or
1792
            not compat.all(isinstance(v, list) and len(v) == 7
1793
                           for v in remote_os))
1794

    
1795
    _ErrorIf(test, self.ENODEOS, node,
1796
             "node hasn't returned valid OS data")
1797

    
1798
    nimg.os_fail = test
1799

    
1800
    if test:
1801
      return
1802

    
1803
    os_dict = {}
1804

    
1805
    for (name, os_path, status, diagnose,
1806
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1807

    
1808
      if name not in os_dict:
1809
        os_dict[name] = []
1810

    
1811
      # parameters is a list of lists instead of list of tuples due to
1812
      # JSON lacking a real tuple type, fix it:
1813
      parameters = [tuple(v) for v in parameters]
1814
      os_dict[name].append((os_path, status, diagnose,
1815
                            set(variants), set(parameters), set(api_ver)))
1816

    
1817
    nimg.oslist = os_dict
1818

    
1819
  def _VerifyNodeOS(self, ninfo, nimg, base):
1820
    """Verifies the node OS list.
1821

1822
    @type ninfo: L{objects.Node}
1823
    @param ninfo: the node to check
1824
    @param nimg: the node image object
1825
    @param base: the 'template' node we match against (e.g. from the master)
1826

1827
    """
1828
    node = ninfo.name
1829
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1830

    
1831
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1832

    
1833
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1834
    for os_name, os_data in nimg.oslist.items():
1835
      assert os_data, "Empty OS status for OS %s?!" % os_name
1836
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1837
      _ErrorIf(not f_status, self.ENODEOS, node,
1838
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1839
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1840
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1841
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1842
      # this will catched in backend too
1843
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1844
               and not f_var, self.ENODEOS, node,
1845
               "OS %s with API at least %d does not declare any variant",
1846
               os_name, constants.OS_API_V15)
1847
      # comparisons with the 'base' image
1848
      test = os_name not in base.oslist
1849
      _ErrorIf(test, self.ENODEOS, node,
1850
               "Extra OS %s not present on reference node (%s)",
1851
               os_name, base.name)
1852
      if test:
1853
        continue
1854
      assert base.oslist[os_name], "Base node has empty OS status?"
1855
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1856
      if not b_status:
1857
        # base OS is invalid, skipping
1858
        continue
1859
      for kind, a, b in [("API version", f_api, b_api),
1860
                         ("variants list", f_var, b_var),
1861
                         ("parameters", beautify_params(f_param),
1862
                          beautify_params(b_param))]:
1863
        _ErrorIf(a != b, self.ENODEOS, node,
1864
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1865
                 kind, os_name, base.name,
1866
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1867

    
1868
    # check any missing OSes
1869
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1870
    _ErrorIf(missing, self.ENODEOS, node,
1871
             "OSes present on reference node %s but missing on this node: %s",
1872
             base.name, utils.CommaJoin(missing))
1873

    
1874
  def _VerifyOob(self, ninfo, nresult):
1875
    """Verifies out of band functionality of a node.
1876

1877
    @type ninfo: L{objects.Node}
1878
    @param ninfo: the node to check
1879
    @param nresult: the remote results for the node
1880

1881
    """
1882
    node = ninfo.name
1883
    # We just have to verify the paths on master and/or master candidates
1884
    # as the oob helper is invoked on the master
1885
    if ((ninfo.master_candidate or ninfo.master_capable) and
1886
        constants.NV_OOB_PATHS in nresult):
1887
      for path_result in nresult[constants.NV_OOB_PATHS]:
1888
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1889

    
1890
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1891
    """Verifies and updates the node volume data.
1892

1893
    This function will update a L{NodeImage}'s internal structures
1894
    with data from the remote call.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param nimg: the node image object
1900
    @param vg_name: the configured VG name
1901

1902
    """
1903
    node = ninfo.name
1904
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1905

    
1906
    nimg.lvm_fail = True
1907
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1908
    if vg_name is None:
1909
      pass
1910
    elif isinstance(lvdata, basestring):
1911
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1912
               utils.SafeEncode(lvdata))
1913
    elif not isinstance(lvdata, dict):
1914
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1915
    else:
1916
      nimg.volumes = lvdata
1917
      nimg.lvm_fail = False
1918

    
1919
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1920
    """Verifies and updates the node instance list.
1921

1922
    If the listing was successful, then updates this node's instance
1923
    list. Otherwise, it marks the RPC call as failed for the instance
1924
    list key.
1925

1926
    @type ninfo: L{objects.Node}
1927
    @param ninfo: the node to check
1928
    @param nresult: the remote results for the node
1929
    @param nimg: the node image object
1930

1931
    """
1932
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1933
    test = not isinstance(idata, list)
1934
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1935
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1936
    if test:
1937
      nimg.hyp_fail = True
1938
    else:
1939
      nimg.instances = idata
1940

    
1941
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1942
    """Verifies and computes a node information map
1943

1944
    @type ninfo: L{objects.Node}
1945
    @param ninfo: the node to check
1946
    @param nresult: the remote results for the node
1947
    @param nimg: the node image object
1948
    @param vg_name: the configured VG name
1949

1950
    """
1951
    node = ninfo.name
1952
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1953

    
1954
    # try to read free memory (from the hypervisor)
1955
    hv_info = nresult.get(constants.NV_HVINFO, None)
1956
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1957
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1958
    if not test:
1959
      try:
1960
        nimg.mfree = int(hv_info["memory_free"])
1961
      except (ValueError, TypeError):
1962
        _ErrorIf(True, self.ENODERPC, node,
1963
                 "node returned invalid nodeinfo, check hypervisor")
1964

    
1965
    # FIXME: devise a free space model for file based instances as well
1966
    if vg_name is not None:
1967
      test = (constants.NV_VGLIST not in nresult or
1968
              vg_name not in nresult[constants.NV_VGLIST])
1969
      _ErrorIf(test, self.ENODELVM, node,
1970
               "node didn't return data for the volume group '%s'"
1971
               " - it is either missing or broken", vg_name)
1972
      if not test:
1973
        try:
1974
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1975
        except (ValueError, TypeError):
1976
          _ErrorIf(True, self.ENODERPC, node,
1977
                   "node returned invalid LVM info, check LVM status")
1978

    
1979
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1980
    """Gets per-disk status information for all instances.
1981

1982
    @type nodelist: list of strings
1983
    @param nodelist: Node names
1984
    @type node_image: dict of (name, L{objects.Node})
1985
    @param node_image: Node objects
1986
    @type instanceinfo: dict of (name, L{objects.Instance})
1987
    @param instanceinfo: Instance objects
1988
    @rtype: {instance: {node: [(succes, payload)]}}
1989
    @return: a dictionary of per-instance dictionaries with nodes as
1990
        keys and disk information as values; the disk information is a
1991
        list of tuples (success, payload)
1992

1993
    """
1994
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1995

    
1996
    node_disks = {}
1997
    node_disks_devonly = {}
1998
    diskless_instances = set()
1999
    diskless = constants.DT_DISKLESS
2000

    
2001
    for nname in nodelist:
2002
      node_instances = list(itertools.chain(node_image[nname].pinst,
2003
                                            node_image[nname].sinst))
2004
      diskless_instances.update(inst for inst in node_instances
2005
                                if instanceinfo[inst].disk_template == diskless)
2006
      disks = [(inst, disk)
2007
               for inst in node_instances
2008
               for disk in instanceinfo[inst].disks]
2009

    
2010
      if not disks:
2011
        # No need to collect data
2012
        continue
2013

    
2014
      node_disks[nname] = disks
2015

    
2016
      # Creating copies as SetDiskID below will modify the objects and that can
2017
      # lead to incorrect data returned from nodes
2018
      devonly = [dev.Copy() for (_, dev) in disks]
2019

    
2020
      for dev in devonly:
2021
        self.cfg.SetDiskID(dev, nname)
2022

    
2023
      node_disks_devonly[nname] = devonly
2024

    
2025
    assert len(node_disks) == len(node_disks_devonly)
2026

    
2027
    # Collect data from all nodes with disks
2028
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2029
                                                          node_disks_devonly)
2030

    
2031
    assert len(result) == len(node_disks)
2032

    
2033
    instdisk = {}
2034

    
2035
    for (nname, nres) in result.items():
2036
      disks = node_disks[nname]
2037

    
2038
      if nres.offline:
2039
        # No data from this node
2040
        data = len(disks) * [(False, "node offline")]
2041
      else:
2042
        msg = nres.fail_msg
2043
        _ErrorIf(msg, self.ENODERPC, nname,
2044
                 "while getting disk information: %s", msg)
2045
        if msg:
2046
          # No data from this node
2047
          data = len(disks) * [(False, msg)]
2048
        else:
2049
          data = []
2050
          for idx, i in enumerate(nres.payload):
2051
            if isinstance(i, (tuple, list)) and len(i) == 2:
2052
              data.append(i)
2053
            else:
2054
              logging.warning("Invalid result from node %s, entry %d: %s",
2055
                              nname, idx, i)
2056
              data.append((False, "Invalid result from the remote node"))
2057

    
2058
      for ((inst, _), status) in zip(disks, data):
2059
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2060

    
2061
    # Add empty entries for diskless instances.
2062
    for inst in diskless_instances:
2063
      assert inst not in instdisk
2064
      instdisk[inst] = {}
2065

    
2066
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2067
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2068
                      compat.all(isinstance(s, (tuple, list)) and
2069
                                 len(s) == 2 for s in statuses)
2070
                      for inst, nnames in instdisk.items()
2071
                      for nname, statuses in nnames.items())
2072
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2073

    
2074
    return instdisk
2075

    
2076
  def _VerifyHVP(self, hvp_data):
2077
    """Verifies locally the syntax of the hypervisor parameters.
2078

2079
    """
2080
    for item, hv_name, hv_params in hvp_data:
2081
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2082
             (item, hv_name))
2083
      try:
2084
        hv_class = hypervisor.GetHypervisor(hv_name)
2085
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086
        hv_class.CheckParameterSyntax(hv_params)
2087
      except errors.GenericError, err:
2088
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2089

    
2090

    
2091
  def BuildHooksEnv(self):
2092
    """Build hooks env.
2093

2094
    Cluster-Verify hooks just ran in the post phase and their failure makes
2095
    the output be logged in the verify output and the verification to fail.
2096

2097
    """
2098
    all_nodes = self.cfg.GetNodeList()
2099
    env = {
2100
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2101
      }
2102
    for node in self.cfg.GetAllNodesInfo().values():
2103
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2104

    
2105
    return env, [], all_nodes
2106

    
2107
  def Exec(self, feedback_fn):
2108
    """Verify integrity of cluster, performing various test on nodes.
2109

2110
    """
2111
    # This method has too many local variables. pylint: disable-msg=R0914
2112
    self.bad = False
2113
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2114
    verbose = self.op.verbose
2115
    self._feedback_fn = feedback_fn
2116
    feedback_fn("* Verifying global settings")
2117
    for msg in self.cfg.VerifyConfig():
2118
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2119

    
2120
    # Check the cluster certificates
2121
    for cert_filename in constants.ALL_CERT_FILES:
2122
      (errcode, msg) = _VerifyCertificate(cert_filename)
2123
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2124

    
2125
    vg_name = self.cfg.GetVGName()
2126
    drbd_helper = self.cfg.GetDRBDHelper()
2127
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2128
    cluster = self.cfg.GetClusterInfo()
2129
    nodeinfo_byname = self.cfg.GetAllNodesInfo()
2130
    nodelist = utils.NiceSort(nodeinfo_byname.keys())
2131
    nodeinfo = [nodeinfo_byname[nname] for nname in nodelist]
2132
    instanceinfo = self.cfg.GetAllInstancesInfo()
2133
    instancelist = utils.NiceSort(instanceinfo.keys())
2134
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2135
    i_non_redundant = [] # Non redundant instances
2136
    i_non_a_balanced = [] # Non auto-balanced instances
2137
    n_offline = 0 # Count of offline nodes
2138
    n_drained = 0 # Count of nodes being drained
2139
    node_vol_should = {}
2140

    
2141
    # FIXME: verify OS list
2142
    # do local checksums
2143
    master_files = [constants.CLUSTER_CONF_FILE]
2144
    master_node = self.master_node = self.cfg.GetMasterNode()
2145
    master_ip = self.cfg.GetMasterIP()
2146

    
2147
    file_names = ssconf.SimpleStore().GetFileList()
2148
    file_names.extend(constants.ALL_CERT_FILES)
2149
    file_names.extend(master_files)
2150
    if cluster.modify_etc_hosts:
2151
      file_names.append(constants.ETC_HOSTS)
2152

    
2153
    local_checksums = utils.FingerprintFiles(file_names)
2154

    
2155
    # Compute the set of hypervisor parameters
2156
    hvp_data = []
2157
    for hv_name in hypervisors:
2158
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2159
    for os_name, os_hvp in cluster.os_hvp.items():
2160
      for hv_name, hv_params in os_hvp.items():
2161
        if not hv_params:
2162
          continue
2163
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2164
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2165
    # TODO: collapse identical parameter values in a single one
2166
    for instance in instanceinfo.values():
2167
      if not instance.hvparams:
2168
        continue
2169
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2170
                       cluster.FillHV(instance)))
2171
    # and verify them locally
2172
    self._VerifyHVP(hvp_data)
2173

    
2174
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2175
    node_verify_param = {
2176
      constants.NV_FILELIST: file_names,
2177
      constants.NV_NODELIST: [node.name for node in nodeinfo
2178
                              if not node.offline],
2179
      constants.NV_HYPERVISOR: hypervisors,
2180
      constants.NV_HVPARAMS: hvp_data,
2181
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2182
                                  node.secondary_ip) for node in nodeinfo
2183
                                 if not node.offline],
2184
      constants.NV_INSTANCELIST: hypervisors,
2185
      constants.NV_VERSION: None,
2186
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2187
      constants.NV_NODESETUP: None,
2188
      constants.NV_TIME: None,
2189
      constants.NV_MASTERIP: (master_node, master_ip),
2190
      constants.NV_OSLIST: None,
2191
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2192
      }
2193

    
2194
    if vg_name is not None:
2195
      node_verify_param[constants.NV_VGLIST] = None
2196
      node_verify_param[constants.NV_LVLIST] = vg_name
2197
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2198
      node_verify_param[constants.NV_DRBDLIST] = None
2199

    
2200
    if drbd_helper:
2201
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2202

    
2203
    # bridge checks
2204
    # FIXME: this needs to be changed per node-group, not cluster-wide
2205
    bridges = set()
2206
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2207
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2208
      bridges.add(default_nicpp[constants.NIC_LINK])
2209
    for instance in instanceinfo.values():
2210
      for nic in instance.nics:
2211
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2212
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2213
          bridges.add(full_nic[constants.NIC_LINK])
2214

    
2215
    if bridges:
2216
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2217

    
2218
    # Build our expected cluster state
2219
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2220
                                                 name=node.name,
2221
                                                 vm_capable=node.vm_capable))
2222
                      for node in nodeinfo)
2223

    
2224
    # Gather OOB paths
2225
    oob_paths = []
2226
    for node in nodeinfo:
2227
      path = _SupportsOob(self.cfg, node)
2228
      if path and path not in oob_paths:
2229
        oob_paths.append(path)
2230

    
2231
    if oob_paths:
2232
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2233

    
2234
    for instance in instancelist:
2235
      inst_config = instanceinfo[instance]
2236

    
2237
      for nname in inst_config.all_nodes:
2238
        if nname not in node_image:
2239
          # ghost node
2240
          gnode = self.NodeImage(name=nname)
2241
          gnode.ghost = True
2242
          node_image[nname] = gnode
2243

    
2244
      inst_config.MapLVsByNode(node_vol_should)
2245

    
2246
      pnode = inst_config.primary_node
2247
      node_image[pnode].pinst.append(instance)
2248

    
2249
      for snode in inst_config.secondary_nodes:
2250
        nimg = node_image[snode]
2251
        nimg.sinst.append(instance)
2252
        if pnode not in nimg.sbp:
2253
          nimg.sbp[pnode] = []
2254
        nimg.sbp[pnode].append(instance)
2255

    
2256
    # At this point, we have the in-memory data structures complete,
2257
    # except for the runtime information, which we'll gather next
2258

    
2259
    # Due to the way our RPC system works, exact response times cannot be
2260
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2261
    # time before and after executing the request, we can at least have a time
2262
    # window.
2263
    nvinfo_starttime = time.time()
2264
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2265
                                           self.cfg.GetClusterName())
2266
    nvinfo_endtime = time.time()
2267

    
2268
    all_drbd_map = self.cfg.ComputeDRBDMap()
2269

    
2270
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2271
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2272

    
2273
    feedback_fn("* Verifying node status")
2274

    
2275
    refos_img = None
2276

    
2277
    for node_i in nodeinfo:
2278
      node = node_i.name
2279
      nimg = node_image[node]
2280

    
2281
      if node_i.offline:
2282
        if verbose:
2283
          feedback_fn("* Skipping offline node %s" % (node,))
2284
        n_offline += 1
2285
        continue
2286

    
2287
      if node == master_node:
2288
        ntype = "master"
2289
      elif node_i.master_candidate:
2290
        ntype = "master candidate"
2291
      elif node_i.drained:
2292
        ntype = "drained"
2293
        n_drained += 1
2294
      else:
2295
        ntype = "regular"
2296
      if verbose:
2297
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2298

    
2299
      msg = all_nvinfo[node].fail_msg
2300
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2301
      if msg:
2302
        nimg.rpc_fail = True
2303
        continue
2304

    
2305
      nresult = all_nvinfo[node].payload
2306

    
2307
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2308
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2309
      self._VerifyNodeNetwork(node_i, nresult)
2310
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2311
                            master_files)
2312

    
2313
      self._VerifyOob(node_i, nresult)
2314

    
2315
      if nimg.vm_capable:
2316
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2317
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2318
                             all_drbd_map)
2319

    
2320
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2321
        self._UpdateNodeInstances(node_i, nresult, nimg)
2322
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2323
        self._UpdateNodeOS(node_i, nresult, nimg)
2324
        if not nimg.os_fail:
2325
          if refos_img is None:
2326
            refos_img = nimg
2327
          self._VerifyNodeOS(node_i, nimg, refos_img)
2328
        self._VerifyNodeBridges(node_i, nresult, bridges)
2329

    
2330
    feedback_fn("* Verifying instance status")
2331
    for instance in instancelist:
2332
      if verbose:
2333
        feedback_fn("* Verifying instance %s" % instance)
2334
      inst_config = instanceinfo[instance]
2335
      self._VerifyInstance(instance, inst_config, node_image,
2336
                           instdisk[instance])
2337
      inst_nodes_offline = []
2338

    
2339
      pnode = inst_config.primary_node
2340
      pnode_img = node_image[pnode]
2341
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2342
               self.ENODERPC, pnode, "instance %s, connection to"
2343
               " primary node failed", instance)
2344

    
2345
      _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2346
               "instance lives on offline node %s", inst_config.primary_node)
2347

    
2348
      # If the instance is non-redundant we cannot survive losing its primary
2349
      # node, so we are not N+1 compliant. On the other hand we have no disk
2350
      # templates with more than one secondary so that situation is not well
2351
      # supported either.
2352
      # FIXME: does not support file-backed instances
2353
      if not inst_config.secondary_nodes:
2354
        i_non_redundant.append(instance)
2355

    
2356
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2357
               instance, "instance has multiple secondary nodes: %s",
2358
               utils.CommaJoin(inst_config.secondary_nodes),
2359
               code=self.ETYPE_WARNING)
2360

    
2361
      if inst_config.disk_template in constants.DTS_NET_MIRROR:
2362
        pnode = inst_config.primary_node
2363
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2364
        instance_groups = {}
2365

    
2366
        for node in instance_nodes:
2367
          instance_groups.setdefault(nodeinfo_byname[node].group,
2368
                                     []).append(node)
2369

    
2370
        pretty_list = [
2371
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2372
          # Sort so that we always list the primary node first.
2373
          for group, nodes in sorted(instance_groups.items(),
2374
                                     key=lambda (_, nodes): pnode in nodes,
2375
                                     reverse=True)]
2376

    
2377
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2378
                      instance, "instance has primary and secondary nodes in"
2379
                      " different groups: %s", utils.CommaJoin(pretty_list),
2380
                      code=self.ETYPE_WARNING)
2381

    
2382
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2383
        i_non_a_balanced.append(instance)
2384

    
2385
      for snode in inst_config.secondary_nodes:
2386
        s_img = node_image[snode]
2387
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2388
                 "instance %s, connection to secondary node failed", instance)
2389

    
2390
        if s_img.offline:
2391
          inst_nodes_offline.append(snode)
2392

    
2393
      # warn that the instance lives on offline nodes
2394
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2395
               "instance has offline secondary node(s) %s",
2396
               utils.CommaJoin(inst_nodes_offline))
2397
      # ... or ghost/non-vm_capable nodes
2398
      for node in inst_config.all_nodes:
2399
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2400
                 "instance lives on ghost node %s", node)
2401
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2402
                 instance, "instance lives on non-vm_capable node %s", node)
2403

    
2404
    feedback_fn("* Verifying orphan volumes")
2405
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2406
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2407

    
2408
    feedback_fn("* Verifying orphan instances")
2409
    self._VerifyOrphanInstances(instancelist, node_image)
2410

    
2411
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2412
      feedback_fn("* Verifying N+1 Memory redundancy")
2413
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2414

    
2415
    feedback_fn("* Other Notes")
2416
    if i_non_redundant:
2417
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2418
                  % len(i_non_redundant))
2419

    
2420
    if i_non_a_balanced:
2421
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2422
                  % len(i_non_a_balanced))
2423

    
2424
    if n_offline:
2425
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2426

    
2427
    if n_drained:
2428
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2429

    
2430
    return not self.bad
2431

    
2432
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2433
    """Analyze the post-hooks' result
2434

2435
    This method analyses the hook result, handles it, and sends some
2436
    nicely-formatted feedback back to the user.
2437

2438
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2439
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2440
    @param hooks_results: the results of the multi-node hooks rpc call
2441
    @param feedback_fn: function used send feedback back to the caller
2442
    @param lu_result: previous Exec result
2443
    @return: the new Exec result, based on the previous result
2444
        and hook results
2445

2446
    """
2447
    # We only really run POST phase hooks, and are only interested in
2448
    # their results
2449
    if phase == constants.HOOKS_PHASE_POST:
2450
      # Used to change hooks' output to proper indentation
2451
      feedback_fn("* Hooks Results")
2452
      assert hooks_results, "invalid result from hooks"
2453

    
2454
      for node_name in hooks_results:
2455
        res = hooks_results[node_name]
2456
        msg = res.fail_msg
2457
        test = msg and not res.offline
2458
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2459
                      "Communication failure in hooks execution: %s", msg)
2460
        if res.offline or msg:
2461
          # No need to investigate payload if node is offline or gave an error.
2462
          # override manually lu_result here as _ErrorIf only
2463
          # overrides self.bad
2464
          lu_result = 1
2465
          continue
2466
        for script, hkr, output in res.payload:
2467
          test = hkr == constants.HKR_FAIL
2468
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2469
                        "Script %s failed, output:", script)
2470
          if test:
2471
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2472
            feedback_fn("%s" % output)
2473
            lu_result = 0
2474

    
2475
      return lu_result
2476

    
2477

    
2478
class LUClusterVerifyDisks(NoHooksLU):
2479
  """Verifies the cluster disks status.
2480

2481
  """
2482
  REQ_BGL = False
2483

    
2484
  def ExpandNames(self):
2485
    self.needed_locks = {
2486
      locking.LEVEL_NODE: locking.ALL_SET,
2487
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2488
    }
2489
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2490

    
2491
  def Exec(self, feedback_fn):
2492
    """Verify integrity of cluster disks.
2493

2494
    @rtype: tuple of three items
2495
    @return: a tuple of (dict of node-to-node_error, list of instances
2496
        which need activate-disks, dict of instance: (node, volume) for
2497
        missing volumes
2498

2499
    """
2500
    result = res_nodes, res_instances, res_missing = {}, [], {}
2501

    
2502
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2503
    instances = self.cfg.GetAllInstancesInfo().values()
2504

    
2505
    nv_dict = {}
2506
    for inst in instances:
2507
      inst_lvs = {}
2508
      if not inst.admin_up:
2509
        continue
2510
      inst.MapLVsByNode(inst_lvs)
2511
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2512
      for node, vol_list in inst_lvs.iteritems():
2513
        for vol in vol_list:
2514
          nv_dict[(node, vol)] = inst
2515

    
2516
    if not nv_dict:
2517
      return result
2518

    
2519
    node_lvs = self.rpc.call_lv_list(nodes, [])
2520
    for node, node_res in node_lvs.items():
2521
      if node_res.offline:
2522
        continue
2523
      msg = node_res.fail_msg
2524
      if msg:
2525
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2526
        res_nodes[node] = msg
2527
        continue
2528

    
2529
      lvs = node_res.payload
2530
      for lv_name, (_, _, lv_online) in lvs.items():
2531
        inst = nv_dict.pop((node, lv_name), None)
2532
        if (not lv_online and inst is not None
2533
            and inst.name not in res_instances):
2534
          res_instances.append(inst.name)
2535

    
2536
    # any leftover items in nv_dict are missing LVs, let's arrange the
2537
    # data better
2538
    for key, inst in nv_dict.iteritems():
2539
      if inst.name not in res_missing:
2540
        res_missing[inst.name] = []
2541
      res_missing[inst.name].append(key)
2542

    
2543
    return result
2544

    
2545

    
2546
class LUClusterRepairDiskSizes(NoHooksLU):
2547
  """Verifies the cluster disks sizes.
2548

2549
  """
2550
  REQ_BGL = False
2551

    
2552
  def ExpandNames(self):
2553
    if self.op.instances:
2554
      self.wanted_names = []
2555
      for name in self.op.instances:
2556
        full_name = _ExpandInstanceName(self.cfg, name)
2557
        self.wanted_names.append(full_name)
2558
      self.needed_locks = {
2559
        locking.LEVEL_NODE: [],
2560
        locking.LEVEL_INSTANCE: self.wanted_names,
2561
        }
2562
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2563
    else:
2564
      self.wanted_names = None
2565
      self.needed_locks = {
2566
        locking.LEVEL_NODE: locking.ALL_SET,
2567
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2568
        }
2569
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2570

    
2571
  def DeclareLocks(self, level):
2572
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2573
      self._LockInstancesNodes(primary_only=True)
2574

    
2575
  def CheckPrereq(self):
2576
    """Check prerequisites.
2577

2578
    This only checks the optional instance list against the existing names.
2579

2580
    """
2581
    if self.wanted_names is None:
2582
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2583

    
2584
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2585
                             in self.wanted_names]
2586

    
2587
  def _EnsureChildSizes(self, disk):
2588
    """Ensure children of the disk have the needed disk size.
2589

2590
    This is valid mainly for DRBD8 and fixes an issue where the
2591
    children have smaller disk size.
2592

2593
    @param disk: an L{ganeti.objects.Disk} object
2594

2595
    """
2596
    if disk.dev_type == constants.LD_DRBD8:
2597
      assert disk.children, "Empty children for DRBD8?"
2598
      fchild = disk.children[0]
2599
      mismatch = fchild.size < disk.size
2600
      if mismatch:
2601
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2602
                     fchild.size, disk.size)
2603
        fchild.size = disk.size
2604

    
2605
      # and we recurse on this child only, not on the metadev
2606
      return self._EnsureChildSizes(fchild) or mismatch
2607
    else:
2608
      return False
2609

    
2610
  def Exec(self, feedback_fn):
2611
    """Verify the size of cluster disks.
2612

2613
    """
2614
    # TODO: check child disks too
2615
    # TODO: check differences in size between primary/secondary nodes
2616
    per_node_disks = {}
2617
    for instance in self.wanted_instances:
2618
      pnode = instance.primary_node
2619
      if pnode not in per_node_disks:
2620
        per_node_disks[pnode] = []
2621
      for idx, disk in enumerate(instance.disks):
2622
        per_node_disks[pnode].append((instance, idx, disk))
2623

    
2624
    changed = []
2625
    for node, dskl in per_node_disks.items():
2626
      newl = [v[2].Copy() for v in dskl]
2627
      for dsk in newl:
2628
        self.cfg.SetDiskID(dsk, node)
2629
      result = self.rpc.call_blockdev_getsize(node, newl)
2630
      if result.fail_msg:
2631
        self.LogWarning("Failure in blockdev_getsize call to node"
2632
                        " %s, ignoring", node)
2633
        continue
2634
      if len(result.payload) != len(dskl):
2635
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2636
                        " result.payload=%s", node, len(dskl), result.payload)
2637
        self.LogWarning("Invalid result from node %s, ignoring node results",
2638
                        node)
2639
        continue
2640
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2641
        if size is None:
2642
          self.LogWarning("Disk %d of instance %s did not return size"
2643
                          " information, ignoring", idx, instance.name)
2644
          continue
2645
        if not isinstance(size, (int, long)):
2646
          self.LogWarning("Disk %d of instance %s did not return valid"
2647
                          " size information, ignoring", idx, instance.name)
2648
          continue
2649
        size = size >> 20
2650
        if size != disk.size:
2651
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2652
                       " correcting: recorded %d, actual %d", idx,
2653
                       instance.name, disk.size, size)
2654
          disk.size = size
2655
          self.cfg.Update(instance, feedback_fn)
2656
          changed.append((instance.name, idx, size))
2657
        if self._EnsureChildSizes(disk):
2658
          self.cfg.Update(instance, feedback_fn)
2659
          changed.append((instance.name, idx, disk.size))
2660
    return changed
2661

    
2662

    
2663
class LUClusterRename(LogicalUnit):
2664
  """Rename the cluster.
2665

2666
  """
2667
  HPATH = "cluster-rename"
2668
  HTYPE = constants.HTYPE_CLUSTER
2669

    
2670
  def BuildHooksEnv(self):
2671
    """Build hooks env.
2672

2673
    """
2674
    env = {
2675
      "OP_TARGET": self.cfg.GetClusterName(),
2676
      "NEW_NAME": self.op.name,
2677
      }
2678
    mn = self.cfg.GetMasterNode()
2679
    all_nodes = self.cfg.GetNodeList()
2680
    return env, [mn], all_nodes
2681

    
2682
  def CheckPrereq(self):
2683
    """Verify that the passed name is a valid one.
2684

2685
    """
2686
    hostname = netutils.GetHostname(name=self.op.name,
2687
                                    family=self.cfg.GetPrimaryIPFamily())
2688

    
2689
    new_name = hostname.name
2690
    self.ip = new_ip = hostname.ip
2691
    old_name = self.cfg.GetClusterName()
2692
    old_ip = self.cfg.GetMasterIP()
2693
    if new_name == old_name and new_ip == old_ip:
2694
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2695
                                 " cluster has changed",
2696
                                 errors.ECODE_INVAL)
2697
    if new_ip != old_ip:
2698
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2699
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2700
                                   " reachable on the network" %
2701
                                   new_ip, errors.ECODE_NOTUNIQUE)
2702

    
2703
    self.op.name = new_name
2704

    
2705
  def Exec(self, feedback_fn):
2706
    """Rename the cluster.
2707

2708
    """
2709
    clustername = self.op.name
2710
    ip = self.ip
2711

    
2712
    # shutdown the master IP
2713
    master = self.cfg.GetMasterNode()
2714
    result = self.rpc.call_node_stop_master(master, False)
2715
    result.Raise("Could not disable the master role")
2716

    
2717
    try:
2718
      cluster = self.cfg.GetClusterInfo()
2719
      cluster.cluster_name = clustername
2720
      cluster.master_ip = ip
2721
      self.cfg.Update(cluster, feedback_fn)
2722

    
2723
      # update the known hosts file
2724
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2725
      node_list = self.cfg.GetOnlineNodeList()
2726
      try:
2727
        node_list.remove(master)
2728
      except ValueError:
2729
        pass
2730
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2731
    finally:
2732
      result = self.rpc.call_node_start_master(master, False, False)
2733
      msg = result.fail_msg
2734
      if msg:
2735
        self.LogWarning("Could not re-enable the master role on"
2736
                        " the master, please restart manually: %s", msg)
2737

    
2738
    return clustername
2739

    
2740

    
2741
class LUClusterSetParams(LogicalUnit):
2742
  """Change the parameters of the cluster.
2743

2744
  """
2745
  HPATH = "cluster-modify"
2746
  HTYPE = constants.HTYPE_CLUSTER
2747
  REQ_BGL = False
2748

    
2749
  def CheckArguments(self):
2750
    """Check parameters
2751

2752
    """
2753
    if self.op.uid_pool:
2754
      uidpool.CheckUidPool(self.op.uid_pool)
2755

    
2756
    if self.op.add_uids:
2757
      uidpool.CheckUidPool(self.op.add_uids)
2758

    
2759
    if self.op.remove_uids:
2760
      uidpool.CheckUidPool(self.op.remove_uids)
2761

    
2762
  def ExpandNames(self):
2763
    # FIXME: in the future maybe other cluster params won't require checking on
2764
    # all nodes to be modified.
2765
    self.needed_locks = {
2766
      locking.LEVEL_NODE: locking.ALL_SET,
2767
    }
2768
    self.share_locks[locking.LEVEL_NODE] = 1
2769

    
2770
  def BuildHooksEnv(self):
2771
    """Build hooks env.
2772

2773
    """
2774
    env = {
2775
      "OP_TARGET": self.cfg.GetClusterName(),
2776
      "NEW_VG_NAME": self.op.vg_name,
2777
      }
2778
    mn = self.cfg.GetMasterNode()
2779
    return env, [mn], [mn]
2780

    
2781
  def CheckPrereq(self):
2782
    """Check prerequisites.
2783

2784
    This checks whether the given params don't conflict and
2785
    if the given volume group is valid.
2786

2787
    """
2788
    if self.op.vg_name is not None and not self.op.vg_name:
2789
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2790
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2791
                                   " instances exist", errors.ECODE_INVAL)
2792

    
2793
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2794
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2795
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2796
                                   " drbd-based instances exist",
2797
                                   errors.ECODE_INVAL)
2798

    
2799
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2800

    
2801
    # if vg_name not None, checks given volume group on all nodes
2802
    if self.op.vg_name:
2803
      vglist = self.rpc.call_vg_list(node_list)
2804
      for node in node_list:
2805
        msg = vglist[node].fail_msg
2806
        if msg:
2807
          # ignoring down node
2808
          self.LogWarning("Error while gathering data on node %s"
2809
                          " (ignoring node): %s", node, msg)
2810
          continue
2811
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2812
                                              self.op.vg_name,
2813
                                              constants.MIN_VG_SIZE)
2814
        if vgstatus:
2815
          raise errors.OpPrereqError("Error on node '%s': %s" %
2816
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2817

    
2818
    if self.op.drbd_helper:
2819
      # checks given drbd helper on all nodes
2820
      helpers = self.rpc.call_drbd_helper(node_list)
2821
      for node in node_list:
2822
        ninfo = self.cfg.GetNodeInfo(node)
2823
        if ninfo.offline:
2824
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2825
          continue
2826
        msg = helpers[node].fail_msg
2827
        if msg:
2828
          raise errors.OpPrereqError("Error checking drbd helper on node"
2829
                                     " '%s': %s" % (node, msg),
2830
                                     errors.ECODE_ENVIRON)
2831
        node_helper = helpers[node].payload
2832
        if node_helper != self.op.drbd_helper:
2833
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2834
                                     (node, node_helper), errors.ECODE_ENVIRON)
2835

    
2836
    self.cluster = cluster = self.cfg.GetClusterInfo()
2837
    # validate params changes
2838
    if self.op.beparams:
2839
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2840
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2841

    
2842
    if self.op.ndparams:
2843
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2844
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2845

    
2846
      # TODO: we need a more general way to handle resetting
2847
      # cluster-level parameters to default values
2848
      if self.new_ndparams["oob_program"] == "":
2849
        self.new_ndparams["oob_program"] = \
2850
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2851

    
2852
    if self.op.nicparams:
2853
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2854
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2855
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2856
      nic_errors = []
2857

    
2858
      # check all instances for consistency
2859
      for instance in self.cfg.GetAllInstancesInfo().values():
2860
        for nic_idx, nic in enumerate(instance.nics):
2861
          params_copy = copy.deepcopy(nic.nicparams)
2862
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2863

    
2864
          # check parameter syntax
2865
          try:
2866
            objects.NIC.CheckParameterSyntax(params_filled)
2867
          except errors.ConfigurationError, err:
2868
            nic_errors.append("Instance %s, nic/%d: %s" %
2869
                              (instance.name, nic_idx, err))
2870

    
2871
          # if we're moving instances to routed, check that they have an ip
2872
          target_mode = params_filled[constants.NIC_MODE]
2873
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2874
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
2875
                              " address" % (instance.name, nic_idx))
2876
      if nic_errors:
2877
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2878
                                   "\n".join(nic_errors))
2879

    
2880
    # hypervisor list/parameters
2881
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2882
    if self.op.hvparams:
2883
      for hv_name, hv_dict in self.op.hvparams.items():
2884
        if hv_name not in self.new_hvparams:
2885
          self.new_hvparams[hv_name] = hv_dict
2886
        else:
2887
          self.new_hvparams[hv_name].update(hv_dict)
2888

    
2889
    # os hypervisor parameters
2890
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2891
    if self.op.os_hvp:
2892
      for os_name, hvs in self.op.os_hvp.items():
2893
        if os_name not in self.new_os_hvp:
2894
          self.new_os_hvp[os_name] = hvs
2895
        else:
2896
          for hv_name, hv_dict in hvs.items():
2897
            if hv_name not in self.new_os_hvp[os_name]:
2898
              self.new_os_hvp[os_name][hv_name] = hv_dict
2899
            else:
2900
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2901

    
2902
    # os parameters
2903
    self.new_osp = objects.FillDict(cluster.osparams, {})
2904
    if self.op.osparams:
2905
      for os_name, osp in self.op.osparams.items():
2906
        if os_name not in self.new_osp:
2907
          self.new_osp[os_name] = {}
2908

    
2909
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2910
                                                  use_none=True)
2911

    
2912
        if not self.new_osp[os_name]:
2913
          # we removed all parameters
2914
          del self.new_osp[os_name]
2915
        else:
2916
          # check the parameter validity (remote check)
2917
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2918
                         os_name, self.new_osp[os_name])
2919

    
2920
    # changes to the hypervisor list
2921
    if self.op.enabled_hypervisors is not None:
2922
      self.hv_list = self.op.enabled_hypervisors
2923
      for hv in self.hv_list:
2924
        # if the hypervisor doesn't already exist in the cluster
2925
        # hvparams, we initialize it to empty, and then (in both
2926
        # cases) we make sure to fill the defaults, as we might not
2927
        # have a complete defaults list if the hypervisor wasn't
2928
        # enabled before
2929
        if hv not in new_hvp:
2930
          new_hvp[hv] = {}
2931
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2932
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2933
    else:
2934
      self.hv_list = cluster.enabled_hypervisors
2935

    
2936
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2937
      # either the enabled list has changed, or the parameters have, validate
2938
      for hv_name, hv_params in self.new_hvparams.items():
2939
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2940
            (self.op.enabled_hypervisors and
2941
             hv_name in self.op.enabled_hypervisors)):
2942
          # either this is a new hypervisor, or its parameters have changed
2943
          hv_class = hypervisor.GetHypervisor(hv_name)
2944
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2945
          hv_class.CheckParameterSyntax(hv_params)
2946
          _CheckHVParams(self, node_list, hv_name, hv_params)
2947

    
2948
    if self.op.os_hvp:
2949
      # no need to check any newly-enabled hypervisors, since the
2950
      # defaults have already been checked in the above code-block
2951
      for os_name, os_hvp in self.new_os_hvp.items():
2952
        for hv_name, hv_params in os_hvp.items():
2953
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2954
          # we need to fill in the new os_hvp on top of the actual hv_p
2955
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2956
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2957
          hv_class = hypervisor.GetHypervisor(hv_name)
2958
          hv_class.CheckParameterSyntax(new_osp)
2959
          _CheckHVParams(self, node_list, hv_name, new_osp)
2960

    
2961
    if self.op.default_iallocator:
2962
      alloc_script = utils.FindFile(self.op.default_iallocator,
2963
                                    constants.IALLOCATOR_SEARCH_PATH,
2964
                                    os.path.isfile)
2965
      if alloc_script is None:
2966
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2967
                                   " specified" % self.op.default_iallocator,
2968
                                   errors.ECODE_INVAL)
2969

    
2970
  def Exec(self, feedback_fn):
2971
    """Change the parameters of the cluster.
2972

2973
    """
2974
    if self.op.vg_name is not None:
2975
      new_volume = self.op.vg_name
2976
      if not new_volume:
2977
        new_volume = None
2978
      if new_volume != self.cfg.GetVGName():
2979
        self.cfg.SetVGName(new_volume)
2980
      else:
2981
        feedback_fn("Cluster LVM configuration already in desired"
2982
                    " state, not changing")
2983
    if self.op.drbd_helper is not None:
2984
      new_helper = self.op.drbd_helper
2985
      if not new_helper:
2986
        new_helper = None
2987
      if new_helper != self.cfg.GetDRBDHelper():
2988
        self.cfg.SetDRBDHelper(new_helper)
2989
      else:
2990
        feedback_fn("Cluster DRBD helper already in desired state,"
2991
                    " not changing")
2992
    if self.op.hvparams:
2993
      self.cluster.hvparams = self.new_hvparams
2994
    if self.op.os_hvp:
2995
      self.cluster.os_hvp = self.new_os_hvp
2996
    if self.op.enabled_hypervisors is not None:
2997
      self.cluster.hvparams = self.new_hvparams
2998
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2999
    if self.op.beparams:
3000
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3001
    if self.op.nicparams:
3002
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3003
    if self.op.osparams:
3004
      self.cluster.osparams = self.new_osp
3005
    if self.op.ndparams:
3006
      self.cluster.ndparams = self.new_ndparams
3007

    
3008
    if self.op.candidate_pool_size is not None:
3009
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3010
      # we need to update the pool size here, otherwise the save will fail
3011
      _AdjustCandidatePool(self, [])
3012

    
3013
    if self.op.maintain_node_health is not None:
3014
      self.cluster.maintain_node_health = self.op.maintain_node_health
3015

    
3016
    if self.op.prealloc_wipe_disks is not None:
3017
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3018

    
3019
    if self.op.add_uids is not None:
3020
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3021

    
3022
    if self.op.remove_uids is not None:
3023
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3024

    
3025
    if self.op.uid_pool is not None:
3026
      self.cluster.uid_pool = self.op.uid_pool
3027

    
3028
    if self.op.default_iallocator is not None:
3029
      self.cluster.default_iallocator = self.op.default_iallocator
3030

    
3031
    if self.op.reserved_lvs is not None:
3032
      self.cluster.reserved_lvs = self.op.reserved_lvs
3033

    
3034
    def helper_os(aname, mods, desc):
3035
      desc += " OS list"
3036
      lst = getattr(self.cluster, aname)
3037
      for key, val in mods:
3038
        if key == constants.DDM_ADD:
3039
          if val in lst:
3040
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3041
          else:
3042
            lst.append(val)
3043
        elif key == constants.DDM_REMOVE:
3044
          if val in lst:
3045
            lst.remove(val)
3046
          else:
3047
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3048
        else:
3049
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3050

    
3051
    if self.op.hidden_os:
3052
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3053

    
3054
    if self.op.blacklisted_os:
3055
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3056

    
3057
    if self.op.master_netdev:
3058
      master = self.cfg.GetMasterNode()
3059
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3060
                  self.cluster.master_netdev)
3061
      result = self.rpc.call_node_stop_master(master, False)
3062
      result.Raise("Could not disable the master ip")
3063
      feedback_fn("Changing master_netdev from %s to %s" %
3064
                  (self.cluster.master_netdev, self.op.master_netdev))
3065
      self.cluster.master_netdev = self.op.master_netdev
3066

    
3067
    self.cfg.Update(self.cluster, feedback_fn)
3068

    
3069
    if self.op.master_netdev:
3070
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3071
                  self.op.master_netdev)
3072
      result = self.rpc.call_node_start_master(master, False, False)
3073
      if result.fail_msg:
3074
        self.LogWarning("Could not re-enable the master ip on"
3075
                        " the master, please restart manually: %s",
3076
                        result.fail_msg)
3077

    
3078

    
3079
def _UploadHelper(lu, nodes, fname):
3080
  """Helper for uploading a file and showing warnings.
3081

3082
  """
3083
  if os.path.exists(fname):
3084
    result = lu.rpc.call_upload_file(nodes, fname)
3085
    for to_node, to_result in result.items():
3086
      msg = to_result.fail_msg
3087
      if msg:
3088
        msg = ("Copy of file %s to node %s failed: %s" %
3089
               (fname, to_node, msg))
3090
        lu.proc.LogWarning(msg)
3091

    
3092

    
3093
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3094
  """Distribute additional files which are part of the cluster configuration.
3095

3096
  ConfigWriter takes care of distributing the config and ssconf files, but
3097
  there are more files which should be distributed to all nodes. This function
3098
  makes sure those are copied.
3099

3100
  @param lu: calling logical unit
3101
  @param additional_nodes: list of nodes not in the config to distribute to
3102
  @type additional_vm: boolean
3103
  @param additional_vm: whether the additional nodes are vm-capable or not
3104

3105
  """
3106
  # 1. Gather target nodes
3107
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3108
  dist_nodes = lu.cfg.GetOnlineNodeList()
3109
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3110
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3111
  if additional_nodes is not None:
3112
    dist_nodes.extend(additional_nodes)
3113
    if additional_vm:
3114
      vm_nodes.extend(additional_nodes)
3115
  if myself.name in dist_nodes:
3116
    dist_nodes.remove(myself.name)
3117
  if myself.name in vm_nodes:
3118
    vm_nodes.remove(myself.name)
3119

    
3120
  # 2. Gather files to distribute
3121
  dist_files = set([constants.ETC_HOSTS,
3122
                    constants.SSH_KNOWN_HOSTS_FILE,
3123
                    constants.RAPI_CERT_FILE,
3124
                    constants.RAPI_USERS_FILE,
3125
                    constants.CONFD_HMAC_KEY,
3126
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3127
                   ])
3128

    
3129
  vm_files = set()
3130
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3131
  for hv_name in enabled_hypervisors:
3132
    hv_class = hypervisor.GetHypervisor(hv_name)
3133
    vm_files.update(hv_class.GetAncillaryFiles())
3134

    
3135
  # 3. Perform the files upload
3136
  for fname in dist_files:
3137
    _UploadHelper(lu, dist_nodes, fname)
3138
  for fname in vm_files:
3139
    _UploadHelper(lu, vm_nodes, fname)
3140

    
3141

    
3142
class LUClusterRedistConf(NoHooksLU):
3143
  """Force the redistribution of cluster configuration.
3144

3145
  This is a very simple LU.
3146

3147
  """
3148
  REQ_BGL = False
3149

    
3150
  def ExpandNames(self):
3151
    self.needed_locks = {
3152
      locking.LEVEL_NODE: locking.ALL_SET,
3153
    }
3154
    self.share_locks[locking.LEVEL_NODE] = 1
3155

    
3156
  def Exec(self, feedback_fn):
3157
    """Redistribute the configuration.
3158

3159
    """
3160
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3161
    _RedistributeAncillaryFiles(self)
3162

    
3163

    
3164
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3165
  """Sleep and poll for an instance's disk to sync.
3166

3167
  """
3168
  if not instance.disks or disks is not None and not disks:
3169
    return True
3170

    
3171
  disks = _ExpandCheckDisks(instance, disks)
3172

    
3173
  if not oneshot:
3174
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3175

    
3176
  node = instance.primary_node
3177

    
3178
  for dev in disks:
3179
    lu.cfg.SetDiskID(dev, node)
3180

    
3181
  # TODO: Convert to utils.Retry
3182

    
3183
  retries = 0
3184
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3185
  while True:
3186
    max_time = 0
3187
    done = True
3188
    cumul_degraded = False
3189
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3190
    msg = rstats.fail_msg
3191
    if msg:
3192
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3193
      retries += 1
3194
      if retries >= 10:
3195
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3196
                                 " aborting." % node)
3197
      time.sleep(6)
3198
      continue
3199
    rstats = rstats.payload
3200
    retries = 0
3201
    for i, mstat in enumerate(rstats):
3202
      if mstat is None:
3203
        lu.LogWarning("Can't compute data for node %s/%s",
3204
                           node, disks[i].iv_name)
3205
        continue
3206

    
3207
      cumul_degraded = (cumul_degraded or
3208
                        (mstat.is_degraded and mstat.sync_percent is None))
3209
      if mstat.sync_percent is not None:
3210
        done = False
3211
        if mstat.estimated_time is not None:
3212
          rem_time = ("%s remaining (estimated)" %
3213
                      utils.FormatSeconds(mstat.estimated_time))
3214
          max_time = mstat.estimated_time
3215
        else:
3216
          rem_time = "no time estimate"
3217
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3218
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3219

    
3220
    # if we're done but degraded, let's do a few small retries, to
3221
    # make sure we see a stable and not transient situation; therefore
3222
    # we force restart of the loop
3223
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3224
      logging.info("Degraded disks found, %d retries left", degr_retries)
3225
      degr_retries -= 1
3226
      time.sleep(1)
3227
      continue
3228

    
3229
    if done or oneshot:
3230
      break
3231

    
3232
    time.sleep(min(60, max_time))
3233

    
3234
  if done:
3235
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3236
  return not cumul_degraded
3237

    
3238

    
3239
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3240
  """Check that mirrors are not degraded.
3241

3242
  The ldisk parameter, if True, will change the test from the
3243
  is_degraded attribute (which represents overall non-ok status for
3244
  the device(s)) to the ldisk (representing the local storage status).
3245

3246
  """
3247
  lu.cfg.SetDiskID(dev, node)
3248

    
3249
  result = True
3250

    
3251
  if on_primary or dev.AssembleOnSecondary():
3252
    rstats = lu.rpc.call_blockdev_find(node, dev)
3253
    msg = rstats.fail_msg
3254
    if msg:
3255
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3256
      result = False
3257
    elif not rstats.payload:
3258
      lu.LogWarning("Can't find disk on node %s", node)
3259
      result = False
3260
    else:
3261
      if ldisk:
3262
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3263
      else:
3264
        result = result and not rstats.payload.is_degraded
3265

    
3266
  if dev.children:
3267
    for child in dev.children:
3268
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3269

    
3270
  return result
3271

    
3272

    
3273
class LUOobCommand(NoHooksLU):
3274
  """Logical unit for OOB handling.
3275

3276
  """
3277
  REG_BGL = False
3278

    
3279
  def CheckPrereq(self):
3280
    """Check prerequisites.
3281

3282
    This checks:
3283
     - the node exists in the configuration
3284
     - OOB is supported
3285

3286
    Any errors are signaled by raising errors.OpPrereqError.
3287

3288
    """
3289
    self.nodes = []
3290
    for node_name in self.op.node_names:
3291
      node = self.cfg.GetNodeInfo(node_name)
3292

    
3293
      if node is None:
3294
        raise errors.OpPrereqError("Node %s not found" % node_name,
3295
                                   errors.ECODE_NOENT)
3296
      else:
3297
        self.nodes.append(node)
3298

    
3299
      if (self.op.command == constants.OOB_POWER_OFF and not node.offline):
3300
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3301
                                    " not marked offline") % node_name,
3302
                                   errors.ECODE_STATE)
3303

    
3304
  def ExpandNames(self):
3305
    """Gather locks we need.
3306

3307
    """
3308
    if self.op.node_names:
3309
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3310
                            for name in self.op.node_names]
3311
    else:
3312
      self.op.node_names = self.cfg.GetNodeList()
3313

    
3314
    self.needed_locks = {
3315
      locking.LEVEL_NODE: self.op.node_names,
3316
      }
3317

    
3318
  def Exec(self, feedback_fn):
3319
    """Execute OOB and return result if we expect any.
3320

3321
    """
3322
    master_node = self.cfg.GetMasterNode()
3323
    ret = []
3324

    
3325
    for node in self.nodes:
3326
      node_entry = [(constants.RS_NORMAL, node.name)]
3327
      ret.append(node_entry)
3328

    
3329
      oob_program = _SupportsOob(self.cfg, node)
3330

    
3331
      if not oob_program:
3332
        node_entry.append((constants.RS_UNAVAIL, None))
3333
        continue
3334

    
3335
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3336
                   self.op.command, oob_program, node.name)
3337
      result = self.rpc.call_run_oob(master_node, oob_program,
3338
                                     self.op.command, node.name,
3339
                                     self.op.timeout)
3340

    
3341
      if result.fail_msg:
3342
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3343
                        node.name, result.fail_msg)
3344
        node_entry.append((constants.RS_NODATA, None))
3345
      else:
3346
        try:
3347
          self._CheckPayload(result)
3348
        except errors.OpExecError, err:
3349
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3350
                          node.name, err)
3351
          node_entry.append((constants.RS_NODATA, None))
3352
        else:
3353
          if self.op.command == constants.OOB_HEALTH:
3354
            # For health we should log important events
3355
            for item, status in result.payload:
3356
              if status in [constants.OOB_STATUS_WARNING,
3357
                            constants.OOB_STATUS_CRITICAL]:
3358
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3359
                                node.name, item, status)
3360

    
3361
          if self.op.command == constants.OOB_POWER_ON:
3362
            node.powered = True
3363
          elif self.op.command == constants.OOB_POWER_OFF:
3364
            node.powered = False
3365
          elif self.op.command == constants.OOB_POWER_STATUS:
3366
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3367
            if powered != node.powered:
3368
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3369
                               " match actual power state (%s)"), node.powered,
3370
                              node.name, powered)
3371

    
3372
          # For configuration changing commands we should update the node
3373
          if self.op.command in (constants.OOB_POWER_ON,
3374
                                 constants.OOB_POWER_OFF):
3375
            self.cfg.Update(node, feedback_fn)
3376

    
3377
          node_entry.append((constants.RS_NORMAL, result.payload))
3378

    
3379
    return ret
3380

    
3381
  def _CheckPayload(self, result):
3382
    """Checks if the payload is valid.
3383

3384
    @param result: RPC result
3385
    @raises errors.OpExecError: If payload is not valid
3386

3387
    """
3388
    errs = []
3389
    if self.op.command == constants.OOB_HEALTH:
3390
      if not isinstance(result.payload, list):
3391
        errs.append("command 'health' is expected to return a list but got %s" %
3392
                    type(result.payload))
3393
      else:
3394
        for item, status in result.payload:
3395
          if status not in constants.OOB_STATUSES:
3396
            errs.append("health item '%s' has invalid status '%s'" %
3397
                        (item, status))
3398

    
3399
    if self.op.command == constants.OOB_POWER_STATUS:
3400
      if not isinstance(result.payload, dict):
3401
        errs.append("power-status is expected to return a dict but got %s" %
3402
                    type(result.payload))
3403

    
3404
    if self.op.command in [
3405
        constants.OOB_POWER_ON,
3406
        constants.OOB_POWER_OFF,
3407
        constants.OOB_POWER_CYCLE,
3408
        ]:
3409
      if result.payload is not None:
3410
        errs.append("%s is expected to not return payload but got '%s'" %
3411
                    (self.op.command, result.payload))
3412

    
3413
    if errs:
3414
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3415
                               utils.CommaJoin(errs))
3416

    
3417

    
3418

    
3419
class LUOsDiagnose(NoHooksLU):
3420
  """Logical unit for OS diagnose/query.
3421

3422
  """
3423
  REQ_BGL = False
3424
  _HID = "hidden"
3425
  _BLK = "blacklisted"
3426
  _VLD = "valid"
3427
  _FIELDS_STATIC = utils.FieldSet()
3428
  _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3429
                                   "parameters", "api_versions", _HID, _BLK)
3430

    
3431
  def CheckArguments(self):
3432
    if self.op.names:
3433
      raise errors.OpPrereqError("Selective OS query not supported",
3434
                                 errors.ECODE_INVAL)
3435

    
3436
    _CheckOutputFields(static=self._FIELDS_STATIC,
3437
                       dynamic=self._FIELDS_DYNAMIC,
3438
                       selected=self.op.output_fields)
3439

    
3440
  def ExpandNames(self):
3441
    # Lock all nodes, in shared mode
3442
    # Temporary removal of locks, should be reverted later
3443
    # TODO: reintroduce locks when they are lighter-weight
3444
    self.needed_locks = {}
3445
    #self.share_locks[locking.LEVEL_NODE] = 1
3446
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3447

    
3448
  @staticmethod
3449
  def _DiagnoseByOS(rlist):
3450
    """Remaps a per-node return list into an a per-os per-node dictionary
3451

3452
    @param rlist: a map with node names as keys and OS objects as values
3453

3454
    @rtype: dict
3455
    @return: a dictionary with osnames as keys and as value another
3456
        map, with nodes as keys and tuples of (path, status, diagnose,
3457
        variants, parameters, api_versions) as values, eg::
3458

3459
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3460
                                     (/srv/..., False, "invalid api")],
3461
                           "node2": [(/srv/..., True, "", [], [])]}
3462
          }
3463

3464
    """
3465
    all_os = {}
3466
    # we build here the list of nodes that didn't fail the RPC (at RPC
3467
    # level), so that nodes with a non-responding node daemon don't
3468
    # make all OSes invalid
3469
    good_nodes = [node_name for node_name in rlist
3470
                  if not rlist[node_name].fail_msg]
3471
    for node_name, nr in rlist.items():
3472
      if nr.fail_msg or not nr.payload:
3473
        continue
3474
      for (name, path, status, diagnose, variants,
3475
           params, api_versions) in nr.payload:
3476
        if name not in all_os:
3477
          # build a list of nodes for this os containing empty lists
3478
          # for each node in node_list
3479
          all_os[name] = {}
3480
          for nname in good_nodes:
3481
            all_os[name][nname] = []
3482
        # convert params from [name, help] to (name, help)
3483
        params = [tuple(v) for v in params]
3484
        all_os[name][node_name].append((path, status, diagnose,
3485
                                        variants, params, api_versions))
3486
    return all_os
3487

    
3488
  def Exec(self, feedback_fn):
3489
    """Compute the list of OSes.
3490

3491
    """
3492
    valid_nodes = [node.name
3493
                   for node in self.cfg.GetAllNodesInfo().values()
3494
                   if not node.offline and node.vm_capable]
3495
    node_data = self.rpc.call_os_diagnose(valid_nodes)
3496
    pol = self._DiagnoseByOS(node_data)
3497
    output = []
3498
    cluster = self.cfg.GetClusterInfo()
3499

    
3500
    for os_name in utils.NiceSort(pol.keys()):
3501
      os_data = pol[os_name]
3502
      row = []
3503
      valid = True
3504
      (variants, params, api_versions) = null_state = (set(), set(), set())
3505
      for idx, osl in enumerate(os_data.values()):
3506
        valid = bool(valid and osl and osl[0][1])
3507
        if not valid:
3508
          (variants, params, api_versions) = null_state
3509
          break
3510
        node_variants, node_params, node_api = osl[0][3:6]
3511
        if idx == 0: # first entry
3512
          variants = set(node_variants)
3513
          params = set(node_params)
3514
          api_versions = set(node_api)
3515
        else: # keep consistency
3516
          variants.intersection_update(node_variants)
3517
          params.intersection_update(node_params)
3518
          api_versions.intersection_update(node_api)
3519

    
3520
      is_hid = os_name in cluster.hidden_os
3521
      is_blk = os_name in cluster.blacklisted_os
3522
      if ((self._HID not in self.op.output_fields and is_hid) or
3523
          (self._BLK not in self.op.output_fields and is_blk) or
3524
          (self._VLD not in self.op.output_fields and not valid)):
3525
        continue
3526

    
3527
      for field in self.op.output_fields:
3528
        if field == "name":
3529
          val = os_name
3530
        elif field == self._VLD:
3531
          val = valid
3532
        elif field == "node_status":
3533
          # this is just a copy of the dict
3534
          val = {}
3535
          for node_name, nos_list in os_data.items():
3536
            val[node_name] = nos_list
3537
        elif field == "variants":
3538
          val = utils.NiceSort(list(variants))
3539
        elif field == "parameters":
3540
          val = list(params)
3541
        elif field == "api_versions":
3542
          val = list(api_versions)
3543
        elif field == self._HID:
3544
          val = is_hid
3545
        elif field == self._BLK:
3546
          val = is_blk
3547
        else:
3548
          raise errors.ParameterError(field)
3549
        row.append(val)
3550
      output.append(row)
3551

    
3552
    return output
3553

    
3554

    
3555
class LUNodeRemove(LogicalUnit):
3556
  """Logical unit for removing a node.
3557

3558
  """
3559
  HPATH = "node-remove"
3560
  HTYPE = constants.HTYPE_NODE
3561

    
3562
  def BuildHooksEnv(self):
3563
    """Build hooks env.
3564

3565
    This doesn't run on the target node in the pre phase as a failed
3566
    node would then be impossible to remove.
3567

3568
    """
3569
    env = {
3570
      "OP_TARGET": self.op.node_name,
3571
      "NODE_NAME": self.op.node_name,
3572
      }
3573
    all_nodes = self.cfg.GetNodeList()
3574
    try:
3575
      all_nodes.remove(self.op.node_name)
3576
    except ValueError:
3577
      logging.warning("Node %s which is about to be removed not found"
3578
                      " in the all nodes list", self.op.node_name)
3579
    return env, all_nodes, all_nodes
3580

    
3581
  def CheckPrereq(self):
3582
    """Check prerequisites.
3583

3584
    This checks:
3585
     - the node exists in the configuration
3586
     - it does not have primary or secondary instances
3587
     - it's not the master
3588

3589
    Any errors are signaled by raising errors.OpPrereqError.
3590

3591
    """
3592
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3593
    node = self.cfg.GetNodeInfo(self.op.node_name)
3594
    assert node is not None
3595

    
3596
    instance_list = self.cfg.GetInstanceList()
3597

    
3598
    masternode = self.cfg.GetMasterNode()
3599
    if node.name == masternode:
3600
      raise errors.OpPrereqError("Node is the master node,"
3601
                                 " you need to failover first.",
3602
                                 errors.ECODE_INVAL)
3603

    
3604
    for instance_name in instance_list:
3605
      instance = self.cfg.GetInstanceInfo(instance_name)
3606
      if node.name in instance.all_nodes:
3607
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3608
                                   " please remove first." % instance_name,
3609
                                   errors.ECODE_INVAL)
3610
    self.op.node_name = node.name
3611
    self.node = node
3612

    
3613
  def Exec(self, feedback_fn):
3614
    """Removes the node from the cluster.
3615

3616
    """
3617
    node = self.node
3618
    logging.info("Stopping the node daemon and removing configs from node %s",
3619
                 node.name)
3620

    
3621
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3622

    
3623
    # Promote nodes to master candidate as needed
3624
    _AdjustCandidatePool(self, exceptions=[node.name])
3625
    self.context.RemoveNode(node.name)
3626

    
3627
    # Run post hooks on the node before it's removed
3628
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3629
    try:
3630
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3631
    except:
3632
      # pylint: disable-msg=W0702
3633
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3634

    
3635
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3636
    msg = result.fail_msg
3637
    if msg:
3638
      self.LogWarning("Errors encountered on the remote node while leaving"
3639
                      " the cluster: %s", msg)
3640

    
3641
    # Remove node from our /etc/hosts
3642
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3643
      master_node = self.cfg.GetMasterNode()
3644
      result = self.rpc.call_etc_hosts_modify(master_node,
3645
                                              constants.ETC_HOSTS_REMOVE,
3646
                                              node.name, None)
3647
      result.Raise("Can't update hosts file with new host data")
3648
      _RedistributeAncillaryFiles(self)
3649

    
3650

    
3651
class _NodeQuery(_QueryBase):
3652
  FIELDS = query.NODE_FIELDS
3653

    
3654
  def ExpandNames(self, lu):
3655
    lu.needed_locks = {}
3656
    lu.share_locks[locking.LEVEL_NODE] = 1
3657

    
3658
    if self.names:
3659
      self.wanted = _GetWantedNodes(lu, self.names)
3660
    else:
3661
      self.wanted = locking.ALL_SET
3662

    
3663
    self.do_locking = (self.use_locking and
3664
                       query.NQ_LIVE in self.requested_data)
3665

    
3666
    if self.do_locking:
3667
      # if we don't request only static fields, we need to lock the nodes
3668
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3669

    
3670
  def DeclareLocks(self, lu, level):
3671
    pass
3672

    
3673
  def _GetQueryData(self, lu):
3674
    """Computes the list of nodes and their attributes.
3675

3676
    """
3677
    all_info = lu.cfg.GetAllNodesInfo()
3678

    
3679
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3680

    
3681
    # Gather data as requested
3682
    if query.NQ_LIVE in self.requested_data:
3683
      # filter out non-vm_capable nodes
3684
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3685

    
3686
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3687
                                        lu.cfg.GetHypervisorType())
3688
      live_data = dict((name, nresult.payload)
3689
                       for (name, nresult) in node_data.items()
3690
                       if not nresult.fail_msg and nresult.payload)
3691
    else:
3692
      live_data = None
3693

    
3694
    if query.NQ_INST in self.requested_data:
3695
      node_to_primary = dict([(name, set()) for name in nodenames])
3696
      node_to_secondary = dict([(name, set()) for name in nodenames])
3697

    
3698
      inst_data = lu.cfg.GetAllInstancesInfo()
3699

    
3700
      for inst in inst_data.values():
3701
        if inst.primary_node in node_to_primary:
3702
          node_to_primary[inst.primary_node].add(inst.name)
3703
        for secnode in inst.secondary_nodes:
3704
          if secnode in node_to_secondary:
3705
            node_to_secondary[secnode].add(inst.name)
3706
    else:
3707
      node_to_primary = None
3708
      node_to_secondary = None
3709

    
3710
    if query.NQ_OOB in self.requested_data:
3711
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3712
                         for name, node in all_info.iteritems())
3713
    else:
3714
      oob_support = None
3715

    
3716
    if query.NQ_GROUP in self.requested_data:
3717
      groups = lu.cfg.GetAllNodeGroupsInfo()
3718
    else:
3719
      groups = {}
3720

    
3721
    return query.NodeQueryData([all_info[name] for name in nodenames],
3722
                               live_data, lu.cfg.GetMasterNode(),
3723
                               node_to_primary, node_to_secondary, groups,
3724
                               oob_support, lu.cfg.GetClusterInfo())
3725

    
3726

    
3727
class LUNodeQuery(NoHooksLU):
3728
  """Logical unit for querying nodes.
3729

3730
  """
3731
  # pylint: disable-msg=W0142
3732
  REQ_BGL = False
3733

    
3734
  def CheckArguments(self):
3735
    self.nq = _NodeQuery(self.op.names, self.op.output_fields,
3736
                         self.op.use_locking)
3737

    
3738
  def ExpandNames(self):
3739
    self.nq.ExpandNames(self)
3740

    
3741
  def Exec(self, feedback_fn):
3742
    return self.nq.OldStyleQuery(self)
3743

    
3744

    
3745
class LUNodeQueryvols(NoHooksLU):
3746
  """Logical unit for getting volumes on node(s).
3747

3748
  """
3749
  REQ_BGL = False
3750
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3751
  _FIELDS_STATIC = utils.FieldSet("node")
3752

    
3753
  def CheckArguments(self):
3754
    _CheckOutputFields(static=self._FIELDS_STATIC,
3755
                       dynamic=self._FIELDS_DYNAMIC,
3756
                       selected=self.op.output_fields)
3757

    
3758
  def ExpandNames(self):
3759
    self.needed_locks = {}
3760
    self.share_locks[locking.LEVEL_NODE] = 1
3761
    if not self.op.nodes:
3762
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3763
    else:
3764
      self.needed_locks[locking.LEVEL_NODE] = \
3765
        _GetWantedNodes(self, self.op.nodes)
3766

    
3767
  def Exec(self, feedback_fn):
3768
    """Computes the list of nodes and their attributes.
3769

3770
    """
3771
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3772
    volumes = self.rpc.call_node_volumes(nodenames)
3773

    
3774
    ilist = self.cfg.GetAllInstancesInfo()
3775

    
3776
    vol2inst = dict(((node, vol), inst.name)
3777
                    for inst in ilist.values()
3778
                    for (node, vols) in inst.MapLVsByNode().items()
3779
                    for vol in vols)
3780

    
3781
    output = []
3782
    for node in nodenames:
3783
      nresult = volumes[node]
3784
      if nresult.offline:
3785
        continue
3786
      msg = nresult.fail_msg
3787
      if msg:
3788
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3789
        continue
3790

    
3791
      node_vols = sorted(nresult.payload,
3792
                         key=operator.itemgetter("dev"))
3793

    
3794
      for vol in node_vols:
3795
        node_output = []
3796
        for field in self.op.output_fields:
3797
          if field == "node":
3798
            val = node
3799
          elif field == "phys":
3800
            val = vol['dev']
3801
          elif field == "vg":
3802
            val = vol['vg']
3803
          elif field == "name":
3804
            val = vol['name']
3805
          elif field == "size":
3806
            val = int(float(vol['size']))
3807
          elif field == "instance":
3808
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
3809
          else:
3810
            raise errors.ParameterError(field)
3811
          node_output.append(str(val))
3812

    
3813
        output.append(node_output)
3814

    
3815
    return output
3816

    
3817

    
3818
class LUNodeQueryStorage(NoHooksLU):
3819
  """Logical unit for getting information on storage units on node(s).
3820

3821
  """
3822
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3823
  REQ_BGL = False
3824

    
3825
  def CheckArguments(self):
3826
    _CheckOutputFields(static=self._FIELDS_STATIC,
3827
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3828
                       selected=self.op.output_fields)
3829

    
3830
  def ExpandNames(self):
3831
    self.needed_locks = {}
3832
    self.share_locks[locking.LEVEL_NODE] = 1
3833

    
3834
    if self.op.nodes:
3835
      self.needed_locks[locking.LEVEL_NODE] = \
3836
        _GetWantedNodes(self, self.op.nodes)
3837
    else:
3838
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3839

    
3840
  def Exec(self, feedback_fn):
3841
    """Computes the list of nodes and their attributes.
3842

3843
    """
3844
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3845

    
3846
    # Always get name to sort by
3847
    if constants.SF_NAME in self.op.output_fields:
3848
      fields = self.op.output_fields[:]
3849
    else:
3850
      fields = [constants.SF_NAME] + self.op.output_fields
3851

    
3852
    # Never ask for node or type as it's only known to the LU
3853
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3854
      while extra in fields:
3855
        fields.remove(extra)
3856

    
3857
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3858
    name_idx = field_idx[constants.SF_NAME]
3859

    
3860
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3861
    data = self.rpc.call_storage_list(self.nodes,
3862
                                      self.op.storage_type, st_args,
3863
                                      self.op.name, fields)
3864

    
3865
    result = []
3866

    
3867
    for node in utils.NiceSort(self.nodes):
3868
      nresult = data[node]
3869
      if nresult.offline:
3870
        continue
3871

    
3872
      msg = nresult.fail_msg
3873
      if msg:
3874
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3875
        continue
3876

    
3877
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3878

    
3879
      for name in utils.NiceSort(rows.keys()):
3880
        row = rows[name]
3881

    
3882
        out = []
3883

    
3884
        for field in self.op.output_fields:
3885
          if field == constants.SF_NODE:
3886
            val = node
3887
          elif field == constants.SF_TYPE:
3888
            val = self.op.storage_type
3889
          elif field in field_idx:
3890
            val = row[field_idx[field]]
3891
          else:
3892
            raise errors.ParameterError(field)
3893

    
3894
          out.append(val)
3895

    
3896
        result.append(out)
3897

    
3898
    return result
3899

    
3900

    
3901
class _InstanceQuery(_QueryBase):
3902
  FIELDS = query.INSTANCE_FIELDS
3903

    
3904
  def ExpandNames(self, lu):
3905
    lu.needed_locks = {}
3906
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3907
    lu.share_locks[locking.LEVEL_NODE] = 1
3908

    
3909
    if self.names:
3910
      self.wanted = _GetWantedInstances(lu, self.names)
3911
    else:
3912
      self.wanted = locking.ALL_SET
3913

    
3914
    self.do_locking = (self.use_locking and
3915
                       query.IQ_LIVE in self.requested_data)
3916
    if self.do_locking:
3917
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3918
      lu.needed_locks[locking.LEVEL_NODE] = []
3919
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3920

    
3921
  def DeclareLocks(self, lu, level):
3922
    if level == locking.LEVEL_NODE and self.do_locking:
3923
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3924

    
3925
  def _GetQueryData(self, lu):
3926
    """Computes the list of instances and their attributes.
3927

3928
    """
3929
    cluster = lu.cfg.GetClusterInfo()
3930
    all_info = lu.cfg.GetAllInstancesInfo()
3931

    
3932
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3933

    
3934
    instance_list = [all_info[name] for name in instance_names]
3935
    nodes = frozenset(itertools.chain(*(inst.all_nodes
3936
                                        for inst in instance_list)))
3937
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3938
    bad_nodes = []
3939
    offline_nodes = []
3940
    wrongnode_inst = set()
3941

    
3942
    # Gather data as requested
3943
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3944
      live_data = {}
3945
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3946
      for name in nodes:
3947
        result = node_data[name]
3948
        if result.offline:
3949
          # offline nodes will be in both lists
3950
          assert result.fail_msg
3951
          offline_nodes.append(name)
3952
        if result.fail_msg:
3953
          bad_nodes.append(name)
3954
        elif result.payload:
3955
          for inst in result.payload:
3956
            if inst in all_info:
3957
              if all_info[inst].primary_node == name:
3958
                live_data.update(result.payload)
3959
              else:
3960
                wrongnode_inst.add(inst)
3961
            else:
3962
              # orphan instance; we don't list it here as we don't
3963
              # handle this case yet in the output of instance listing
3964
              logging.warning("Orphan instance '%s' found on node %s",
3965
                              inst, name)
3966
        # else no instance is alive
3967
    else:
3968
      live_data = {}
3969

    
3970
    if query.IQ_DISKUSAGE in self.requested_data:
3971
      disk_usage = dict((inst.name,
3972
                         _ComputeDiskSize(inst.disk_template,
3973
                                          [{"size": disk.size}
3974
                                           for disk in inst.disks]))
3975
                        for inst in instance_list)
3976
    else:
3977
      disk_usage = None
3978

    
3979
    if query.IQ_CONSOLE in self.requested_data:
3980
      consinfo = {}
3981
      for inst in instance_list:
3982
        if inst.name in live_data:
3983
          # Instance is running
3984
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
3985
        else:
3986
          consinfo[inst.name] = None
3987
      assert set(consinfo.keys()) == set(instance_names)
3988
    else:
3989
      consinfo = None
3990

    
3991
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
3992
                                   disk_usage, offline_nodes, bad_nodes,
3993
                                   live_data, wrongnode_inst, consinfo)
3994

    
3995

    
3996
class LUQuery(NoHooksLU):
3997
  """Query for resources/items of a certain kind.
3998

3999
  """
4000
  # pylint: disable-msg=W0142
4001
  REQ_BGL = False
4002

    
4003
  def CheckArguments(self):
4004
    qcls = _GetQueryImplementation(self.op.what)
4005
    names = qlang.ReadSimpleFilter("name", self.op.filter)
4006

    
4007
    self.impl = qcls(names, self.op.fields, False)
4008

    
4009
  def ExpandNames(self):
4010
    self.impl.ExpandNames(self)
4011

    
4012
  def DeclareLocks(self, level):
4013
    self.impl.DeclareLocks(self, level)
4014

    
4015
  def Exec(self, feedback_fn):
4016
    return self.impl.NewStyleQuery(self)
4017

    
4018

    
4019
class LUQueryFields(NoHooksLU):
4020
  """Query for resources/items of a certain kind.
4021

4022
  """
4023
  # pylint: disable-msg=W0142
4024
  REQ_BGL = False
4025

    
4026
  def CheckArguments(self):
4027
    self.qcls = _GetQueryImplementation(self.op.what)
4028

    
4029
  def ExpandNames(self):
4030
    self.needed_locks = {}
4031

    
4032
  def Exec(self, feedback_fn):
4033
    return self.qcls.FieldsQuery(self.op.fields)
4034

    
4035

    
4036
class LUNodeModifyStorage(NoHooksLU):
4037
  """Logical unit for modifying a storage volume on a node.
4038

4039
  """
4040
  REQ_BGL = False
4041

    
4042
  def CheckArguments(self):
4043
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4044

    
4045
    storage_type = self.op.storage_type
4046

    
4047
    try:
4048
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4049
    except KeyError:
4050
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4051
                                 " modified" % storage_type,
4052
                                 errors.ECODE_INVAL)
4053

    
4054
    diff = set(self.op.changes.keys()) - modifiable
4055
    if diff:
4056
      raise errors.OpPrereqError("The following fields can not be modified for"
4057
                                 " storage units of type '%s': %r" %
4058
                                 (storage_type, list(diff)),
4059
                                 errors.ECODE_INVAL)
4060

    
4061
  def ExpandNames(self):
4062
    self.needed_locks = {
4063
      locking.LEVEL_NODE: self.op.node_name,
4064
      }
4065

    
4066
  def Exec(self, feedback_fn):
4067
    """Computes the list of nodes and their attributes.
4068

4069
    """
4070
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4071
    result = self.rpc.call_storage_modify(self.op.node_name,
4072
                                          self.op.storage_type, st_args,
4073
                                          self.op.name, self.op.changes)
4074
    result.Raise("Failed to modify storage unit '%s' on %s" %
4075
                 (self.op.name, self.op.node_name))
4076

    
4077

    
4078
class LUNodeAdd(LogicalUnit):
4079
  """Logical unit for adding node to the cluster.
4080

4081
  """
4082
  HPATH = "node-add"
4083
  HTYPE = constants.HTYPE_NODE
4084
  _NFLAGS = ["master_capable", "vm_capable"]
4085

    
4086
  def CheckArguments(self):
4087
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4088
    # validate/normalize the node name
4089
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4090
                                         family=self.primary_ip_family)
4091
    self.op.node_name = self.hostname.name
4092

    
4093
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4094
      raise errors.OpPrereqError("Cannot readd the master node",
4095
                                 errors.ECODE_STATE)
4096

    
4097
    if self.op.readd and self.op.group:
4098
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4099
                                 " being readded", errors.ECODE_INVAL)
4100

    
4101
  def BuildHooksEnv(self):
4102
    """Build hooks env.
4103

4104
    This will run on all nodes before, and on all nodes + the new node after.
4105

4106
    """
4107
    env = {
4108
      "OP_TARGET": self.op.node_name,
4109
      "NODE_NAME": self.op.node_name,
4110
      "NODE_PIP": self.op.primary_ip,
4111
      "NODE_SIP": self.op.secondary_ip,
4112
      "MASTER_CAPABLE": str(self.op.master_capable),
4113
      "VM_CAPABLE": str(self.op.vm_capable),
4114
      }
4115
    nodes_0 = self.cfg.GetNodeList()
4116
    nodes_1 = nodes_0 + [self.op.node_name, ]
4117
    return env, nodes_0, nodes_1
4118

    
4119
  def CheckPrereq(self):
4120
    """Check prerequisites.
4121

4122
    This checks:
4123
     - the new node is not already in the config
4124
     - it is resolvable
4125
     - its parameters (single/dual homed) matches the cluster
4126

4127
    Any errors are signaled by raising errors.OpPrereqError.
4128

4129
    """
4130
    cfg = self.cfg
4131
    hostname = self.hostname
4132
    node = hostname.name
4133
    primary_ip = self.op.primary_ip = hostname.ip
4134
    if self.op.secondary_ip is None:
4135
      if self.primary_ip_family == netutils.IP6Address.family:
4136
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4137
                                   " IPv4 address must be given as secondary",
4138
                                   errors.ECODE_INVAL)
4139
      self.op.secondary_ip = primary_ip
4140

    
4141
    secondary_ip = self.op.secondary_ip
4142
    if not netutils.IP4Address.IsValid(secondary_ip):
4143
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4144
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4145

    
4146
    node_list = cfg.GetNodeList()
4147
    if not self.op.readd and node in node_list:
4148
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4149
                                 node, errors.ECODE_EXISTS)
4150
    elif self.op.readd and node not in node_list:
4151
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4152
                                 errors.ECODE_NOENT)
4153

    
4154
    self.changed_primary_ip = False
4155

    
4156
    for existing_node_name in node_list:
4157
      existing_node = cfg.GetNodeInfo(existing_node_name)
4158

    
4159
      if self.op.readd and node == existing_node_name:
4160
        if existing_node.secondary_ip != secondary_ip:
4161
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4162
                                     " address configuration as before",
4163
                                     errors.ECODE_INVAL)
4164
        if existing_node.primary_ip != primary_ip:
4165
          self.changed_primary_ip = True
4166

    
4167
        continue
4168

    
4169
      if (existing_node.primary_ip == primary_ip or
4170
          existing_node.secondary_ip == primary_ip or
4171
          existing_node.primary_ip == secondary_ip or
4172
          existing_node.secondary_ip == secondary_ip):
4173
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4174
                                   " existing node %s" % existing_node.name,
4175
                                   errors.ECODE_NOTUNIQUE)
4176

    
4177
    # After this 'if' block, None is no longer a valid value for the
4178
    # _capable op attributes
4179
    if self.op.readd:
4180
      old_node = self.cfg.GetNodeInfo(node)
4181
      assert old_node is not None, "Can't retrieve locked node %s" % node
4182
      for attr in self._NFLAGS:
4183
        if getattr(self.op, attr) is None:
4184
          setattr(self.op, attr, getattr(old_node, attr))
4185
    else:
4186
      for attr in self._NFLAGS:
4187
        if getattr(self.op, attr) is None:
4188
          setattr(self.op, attr, True)
4189

    
4190
    if self.op.readd and not self.op.vm_capable:
4191
      pri, sec = cfg.GetNodeInstances(node)
4192
      if pri or sec:
4193
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4194
                                   " flag set to false, but it already holds"
4195
                                   " instances" % node,
4196
                                   errors.ECODE_STATE)
4197

    
4198
    # check that the type of the node (single versus dual homed) is the
4199
    # same as for the master
4200
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4201
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4202
    newbie_singlehomed = secondary_ip == primary_ip
4203
    if master_singlehomed != newbie_singlehomed:
4204
      if master_singlehomed:
4205
        raise errors.OpPrereqError("The master has no secondary ip but the"
4206
                                   " new node has one",
4207
                                   errors.ECODE_INVAL)
4208
      else:
4209
        raise errors.OpPrereqError("The master has a secondary ip but the"
4210
                                   " new node doesn't have one",
4211
                                   errors.ECODE_INVAL)
4212

    
4213
    # checks reachability
4214
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4215
      raise errors.OpPrereqError("Node not reachable by ping",
4216
                                 errors.ECODE_ENVIRON)
4217

    
4218
    if not newbie_singlehomed:
4219
      # check reachability from my secondary ip to newbie's secondary ip
4220
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4221
                           source=myself.secondary_ip):
4222
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4223
                                   " based ping to node daemon port",
4224
                                   errors.ECODE_ENVIRON)
4225

    
4226
    if self.op.readd:
4227
      exceptions = [node]
4228
    else:
4229
      exceptions = []
4230

    
4231
    if self.op.master_capable:
4232
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4233
    else:
4234
      self.master_candidate = False
4235

    
4236
    if self.op.readd:
4237
      self.new_node = old_node
4238
    else:
4239
      node_group = cfg.LookupNodeGroup(self.op.group)
4240
      self.new_node = objects.Node(name=node,
4241
                                   primary_ip=primary_ip,
4242
                                   secondary_ip=secondary_ip,
4243
                                   master_candidate=self.master_candidate,
4244
                                   offline=False, drained=False,
4245
                                   group=node_group)
4246

    
4247
    if self.op.ndparams:
4248
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4249

    
4250
  def Exec(self, feedback_fn):
4251
    """Adds the new node to the cluster.
4252

4253
    """
4254
    new_node = self.new_node
4255
    node = new_node.name
4256

    
4257
    # We adding a new node so we assume it's powered
4258
    new_node.powered = True
4259

    
4260
    # for re-adds, reset the offline/drained/master-candidate flags;
4261
    # we need to reset here, otherwise offline would prevent RPC calls
4262
    # later in the procedure; this also means that if the re-add
4263
    # fails, we are left with a non-offlined, broken node
4264
    if self.op.readd:
4265
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4266
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4267
      # if we demote the node, we do cleanup later in the procedure
4268
      new_node.master_candidate = self.master_candidate
4269
      if self.changed_primary_ip:
4270
        new_node.primary_ip = self.op.primary_ip
4271

    
4272
    # copy the master/vm_capable flags
4273
    for attr in self._NFLAGS:
4274
      setattr(new_node, attr, getattr(self.op, attr))
4275

    
4276
    # notify the user about any possible mc promotion
4277
    if new_node.master_candidate:
4278
      self.LogInfo("Node will be a master candidate")
4279

    
4280
    if self.op.ndparams:
4281
      new_node.ndparams = self.op.ndparams
4282
    else:
4283
      new_node.ndparams = {}
4284

    
4285
    # check connectivity
4286
    result = self.rpc.call_version([node])[node]
4287
    result.Raise("Can't get version information from node %s" % node)
4288
    if constants.PROTOCOL_VERSION == result.payload:
4289
      logging.info("Communication to node %s fine, sw version %s match",
4290
                   node, result.payload)
4291
    else:
4292
      raise errors.OpExecError("Version mismatch master version %s,"
4293
                               " node version %s" %
4294
                               (constants.PROTOCOL_VERSION, result.payload))
4295

    
4296
    # Add node to our /etc/hosts, and add key to known_hosts
4297
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4298
      master_node = self.cfg.GetMasterNode()
4299
      result = self.rpc.call_etc_hosts_modify(master_node,
4300
                                              constants.ETC_HOSTS_ADD,
4301
                                              self.hostname.name,
4302
                                              self.hostname.ip)
4303
      result.Raise("Can't update hosts file with new host data")
4304

    
4305
    if new_node.secondary_ip != new_node.primary_ip:
4306
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4307
                               False)
4308

    
4309
    node_verify_list = [self.cfg.GetMasterNode()]
4310
    node_verify_param = {
4311
      constants.NV_NODELIST: [node],
4312
      # TODO: do a node-net-test as well?
4313
    }
4314

    
4315
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4316
                                       self.cfg.GetClusterName())
4317
    for verifier in node_verify_list:
4318
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4319
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4320
      if nl_payload:
4321
        for failed in nl_payload:
4322
          feedback_fn("ssh/hostname verification failed"
4323
                      " (checking from %s): %s" %
4324
                      (verifier, nl_payload[failed]))
4325
        raise errors.OpExecError("ssh/hostname verification failed")
4326

    
4327
    if self.op.readd:
4328
      _RedistributeAncillaryFiles(self)
4329
      self.context.ReaddNode(new_node)
4330
      # make sure we redistribute the config
4331
      self.cfg.Update(new_node, feedback_fn)
4332
      # and make sure the new node will not have old files around
4333
      if not new_node.master_candidate:
4334
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4335
        msg = result.fail_msg
4336
        if msg:
4337
          self.LogWarning("Node failed to demote itself from master"
4338
                          " candidate status: %s" % msg)
4339
    else:
4340
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4341
                                  additional_vm=self.op.vm_capable)
4342
      self.context.AddNode(new_node, self.proc.GetECId())
4343

    
4344

    
4345
class LUNodeSetParams(LogicalUnit):
4346
  """Modifies the parameters of a node.
4347

4348
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4349
      to the node role (as _ROLE_*)
4350
  @cvar _R2F: a dictionary from node role to tuples of flags
4351
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4352

4353
  """
4354
  HPATH = "node-modify"
4355
  HTYPE = constants.HTYPE_NODE
4356
  REQ_BGL = False
4357
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4358
  _F2R = {
4359
    (True, False, False): _ROLE_CANDIDATE,
4360
    (False, True, False): _ROLE_DRAINED,
4361
    (False, False, True): _ROLE_OFFLINE,
4362
    (False, False, False): _ROLE_REGULAR,
4363
    }
4364
  _R2F = dict((v, k) for k, v in _F2R.items())
4365
  _FLAGS = ["master_candidate", "drained", "offline"]
4366

    
4367
  def CheckArguments(self):
4368
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4369
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4370
                self.op.master_capable, self.op.vm_capable,
4371
                self.op.secondary_ip, self.op.ndparams]
4372
    if all_mods.count(None) == len(all_mods):
4373
      raise errors.OpPrereqError("Please pass at least one modification",
4374
                                 errors.ECODE_INVAL)
4375
    if all_mods.count(True) > 1:
4376
      raise errors.OpPrereqError("Can't set the node into more than one"
4377
                                 " state at the same time",
4378
                                 errors.ECODE_INVAL)
4379

    
4380
    # Boolean value that tells us whether we might be demoting from MC
4381
    self.might_demote = (self.op.master_candidate == False or
4382
                         self.op.offline == True or
4383
                         self.op.drained == True or
4384
                         self.op.master_capable == False)
4385

    
4386
    if self.op.secondary_ip:
4387
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4388
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4389
                                   " address" % self.op.secondary_ip,
4390
                                   errors.ECODE_INVAL)
4391

    
4392
    self.lock_all = self.op.auto_promote and self.might_demote
4393
    self.lock_instances = self.op.secondary_ip is not None
4394

    
4395
  def ExpandNames(self):
4396
    if self.lock_all:
4397
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4398
    else:
4399
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4400

    
4401
    if self.lock_instances:
4402
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4403

    
4404
  def DeclareLocks(self, level):
4405
    # If we have locked all instances, before waiting to lock nodes, release
4406
    # all the ones living on nodes unrelated to the current operation.
4407
    if level == locking.LEVEL_NODE and self.lock_instances:
4408
      instances_release = []
4409
      instances_keep = []
4410
      self.affected_instances = []
4411
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4412
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4413
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4414
          i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4415
          if i_mirrored and self.op.node_name in instance.all_nodes:
4416
            instances_keep.append(instance_name)
4417
            self.affected_instances.append(instance)
4418
          else:
4419
            instances_release.append(instance_name)
4420
        if instances_release:
4421
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4422
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4423

    
4424
  def BuildHooksEnv(self):
4425
    """Build hooks env.
4426

4427
    This runs on the master node.
4428

4429
    """
4430
    env = {
4431
      "OP_TARGET": self.op.node_name,
4432
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4433
      "OFFLINE": str(self.op.offline),
4434
      "DRAINED": str(self.op.drained),
4435
      "MASTER_CAPABLE": str(self.op.master_capable),
4436
      "VM_CAPABLE": str(self.op.vm_capable),
4437
      }
4438
    nl = [self.cfg.GetMasterNode(),
4439
          self.op.node_name]
4440
    return env, nl, nl
4441

    
4442
  def CheckPrereq(self):
4443
    """Check prerequisites.
4444

4445
    This only checks the instance list against the existing names.
4446

4447
    """
4448
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4449

    
4450
    if (self.op.master_candidate is not None or
4451
        self.op.drained is not None or
4452
        self.op.offline is not None):
4453
      # we can't change the master's node flags
4454
      if self.op.node_name == self.cfg.GetMasterNode():
4455
        raise errors.OpPrereqError("The master role can be changed"
4456
                                   " only via master-failover",
4457
                                   errors.ECODE_INVAL)
4458

    
4459
    if self.op.master_candidate and not node.master_capable:
4460
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4461
                                 " it a master candidate" % node.name,
4462
                                 errors.ECODE_STATE)
4463

    
4464
    if self.op.vm_capable == False:
4465
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4466
      if ipri or isec:
4467
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4468
                                   " the vm_capable flag" % node.name,
4469
                                   errors.ECODE_STATE)
4470

    
4471
    if node.master_candidate and self.might_demote and not self.lock_all:
4472
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4473
      # check if after removing the current node, we're missing master
4474
      # candidates
4475
      (mc_remaining, mc_should, _) = \
4476
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4477
      if mc_remaining < mc_should:
4478
        raise errors.OpPrereqError("Not enough master candidates, please"
4479
                                   " pass auto promote option to allow"
4480
                                   " promotion", errors.ECODE_STATE)
4481

    
4482
    self.old_flags = old_flags = (node.master_candidate,
4483
                                  node.drained, node.offline)
4484
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4485
    self.old_role = old_role = self._F2R[old_flags]
4486

    
4487
    # Check for ineffective changes
4488
    for attr in self._FLAGS:
4489
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4490
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4491
        setattr(self.op, attr, None)
4492

    
4493
    # Past this point, any flag change to False means a transition
4494
    # away from the respective state, as only real changes are kept
4495

    
4496
    # TODO: We might query the real power state if it supports OOB
4497
    if _SupportsOob(self.cfg, node):
4498
      if self.op.offline is False and not (node.powered or
4499
                                           self.op.powered == True):
4500
        raise errors.OpPrereqError(("Please power on node %s first before you"
4501
                                    " can reset offline state") %
4502
                                   self.op.node_name)
4503
    elif self.op.powered is not None:
4504
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4505
                                  " which does not support out-of-band"
4506
                                  " handling") % self.op.node_name)
4507

    
4508
    # If we're being deofflined/drained, we'll MC ourself if needed
4509
    if (self.op.drained == False or self.op.offline == False or
4510
        (self.op.master_capable and not node.master_capable)):
4511
      if _DecideSelfPromotion(self):
4512
        self.op.master_candidate = True
4513
        self.LogInfo("Auto-promoting node to master candidate")
4514

    
4515
    # If we're no longer master capable, we'll demote ourselves from MC
4516
    if self.op.master_capable == False and node.master_candidate:
4517
      self.LogInfo("Demoting from master candidate")
4518
      self.op.master_candidate = False
4519

    
4520
    # Compute new role
4521
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4522
    if self.op.master_candidate:
4523
      new_role = self._ROLE_CANDIDATE
4524
    elif self.op.drained:
4525
      new_role = self._ROLE_DRAINED
4526
    elif self.op.offline:
4527
      new_role = self._ROLE_OFFLINE
4528
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4529
      # False is still in new flags, which means we're un-setting (the
4530
      # only) True flag
4531
      new_role = self._ROLE_REGULAR
4532
    else: # no new flags, nothing, keep old role
4533
      new_role = old_role
4534

    
4535
    self.new_role = new_role
4536

    
4537
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4538
      # Trying to transition out of offline status
4539
      result = self.rpc.call_version([node.name])[node.name]
4540
      if result.fail_msg:
4541
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4542
                                   " to report its version: %s" %
4543
                                   (node.name, result.fail_msg),
4544
                                   errors.ECODE_STATE)
4545
      else:
4546
        self.LogWarning("Transitioning node from offline to online state"
4547
                        " without using re-add. Please make sure the node"
4548
                        " is healthy!")
4549

    
4550
    if self.op.secondary_ip:
4551
      # Ok even without locking, because this can't be changed by any LU
4552
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4553
      master_singlehomed = master.secondary_ip == master.primary_ip
4554
      if master_singlehomed and self.op.secondary_ip:
4555
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4556
                                   " homed cluster", errors.ECODE_INVAL)
4557

    
4558
      if node.offline:
4559
        if self.affected_instances:
4560
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4561
                                     " node has instances (%s) configured"
4562
                                     " to use it" % self.affected_instances)
4563
      else:
4564
        # On online nodes, check that no instances are running, and that
4565
        # the node has the new ip and we can reach it.
4566
        for instance in self.affected_instances:
4567
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4568

    
4569
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4570
        if master.name != node.name:
4571
          # check reachability from master secondary ip to new secondary ip
4572
          if not netutils.TcpPing(self.op.secondary_ip,
4573
                                  constants.DEFAULT_NODED_PORT,
4574
                                  source=master.secondary_ip):
4575
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4576
                                       " based ping to node daemon port",
4577
                                       errors.ECODE_ENVIRON)
4578

    
4579
    if self.op.ndparams:
4580
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4581
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4582
      self.new_ndparams = new_ndparams
4583

    
4584
  def Exec(self, feedback_fn):
4585
    """Modifies a node.
4586

4587
    """
4588
    node = self.node
4589
    old_role = self.old_role
4590
    new_role = self.new_role
4591

    
4592
    result = []
4593

    
4594
    if self.op.ndparams:
4595
      node.ndparams = self.new_ndparams
4596

    
4597
    if self.op.powered is not None:
4598
      node.powered = self.op.powered
4599

    
4600
    for attr in ["master_capable", "vm_capable"]:
4601
      val = getattr(self.op, attr)
4602
      if val is not None:
4603
        setattr(node, attr, val)
4604
        result.append((attr, str(val)))
4605

    
4606
    if new_role != old_role:
4607
      # Tell the node to demote itself, if no longer MC and not offline
4608
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4609
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4610
        if msg:
4611
          self.LogWarning("Node failed to demote itself: %s", msg)
4612

    
4613
      new_flags = self._R2F[new_role]
4614
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4615
        if of != nf:
4616
          result.append((desc, str(nf)))
4617
      (node.master_candidate, node.drained, node.offline) = new_flags
4618

    
4619
      # we locked all nodes, we adjust the CP before updating this node
4620
      if self.lock_all:
4621
        _AdjustCandidatePool(self, [node.name])
4622

    
4623
    if self.op.secondary_ip:
4624
      node.secondary_ip = self.op.secondary_ip
4625
      result.append(("secondary_ip", self.op.secondary_ip))
4626

    
4627
    # this will trigger configuration file update, if needed
4628
    self.cfg.Update(node, feedback_fn)
4629

    
4630
    # this will trigger job queue propagation or cleanup if the mc
4631
    # flag changed
4632
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4633
      self.context.ReaddNode(node)
4634

    
4635
    return result
4636

    
4637

    
4638
class LUNodePowercycle(NoHooksLU):
4639
  """Powercycles a node.
4640

4641
  """
4642
  REQ_BGL = False
4643

    
4644
  def CheckArguments(self):
4645
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4646
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4647
      raise errors.OpPrereqError("The node is the master and the force"
4648
                                 " parameter was not set",
4649
                                 errors.ECODE_INVAL)
4650

    
4651
  def ExpandNames(self):
4652
    """Locking for PowercycleNode.
4653

4654
    This is a last-resort option and shouldn't block on other
4655
    jobs. Therefore, we grab no locks.
4656

4657
    """
4658
    self.needed_locks = {}
4659

    
4660
  def Exec(self, feedback_fn):
4661
    """Reboots a node.
4662

4663
    """
4664
    result = self.rpc.call_node_powercycle(self.op.node_name,
4665
                                           self.cfg.GetHypervisorType())
4666
    result.Raise("Failed to schedule the reboot")
4667
    return result.payload
4668

    
4669

    
4670
class LUClusterQuery(NoHooksLU):
4671
  """Query cluster configuration.
4672

4673
  """
4674
  REQ_BGL = False
4675

    
4676
  def ExpandNames(self):
4677
    self.needed_locks = {}
4678

    
4679
  def Exec(self, feedback_fn):
4680
    """Return cluster config.
4681

4682
    """
4683
    cluster = self.cfg.GetClusterInfo()
4684
    os_hvp = {}
4685

    
4686
    # Filter just for enabled hypervisors
4687
    for os_name, hv_dict in cluster.os_hvp.items():
4688
      os_hvp[os_name] = {}
4689
      for hv_name, hv_params in hv_dict.items():
4690
        if hv_name in cluster.enabled_hypervisors:
4691
          os_hvp[os_name][hv_name] = hv_params
4692

    
4693
    # Convert ip_family to ip_version
4694
    primary_ip_version = constants.IP4_VERSION
4695
    if cluster.primary_ip_family == netutils.IP6Address.family:
4696
      primary_ip_version = constants.IP6_VERSION
4697

    
4698
    result = {
4699
      "software_version": constants.RELEASE_VERSION,
4700
      "protocol_version": constants.PROTOCOL_VERSION,
4701
      "config_version": constants.CONFIG_VERSION,
4702
      "os_api_version": max(constants.OS_API_VERSIONS),
4703
      "export_version": constants.EXPORT_VERSION,
4704
      "architecture": (platform.architecture()[0], platform.machine()),
4705
      "name": cluster.cluster_name,
4706
      "master": cluster.master_node,
4707
      "default_hypervisor": cluster.enabled_hypervisors[0],
4708
      "enabled_hypervisors": cluster.enabled_hypervisors,
4709
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4710
                        for hypervisor_name in cluster.enabled_hypervisors]),
4711
      "os_hvp": os_hvp,
4712
      "beparams": cluster.beparams,
4713
      "osparams": cluster.osparams,
4714
      "nicparams": cluster.nicparams,
4715
      "ndparams": cluster.ndparams,
4716
      "candidate_pool_size": cluster.candidate_pool_size,
4717
      "master_netdev": cluster.master_netdev,
4718
      "volume_group_name": cluster.volume_group_name,
4719
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4720
      "file_storage_dir": cluster.file_storage_dir,
4721
      "maintain_node_health": cluster.maintain_node_health,
4722
      "ctime": cluster.ctime,
4723
      "mtime": cluster.mtime,
4724
      "uuid": cluster.uuid,
4725
      "tags": list(cluster.GetTags()),
4726
      "uid_pool": cluster.uid_pool,
4727
      "default_iallocator": cluster.default_iallocator,
4728
      "reserved_lvs": cluster.reserved_lvs,
4729
      "primary_ip_version": primary_ip_version,
4730
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4731
      "hidden_os": cluster.hidden_os,
4732
      "blacklisted_os": cluster.blacklisted_os,
4733
      }
4734

    
4735
    return result
4736

    
4737

    
4738
class LUClusterConfigQuery(NoHooksLU):
4739
  """Return configuration values.
4740

4741
  """
4742
  REQ_BGL = False
4743
  _FIELDS_DYNAMIC = utils.FieldSet()
4744
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4745
                                  "watcher_pause", "volume_group_name")
4746

    
4747
  def CheckArguments(self):
4748
    _CheckOutputFields(static=self._FIELDS_STATIC,
4749
                       dynamic=self._FIELDS_DYNAMIC,
4750
                       selected=self.op.output_fields)
4751

    
4752
  def ExpandNames(self):
4753
    self.needed_locks = {}
4754

    
4755
  def Exec(self, feedback_fn):
4756
    """Dump a representation of the cluster config to the standard output.
4757

4758
    """
4759
    values = []
4760
    for field in self.op.output_fields:
4761
      if field == "cluster_name":
4762
        entry = self.cfg.GetClusterName()
4763
      elif field == "master_node":
4764
        entry = self.cfg.GetMasterNode()
4765
      elif field == "drain_flag":
4766
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4767
      elif field == "watcher_pause":
4768
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4769
      elif field == "volume_group_name":
4770
        entry = self.cfg.GetVGName()
4771
      else:
4772
        raise errors.ParameterError(field)
4773
      values.append(entry)
4774
    return values
4775

    
4776

    
4777
class LUInstanceActivateDisks(NoHooksLU):
4778
  """Bring up an instance's disks.
4779

4780
  """
4781
  REQ_BGL = False
4782

    
4783
  def ExpandNames(self):
4784
    self._ExpandAndLockInstance()
4785
    self.needed_locks[locking.LEVEL_NODE] = []
4786
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4787

    
4788
  def DeclareLocks(self, level):
4789
    if level == locking.LEVEL_NODE:
4790
      self._LockInstancesNodes()
4791

    
4792
  def CheckPrereq(self):
4793
    """Check prerequisites.
4794

4795
    This checks that the instance is in the cluster.
4796

4797
    """
4798
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4799
    assert self.instance is not None, \
4800
      "Cannot retrieve locked instance %s" % self.op.instance_name
4801
    _CheckNodeOnline(self, self.instance.primary_node)
4802

    
4803
  def Exec(self, feedback_fn):
4804
    """Activate the disks.
4805

4806
    """
4807
    disks_ok, disks_info = \
4808
              _AssembleInstanceDisks(self, self.instance,
4809
                                     ignore_size=self.op.ignore_size)
4810
    if not disks_ok:
4811
      raise errors.OpExecError("Cannot activate block devices")
4812

    
4813
    return disks_info
4814

    
4815

    
4816
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4817
                           ignore_size=False):
4818
  """Prepare the block devices for an instance.
4819

4820
  This sets up the block devices on all nodes.
4821

4822
  @type lu: L{LogicalUnit}
4823
  @param lu: the logical unit on whose behalf we execute
4824
  @type instance: L{objects.Instance}
4825
  @param instance: the instance for whose disks we assemble
4826
  @type disks: list of L{objects.Disk} or None
4827
  @param disks: which disks to assemble (or all, if None)
4828
  @type ignore_secondaries: boolean
4829
  @param ignore_secondaries: if true, errors on secondary nodes
4830
      won't result in an error return from the function
4831
  @type ignore_size: boolean
4832
  @param ignore_size: if true, the current known size of the disk
4833
      will not be used during the disk activation, useful for cases
4834
      when the size is wrong
4835
  @return: False if the operation failed, otherwise a list of
4836
      (host, instance_visible_name, node_visible_name)
4837
      with the mapping from node devices to instance devices
4838

4839
  """
4840
  device_info = []
4841
  disks_ok = True
4842
  iname = instance.name
4843
  disks = _ExpandCheckDisks(instance, disks)
4844

    
4845
  # With the two passes mechanism we try to reduce the window of
4846
  # opportunity for the race condition of switching DRBD to primary
4847
  # before handshaking occured, but we do not eliminate it
4848

    
4849
  # The proper fix would be to wait (with some limits) until the
4850
  # connection has been made and drbd transitions from WFConnection
4851
  # into any other network-connected state (Connected, SyncTarget,
4852
  # SyncSource, etc.)
4853

    
4854
  # 1st pass, assemble on all nodes in secondary mode
4855
  for idx, inst_disk in enumerate(disks):
4856
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4857
      if ignore_size:
4858
        node_disk = node_disk.Copy()
4859
        node_disk.UnsetSize()
4860
      lu.cfg.SetDiskID(node_disk, node)
4861
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4862
      msg = result.fail_msg
4863
      if msg:
4864
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4865
                           " (is_primary=False, pass=1): %s",
4866
                           inst_disk.iv_name, node, msg)
4867
        if not ignore_secondaries:
4868
          disks_ok = False
4869

    
4870
  # FIXME: race condition on drbd migration to primary
4871

    
4872
  # 2nd pass, do only the primary node
4873
  for idx, inst_disk in enumerate(disks):
4874
    dev_path = None
4875

    
4876
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4877
      if node != instance.primary_node:
4878
        continue
4879
      if ignore_size:
4880
        node_disk = node_disk.Copy()
4881
        node_disk.UnsetSize()
4882
      lu.cfg.SetDiskID(node_disk, node)
4883
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4884
      msg = result.fail_msg
4885
      if msg:
4886
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4887
                           " (is_primary=True, pass=2): %s",
4888
                           inst_disk.iv_name, node, msg)
4889
        disks_ok = False
4890
      else:
4891
        dev_path = result.payload
4892

    
4893
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4894

    
4895
  # leave the disks configured for the primary node
4896
  # this is a workaround that would be fixed better by
4897
  # improving the logical/physical id handling
4898
  for disk in disks:
4899
    lu.cfg.SetDiskID(disk, instance.primary_node)
4900

    
4901
  return disks_ok, device_info
4902

    
4903

    
4904
def _StartInstanceDisks(lu, instance, force):
4905
  """Start the disks of an instance.
4906

4907
  """
4908
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4909
                                           ignore_secondaries=force)
4910
  if not disks_ok:
4911
    _ShutdownInstanceDisks(lu, instance)
4912
    if force is not None and not force:
4913
      lu.proc.LogWarning("", hint="If the message above refers to a"
4914
                         " secondary node,"
4915
                         " you can retry the operation using '--force'.")
4916
    raise errors.OpExecError("Disk consistency error")
4917

    
4918

    
4919
class LUInstanceDeactivateDisks(NoHooksLU):
4920
  """Shutdown an instance's disks.
4921

4922
  """
4923
  REQ_BGL = False
4924

    
4925
  def ExpandNames(self):
4926
    self._ExpandAndLockInstance()
4927
    self.needed_locks[locking.LEVEL_NODE] = []
4928
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4929

    
4930
  def DeclareLocks(self, level):
4931
    if level == locking.LEVEL_NODE:
4932
      self._LockInstancesNodes()
4933

    
4934
  def CheckPrereq(self):
4935
    """Check prerequisites.
4936

4937
    This checks that the instance is in the cluster.
4938

4939
    """
4940
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4941
    assert self.instance is not None, \
4942
      "Cannot retrieve locked instance %s" % self.op.instance_name
4943

    
4944
  def Exec(self, feedback_fn):
4945
    """Deactivate the disks
4946

4947
    """
4948
    instance = self.instance
4949
    if self.op.force:
4950
      _ShutdownInstanceDisks(self, instance)
4951
    else:
4952
      _SafeShutdownInstanceDisks(self, instance)
4953

    
4954

    
4955
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4956
  """Shutdown block devices of an instance.
4957

4958
  This function checks if an instance is running, before calling
4959
  _ShutdownInstanceDisks.
4960

4961
  """
4962
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4963
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4964

    
4965

    
4966
def _ExpandCheckDisks(instance, disks):
4967
  """Return the instance disks selected by the disks list
4968

4969
  @type disks: list of L{objects.Disk} or None
4970
  @param disks: selected disks
4971
  @rtype: list of L{objects.Disk}
4972
  @return: selected instance disks to act on
4973

4974
  """
4975
  if disks is None:
4976
    return instance.disks
4977
  else:
4978
    if not set(disks).issubset(instance.disks):
4979
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4980
                                   " target instance")
4981
    return disks
4982

    
4983

    
4984
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4985
  """Shutdown block devices of an instance.
4986

4987
  This does the shutdown on all nodes of the instance.
4988

4989
  If the ignore_primary is false, errors on the primary node are
4990
  ignored.
4991

4992
  """
4993
  all_result = True
4994
  disks = _ExpandCheckDisks(instance, disks)
4995

    
4996
  for disk in disks:
4997
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4998
      lu.cfg.SetDiskID(top_disk, node)
4999
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5000
      msg = result.fail_msg
5001
      if msg:
5002
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5003
                      disk.iv_name, node, msg)
5004
        if ((node == instance.primary_node and not ignore_primary) or
5005
            (node != instance.primary_node and not result.offline)):
5006
          all_result = False
5007
  return all_result
5008

    
5009

    
5010
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5011
  """Checks if a node has enough free memory.
5012

5013
  This function check if a given node has the needed amount of free
5014
  memory. In case the node has less memory or we cannot get the
5015
  information from the node, this function raise an OpPrereqError
5016
  exception.
5017

5018
  @type lu: C{LogicalUnit}
5019
  @param lu: a logical unit from which we get configuration data
5020
  @type node: C{str}
5021
  @param node: the node to check
5022
  @type reason: C{str}
5023
  @param reason: string to use in the error message
5024
  @type requested: C{int}
5025
  @param requested: the amount of memory in MiB to check for
5026
  @type hypervisor_name: C{str}
5027
  @param hypervisor_name: the hypervisor to ask for memory stats
5028
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5029
      we cannot check the node
5030

5031
  """
5032
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5033
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5034
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5035
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5036
  if not isinstance(free_mem, int):
5037
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5038
                               " was '%s'" % (node, free_mem),
5039
                               errors.ECODE_ENVIRON)
5040
  if requested > free_mem:
5041
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5042
                               " needed %s MiB, available %s MiB" %
5043
                               (node, reason, requested, free_mem),
5044
                               errors.ECODE_NORES)
5045

    
5046

    
5047
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5048
  """Checks if nodes have enough free disk space in the all VGs.
5049

5050
  This function check if all given nodes have the needed amount of
5051
  free disk. In case any node has less disk or we cannot get the
5052
  information from the node, this function raise an OpPrereqError
5053
  exception.
5054

5055
  @type lu: C{LogicalUnit}
5056
  @param lu: a logical unit from which we get configuration data
5057
  @type nodenames: C{list}
5058
  @param nodenames: the list of node names to check
5059
  @type req_sizes: C{dict}
5060
  @param req_sizes: the hash of vg and corresponding amount of disk in
5061
      MiB to check for
5062
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5063
      or we cannot check the node
5064

5065
  """
5066
  for vg, req_size in req_sizes.items():
5067
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5068

    
5069

    
5070
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5071
  """Checks if nodes have enough free disk space in the specified VG.
5072

5073
  This function check if all given nodes have the needed amount of
5074
  free disk. In case any node has less disk or we cannot get the
5075
  information from the node, this function raise an OpPrereqError
5076
  exception.
5077

5078
  @type lu: C{LogicalUnit}
5079
  @param lu: a logical unit from which we get configuration data
5080
  @type nodenames: C{list}
5081
  @param nodenames: the list of node names to check
5082
  @type vg: C{str}
5083
  @param vg: the volume group to check
5084
  @type requested: C{int}
5085
  @param requested: the amount of disk in MiB to check for
5086
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5087
      or we cannot check the node
5088

5089
  """
5090
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5091
  for node in nodenames:
5092
    info = nodeinfo[node]
5093
    info.Raise("Cannot get current information from node %s" % node,
5094
               prereq=True, ecode=errors.ECODE_ENVIRON)
5095
    vg_free = info.payload.get("vg_free", None)
5096
    if not isinstance(vg_free, int):
5097
      raise errors.OpPrereqError("Can't compute free disk space on node"
5098
                                 " %s for vg %s, result was '%s'" %
5099
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5100
    if requested > vg_free:
5101
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5102
                                 " vg %s: required %d MiB, available %d MiB" %
5103
                                 (node, vg, requested, vg_free),
5104
                                 errors.ECODE_NORES)
5105

    
5106

    
5107
class LUInstanceStartup(LogicalUnit):
5108
  """Starts an instance.
5109

5110
  """
5111
  HPATH = "instance-start"
5112
  HTYPE = constants.HTYPE_INSTANCE
5113
  REQ_BGL = False
5114

    
5115
  def CheckArguments(self):
5116
    # extra beparams
5117
    if self.op.beparams:
5118
      # fill the beparams dict
5119
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5120

    
5121
  def ExpandNames(self):
5122
    self._ExpandAndLockInstance()
5123

    
5124
  def BuildHooksEnv(self):
5125
    """Build hooks env.
5126

5127
    This runs on master, primary and secondary nodes of the instance.
5128

5129
    """
5130
    env = {
5131
      "FORCE": self.op.force,
5132
      }
5133
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5134
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5135
    return env, nl, nl
5136

    
5137
  def CheckPrereq(self):
5138
    """Check prerequisites.
5139

5140
    This checks that the instance is in the cluster.
5141

5142
    """
5143
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5144
    assert self.instance is not None, \
5145
      "Cannot retrieve locked instance %s" % self.op.instance_name
5146

    
5147
    # extra hvparams
5148
    if self.op.hvparams:
5149
      # check hypervisor parameter syntax (locally)
5150
      cluster = self.cfg.GetClusterInfo()
5151
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5152
      filled_hvp = cluster.FillHV(instance)
5153
      filled_hvp.update(self.op.hvparams)
5154
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5155
      hv_type.CheckParameterSyntax(filled_hvp)
5156
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5157

    
5158
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5159

    
5160
    if self.primary_offline and self.op.ignore_offline_nodes:
5161
      self.proc.LogWarning("Ignoring offline primary node")
5162

    
5163
      if self.op.hvparams or self.op.beparams:
5164
        self.proc.LogWarning("Overridden parameters are ignored")
5165
    else:
5166
      _CheckNodeOnline(self, instance.primary_node)
5167

    
5168
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5169

    
5170
      # check bridges existence
5171
      _CheckInstanceBridgesExist(self, instance)
5172

    
5173
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5174
                                                instance.name,
5175
                                                instance.hypervisor)
5176
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5177
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5178
      if not remote_info.payload: # not running already
5179
        _CheckNodeFreeMemory(self, instance.primary_node,
5180
                             "starting instance %s" % instance.name,
5181
                             bep[constants.BE_MEMORY], instance.hypervisor)
5182

    
5183
  def Exec(self, feedback_fn):
5184
    """Start the instance.
5185

5186
    """
5187
    instance = self.instance
5188
    force = self.op.force
5189

    
5190
    if not self.op.no_remember:
5191
      self.cfg.MarkInstanceUp(instance.name)
5192

    
5193
    if self.primary_offline:
5194
      assert self.op.ignore_offline_nodes
5195
      self.proc.LogInfo("Primary node offline, marked instance as started")
5196
    else:
5197
      node_current = instance.primary_node
5198

    
5199
      _StartInstanceDisks(self, instance, force)
5200

    
5201
      result = self.rpc.call_instance_start(node_current, instance,
5202
                                            self.op.hvparams, self.op.beparams)
5203
      msg = result.fail_msg
5204
      if msg:
5205
        _ShutdownInstanceDisks(self, instance)
5206
        raise errors.OpExecError("Could not start instance: %s" % msg)
5207

    
5208

    
5209
class LUInstanceReboot(LogicalUnit):
5210
  """Reboot an instance.
5211

5212
  """
5213
  HPATH = "instance-reboot"
5214
  HTYPE = constants.HTYPE_INSTANCE
5215
  REQ_BGL = False
5216

    
5217
  def ExpandNames(self):
5218
    self._ExpandAndLockInstance()
5219

    
5220
  def BuildHooksEnv(self):
5221
    """Build hooks env.
5222

5223
    This runs on master, primary and secondary nodes of the instance.
5224

5225
    """
5226
    env = {
5227
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5228
      "REBOOT_TYPE": self.op.reboot_type,
5229
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5230
      }
5231
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5232
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5233
    return env, nl, nl
5234

    
5235
  def CheckPrereq(self):
5236
    """Check prerequisites.
5237

5238
    This checks that the instance is in the cluster.
5239

5240
    """
5241
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5242
    assert self.instance is not None, \
5243
      "Cannot retrieve locked instance %s" % self.op.instance_name
5244

    
5245
    _CheckNodeOnline(self, instance.primary_node)
5246

    
5247
    # check bridges existence
5248
    _CheckInstanceBridgesExist(self, instance)
5249

    
5250
  def Exec(self, feedback_fn):
5251
    """Reboot the instance.
5252

5253
    """
5254
    instance = self.instance
5255
    ignore_secondaries = self.op.ignore_secondaries
5256
    reboot_type = self.op.reboot_type
5257

    
5258
    node_current = instance.primary_node
5259

    
5260
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5261
                       constants.INSTANCE_REBOOT_HARD]:
5262
      for disk in instance.disks:
5263
        self.cfg.SetDiskID(disk, node_current)
5264
      result = self.rpc.call_instance_reboot(node_current, instance,
5265
                                             reboot_type,
5266
                                             self.op.shutdown_timeout)
5267
      result.Raise("Could not reboot instance")
5268
    else:
5269
      result = self.rpc.call_instance_shutdown(node_current, instance,
5270
                                               self.op.shutdown_timeout)
5271
      result.Raise("Could not shutdown instance for full reboot")
5272
      _ShutdownInstanceDisks(self, instance)
5273
      _StartInstanceDisks(self, instance, ignore_secondaries)
5274
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5275
      msg = result.fail_msg
5276
      if msg:
5277
        _ShutdownInstanceDisks(self, instance)
5278
        raise errors.OpExecError("Could not start instance for"
5279
                                 " full reboot: %s" % msg)
5280

    
5281
    self.cfg.MarkInstanceUp(instance.name)
5282

    
5283

    
5284
class LUInstanceShutdown(LogicalUnit):
5285
  """Shutdown an instance.
5286

5287
  """
5288
  HPATH = "instance-stop"
5289
  HTYPE = constants.HTYPE_INSTANCE
5290
  REQ_BGL = False
5291

    
5292
  def ExpandNames(self):
5293
    self._ExpandAndLockInstance()
5294

    
5295
  def BuildHooksEnv(self):
5296
    """Build hooks env.
5297

5298
    This runs on master, primary and secondary nodes of the instance.
5299

5300
    """
5301
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5302
    env["TIMEOUT"] = self.op.timeout
5303
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5304
    return env, nl, nl
5305

    
5306
  def CheckPrereq(self):
5307
    """Check prerequisites.
5308

5309
    This checks that the instance is in the cluster.
5310

5311
    """
5312
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5313
    assert self.instance is not None, \
5314
      "Cannot retrieve locked instance %s" % self.op.instance_name
5315

    
5316
    self.primary_offline = \
5317
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5318

    
5319
    if self.primary_offline and self.op.ignore_offline_nodes:
5320
      self.proc.LogWarning("Ignoring offline primary node")
5321
    else:
5322
      _CheckNodeOnline(self, self.instance.primary_node)
5323

    
5324
  def Exec(self, feedback_fn):
5325
    """Shutdown the instance.
5326

5327
    """
5328
    instance = self.instance
5329
    node_current = instance.primary_node
5330
    timeout = self.op.timeout
5331

    
5332
    if not self.op.no_remember:
5333
      self.cfg.MarkInstanceDown(instance.name)
5334

    
5335
    if self.primary_offline:
5336
      assert self.op.ignore_offline_nodes
5337
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5338
    else:
5339
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5340
      msg = result.fail_msg
5341
      if msg:
5342
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5343

    
5344
      _ShutdownInstanceDisks(self, instance)
5345

    
5346

    
5347
class LUInstanceReinstall(LogicalUnit):
5348
  """Reinstall an instance.
5349

5350
  """
5351
  HPATH = "instance-reinstall"
5352
  HTYPE = constants.HTYPE_INSTANCE
5353
  REQ_BGL = False
5354

    
5355
  def ExpandNames(self):
5356
    self._ExpandAndLockInstance()
5357

    
5358
  def BuildHooksEnv(self):
5359
    """Build hooks env.
5360

5361
    This runs on master, primary and secondary nodes of the instance.
5362

5363
    """
5364
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5365
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5366
    return env, nl, nl
5367

    
5368
  def CheckPrereq(self):
5369
    """Check prerequisites.
5370

5371
    This checks that the instance is in the cluster and is not running.
5372

5373
    """
5374
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5375
    assert instance is not None, \
5376
      "Cannot retrieve locked instance %s" % self.op.instance_name
5377
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5378
                     " offline, cannot reinstall")
5379
    for node in instance.secondary_nodes:
5380
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5381
                       " cannot reinstall")
5382

    
5383
    if instance.disk_template == constants.DT_DISKLESS:
5384
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5385
                                 self.op.instance_name,
5386
                                 errors.ECODE_INVAL)
5387
    _CheckInstanceDown(self, instance, "cannot reinstall")
5388

    
5389
    if self.op.os_type is not None:
5390
      # OS verification
5391
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5392
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5393
      instance_os = self.op.os_type
5394
    else:
5395
      instance_os = instance.os
5396

    
5397
    nodelist = list(instance.all_nodes)
5398

    
5399
    if self.op.osparams:
5400
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5401
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5402
      self.os_inst = i_osdict # the new dict (without defaults)
5403
    else:
5404
      self.os_inst = None
5405

    
5406
    self.instance = instance
5407

    
5408
  def Exec(self, feedback_fn):
5409
    """Reinstall the instance.
5410

5411
    """
5412
    inst = self.instance
5413

    
5414
    if self.op.os_type is not None:
5415
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5416
      inst.os = self.op.os_type
5417
      # Write to configuration
5418
      self.cfg.Update(inst, feedback_fn)
5419

    
5420
    _StartInstanceDisks(self, inst, None)
5421
    try:
5422
      feedback_fn("Running the instance OS create scripts...")
5423
      # FIXME: pass debug option from opcode to backend
5424
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5425
                                             self.op.debug_level,
5426
                                             osparams=self.os_inst)
5427
      result.Raise("Could not install OS for instance %s on node %s" %
5428
                   (inst.name, inst.primary_node))
5429
    finally:
5430
      _ShutdownInstanceDisks(self, inst)
5431

    
5432

    
5433
class LUInstanceRecreateDisks(LogicalUnit):
5434
  """Recreate an instance's missing disks.
5435

5436
  """
5437
  HPATH = "instance-recreate-disks"
5438
  HTYPE = constants.HTYPE_INSTANCE
5439
  REQ_BGL = False
5440

    
5441
  def CheckArguments(self):
5442
    # normalise the disk list
5443
    self.op.disks = sorted(frozenset(self.op.disks))
5444

    
5445
  def ExpandNames(self):
5446
    self._ExpandAndLockInstance()
5447
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5448
    if self.op.nodes:
5449
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5450
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5451
    else:
5452
      self.needed_locks[locking.LEVEL_NODE] = []
5453

    
5454
  def DeclareLocks(self, level):
5455
    if level == locking.LEVEL_NODE:
5456
      # if we replace the nodes, we only need to lock the old primary,
5457
      # otherwise we need to lock all nodes for disk re-creation
5458
      primary_only = bool(self.op.nodes)
5459
      self._LockInstancesNodes(primary_only=primary_only)
5460

    
5461
  def BuildHooksEnv(self):
5462
    """Build hooks env.
5463

5464
    This runs on master, primary and secondary nodes of the instance.
5465

5466
    """
5467
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5468
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5469
    return env, nl, nl
5470

    
5471
  def CheckPrereq(self):
5472
    """Check prerequisites.
5473

5474
    This checks that the instance is in the cluster and is not running.
5475

5476
    """
5477
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5478
    assert instance is not None, \
5479
      "Cannot retrieve locked instance %s" % self.op.instance_name
5480
    if self.op.nodes:
5481
      if len(self.op.nodes) != len(instance.all_nodes):
5482
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
5483
                                   " %d replacement nodes were specified" %
5484
                                   (instance.name, len(instance.all_nodes),
5485
                                    len(self.op.nodes)),
5486
                                   errors.ECODE_INVAL)
5487
      assert instance.disk_template != constants.DT_DRBD8 or \
5488
          len(self.op.nodes) == 2
5489
      assert instance.disk_template != constants.DT_PLAIN or \
5490
          len(self.op.nodes) == 1
5491
      primary_node = self.op.nodes[0]
5492
    else:
5493
      primary_node = instance.primary_node
5494
    _CheckNodeOnline(self, primary_node)
5495

    
5496
    if instance.disk_template == constants.DT_DISKLESS:
5497
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5498
                                 self.op.instance_name, errors.ECODE_INVAL)
5499
    # if we replace nodes *and* the old primary is offline, we don't
5500
    # check
5501
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
5502
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
5503
    if not (self.op.nodes and old_pnode.offline):
5504
      _CheckInstanceDown(self, instance, "cannot recreate disks")
5505

    
5506
    if not self.op.disks:
5507
      self.op.disks = range(len(instance.disks))
5508
    else:
5509
      for idx in self.op.disks:
5510
        if idx >= len(instance.disks):
5511
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5512
                                     errors.ECODE_INVAL)
5513
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
5514
      raise errors.OpPrereqError("Can't recreate disks partially and"
5515
                                 " change the nodes at the same time",
5516
                                 errors.ECODE_INVAL)
5517
    self.instance = instance
5518

    
5519
  def Exec(self, feedback_fn):
5520
    """Recreate the disks.
5521

5522
    """
5523
    instance = self.instance
5524

    
5525
    to_skip = []
5526
    mods = [] # keeps track of needed logical_id changes
5527

    
5528
    for idx, disk in enumerate(instance.disks):
5529
      if idx not in self.op.disks: # disk idx has not been passed in
5530
        to_skip.append(idx)
5531
        continue
5532
      # update secondaries for disks, if needed
5533
      if self.op.nodes:
5534
        if disk.dev_type == constants.LD_DRBD8:
5535
          # need to update the nodes and minors
5536
          assert len(self.op.nodes) == 2
5537
          assert len(disk.logical_id) == 6 # otherwise disk internals
5538
                                           # have changed
5539
          (_, _, old_port, _, _, old_secret) = disk.logical_id
5540
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
5541
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
5542
                    new_minors[0], new_minors[1], old_secret)
5543
          assert len(disk.logical_id) == len(new_id)
5544
          mods.append((idx, new_id))
5545

    
5546
    # now that we have passed all asserts above, we can apply the mods
5547
    # in a single run (to avoid partial changes)
5548
    for idx, new_id in mods:
5549
      instance.disks[idx].logical_id = new_id
5550

    
5551
    # change primary node, if needed
5552
    if self.op.nodes:
5553
      instance.primary_node = self.op.nodes[0]
5554
      self.LogWarning("Changing the instance's nodes, you will have to"
5555
                      " remove any disks left on the older nodes manually")
5556

    
5557
    if self.op.nodes:
5558
      self.cfg.Update(instance, feedback_fn)
5559

    
5560
    _CreateDisks(self, instance, to_skip=to_skip)
5561

    
5562

    
5563
class LUInstanceRename(LogicalUnit):
5564
  """Rename an instance.
5565

5566
  """
5567
  HPATH = "instance-rename"
5568
  HTYPE = constants.HTYPE_INSTANCE
5569

    
5570
  def CheckArguments(self):
5571
    """Check arguments.
5572

5573
    """
5574
    if self.op.ip_check and not self.op.name_check:
5575
      # TODO: make the ip check more flexible and not depend on the name check
5576
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5577
                                 errors.ECODE_INVAL)
5578

    
5579
  def BuildHooksEnv(self):
5580
    """Build hooks env.
5581

5582
    This runs on master, primary and secondary nodes of the instance.
5583

5584
    """
5585
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5586
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5587
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5588
    return env, nl, nl
5589

    
5590
  def CheckPrereq(self):
5591
    """Check prerequisites.
5592

5593
    This checks that the instance is in the cluster and is not running.
5594

5595
    """
5596
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5597
                                                self.op.instance_name)
5598
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5599
    assert instance is not None
5600
    _CheckNodeOnline(self, instance.primary_node)
5601
    _CheckInstanceDown(self, instance, "cannot rename")
5602
    self.instance = instance
5603

    
5604
    new_name = self.op.new_name
5605
    if self.op.name_check:
5606
      hostname = netutils.GetHostname(name=new_name)
5607
      if hostname != new_name:
5608
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5609
                     hostname.name)
5610
      new_name = self.op.new_name = hostname.name
5611
      if (self.op.ip_check and
5612
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5613
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5614
                                   (hostname.ip, new_name),
5615
                                   errors.ECODE_NOTUNIQUE)
5616

    
5617
    instance_list = self.cfg.GetInstanceList()
5618
    if new_name in instance_list and new_name != instance.name:
5619
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5620
                                 new_name, errors.ECODE_EXISTS)
5621

    
5622
  def Exec(self, feedback_fn):
5623
    """Rename the instance.
5624

5625
    """
5626
    inst = self.instance
5627
    old_name = inst.name
5628

    
5629
    rename_file_storage = False
5630
    if (inst.disk_template == constants.DT_FILE and
5631
        self.op.new_name != inst.name):
5632
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5633
      rename_file_storage = True
5634

    
5635
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5636
    # Change the instance lock. This is definitely safe while we hold the BGL
5637
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5638
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5639

    
5640
    # re-read the instance from the configuration after rename
5641
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5642

    
5643
    if rename_file_storage:
5644
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5645
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5646
                                                     old_file_storage_dir,
5647
                                                     new_file_storage_dir)
5648
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5649
                   " (but the instance has been renamed in Ganeti)" %
5650
                   (inst.primary_node, old_file_storage_dir,
5651
                    new_file_storage_dir))
5652

    
5653
    _StartInstanceDisks(self, inst, None)
5654
    try:
5655
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5656
                                                 old_name, self.op.debug_level)
5657
      msg = result.fail_msg
5658
      if msg:
5659
        msg = ("Could not run OS rename script for instance %s on node %s"
5660
               " (but the instance has been renamed in Ganeti): %s" %
5661
               (inst.name, inst.primary_node, msg))
5662
        self.proc.LogWarning(msg)
5663
    finally:
5664
      _ShutdownInstanceDisks(self, inst)
5665

    
5666
    return inst.name
5667

    
5668

    
5669
class LUInstanceRemove(LogicalUnit):
5670
  """Remove an instance.
5671

5672
  """
5673
  HPATH = "instance-remove"
5674
  HTYPE = constants.HTYPE_INSTANCE
5675
  REQ_BGL = False
5676

    
5677
  def ExpandNames(self):
5678
    self._ExpandAndLockInstance()
5679
    self.needed_locks[locking.LEVEL_NODE] = []
5680
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5681

    
5682
  def DeclareLocks(self, level):
5683
    if level == locking.LEVEL_NODE:
5684
      self._LockInstancesNodes()
5685

    
5686
  def BuildHooksEnv(self):
5687
    """Build hooks env.
5688

5689
    This runs on master, primary and secondary nodes of the instance.
5690

5691
    """
5692
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5693
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5694
    nl = [self.cfg.GetMasterNode()]
5695
    nl_post = list(self.instance.all_nodes) + nl
5696
    return env, nl, nl_post
5697

    
5698
  def CheckPrereq(self):
5699
    """Check prerequisites.
5700

5701
    This checks that the instance is in the cluster.
5702

5703
    """
5704
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5705
    assert self.instance is not None, \
5706
      "Cannot retrieve locked instance %s" % self.op.instance_name
5707

    
5708
  def Exec(self, feedback_fn):
5709
    """Remove the instance.
5710

5711
    """
5712
    instance = self.instance
5713
    logging.info("Shutting down instance %s on node %s",
5714
                 instance.name, instance.primary_node)
5715

    
5716
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5717
                                             self.op.shutdown_timeout)
5718
    msg = result.fail_msg
5719
    if msg:
5720
      if self.op.ignore_failures:
5721
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5722
      else:
5723
        raise errors.OpExecError("Could not shutdown instance %s on"
5724
                                 " node %s: %s" %
5725
                                 (instance.name, instance.primary_node, msg))
5726

    
5727
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5728

    
5729

    
5730
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5731
  """Utility function to remove an instance.
5732

5733
  """
5734
  logging.info("Removing block devices for instance %s", instance.name)
5735

    
5736
  if not _RemoveDisks(lu, instance):
5737
    if not ignore_failures:
5738
      raise errors.OpExecError("Can't remove instance's disks")
5739
    feedback_fn("Warning: can't remove instance's disks")
5740

    
5741
  logging.info("Removing instance %s out of cluster config", instance.name)
5742

    
5743
  lu.cfg.RemoveInstance(instance.name)
5744

    
5745
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5746
    "Instance lock removal conflict"
5747

    
5748
  # Remove lock for the instance
5749
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5750

    
5751

    
5752
class LUInstanceQuery(NoHooksLU):
5753
  """Logical unit for querying instances.
5754

5755
  """
5756
  # pylint: disable-msg=W0142
5757
  REQ_BGL = False
5758

    
5759
  def CheckArguments(self):
5760
    self.iq = _InstanceQuery(self.op.names, self.op.output_fields,
5761
                             self.op.use_locking)
5762

    
5763
  def ExpandNames(self):
5764
    self.iq.ExpandNames(self)
5765

    
5766
  def DeclareLocks(self, level):
5767
    self.iq.DeclareLocks(self, level)
5768

    
5769
  def Exec(self, feedback_fn):
5770
    return self.iq.OldStyleQuery(self)
5771

    
5772

    
5773
class LUInstanceFailover(LogicalUnit):
5774
  """Failover an instance.
5775

5776
  """
5777
  HPATH = "instance-failover"
5778
  HTYPE = constants.HTYPE_INSTANCE
5779
  REQ_BGL = False
5780

    
5781
  def ExpandNames(self):
5782
    self._ExpandAndLockInstance()
5783
    self.needed_locks[locking.LEVEL_NODE] = []
5784
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5785

    
5786
  def DeclareLocks(self, level):
5787
    if level == locking.LEVEL_NODE:
5788
      self._LockInstancesNodes()
5789

    
5790
  def BuildHooksEnv(self):
5791
    """Build hooks env.
5792

5793
    This runs on master, primary and secondary nodes of the instance.
5794

5795
    """
5796
    instance = self.instance
5797
    source_node = instance.primary_node
5798
    target_node = instance.secondary_nodes[0]
5799
    env = {
5800
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5801
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5802
      "OLD_PRIMARY": source_node,
5803
      "OLD_SECONDARY": target_node,
5804
      "NEW_PRIMARY": target_node,
5805
      "NEW_SECONDARY": source_node,
5806
      }
5807
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5808
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5809
    nl_post = list(nl)
5810
    nl_post.append(source_node)
5811
    return env, nl, nl_post
5812

    
5813
  def CheckPrereq(self):
5814
    """Check prerequisites.
5815

5816
    This checks that the instance is in the cluster.
5817

5818
    """
5819
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5820
    assert self.instance is not None, \
5821
      "Cannot retrieve locked instance %s" % self.op.instance_name
5822

    
5823
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5824
    if instance.disk_template not in constants.DTS_NET_MIRROR:
5825
      raise errors.OpPrereqError("Instance's disk layout is not"
5826
                                 " network mirrored, cannot failover.",
5827
                                 errors.ECODE_STATE)
5828

    
5829
    secondary_nodes = instance.secondary_nodes
5830
    if not secondary_nodes:
5831
      raise errors.ProgrammerError("no secondary node but using "
5832
                                   "a mirrored disk template")
5833

    
5834
    target_node = secondary_nodes[0]
5835
    _CheckNodeOnline(self, target_node)
5836
    _CheckNodeNotDrained(self, target_node)
5837
    if instance.admin_up:
5838
      # check memory requirements on the secondary node
5839
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5840
                           instance.name, bep[constants.BE_MEMORY],
5841
                           instance.hypervisor)
5842
    else:
5843
      self.LogInfo("Not checking memory on the secondary node as"
5844
                   " instance will not be started")
5845

    
5846
    # check bridge existance
5847
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5848

    
5849
  def Exec(self, feedback_fn):
5850
    """Failover an instance.
5851

5852
    The failover is done by shutting it down on its present node and
5853
    starting it on the secondary.
5854

5855
    """
5856
    instance = self.instance
5857
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5858

    
5859
    source_node = instance.primary_node
5860
    target_node = instance.secondary_nodes[0]
5861

    
5862
    if instance.admin_up:
5863
      feedback_fn("* checking disk consistency between source and target")
5864
      for dev in instance.disks:
5865
        # for drbd, these are drbd over lvm
5866
        if not _CheckDiskConsistency(self, dev, target_node, False):
5867
          if not self.op.ignore_consistency:
5868
            raise errors.OpExecError("Disk %s is degraded on target node,"
5869
                                     " aborting failover." % dev.iv_name)
5870
    else:
5871
      feedback_fn("* not checking disk consistency as instance is not running")
5872

    
5873
    feedback_fn("* shutting down instance on source node")
5874
    logging.info("Shutting down instance %s on node %s",
5875
                 instance.name, source_node)
5876

    
5877
    result = self.rpc.call_instance_shutdown(source_node, instance,
5878
                                             self.op.shutdown_timeout)
5879
    msg = result.fail_msg
5880
    if msg:
5881
      if self.op.ignore_consistency or primary_node.offline:
5882
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5883
                             " Proceeding anyway. Please make sure node"
5884
                             " %s is down. Error details: %s",
5885
                             instance.name, source_node, source_node, msg)
5886
      else:
5887
        raise errors.OpExecError("Could not shutdown instance %s on"
5888
                                 " node %s: %s" %
5889
                                 (instance.name, source_node, msg))
5890

    
5891
    feedback_fn("* deactivating the instance's disks on source node")
5892
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5893
      raise errors.OpExecError("Can't shut down the instance's disks.")
5894

    
5895
    instance.primary_node = target_node
5896
    # distribute new instance config to the other nodes
5897
    self.cfg.Update(instance, feedback_fn)
5898

    
5899
    # Only start the instance if it's marked as up
5900
    if instance.admin_up:
5901
      feedback_fn("* activating the instance's disks on target node")
5902
      logging.info("Starting instance %s on node %s",
5903
                   instance.name, target_node)
5904

    
5905
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5906
                                           ignore_secondaries=True)
5907
      if not disks_ok:
5908
        _ShutdownInstanceDisks(self, instance)
5909
        raise errors.OpExecError("Can't activate the instance's disks")
5910

    
5911
      feedback_fn("* starting the instance on the target node")
5912
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5913
      msg = result.fail_msg
5914
      if msg:
5915
        _ShutdownInstanceDisks(self, instance)
5916
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5917
                                 (instance.name, target_node, msg))
5918

    
5919

    
5920
class LUInstanceMigrate(LogicalUnit):
5921
  """Migrate an instance.
5922

5923
  This is migration without shutting down, compared to the failover,
5924
  which is done with shutdown.
5925

5926
  """
5927
  HPATH = "instance-migrate"
5928
  HTYPE = constants.HTYPE_INSTANCE
5929
  REQ_BGL = False
5930

    
5931
  def ExpandNames(self):
5932
    self._ExpandAndLockInstance()
5933

    
5934
    self.needed_locks[locking.LEVEL_NODE] = []
5935
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5936

    
5937
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5938
                                       self.op.cleanup)
5939
    self.tasklets = [self._migrater]
5940

    
5941
  def DeclareLocks(self, level):
5942
    if level == locking.LEVEL_NODE:
5943
      self._LockInstancesNodes()
5944

    
5945
  def BuildHooksEnv(self):
5946
    """Build hooks env.
5947

5948
    This runs on master, primary and secondary nodes of the instance.
5949

5950
    """
5951
    instance = self._migrater.instance
5952
    source_node = instance.primary_node
5953
    target_node = instance.secondary_nodes[0]
5954
    env = _BuildInstanceHookEnvByObject(self, instance)
5955
    env["MIGRATE_LIVE"] = self._migrater.live
5956
    env["MIGRATE_CLEANUP"] = self.op.cleanup
5957
    env.update({
5958
        "OLD_PRIMARY": source_node,
5959
        "OLD_SECONDARY": target_node,
5960
        "NEW_PRIMARY": target_node,
5961
        "NEW_SECONDARY": source_node,
5962
        })
5963
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5964
    nl_post = list(nl)
5965
    nl_post.append(source_node)
5966
    return env, nl, nl_post
5967

    
5968

    
5969
class LUInstanceMove(LogicalUnit):
5970
  """Move an instance by data-copying.
5971

5972
  """
5973
  HPATH = "instance-move"
5974
  HTYPE = constants.HTYPE_INSTANCE
5975
  REQ_BGL = False
5976

    
5977
  def ExpandNames(self):
5978
    self._ExpandAndLockInstance()
5979
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5980
    self.op.target_node = target_node
5981
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
5982
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5983

    
5984
  def DeclareLocks(self, level):
5985
    if level == locking.LEVEL_NODE:
5986
      self._LockInstancesNodes(primary_only=True)
5987

    
5988
  def BuildHooksEnv(self):
5989
    """Build hooks env.
5990

5991
    This runs on master, primary and secondary nodes of the instance.
5992

5993
    """
5994
    env = {
5995
      "TARGET_NODE": self.op.target_node,
5996
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5997
      }
5998
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5999
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6000
                                       self.op.target_node]
6001
    return env, nl, nl
6002

    
6003
  def CheckPrereq(self):
6004
    """Check prerequisites.
6005

6006
    This checks that the instance is in the cluster.
6007

6008
    """
6009
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010
    assert self.instance is not None, \
6011
      "Cannot retrieve locked instance %s" % self.op.instance_name
6012

    
6013
    node = self.cfg.GetNodeInfo(self.op.target_node)
6014
    assert node is not None, \
6015
      "Cannot retrieve locked node %s" % self.op.target_node
6016

    
6017
    self.target_node = target_node = node.name
6018

    
6019
    if target_node == instance.primary_node:
6020
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6021
                                 (instance.name, target_node),
6022
                                 errors.ECODE_STATE)
6023

    
6024
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6025

    
6026
    for idx, dsk in enumerate(instance.disks):
6027
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6028
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6029
                                   " cannot copy" % idx, errors.ECODE_STATE)
6030

    
6031
    _CheckNodeOnline(self, target_node)
6032
    _CheckNodeNotDrained(self, target_node)
6033
    _CheckNodeVmCapable(self, target_node)
6034

    
6035
    if instance.admin_up:
6036
      # check memory requirements on the secondary node
6037
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6038
                           instance.name, bep[constants.BE_MEMORY],
6039
                           instance.hypervisor)
6040
    else:
6041
      self.LogInfo("Not checking memory on the secondary node as"
6042
                   " instance will not be started")
6043

    
6044
    # check bridge existance
6045
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6046

    
6047
  def Exec(self, feedback_fn):
6048
    """Move an instance.
6049

6050
    The move is done by shutting it down on its present node, copying
6051
    the data over (slow) and starting it on the new node.
6052

6053
    """
6054
    instance = self.instance
6055

    
6056
    source_node = instance.primary_node
6057
    target_node = self.target_node
6058

    
6059
    self.LogInfo("Shutting down instance %s on source node %s",
6060
                 instance.name, source_node)
6061

    
6062
    result = self.rpc.call_instance_shutdown(source_node, instance,
6063
                                             self.op.shutdown_timeout)
6064
    msg = result.fail_msg
6065
    if msg:
6066
      if self.op.ignore_consistency:
6067
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6068
                             " Proceeding anyway. Please make sure node"
6069
                             " %s is down. Error details: %s",
6070
                             instance.name, source_node, source_node, msg)
6071
      else:
6072
        raise errors.OpExecError("Could not shutdown instance %s on"
6073
                                 " node %s: %s" %
6074
                                 (instance.name, source_node, msg))
6075

    
6076
    # create the target disks
6077
    try:
6078
      _CreateDisks(self, instance, target_node=target_node)
6079
    except errors.OpExecError:
6080
      self.LogWarning("Device creation failed, reverting...")
6081
      try:
6082
        _RemoveDisks(self, instance, target_node=target_node)
6083
      finally:
6084
        self.cfg.ReleaseDRBDMinors(instance.name)
6085
        raise
6086

    
6087
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6088

    
6089
    errs = []
6090
    # activate, get path, copy the data over
6091
    for idx, disk in enumerate(instance.disks):
6092
      self.LogInfo("Copying data for disk %d", idx)
6093
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6094
                                               instance.name, True, idx)
6095
      if result.fail_msg:
6096
        self.LogWarning("Can't assemble newly created disk %d: %s",
6097
                        idx, result.fail_msg)
6098
        errs.append(result.fail_msg)
6099
        break
6100
      dev_path = result.payload
6101
      result = self.rpc.call_blockdev_export(source_node, disk,
6102
                                             target_node, dev_path,
6103
                                             cluster_name)
6104
      if result.fail_msg:
6105
        self.LogWarning("Can't copy data over for disk %d: %s",
6106
                        idx, result.fail_msg)
6107
        errs.append(result.fail_msg)
6108
        break
6109

    
6110
    if errs:
6111
      self.LogWarning("Some disks failed to copy, aborting")
6112
      try:
6113
        _RemoveDisks(self, instance, target_node=target_node)
6114
      finally:
6115
        self.cfg.ReleaseDRBDMinors(instance.name)
6116
        raise errors.OpExecError("Errors during disk copy: %s" %
6117
                                 (",".join(errs),))
6118

    
6119
    instance.primary_node = target_node
6120
    self.cfg.Update(instance, feedback_fn)
6121

    
6122
    self.LogInfo("Removing the disks on the original node")
6123
    _RemoveDisks(self, instance, target_node=source_node)
6124

    
6125
    # Only start the instance if it's marked as up
6126
    if instance.admin_up:
6127
      self.LogInfo("Starting instance %s on node %s",
6128
                   instance.name, target_node)
6129

    
6130
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6131
                                           ignore_secondaries=True)
6132
      if not disks_ok:
6133
        _ShutdownInstanceDisks(self, instance)
6134
        raise errors.OpExecError("Can't activate the instance's disks")
6135

    
6136
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6137
      msg = result.fail_msg
6138
      if msg:
6139
        _ShutdownInstanceDisks(self, instance)
6140
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6141
                                 (instance.name, target_node, msg))
6142

    
6143

    
6144
class LUNodeMigrate(LogicalUnit):
6145
  """Migrate all instances from a node.
6146

6147
  """
6148
  HPATH = "node-migrate"
6149
  HTYPE = constants.HTYPE_NODE
6150
  REQ_BGL = False
6151

    
6152
  def ExpandNames(self):
6153
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6154

    
6155
    self.needed_locks = {
6156
      locking.LEVEL_NODE: [self.op.node_name],
6157
      }
6158

    
6159
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6160

    
6161
    # Create tasklets for migrating instances for all instances on this node
6162
    names = []
6163
    tasklets = []
6164

    
6165
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6166
      logging.debug("Migrating instance %s", inst.name)
6167
      names.append(inst.name)
6168

    
6169
      tasklets.append(TLMigrateInstance(self, inst.name, False))
6170

    
6171
    self.tasklets = tasklets
6172

    
6173
    # Declare instance locks
6174
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6175

    
6176
  def DeclareLocks(self, level):
6177
    if level == locking.LEVEL_NODE:
6178
      self._LockInstancesNodes()
6179

    
6180
  def BuildHooksEnv(self):
6181
    """Build hooks env.
6182

6183
    This runs on the master, the primary and all the secondaries.
6184

6185
    """
6186
    env = {
6187
      "NODE_NAME": self.op.node_name,
6188
      }
6189

    
6190
    nl = [self.cfg.GetMasterNode()]
6191

    
6192
    return (env, nl, nl)
6193

    
6194

    
6195
class TLMigrateInstance(Tasklet):
6196
  """Tasklet class for instance migration.
6197

6198
  @type live: boolean
6199
  @ivar live: whether the migration will be done live or non-live;
6200
      this variable is initalized only after CheckPrereq has run
6201

6202
  """
6203
  def __init__(self, lu, instance_name, cleanup):
6204
    """Initializes this class.
6205

6206
    """
6207
    Tasklet.__init__(self, lu)
6208

    
6209
    # Parameters
6210
    self.instance_name = instance_name
6211
    self.cleanup = cleanup
6212
    self.live = False # will be overridden later
6213

    
6214
  def CheckPrereq(self):
6215
    """Check prerequisites.
6216

6217
    This checks that the instance is in the cluster.
6218

6219
    """
6220
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6221
    instance = self.cfg.GetInstanceInfo(instance_name)
6222
    assert instance is not None
6223

    
6224
    if instance.disk_template != constants.DT_DRBD8:
6225
      raise errors.OpPrereqError("Instance's disk layout is not"
6226
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
6227

    
6228
    secondary_nodes = instance.secondary_nodes
6229
    if not secondary_nodes:
6230
      raise errors.ConfigurationError("No secondary node but using"
6231
                                      " drbd8 disk template")
6232

    
6233
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6234

    
6235
    target_node = secondary_nodes[0]
6236
    # check memory requirements on the secondary node
6237
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6238
                         instance.name, i_be[constants.BE_MEMORY],
6239
                         instance.hypervisor)
6240

    
6241
    # check bridge existance
6242
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6243

    
6244
    if not self.cleanup:
6245
      _CheckNodeNotDrained(self.lu, target_node)
6246
      result = self.rpc.call_instance_migratable(instance.primary_node,
6247
                                                 instance)
6248
      result.Raise("Can't migrate, please use failover",
6249
                   prereq=True, ecode=errors.ECODE_STATE)
6250

    
6251
    self.instance = instance
6252

    
6253
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6254
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6255
                                 " parameters are accepted",
6256
                                 errors.ECODE_INVAL)
6257
    if self.lu.op.live is not None:
6258
      if self.lu.op.live:
6259
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6260
      else:
6261
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6262
      # reset the 'live' parameter to None so that repeated
6263
      # invocations of CheckPrereq do not raise an exception
6264
      self.lu.op.live = None
6265
    elif self.lu.op.mode is None:
6266
      # read the default value from the hypervisor
6267
      i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6268
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6269

    
6270
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6271

    
6272
  def _WaitUntilSync(self):
6273
    """Poll with custom rpc for disk sync.
6274

6275
    This uses our own step-based rpc call.
6276

6277
    """
6278
    self.feedback_fn("* wait until resync is done")
6279
    all_done = False
6280
    while not all_done:
6281
      all_done = True
6282
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6283
                                            self.nodes_ip,
6284
                                            self.instance.disks)
6285
      min_percent = 100
6286
      for node, nres in result.items():
6287
        nres.Raise("Cannot resync disks on node %s" % node)
6288
        node_done, node_percent = nres.payload
6289
        all_done = all_done and node_done
6290
        if node_percent is not None:
6291
          min_percent = min(min_percent, node_percent)
6292
      if not all_done:
6293
        if min_percent < 100:
6294
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6295
        time.sleep(2)
6296

    
6297
  def _EnsureSecondary(self, node):
6298
    """Demote a node to secondary.
6299

6300
    """
6301
    self.feedback_fn("* switching node %s to secondary mode" % node)
6302

    
6303
    for dev in self.instance.disks:
6304
      self.cfg.SetDiskID(dev, node)
6305

    
6306
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6307
                                          self.instance.disks)
6308
    result.Raise("Cannot change disk to secondary on node %s" % node)
6309

    
6310
  def _GoStandalone(self):
6311
    """Disconnect from the network.
6312

6313
    """
6314
    self.feedback_fn("* changing into standalone mode")
6315
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6316
                                               self.instance.disks)
6317
    for node, nres in result.items():
6318
      nres.Raise("Cannot disconnect disks node %s" % node)
6319

    
6320
  def _GoReconnect(self, multimaster):
6321
    """Reconnect to the network.
6322

6323
    """
6324
    if multimaster:
6325
      msg = "dual-master"
6326
    else:
6327
      msg = "single-master"
6328
    self.feedback_fn("* changing disks into %s mode" % msg)
6329
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6330
                                           self.instance.disks,
6331
                                           self.instance.name, multimaster)
6332
    for node, nres in result.items():
6333
      nres.Raise("Cannot change disks config on node %s" % node)
6334

    
6335
  def _ExecCleanup(self):
6336
    """Try to cleanup after a failed migration.
6337

6338
    The cleanup is done by:
6339
      - check that the instance is running only on one node
6340
        (and update the config if needed)
6341
      - change disks on its secondary node to secondary
6342
      - wait until disks are fully synchronized
6343
      - disconnect from the network
6344
      - change disks into single-master mode
6345
      - wait again until disks are fully synchronized
6346

6347
    """
6348
    instance = self.instance
6349
    target_node = self.target_node
6350
    source_node = self.source_node
6351

    
6352
    # check running on only one node
6353
    self.feedback_fn("* checking where the instance actually runs"
6354
                     " (if this hangs, the hypervisor might be in"
6355
                     " a bad state)")
6356
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6357
    for node, result in ins_l.items():
6358
      result.Raise("Can't contact node %s" % node)
6359

    
6360
    runningon_source = instance.name in ins_l[source_node].payload
6361
    runningon_target = instance.name in ins_l[target_node].payload
6362

    
6363
    if runningon_source and runningon_target:
6364
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6365
                               " or the hypervisor is confused. You will have"
6366
                               " to ensure manually that it runs only on one"
6367
                               " and restart this operation.")
6368

    
6369
    if not (runningon_source or runningon_target):
6370
      raise errors.OpExecError("Instance does not seem to be running at all."
6371
                               " In this case, it's safer to repair by"
6372
                               " running 'gnt-instance stop' to ensure disk"
6373
                               " shutdown, and then restarting it.")
6374

    
6375
    if runningon_target:
6376
      # the migration has actually succeeded, we need to update the config
6377
      self.feedback_fn("* instance running on secondary node (%s),"
6378
                       " updating config" % target_node)
6379
      instance.primary_node = target_node
6380
      self.cfg.Update(instance, self.feedback_fn)
6381
      demoted_node = source_node
6382
    else:
6383
      self.feedback_fn("* instance confirmed to be running on its"
6384
                       " primary node (%s)" % source_node)
6385
      demoted_node = target_node
6386

    
6387
    self._EnsureSecondary(demoted_node)
6388
    try:
6389
      self._WaitUntilSync()
6390
    except errors.OpExecError:
6391
      # we ignore here errors, since if the device is standalone, it
6392
      # won't be able to sync
6393
      pass
6394
    self._GoStandalone()
6395
    self._GoReconnect(False)
6396
    self._WaitUntilSync()
6397

    
6398
    self.feedback_fn("* done")
6399

    
6400
  def _RevertDiskStatus(self):
6401
    """Try to revert the disk status after a failed migration.
6402

6403
    """
6404
    target_node = self.target_node
6405
    try:
6406
      self._EnsureSecondary(target_node)
6407
      self._GoStandalone()
6408
      self._GoReconnect(False)
6409
      self._WaitUntilSync()
6410
    except errors.OpExecError, err:
6411
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6412
                         " drives: error '%s'\n"
6413
                         "Please look and recover the instance status" %
6414
                         str(err))
6415

    
6416
  def _AbortMigration(self):
6417
    """Call the hypervisor code to abort a started migration.
6418

6419
    """
6420
    instance = self.instance
6421
    target_node = self.target_node
6422
    migration_info = self.migration_info
6423

    
6424
    abort_result = self.rpc.call_finalize_migration(target_node,
6425
                                                    instance,
6426
                                                    migration_info,
6427
                                                    False)
6428
    abort_msg = abort_result.fail_msg
6429
    if abort_msg:
6430
      logging.error("Aborting migration failed on target node %s: %s",
6431
                    target_node, abort_msg)
6432
      # Don't raise an exception here, as we stil have to try to revert the
6433
      # disk status, even if this step failed.
6434

    
6435
  def _ExecMigration(self):
6436
    """Migrate an instance.
6437

6438
    The migrate is done by:
6439
      - change the disks into dual-master mode
6440
      - wait until disks are fully synchronized again
6441
      - migrate the instance
6442
      - change disks on the new secondary node (the old primary) to secondary
6443
      - wait until disks are fully synchronized
6444
      - change disks into single-master mode
6445

6446
    """
6447
    instance = self.instance
6448
    target_node = self.target_node
6449
    source_node = self.source_node
6450

    
6451
    self.feedback_fn("* checking disk consistency between source and target")
6452
    for dev in instance.disks:
6453
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6454
        raise errors.OpExecError("Disk %s is degraded or not fully"
6455
                                 " synchronized on target node,"
6456
                                 " aborting migrate." % dev.iv_name)
6457

    
6458
    # First get the migration information from the remote node
6459
    result = self.rpc.call_migration_info(source_node, instance)
6460
    msg = result.fail_msg
6461
    if msg:
6462
      log_err = ("Failed fetching source migration information from %s: %s" %
6463
                 (source_node, msg))
6464
      logging.error(log_err)
6465
      raise errors.OpExecError(log_err)
6466

    
6467
    self.migration_info = migration_info = result.payload
6468

    
6469
    # Then switch the disks to master/master mode
6470
    self._EnsureSecondary(target_node)
6471
    self._GoStandalone()
6472
    self._GoReconnect(True)
6473
    self._WaitUntilSync()
6474

    
6475
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6476
    result = self.rpc.call_accept_instance(target_node,
6477
                                           instance,
6478
                                           migration_info,
6479
                                           self.nodes_ip[target_node])
6480

    
6481
    msg = result.fail_msg
6482
    if msg:
6483
      logging.error("Instance pre-migration failed, trying to revert"
6484
                    " disk status: %s", msg)
6485
      self.feedback_fn("Pre-migration failed, aborting")
6486
      self._AbortMigration()
6487
      self._RevertDiskStatus()
6488
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6489
                               (instance.name, msg))
6490

    
6491
    self.feedback_fn("* migrating instance to %s" % target_node)
6492
    time.sleep(10)
6493
    result = self.rpc.call_instance_migrate(source_node, instance,
6494
                                            self.nodes_ip[target_node],
6495
                                            self.live)
6496
    msg = result.fail_msg
6497
    if msg:
6498
      logging.error("Instance migration failed, trying to revert"
6499
                    " disk status: %s", msg)
6500
      self.feedback_fn("Migration failed, aborting")
6501
      self._AbortMigration()
6502
      self._RevertDiskStatus()
6503
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6504
                               (instance.name, msg))
6505
    time.sleep(10)
6506

    
6507
    instance.primary_node = target_node
6508
    # distribute new instance config to the other nodes
6509
    self.cfg.Update(instance, self.feedback_fn)
6510

    
6511
    result = self.rpc.call_finalize_migration(target_node,
6512
                                              instance,
6513
                                              migration_info,
6514
                                              True)
6515
    msg = result.fail_msg
6516
    if msg:
6517
      logging.error("Instance migration succeeded, but finalization failed:"
6518
                    " %s", msg)
6519
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6520
                               msg)
6521

    
6522
    self._EnsureSecondary(source_node)
6523
    self._WaitUntilSync()
6524
    self._GoStandalone()
6525
    self._GoReconnect(False)
6526
    self._WaitUntilSync()
6527

    
6528
    self.feedback_fn("* done")
6529

    
6530
  def Exec(self, feedback_fn):
6531
    """Perform the migration.
6532

6533
    """
6534
    feedback_fn("Migrating instance %s" % self.instance.name)
6535

    
6536
    self.feedback_fn = feedback_fn
6537

    
6538
    self.source_node = self.instance.primary_node
6539
    self.target_node = self.instance.secondary_nodes[0]
6540
    self.all_nodes = [self.source_node, self.target_node]
6541
    self.nodes_ip = {
6542
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6543
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6544
      }
6545

    
6546
    if self.cleanup:
6547
      return self._ExecCleanup()
6548
    else:
6549
      return self._ExecMigration()
6550

    
6551

    
6552
def _CreateBlockDev(lu, node, instance, device, force_create,
6553
                    info, force_open):
6554
  """Create a tree of block devices on a given node.
6555

6556
  If this device type has to be created on secondaries, create it and
6557
  all its children.
6558

6559
  If not, just recurse to children keeping the same 'force' value.
6560

6561
  @param lu: the lu on whose behalf we execute
6562
  @param node: the node on which to create the device
6563
  @type instance: L{objects.Instance}
6564
  @param instance: the instance which owns the device
6565
  @type device: L{objects.Disk}
6566
  @param device: the device to create
6567
  @type force_create: boolean
6568
  @param force_create: whether to force creation of this device; this
6569
      will be change to True whenever we find a device which has
6570
      CreateOnSecondary() attribute
6571
  @param info: the extra 'metadata' we should attach to the device
6572
      (this will be represented as a LVM tag)
6573
  @type force_open: boolean
6574
  @param force_open: this parameter will be passes to the
6575
      L{backend.BlockdevCreate} function where it specifies
6576
      whether we run on primary or not, and it affects both
6577
      the child assembly and the device own Open() execution
6578

6579
  """
6580
  if device.CreateOnSecondary():
6581
    force_create = True
6582

    
6583
  if device.children:
6584
    for child in device.children:
6585
      _CreateBlockDev(lu, node, instance, child, force_create,
6586
                      info, force_open)
6587

    
6588
  if not force_create:
6589
    return
6590

    
6591
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6592

    
6593

    
6594
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6595
  """Create a single block device on a given node.
6596

6597
  This will not recurse over children of the device, so they must be
6598
  created in advance.
6599

6600
  @param lu: the lu on whose behalf we execute
6601
  @param node: the node on which to create the device
6602
  @type instance: L{objects.Instance}
6603
  @param instance: the instance which owns the device
6604
  @type device: L{objects.Disk}
6605
  @param device: the device to create
6606
  @param info: the extra 'metadata' we should attach to the device
6607
      (this will be represented as a LVM tag)
6608
  @type force_open: boolean
6609
  @param force_open: this parameter will be passes to the
6610
      L{backend.BlockdevCreate} function where it specifies
6611
      whether we run on primary or not, and it affects both
6612
      the child assembly and the device own Open() execution
6613

6614
  """
6615
  lu.cfg.SetDiskID(device, node)
6616
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6617
                                       instance.name, force_open, info)
6618
  result.Raise("Can't create block device %s on"
6619
               " node %s for instance %s" % (device, node, instance.name))
6620
  if device.physical_id is None:
6621
    device.physical_id = result.payload
6622

    
6623

    
6624
def _GenerateUniqueNames(lu, exts):
6625
  """Generate a suitable LV name.
6626

6627
  This will generate a logical volume name for the given instance.
6628

6629
  """
6630
  results = []
6631
  for val in exts:
6632
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6633
    results.append("%s%s" % (new_id, val))
6634
  return results
6635

    
6636

    
6637
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
6638
                         iv_name, p_minor, s_minor):
6639
  """Generate a drbd8 device complete with its children.
6640

6641
  """
6642
  assert len(vgnames) == len(names) == 2
6643
  port = lu.cfg.AllocatePort()
6644
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6645
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6646
                          logical_id=(vgnames[0], names[0]))
6647
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6648
                          logical_id=(vgnames[1], names[1]))
6649
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6650
                          logical_id=(primary, secondary, port,
6651
                                      p_minor, s_minor,
6652
                                      shared_secret),
6653
                          children=[dev_data, dev_meta],
6654
                          iv_name=iv_name)
6655
  return drbd_dev
6656

    
6657

    
6658
def _GenerateDiskTemplate(lu, template_name,
6659
                          instance_name, primary_node,
6660
                          secondary_nodes, disk_info,
6661
                          file_storage_dir, file_driver,
6662
                          base_index, feedback_fn):
6663
  """Generate the entire disk layout for a given template type.
6664

6665
  """
6666
  #TODO: compute space requirements
6667

    
6668
  vgname = lu.cfg.GetVGName()
6669
  disk_count = len(disk_info)
6670
  disks = []
6671
  if template_name == constants.DT_DISKLESS:
6672
    pass
6673
  elif template_name == constants.DT_PLAIN:
6674
    if len(secondary_nodes) != 0:
6675
      raise errors.ProgrammerError("Wrong template configuration")
6676

    
6677
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6678
                                      for i in range(disk_count)])
6679
    for idx, disk in enumerate(disk_info):
6680
      disk_index = idx + base_index
6681
      vg = disk.get("vg", vgname)
6682
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6683
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6684
                              logical_id=(vg, names[idx]),
6685
                              iv_name="disk/%d" % disk_index,
6686
                              mode=disk["mode"])
6687
      disks.append(disk_dev)
6688
  elif template_name == constants.DT_DRBD8:
6689
    if len(secondary_nodes) != 1:
6690
      raise errors.ProgrammerError("Wrong template configuration")
6691
    remote_node = secondary_nodes[0]
6692
    minors = lu.cfg.AllocateDRBDMinor(
6693
      [primary_node, remote_node] * len(disk_info), instance_name)
6694

    
6695
    names = []
6696
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6697
                                               for i in range(disk_count)]):
6698
      names.append(lv_prefix + "_data")
6699
      names.append(lv_prefix + "_meta")
6700
    for idx, disk in enumerate(disk_info):
6701
      disk_index = idx + base_index
6702
      data_vg = disk.get("vg", vgname)
6703
      meta_vg = disk.get("metavg", data_vg)
6704
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6705
                                      disk["size"], [data_vg, meta_vg],
6706
                                      names[idx*2:idx*2+2],
6707
                                      "disk/%d" % disk_index,
6708
                                      minors[idx*2], minors[idx*2+1])
6709
      disk_dev.mode = disk["mode"]
6710
      disks.append(disk_dev)
6711
  elif template_name == constants.DT_FILE:
6712
    if len(secondary_nodes) != 0:
6713
      raise errors.ProgrammerError("Wrong template configuration")
6714

    
6715
    opcodes.RequireFileStorage()
6716

    
6717
    for idx, disk in enumerate(disk_info):
6718
      disk_index = idx + base_index
6719
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6720
                              iv_name="disk/%d" % disk_index,
6721
                              logical_id=(file_driver,
6722
                                          "%s/disk%d" % (file_storage_dir,
6723
                                                         disk_index)),
6724
                              mode=disk["mode"])
6725
      disks.append(disk_dev)
6726
  else:
6727
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6728
  return disks
6729

    
6730

    
6731
def _GetInstanceInfoText(instance):
6732
  """Compute that text that should be added to the disk's metadata.
6733

6734
  """
6735
  return "originstname+%s" % instance.name
6736

    
6737

    
6738
def _CalcEta(time_taken, written, total_size):
6739
  """Calculates the ETA based on size written and total size.
6740

6741
  @param time_taken: The time taken so far
6742
  @param written: amount written so far
6743
  @param total_size: The total size of data to be written
6744
  @return: The remaining time in seconds
6745

6746
  """
6747
  avg_time = time_taken / float(written)
6748
  return (total_size - written) * avg_time
6749

    
6750

    
6751
def _WipeDisks(lu, instance):
6752
  """Wipes instance disks.
6753

6754
  @type lu: L{LogicalUnit}
6755
  @param lu: the logical unit on whose behalf we execute
6756
  @type instance: L{objects.Instance}
6757
  @param instance: the instance whose disks we should create
6758
  @return: the success of the wipe
6759

6760
  """
6761
  node = instance.primary_node
6762

    
6763
  for device in instance.disks:
6764
    lu.cfg.SetDiskID(device, node)
6765

    
6766
  logging.info("Pause sync of instance %s disks", instance.name)
6767
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6768

    
6769
  for idx, success in enumerate(result.payload):
6770
    if not success:
6771
      logging.warn("pause-sync of instance %s for disks %d failed",
6772
                   instance.name, idx)
6773

    
6774
  try:
6775
    for idx, device in enumerate(instance.disks):
6776
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6777
      # MAX_WIPE_CHUNK at max
6778
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6779
                            constants.MIN_WIPE_CHUNK_PERCENT)
6780
      # we _must_ make this an int, otherwise rounding errors will
6781
      # occur
6782
      wipe_chunk_size = int(wipe_chunk_size)
6783

    
6784
      lu.LogInfo("* Wiping disk %d", idx)
6785
      logging.info("Wiping disk %d for instance %s, node %s using"
6786
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
6787

    
6788
      offset = 0
6789
      size = device.size
6790
      last_output = 0
6791
      start_time = time.time()
6792

    
6793
      while offset < size:
6794
        wipe_size = min(wipe_chunk_size, size - offset)
6795
        logging.debug("Wiping disk %d, offset %s, chunk %s",
6796
                      idx, offset, wipe_size)
6797
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6798
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
6799
                     (idx, offset, wipe_size))
6800
        now = time.time()
6801
        offset += wipe_size
6802
        if now - last_output >= 60:
6803
          eta = _CalcEta(now - start_time, offset, size)
6804
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
6805
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
6806
          last_output = now
6807
  finally:
6808
    logging.info("Resume sync of instance %s disks", instance.name)
6809

    
6810
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6811

    
6812
    for idx, success in enumerate(result.payload):
6813
      if not success:
6814
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6815
                      " look at the status and troubleshoot the issue.", idx)
6816
        logging.warn("resume-sync of instance %s for disks %d failed",
6817
                     instance.name, idx)
6818

    
6819

    
6820
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6821
  """Create all disks for an instance.
6822

6823
  This abstracts away some work from AddInstance.
6824

6825
  @type lu: L{LogicalUnit}
6826
  @param lu: the logical unit on whose behalf we execute
6827
  @type instance: L{objects.Instance}
6828
  @param instance: the instance whose disks we should create
6829
  @type to_skip: list
6830
  @param to_skip: list of indices to skip
6831
  @type target_node: string
6832
  @param target_node: if passed, overrides the target node for creation
6833
  @rtype: boolean
6834
  @return: the success of the creation
6835

6836
  """
6837
  info = _GetInstanceInfoText(instance)
6838
  if target_node is None:
6839
    pnode = instance.primary_node
6840
    all_nodes = instance.all_nodes
6841
  else:
6842
    pnode = target_node
6843
    all_nodes = [pnode]
6844

    
6845
  if instance.disk_template == constants.DT_FILE:
6846
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6847
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6848

    
6849
    result.Raise("Failed to create directory '%s' on"
6850
                 " node %s" % (file_storage_dir, pnode))
6851

    
6852
  # Note: this needs to be kept in sync with adding of disks in
6853
  # LUInstanceSetParams
6854
  for idx, device in enumerate(instance.disks):
6855
    if to_skip and idx in to_skip:
6856
      continue
6857
    logging.info("Creating volume %s for instance %s",
6858
                 device.iv_name, instance.name)
6859
    #HARDCODE
6860
    for node in all_nodes:
6861
      f_create = node == pnode
6862
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6863

    
6864

    
6865
def _RemoveDisks(lu, instance, target_node=None):
6866
  """Remove all disks for an instance.
6867

6868
  This abstracts away some work from `AddInstance()` and
6869
  `RemoveInstance()`. Note that in case some of the devices couldn't
6870
  be removed, the removal will continue with the other ones (compare
6871
  with `_CreateDisks()`).
6872

6873
  @type lu: L{LogicalUnit}
6874
  @param lu: the logical unit on whose behalf we execute
6875
  @type instance: L{objects.Instance}
6876
  @param instance: the instance whose disks we should remove
6877
  @type target_node: string
6878
  @param target_node: used to override the node on which to remove the disks
6879
  @rtype: boolean
6880
  @return: the success of the removal
6881

6882
  """
6883
  logging.info("Removing block devices for instance %s", instance.name)
6884

    
6885
  all_result = True
6886
  for device in instance.disks:
6887
    if target_node:
6888
      edata = [(target_node, device)]
6889
    else:
6890
      edata = device.ComputeNodeTree(instance.primary_node)
6891
    for node, disk in edata:
6892
      lu.cfg.SetDiskID(disk, node)
6893
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6894
      if msg:
6895
        lu.LogWarning("Could not remove block device %s on node %s,"
6896
                      " continuing anyway: %s", device.iv_name, node, msg)
6897
        all_result = False
6898

    
6899
    # if this is a DRBD disk, return its port to the pool
6900
    if device.dev_type in constants.LDS_DRBD:
6901
      tcp_port = device.logical_id[2]
6902
      lu.cfg.AddTcpUdpPort(tcp_port)
6903

    
6904
  if instance.disk_template == constants.DT_FILE:
6905
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6906
    if target_node:
6907
      tgt = target_node
6908
    else:
6909
      tgt = instance.primary_node
6910
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6911
    if result.fail_msg:
6912
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6913
                    file_storage_dir, instance.primary_node, result.fail_msg)
6914
      all_result = False
6915

    
6916
  return all_result
6917

    
6918

    
6919
def _ComputeDiskSizePerVG(disk_template, disks):
6920
  """Compute disk size requirements in the volume group
6921

6922
  """
6923
  def _compute(disks, payload):
6924
    """Universal algorithm
6925

6926
    """
6927
    vgs = {}
6928
    for disk in disks:
6929
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6930

    
6931
    return vgs
6932

    
6933
  # Required free disk space as a function of disk and swap space
6934
  req_size_dict = {
6935
    constants.DT_DISKLESS: {},
6936
    constants.DT_PLAIN: _compute(disks, 0),
6937
    # 128 MB are added for drbd metadata for each disk
6938
    constants.DT_DRBD8: _compute(disks, 128),
6939
    constants.DT_FILE: {},
6940
  }
6941

    
6942
  if disk_template not in req_size_dict:
6943
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6944
                                 " is unknown" %  disk_template)
6945

    
6946
  return req_size_dict[disk_template]
6947

    
6948

    
6949
def _ComputeDiskSize(disk_template, disks):
6950
  """Compute disk size requirements in the volume group
6951

6952
  """
6953
  # Required free disk space as a function of disk and swap space
6954
  req_size_dict = {
6955
    constants.DT_DISKLESS: None,
6956
    constants.DT_PLAIN: sum(d["size"] for d in disks),
6957
    # 128 MB are added for drbd metadata for each disk
6958
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6959
    constants.DT_FILE: None,
6960
  }
6961

    
6962
  if disk_template not in req_size_dict:
6963
    raise errors.ProgrammerError("Disk template '%s' size requirement"
6964
                                 " is unknown" %  disk_template)
6965

    
6966
  return req_size_dict[disk_template]
6967

    
6968

    
6969
def _FilterVmNodes(lu, nodenames):
6970
  """Filters out non-vm_capable nodes from a list.
6971

6972
  @type lu: L{LogicalUnit}
6973
  @param lu: the logical unit for which we check
6974
  @type nodenames: list
6975
  @param nodenames: the list of nodes on which we should check
6976
  @rtype: list
6977
  @return: the list of vm-capable nodes
6978

6979
  """
6980
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
6981
  return [name for name in nodenames if name not in vm_nodes]
6982

    
6983

    
6984
def _CheckHVParams(lu, nodenames, hvname, hvparams):
6985
  """Hypervisor parameter validation.
6986

6987
  This function abstract the hypervisor parameter validation to be
6988
  used in both instance create and instance modify.
6989

6990
  @type lu: L{LogicalUnit}
6991
  @param lu: the logical unit for which we check
6992
  @type nodenames: list
6993
  @param nodenames: the list of nodes on which we should check
6994
  @type hvname: string
6995
  @param hvname: the name of the hypervisor we should use
6996
  @type hvparams: dict
6997
  @param hvparams: the parameters which we need to check
6998
  @raise errors.OpPrereqError: if the parameters are not valid
6999

7000
  """
7001
  nodenames = _FilterVmNodes(lu, nodenames)
7002
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7003
                                                  hvname,
7004
                                                  hvparams)
7005
  for node in nodenames:
7006
    info = hvinfo[node]
7007
    if info.offline:
7008
      continue
7009
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7010

    
7011

    
7012
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7013
  """OS parameters validation.
7014

7015
  @type lu: L{LogicalUnit}
7016
  @param lu: the logical unit for which we check
7017
  @type required: boolean
7018
  @param required: whether the validation should fail if the OS is not
7019
      found
7020
  @type nodenames: list
7021
  @param nodenames: the list of nodes on which we should check
7022
  @type osname: string
7023
  @param osname: the name of the hypervisor we should use
7024
  @type osparams: dict
7025
  @param osparams: the parameters which we need to check
7026
  @raise errors.OpPrereqError: if the parameters are not valid
7027

7028
  """
7029
  nodenames = _FilterVmNodes(lu, nodenames)
7030
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7031
                                   [constants.OS_VALIDATE_PARAMETERS],
7032
                                   osparams)
7033
  for node, nres in result.items():
7034
    # we don't check for offline cases since this should be run only
7035
    # against the master node and/or an instance's nodes
7036
    nres.Raise("OS Parameters validation failed on node %s" % node)
7037
    if not nres.payload:
7038
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7039
                 osname, node)
7040

    
7041

    
7042
class LUInstanceCreate(LogicalUnit):
7043
  """Create an instance.
7044

7045
  """
7046
  HPATH = "instance-add"
7047
  HTYPE = constants.HTYPE_INSTANCE
7048
  REQ_BGL = False
7049

    
7050
  def CheckArguments(self):
7051
    """Check arguments.
7052

7053
    """
7054
    # do not require name_check to ease forward/backward compatibility
7055
    # for tools
7056
    if self.op.no_install and self.op.start:
7057
      self.LogInfo("No-installation mode selected, disabling startup")
7058
      self.op.start = False
7059
    # validate/normalize the instance name
7060
    self.op.instance_name = \
7061
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7062

    
7063
    if self.op.ip_check and not self.op.name_check:
7064
      # TODO: make the ip check more flexible and not depend on the name check
7065
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7066
                                 errors.ECODE_INVAL)
7067

    
7068
    # check nics' parameter names
7069
    for nic in self.op.nics:
7070
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7071

    
7072
    # check disks. parameter names and consistent adopt/no-adopt strategy
7073
    has_adopt = has_no_adopt = False
7074
    for disk in self.op.disks:
7075
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7076
      if "adopt" in disk:
7077
        has_adopt = True
7078
      else:
7079
        has_no_adopt = True
7080
    if has_adopt and has_no_adopt:
7081
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7082
                                 errors.ECODE_INVAL)
7083
    if has_adopt:
7084
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7085
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7086
                                   " '%s' disk template" %
7087
                                   self.op.disk_template,
7088
                                   errors.ECODE_INVAL)
7089
      if self.op.iallocator is not None:
7090
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7091
                                   " iallocator script", errors.ECODE_INVAL)
7092
      if self.op.mode == constants.INSTANCE_IMPORT:
7093
        raise errors.OpPrereqError("Disk adoption not allowed for"
7094
                                   " instance import", errors.ECODE_INVAL)
7095

    
7096
    self.adopt_disks = has_adopt
7097

    
7098
    # instance name verification
7099
    if self.op.name_check:
7100
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7101
      self.op.instance_name = self.hostname1.name
7102
      # used in CheckPrereq for ip ping check
7103
      self.check_ip = self.hostname1.ip
7104
    else:
7105
      self.check_ip = None
7106

    
7107
    # file storage checks
7108
    if (self.op.file_driver and
7109
        not self.op.file_driver in constants.FILE_DRIVER):
7110
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7111
                                 self.op.file_driver, errors.ECODE_INVAL)
7112

    
7113
    if self.op.disk_template == constants.DT_FILE:
7114
      opcodes.RequireFileStorage()
7115

    
7116
    ### Node/iallocator related checks
7117
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7118

    
7119
    if self.op.pnode is not None:
7120
      if self.op.disk_template in constants.DTS_NET_MIRROR:
7121
        if self.op.snode is None:
7122
          raise errors.OpPrereqError("The networked disk templates need"
7123
                                     " a mirror node", errors.ECODE_INVAL)
7124
      elif self.op.snode:
7125
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7126
                        " template")
7127
        self.op.snode = None
7128

    
7129
    self._cds = _GetClusterDomainSecret()
7130

    
7131
    if self.op.mode == constants.INSTANCE_IMPORT:
7132
      # On import force_variant must be True, because if we forced it at
7133
      # initial install, our only chance when importing it back is that it
7134
      # works again!
7135
      self.op.force_variant = True
7136

    
7137
      if self.op.no_install:
7138
        self.LogInfo("No-installation mode has no effect during import")
7139

    
7140
    elif self.op.mode == constants.INSTANCE_CREATE:
7141
      if self.op.os_type is None:
7142
        raise errors.OpPrereqError("No guest OS specified",
7143
                                   errors.ECODE_INVAL)
7144
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7145
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7146
                                   " installation" % self.op.os_type,
7147
                                   errors.ECODE_STATE)
7148
      if self.op.disk_template is None:
7149
        raise errors.OpPrereqError("No disk template specified",
7150
                                   errors.ECODE_INVAL)
7151

    
7152
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7153
      # Check handshake to ensure both clusters have the same domain secret
7154
      src_handshake = self.op.source_handshake
7155
      if not src_handshake:
7156
        raise errors.OpPrereqError("Missing source handshake",
7157
                                   errors.ECODE_INVAL)
7158

    
7159
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7160
                                                           src_handshake)
7161
      if errmsg:
7162
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7163
                                   errors.ECODE_INVAL)
7164

    
7165
      # Load and check source CA
7166
      self.source_x509_ca_pem = self.op.source_x509_ca
7167
      if not self.source_x509_ca_pem:
7168
        raise errors.OpPrereqError("Missing source X509 CA",
7169
                                   errors.ECODE_INVAL)
7170

    
7171
      try:
7172
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7173
                                                    self._cds)
7174
      except OpenSSL.crypto.Error, err:
7175
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7176
                                   (err, ), errors.ECODE_INVAL)
7177

    
7178
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7179
      if errcode is not None:
7180
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7181
                                   errors.ECODE_INVAL)
7182

    
7183
      self.source_x509_ca = cert
7184

    
7185
      src_instance_name = self.op.source_instance_name
7186
      if not src_instance_name:
7187
        raise errors.OpPrereqError("Missing source instance name",
7188
                                   errors.ECODE_INVAL)
7189

    
7190
      self.source_instance_name = \
7191
          netutils.GetHostname(name=src_instance_name).name
7192

    
7193
    else:
7194
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7195
                                 self.op.mode, errors.ECODE_INVAL)
7196

    
7197
  def ExpandNames(self):
7198
    """ExpandNames for CreateInstance.
7199

7200
    Figure out the right locks for instance creation.
7201

7202
    """
7203
    self.needed_locks = {}
7204

    
7205
    instance_name = self.op.instance_name
7206
    # this is just a preventive check, but someone might still add this
7207
    # instance in the meantime, and creation will fail at lock-add time
7208
    if instance_name in self.cfg.GetInstanceList():
7209
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7210
                                 instance_name, errors.ECODE_EXISTS)
7211

    
7212
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7213

    
7214
    if self.op.iallocator:
7215
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7216
    else:
7217
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7218
      nodelist = [self.op.pnode]
7219
      if self.op.snode is not None:
7220
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7221
        nodelist.append(self.op.snode)
7222
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7223

    
7224
    # in case of import lock the source node too
7225
    if self.op.mode == constants.INSTANCE_IMPORT:
7226
      src_node = self.op.src_node
7227
      src_path = self.op.src_path
7228

    
7229
      if src_path is None:
7230
        self.op.src_path = src_path = self.op.instance_name
7231

    
7232
      if src_node is None:
7233
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7234
        self.op.src_node = None
7235
        if os.path.isabs(src_path):
7236
          raise errors.OpPrereqError("Importing an instance from a path"
7237
                                     " requires a source node option",
7238
                                     errors.ECODE_INVAL)
7239
      else:
7240
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7241
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7242
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7243
        if not os.path.isabs(src_path):
7244
          self.op.src_path = src_path = \
7245
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7246

    
7247
  def _RunAllocator(self):
7248
    """Run the allocator based on input opcode.
7249

7250
    """
7251
    nics = [n.ToDict() for n in self.nics]
7252
    ial = IAllocator(self.cfg, self.rpc,
7253
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7254
                     name=self.op.instance_name,
7255
                     disk_template=self.op.disk_template,
7256
                     tags=[],
7257
                     os=self.op.os_type,
7258
                     vcpus=self.be_full[constants.BE_VCPUS],
7259
                     mem_size=self.be_full[constants.BE_MEMORY],
7260
                     disks=self.disks,
7261
                     nics=nics,
7262
                     hypervisor=self.op.hypervisor,
7263
                     )
7264

    
7265
    ial.Run(self.op.iallocator)
7266

    
7267
    if not ial.success:
7268
      raise errors.OpPrereqError("Can't compute nodes using"
7269
                                 " iallocator '%s': %s" %
7270
                                 (self.op.iallocator, ial.info),
7271
                                 errors.ECODE_NORES)
7272
    if len(ial.result) != ial.required_nodes:
7273
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7274
                                 " of nodes (%s), required %s" %
7275
                                 (self.op.iallocator, len(ial.result),
7276
                                  ial.required_nodes), errors.ECODE_FAULT)
7277
    self.op.pnode = ial.result[0]
7278
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7279
                 self.op.instance_name, self.op.iallocator,
7280
                 utils.CommaJoin(ial.result))
7281
    if ial.required_nodes == 2:
7282
      self.op.snode = ial.result[1]
7283

    
7284
  def BuildHooksEnv(self):
7285
    """Build hooks env.
7286

7287
    This runs on master, primary and secondary nodes of the instance.
7288

7289
    """
7290
    env = {
7291
      "ADD_MODE": self.op.mode,
7292
      }
7293
    if self.op.mode == constants.INSTANCE_IMPORT:
7294
      env["SRC_NODE"] = self.op.src_node
7295
      env["SRC_PATH"] = self.op.src_path
7296
      env["SRC_IMAGES"] = self.src_images
7297

    
7298
    env.update(_BuildInstanceHookEnv(
7299
      name=self.op.instance_name,
7300
      primary_node=self.op.pnode,
7301
      secondary_nodes=self.secondaries,
7302
      status=self.op.start,
7303
      os_type=self.op.os_type,
7304
      memory=self.be_full[constants.BE_MEMORY],
7305
      vcpus=self.be_full[constants.BE_VCPUS],
7306
      nics=_NICListToTuple(self, self.nics),
7307
      disk_template=self.op.disk_template,
7308
      disks=[(d["size"], d["mode"]) for d in self.disks],
7309
      bep=self.be_full,
7310
      hvp=self.hv_full,
7311
      hypervisor_name=self.op.hypervisor,
7312
    ))
7313

    
7314
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7315
          self.secondaries)
7316
    return env, nl, nl
7317

    
7318
  def _ReadExportInfo(self):
7319
    """Reads the export information from disk.
7320

7321
    It will override the opcode source node and path with the actual
7322
    information, if these two were not specified before.
7323

7324
    @return: the export information
7325

7326
    """
7327
    assert self.op.mode == constants.INSTANCE_IMPORT
7328

    
7329
    src_node = self.op.src_node
7330
    src_path = self.op.src_path
7331

    
7332
    if src_node is None:
7333
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7334
      exp_list = self.rpc.call_export_list(locked_nodes)
7335
      found = False
7336
      for node in exp_list:
7337
        if exp_list[node].fail_msg:
7338
          continue
7339
        if src_path in exp_list[node].payload:
7340
          found = True
7341
          self.op.src_node = src_node = node
7342
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7343
                                                       src_path)
7344
          break
7345
      if not found:
7346
        raise errors.OpPrereqError("No export found for relative path %s" %
7347
                                    src_path, errors.ECODE_INVAL)
7348

    
7349
    _CheckNodeOnline(self, src_node)
7350
    result = self.rpc.call_export_info(src_node, src_path)
7351
    result.Raise("No export or invalid export found in dir %s" % src_path)
7352

    
7353
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7354
    if not export_info.has_section(constants.INISECT_EXP):
7355
      raise errors.ProgrammerError("Corrupted export config",
7356
                                   errors.ECODE_ENVIRON)
7357

    
7358
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7359
    if (int(ei_version) != constants.EXPORT_VERSION):
7360
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7361
                                 (ei_version, constants.EXPORT_VERSION),
7362
                                 errors.ECODE_ENVIRON)
7363
    return export_info
7364

    
7365
  def _ReadExportParams(self, einfo):
7366
    """Use export parameters as defaults.
7367

7368
    In case the opcode doesn't specify (as in override) some instance
7369
    parameters, then try to use them from the export information, if
7370
    that declares them.
7371

7372
    """
7373
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7374

    
7375
    if self.op.disk_template is None:
7376
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7377
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7378
                                          "disk_template")
7379
      else:
7380
        raise errors.OpPrereqError("No disk template specified and the export"
7381
                                   " is missing the disk_template information",
7382
                                   errors.ECODE_INVAL)
7383

    
7384
    if not self.op.disks:
7385
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7386
        disks = []
7387
        # TODO: import the disk iv_name too
7388
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7389
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7390
          disks.append({"size": disk_sz})
7391
        self.op.disks = disks
7392
      else:
7393
        raise errors.OpPrereqError("No disk info specified and the export"
7394
                                   " is missing the disk information",
7395
                                   errors.ECODE_INVAL)
7396

    
7397
    if (not self.op.nics and
7398
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7399
      nics = []
7400
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7401
        ndict = {}
7402
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7403
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7404
          ndict[name] = v
7405
        nics.append(ndict)
7406
      self.op.nics = nics
7407

    
7408
    if (self.op.hypervisor is None and
7409
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7410
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7411
    if einfo.has_section(constants.INISECT_HYP):
7412
      # use the export parameters but do not override the ones
7413
      # specified by the user
7414
      for name, value in einfo.items(constants.INISECT_HYP):
7415
        if name not in self.op.hvparams:
7416
          self.op.hvparams[name] = value
7417

    
7418
    if einfo.has_section(constants.INISECT_BEP):
7419
      # use the parameters, without overriding
7420
      for name, value in einfo.items(constants.INISECT_BEP):
7421
        if name not in self.op.beparams:
7422
          self.op.beparams[name] = value
7423
    else:
7424
      # try to read the parameters old style, from the main section
7425
      for name in constants.BES_PARAMETERS:
7426
        if (name not in self.op.beparams and
7427
            einfo.has_option(constants.INISECT_INS, name)):
7428
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7429

    
7430
    if einfo.has_section(constants.INISECT_OSP):
7431
      # use the parameters, without overriding
7432
      for name, value in einfo.items(constants.INISECT_OSP):
7433
        if name not in self.op.osparams:
7434
          self.op.osparams[name] = value
7435

    
7436
  def _RevertToDefaults(self, cluster):
7437
    """Revert the instance parameters to the default values.
7438

7439
    """
7440
    # hvparams
7441
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7442
    for name in self.op.hvparams.keys():
7443
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7444
        del self.op.hvparams[name]
7445
    # beparams
7446
    be_defs = cluster.SimpleFillBE({})
7447
    for name in self.op.beparams.keys():
7448
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7449
        del self.op.beparams[name]
7450
    # nic params
7451
    nic_defs = cluster.SimpleFillNIC({})
7452
    for nic in self.op.nics:
7453
      for name in constants.NICS_PARAMETERS:
7454
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7455
          del nic[name]
7456
    # osparams
7457
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7458
    for name in self.op.osparams.keys():
7459
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7460
        del self.op.osparams[name]
7461

    
7462
  def _CalculateFileStorageDir(self):
7463
    """Calculate final instance file storage dir.
7464

7465
    """
7466
    # file storage dir calculation/check
7467
    self.instance_file_storage_dir = None
7468
    if self.op.disk_template == constants.DT_FILE:
7469
      # build the full file storage dir path
7470
      joinargs = []
7471

    
7472
      cfg_storagedir = self.cfg.GetFileStorageDir()
7473
      if not cfg_storagedir:
7474
        raise errors.OpPrereqError("Cluster file storage dir not defined")
7475
      joinargs.append(cfg_storagedir)
7476

    
7477
      if self.op.file_storage_dir is not None:
7478
        joinargs.append(self.op.file_storage_dir)
7479

    
7480
      joinargs.append(self.op.instance_name)
7481

    
7482
      # pylint: disable-msg=W0142
7483
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
7484

    
7485
  def CheckPrereq(self):
7486
    """Check prerequisites.
7487

7488
    """
7489
    self._CalculateFileStorageDir()
7490

    
7491
    if self.op.mode == constants.INSTANCE_IMPORT:
7492
      export_info = self._ReadExportInfo()
7493
      self._ReadExportParams(export_info)
7494

    
7495
    if (not self.cfg.GetVGName() and
7496
        self.op.disk_template not in constants.DTS_NOT_LVM):
7497
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7498
                                 " instances", errors.ECODE_STATE)
7499

    
7500
    if self.op.hypervisor is None:
7501
      self.op.hypervisor = self.cfg.GetHypervisorType()
7502

    
7503
    cluster = self.cfg.GetClusterInfo()
7504
    enabled_hvs = cluster.enabled_hypervisors
7505
    if self.op.hypervisor not in enabled_hvs:
7506
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7507
                                 " cluster (%s)" % (self.op.hypervisor,
7508
                                  ",".join(enabled_hvs)),
7509
                                 errors.ECODE_STATE)
7510

    
7511
    # check hypervisor parameter syntax (locally)
7512
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7513
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7514
                                      self.op.hvparams)
7515
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7516
    hv_type.CheckParameterSyntax(filled_hvp)
7517
    self.hv_full = filled_hvp
7518
    # check that we don't specify global parameters on an instance
7519
    _CheckGlobalHvParams(self.op.hvparams)
7520

    
7521
    # fill and remember the beparams dict
7522
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7523
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7524

    
7525
    # build os parameters
7526
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7527

    
7528
    # now that hvp/bep are in final format, let's reset to defaults,
7529
    # if told to do so
7530
    if self.op.identify_defaults:
7531
      self._RevertToDefaults(cluster)
7532

    
7533
    # NIC buildup
7534
    self.nics = []
7535
    for idx, nic in enumerate(self.op.nics):
7536
      nic_mode_req = nic.get("mode", None)
7537
      nic_mode = nic_mode_req
7538
      if nic_mode is None:
7539
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7540

    
7541
      # in routed mode, for the first nic, the default ip is 'auto'
7542
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7543
        default_ip_mode = constants.VALUE_AUTO
7544
      else:
7545
        default_ip_mode = constants.VALUE_NONE
7546

    
7547
      # ip validity checks
7548
      ip = nic.get("ip", default_ip_mode)
7549
      if ip is None or ip.lower() == constants.VALUE_NONE:
7550
        nic_ip = None
7551
      elif ip.lower() == constants.VALUE_AUTO:
7552
        if not self.op.name_check:
7553
          raise errors.OpPrereqError("IP address set to auto but name checks"
7554
                                     " have been skipped",
7555
                                     errors.ECODE_INVAL)
7556
        nic_ip = self.hostname1.ip
7557
      else:
7558
        if not netutils.IPAddress.IsValid(ip):
7559
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7560
                                     errors.ECODE_INVAL)
7561
        nic_ip = ip
7562

    
7563
      # TODO: check the ip address for uniqueness
7564
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7565
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7566
                                   errors.ECODE_INVAL)
7567

    
7568
      # MAC address verification
7569
      mac = nic.get("mac", constants.VALUE_AUTO)
7570
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7571
        mac = utils.NormalizeAndValidateMac(mac)
7572

    
7573
        try:
7574
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7575
        except errors.ReservationError:
7576
          raise errors.OpPrereqError("MAC address %s already in use"
7577
                                     " in cluster" % mac,
7578
                                     errors.ECODE_NOTUNIQUE)
7579

    
7580
      # bridge verification
7581
      bridge = nic.get("bridge", None)
7582
      link = nic.get("link", None)
7583
      if bridge and link:
7584
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7585
                                   " at the same time", errors.ECODE_INVAL)
7586
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7587
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7588
                                   errors.ECODE_INVAL)
7589
      elif bridge:
7590
        link = bridge
7591

    
7592
      nicparams = {}
7593
      if nic_mode_req:
7594
        nicparams[constants.NIC_MODE] = nic_mode_req
7595
      if link:
7596
        nicparams[constants.NIC_LINK] = link
7597

    
7598
      check_params = cluster.SimpleFillNIC(nicparams)
7599
      objects.NIC.CheckParameterSyntax(check_params)
7600
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7601

    
7602
    # disk checks/pre-build
7603
    self.disks = []
7604
    for disk in self.op.disks:
7605
      mode = disk.get("mode", constants.DISK_RDWR)
7606
      if mode not in constants.DISK_ACCESS_SET:
7607
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7608
                                   mode, errors.ECODE_INVAL)
7609
      size = disk.get("size", None)
7610
      if size is None:
7611
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7612
      try:
7613
        size = int(size)
7614
      except (TypeError, ValueError):
7615
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7616
                                   errors.ECODE_INVAL)
7617
      data_vg = disk.get("vg", self.cfg.GetVGName())
7618
      meta_vg = disk.get("metavg", data_vg)
7619
      new_disk = {"size": size, "mode": mode, "vg": data_vg, "metavg": meta_vg}
7620
      if "adopt" in disk:
7621
        new_disk["adopt"] = disk["adopt"]
7622
      self.disks.append(new_disk)
7623

    
7624
    if self.op.mode == constants.INSTANCE_IMPORT:
7625

    
7626
      # Check that the new instance doesn't have less disks than the export
7627
      instance_disks = len(self.disks)
7628
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7629
      if instance_disks < export_disks:
7630
        raise errors.OpPrereqError("Not enough disks to import."
7631
                                   " (instance: %d, export: %d)" %
7632
                                   (instance_disks, export_disks),
7633
                                   errors.ECODE_INVAL)
7634

    
7635
      disk_images = []
7636
      for idx in range(export_disks):
7637
        option = 'disk%d_dump' % idx
7638
        if export_info.has_option(constants.INISECT_INS, option):
7639
          # FIXME: are the old os-es, disk sizes, etc. useful?
7640
          export_name = export_info.get(constants.INISECT_INS, option)
7641
          image = utils.PathJoin(self.op.src_path, export_name)
7642
          disk_images.append(image)
7643
        else:
7644
          disk_images.append(False)
7645

    
7646
      self.src_images = disk_images
7647

    
7648
      old_name = export_info.get(constants.INISECT_INS, 'name')
7649
      try:
7650
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7651
      except (TypeError, ValueError), err:
7652
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7653
                                   " an integer: %s" % str(err),
7654
                                   errors.ECODE_STATE)
7655
      if self.op.instance_name == old_name:
7656
        for idx, nic in enumerate(self.nics):
7657
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7658
            nic_mac_ini = 'nic%d_mac' % idx
7659
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7660

    
7661
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7662

    
7663
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7664
    if self.op.ip_check:
7665
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7666
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7667
                                   (self.check_ip, self.op.instance_name),
7668
                                   errors.ECODE_NOTUNIQUE)
7669

    
7670
    #### mac address generation
7671
    # By generating here the mac address both the allocator and the hooks get
7672
    # the real final mac address rather than the 'auto' or 'generate' value.
7673
    # There is a race condition between the generation and the instance object
7674
    # creation, which means that we know the mac is valid now, but we're not
7675
    # sure it will be when we actually add the instance. If things go bad
7676
    # adding the instance will abort because of a duplicate mac, and the
7677
    # creation job will fail.
7678
    for nic in self.nics:
7679
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7680
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7681

    
7682
    #### allocator run
7683

    
7684
    if self.op.iallocator is not None:
7685
      self._RunAllocator()
7686

    
7687
    #### node related checks
7688

    
7689
    # check primary node
7690
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7691
    assert self.pnode is not None, \
7692
      "Cannot retrieve locked node %s" % self.op.pnode
7693
    if pnode.offline:
7694
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7695
                                 pnode.name, errors.ECODE_STATE)
7696
    if pnode.drained:
7697
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7698
                                 pnode.name, errors.ECODE_STATE)
7699
    if not pnode.vm_capable:
7700
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7701
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7702

    
7703
    self.secondaries = []
7704

    
7705
    # mirror node verification
7706
    if self.op.disk_template in constants.DTS_NET_MIRROR:
7707
      if self.op.snode == pnode.name:
7708
        raise errors.OpPrereqError("The secondary node cannot be the"
7709
                                   " primary node.", errors.ECODE_INVAL)
7710
      _CheckNodeOnline(self, self.op.snode)
7711
      _CheckNodeNotDrained(self, self.op.snode)
7712
      _CheckNodeVmCapable(self, self.op.snode)
7713
      self.secondaries.append(self.op.snode)
7714

    
7715
    nodenames = [pnode.name] + self.secondaries
7716

    
7717
    if not self.adopt_disks:
7718
      # Check lv size requirements, if not adopting
7719
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7720
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7721

    
7722
    else: # instead, we must check the adoption data
7723
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7724
      if len(all_lvs) != len(self.disks):
7725
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7726
                                   errors.ECODE_INVAL)
7727
      for lv_name in all_lvs:
7728
        try:
7729
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7730
          # to ReserveLV uses the same syntax
7731
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7732
        except errors.ReservationError:
7733
          raise errors.OpPrereqError("LV named %s used by another instance" %
7734
                                     lv_name, errors.ECODE_NOTUNIQUE)
7735

    
7736
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7737
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7738

    
7739
      node_lvs = self.rpc.call_lv_list([pnode.name],
7740
                                       vg_names.payload.keys())[pnode.name]
7741
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7742
      node_lvs = node_lvs.payload
7743

    
7744
      delta = all_lvs.difference(node_lvs.keys())
7745
      if delta:
7746
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7747
                                   utils.CommaJoin(delta),
7748
                                   errors.ECODE_INVAL)
7749
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7750
      if online_lvs:
7751
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7752
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7753
                                   errors.ECODE_STATE)
7754
      # update the size of disk based on what is found
7755
      for dsk in self.disks:
7756
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7757

    
7758
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7759

    
7760
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7761
    # check OS parameters (remotely)
7762
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7763

    
7764
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7765

    
7766
    # memory check on primary node
7767
    if self.op.start:
7768
      _CheckNodeFreeMemory(self, self.pnode.name,
7769
                           "creating instance %s" % self.op.instance_name,
7770
                           self.be_full[constants.BE_MEMORY],
7771
                           self.op.hypervisor)
7772

    
7773
    self.dry_run_result = list(nodenames)
7774

    
7775
  def Exec(self, feedback_fn):
7776
    """Create and add the instance to the cluster.
7777

7778
    """
7779
    instance = self.op.instance_name
7780
    pnode_name = self.pnode.name
7781

    
7782
    ht_kind = self.op.hypervisor
7783
    if ht_kind in constants.HTS_REQ_PORT:
7784
      network_port = self.cfg.AllocatePort()
7785
    else:
7786
      network_port = None
7787

    
7788
    disks = _GenerateDiskTemplate(self,
7789
                                  self.op.disk_template,
7790
                                  instance, pnode_name,
7791
                                  self.secondaries,
7792
                                  self.disks,
7793
                                  self.instance_file_storage_dir,
7794
                                  self.op.file_driver,
7795
                                  0,
7796
                                  feedback_fn)
7797

    
7798
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7799
                            primary_node=pnode_name,
7800
                            nics=self.nics, disks=disks,
7801
                            disk_template=self.op.disk_template,
7802
                            admin_up=False,
7803
                            network_port=network_port,
7804
                            beparams=self.op.beparams,
7805
                            hvparams=self.op.hvparams,
7806
                            hypervisor=self.op.hypervisor,
7807
                            osparams=self.op.osparams,
7808
                            )
7809

    
7810
    if self.adopt_disks:
7811
      # rename LVs to the newly-generated names; we need to construct
7812
      # 'fake' LV disks with the old data, plus the new unique_id
7813
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7814
      rename_to = []
7815
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7816
        rename_to.append(t_dsk.logical_id)
7817
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7818
        self.cfg.SetDiskID(t_dsk, pnode_name)
7819
      result = self.rpc.call_blockdev_rename(pnode_name,
7820
                                             zip(tmp_disks, rename_to))
7821
      result.Raise("Failed to rename adoped LVs")
7822
    else:
7823
      feedback_fn("* creating instance disks...")
7824
      try:
7825
        _CreateDisks(self, iobj)
7826
      except errors.OpExecError:
7827
        self.LogWarning("Device creation failed, reverting...")
7828
        try:
7829
          _RemoveDisks(self, iobj)
7830
        finally:
7831
          self.cfg.ReleaseDRBDMinors(instance)
7832
          raise
7833

    
7834
    feedback_fn("adding instance %s to cluster config" % instance)
7835

    
7836
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7837

    
7838
    # Declare that we don't want to remove the instance lock anymore, as we've
7839
    # added the instance to the config
7840
    del self.remove_locks[locking.LEVEL_INSTANCE]
7841
    # Unlock all the nodes
7842
    if self.op.mode == constants.INSTANCE_IMPORT:
7843
      nodes_keep = [self.op.src_node]
7844
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7845
                       if node != self.op.src_node]
7846
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7847
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7848
    else:
7849
      self.context.glm.release(locking.LEVEL_NODE)
7850
      del self.acquired_locks[locking.LEVEL_NODE]
7851

    
7852
    disk_abort = False
7853
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7854
      feedback_fn("* wiping instance disks...")
7855
      try:
7856
        _WipeDisks(self, iobj)
7857
      except errors.OpExecError, err:
7858
        logging.exception("Wiping disks failed")
7859
        self.LogWarning("Wiping instance disks failed (%s)", err)
7860
        disk_abort = True
7861

    
7862
    if disk_abort:
7863
      # Something is already wrong with the disks, don't do anything else
7864
      pass
7865
    elif self.op.wait_for_sync:
7866
      disk_abort = not _WaitForSync(self, iobj)
7867
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
7868
      # make sure the disks are not degraded (still sync-ing is ok)
7869
      time.sleep(15)
7870
      feedback_fn("* checking mirrors status")
7871
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7872
    else:
7873
      disk_abort = False
7874

    
7875
    if disk_abort:
7876
      _RemoveDisks(self, iobj)
7877
      self.cfg.RemoveInstance(iobj.name)
7878
      # Make sure the instance lock gets removed
7879
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7880
      raise errors.OpExecError("There are some degraded disks for"
7881
                               " this instance")
7882

    
7883
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7884
      if self.op.mode == constants.INSTANCE_CREATE:
7885
        if not self.op.no_install:
7886
          feedback_fn("* running the instance OS create scripts...")
7887
          # FIXME: pass debug option from opcode to backend
7888
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7889
                                                 self.op.debug_level)
7890
          result.Raise("Could not add os for instance %s"
7891
                       " on node %s" % (instance, pnode_name))
7892

    
7893
      elif self.op.mode == constants.INSTANCE_IMPORT:
7894
        feedback_fn("* running the instance OS import scripts...")
7895

    
7896
        transfers = []
7897

    
7898
        for idx, image in enumerate(self.src_images):
7899
          if not image:
7900
            continue
7901

    
7902
          # FIXME: pass debug option from opcode to backend
7903
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7904
                                             constants.IEIO_FILE, (image, ),
7905
                                             constants.IEIO_SCRIPT,
7906
                                             (iobj.disks[idx], idx),
7907
                                             None)
7908
          transfers.append(dt)
7909

    
7910
        import_result = \
7911
          masterd.instance.TransferInstanceData(self, feedback_fn,
7912
                                                self.op.src_node, pnode_name,
7913
                                                self.pnode.secondary_ip,
7914
                                                iobj, transfers)
7915
        if not compat.all(import_result):
7916
          self.LogWarning("Some disks for instance %s on node %s were not"
7917
                          " imported successfully" % (instance, pnode_name))
7918

    
7919
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7920
        feedback_fn("* preparing remote import...")
7921
        # The source cluster will stop the instance before attempting to make a
7922
        # connection. In some cases stopping an instance can take a long time,
7923
        # hence the shutdown timeout is added to the connection timeout.
7924
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7925
                           self.op.source_shutdown_timeout)
7926
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7927

    
7928
        assert iobj.primary_node == self.pnode.name
7929
        disk_results = \
7930
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7931
                                        self.source_x509_ca,
7932
                                        self._cds, timeouts)
7933
        if not compat.all(disk_results):
7934
          # TODO: Should the instance still be started, even if some disks
7935
          # failed to import (valid for local imports, too)?
7936
          self.LogWarning("Some disks for instance %s on node %s were not"
7937
                          " imported successfully" % (instance, pnode_name))
7938

    
7939
        # Run rename script on newly imported instance
7940
        assert iobj.name == instance
7941
        feedback_fn("Running rename script for %s" % instance)
7942
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7943
                                                   self.source_instance_name,
7944
                                                   self.op.debug_level)
7945
        if result.fail_msg:
7946
          self.LogWarning("Failed to run rename script for %s on node"
7947
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7948

    
7949
      else:
7950
        # also checked in the prereq part
7951
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7952
                                     % self.op.mode)
7953

    
7954
    if self.op.start:
7955
      iobj.admin_up = True
7956
      self.cfg.Update(iobj, feedback_fn)
7957
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7958
      feedback_fn("* starting instance...")
7959
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7960
      result.Raise("Could not start instance")
7961

    
7962
    return list(iobj.all_nodes)
7963

    
7964

    
7965
class LUInstanceConsole(NoHooksLU):
7966
  """Connect to an instance's console.
7967

7968
  This is somewhat special in that it returns the command line that
7969
  you need to run on the master node in order to connect to the
7970
  console.
7971

7972
  """
7973
  REQ_BGL = False
7974

    
7975
  def ExpandNames(self):
7976
    self._ExpandAndLockInstance()
7977

    
7978
  def CheckPrereq(self):
7979
    """Check prerequisites.
7980

7981
    This checks that the instance is in the cluster.
7982

7983
    """
7984
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7985
    assert self.instance is not None, \
7986
      "Cannot retrieve locked instance %s" % self.op.instance_name
7987
    _CheckNodeOnline(self, self.instance.primary_node)
7988

    
7989
  def Exec(self, feedback_fn):
7990
    """Connect to the console of an instance
7991

7992
    """
7993
    instance = self.instance
7994
    node = instance.primary_node
7995

    
7996
    node_insts = self.rpc.call_instance_list([node],
7997
                                             [instance.hypervisor])[node]
7998
    node_insts.Raise("Can't get node information from %s" % node)
7999

    
8000
    if instance.name not in node_insts.payload:
8001
      if instance.admin_up:
8002
        state = "ERROR_down"
8003
      else:
8004
        state = "ADMIN_down"
8005
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8006
                               (instance.name, state))
8007

    
8008
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8009

    
8010
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8011

    
8012

    
8013
def _GetInstanceConsole(cluster, instance):
8014
  """Returns console information for an instance.
8015

8016
  @type cluster: L{objects.Cluster}
8017
  @type instance: L{objects.Instance}
8018
  @rtype: dict
8019

8020
  """
8021
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8022
  # beparams and hvparams are passed separately, to avoid editing the
8023
  # instance and then saving the defaults in the instance itself.
8024
  hvparams = cluster.FillHV(instance)
8025
  beparams = cluster.FillBE(instance)
8026
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8027

    
8028
  assert console.instance == instance.name
8029
  assert console.Validate()
8030

    
8031
  return console.ToDict()
8032

    
8033

    
8034
class LUInstanceReplaceDisks(LogicalUnit):
8035
  """Replace the disks of an instance.
8036

8037
  """
8038
  HPATH = "mirrors-replace"
8039
  HTYPE = constants.HTYPE_INSTANCE
8040
  REQ_BGL = False
8041

    
8042
  def CheckArguments(self):
8043
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8044
                                  self.op.iallocator)
8045

    
8046
  def ExpandNames(self):
8047
    self._ExpandAndLockInstance()
8048

    
8049
    if self.op.iallocator is not None:
8050
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8051

    
8052
    elif self.op.remote_node is not None:
8053
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8054
      self.op.remote_node = remote_node
8055

    
8056
      # Warning: do not remove the locking of the new secondary here
8057
      # unless DRBD8.AddChildren is changed to work in parallel;
8058
      # currently it doesn't since parallel invocations of
8059
      # FindUnusedMinor will conflict
8060
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8061
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8062

    
8063
    else:
8064
      self.needed_locks[locking.LEVEL_NODE] = []
8065
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8066

    
8067
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8068
                                   self.op.iallocator, self.op.remote_node,
8069
                                   self.op.disks, False, self.op.early_release)
8070

    
8071
    self.tasklets = [self.replacer]
8072

    
8073
  def DeclareLocks(self, level):
8074
    # If we're not already locking all nodes in the set we have to declare the
8075
    # instance's primary/secondary nodes.
8076
    if (level == locking.LEVEL_NODE and
8077
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8078
      self._LockInstancesNodes()
8079

    
8080
  def BuildHooksEnv(self):
8081
    """Build hooks env.
8082

8083
    This runs on the master, the primary and all the secondaries.
8084

8085
    """
8086
    instance = self.replacer.instance
8087
    env = {
8088
      "MODE": self.op.mode,
8089
      "NEW_SECONDARY": self.op.remote_node,
8090
      "OLD_SECONDARY": instance.secondary_nodes[0],
8091
      }
8092
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8093
    nl = [
8094
      self.cfg.GetMasterNode(),
8095
      instance.primary_node,
8096
      ]
8097
    if self.op.remote_node is not None:
8098
      nl.append(self.op.remote_node)
8099
    return env, nl, nl
8100

    
8101

    
8102
class TLReplaceDisks(Tasklet):
8103
  """Replaces disks for an instance.
8104

8105
  Note: Locking is not within the scope of this class.
8106

8107
  """
8108
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8109
               disks, delay_iallocator, early_release):
8110
    """Initializes this class.
8111

8112
    """
8113
    Tasklet.__init__(self, lu)
8114

    
8115
    # Parameters
8116
    self.instance_name = instance_name
8117
    self.mode = mode
8118
    self.iallocator_name = iallocator_name
8119
    self.remote_node = remote_node
8120
    self.disks = disks
8121
    self.delay_iallocator = delay_iallocator
8122
    self.early_release = early_release
8123

    
8124
    # Runtime data
8125
    self.instance = None
8126
    self.new_node = None
8127
    self.target_node = None
8128
    self.other_node = None
8129
    self.remote_node_info = None
8130
    self.node_secondary_ip = None
8131

    
8132
  @staticmethod
8133
  def CheckArguments(mode, remote_node, iallocator):
8134
    """Helper function for users of this class.
8135

8136
    """
8137
    # check for valid parameter combination
8138
    if mode == constants.REPLACE_DISK_CHG:
8139
      if remote_node is None and iallocator is None:
8140
        raise errors.OpPrereqError("When changing the secondary either an"
8141
                                   " iallocator script must be used or the"
8142
                                   " new node given", errors.ECODE_INVAL)
8143

    
8144
      if remote_node is not None and iallocator is not None:
8145
        raise errors.OpPrereqError("Give either the iallocator or the new"
8146
                                   " secondary, not both", errors.ECODE_INVAL)
8147

    
8148
    elif remote_node is not None or iallocator is not None:
8149
      # Not replacing the secondary
8150
      raise errors.OpPrereqError("The iallocator and new node options can"
8151
                                 " only be used when changing the"
8152
                                 " secondary node", errors.ECODE_INVAL)
8153

    
8154
  @staticmethod
8155
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8156
    """Compute a new secondary node using an IAllocator.
8157

8158
    """
8159
    ial = IAllocator(lu.cfg, lu.rpc,
8160
                     mode=constants.IALLOCATOR_MODE_RELOC,
8161
                     name=instance_name,
8162
                     relocate_from=relocate_from)
8163

    
8164
    ial.Run(iallocator_name)
8165

    
8166
    if not ial.success:
8167
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8168
                                 " %s" % (iallocator_name, ial.info),
8169
                                 errors.ECODE_NORES)
8170

    
8171
    if len(ial.result) != ial.required_nodes:
8172
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8173
                                 " of nodes (%s), required %s" %
8174
                                 (iallocator_name,
8175
                                  len(ial.result), ial.required_nodes),
8176
                                 errors.ECODE_FAULT)
8177

    
8178
    remote_node_name = ial.result[0]
8179

    
8180
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8181
               instance_name, remote_node_name)
8182

    
8183
    return remote_node_name
8184

    
8185
  def _FindFaultyDisks(self, node_name):
8186
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8187
                                    node_name, True)
8188

    
8189
  def CheckPrereq(self):
8190
    """Check prerequisites.
8191

8192
    This checks that the instance is in the cluster.
8193

8194
    """
8195
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8196
    assert instance is not None, \
8197
      "Cannot retrieve locked instance %s" % self.instance_name
8198

    
8199
    if instance.disk_template != constants.DT_DRBD8:
8200
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8201
                                 " instances", errors.ECODE_INVAL)
8202

    
8203
    if len(instance.secondary_nodes) != 1:
8204
      raise errors.OpPrereqError("The instance has a strange layout,"
8205
                                 " expected one secondary but found %d" %
8206
                                 len(instance.secondary_nodes),
8207
                                 errors.ECODE_FAULT)
8208

    
8209
    if not self.delay_iallocator:
8210
      self._CheckPrereq2()
8211

    
8212
  def _CheckPrereq2(self):
8213
    """Check prerequisites, second part.
8214

8215
    This function should always be part of CheckPrereq. It was separated and is
8216
    now called from Exec because during node evacuation iallocator was only
8217
    called with an unmodified cluster model, not taking planned changes into
8218
    account.
8219

8220
    """
8221
    instance = self.instance
8222
    secondary_node = instance.secondary_nodes[0]
8223

    
8224
    if self.iallocator_name is None:
8225
      remote_node = self.remote_node
8226
    else:
8227
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8228
                                       instance.name, instance.secondary_nodes)
8229

    
8230
    if remote_node is not None:
8231
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8232
      assert self.remote_node_info is not None, \
8233
        "Cannot retrieve locked node %s" % remote_node
8234
    else:
8235
      self.remote_node_info = None
8236

    
8237
    if remote_node == self.instance.primary_node:
8238
      raise errors.OpPrereqError("The specified node is the primary node of"
8239
                                 " the instance.", errors.ECODE_INVAL)
8240

    
8241
    if remote_node == secondary_node:
8242
      raise errors.OpPrereqError("The specified node is already the"
8243
                                 " secondary node of the instance.",
8244
                                 errors.ECODE_INVAL)
8245

    
8246
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8247
                                    constants.REPLACE_DISK_CHG):
8248
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8249
                                 errors.ECODE_INVAL)
8250

    
8251
    if self.mode == constants.REPLACE_DISK_AUTO:
8252
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8253
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8254

    
8255
      if faulty_primary and faulty_secondary:
8256
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8257
                                   " one node and can not be repaired"
8258
                                   " automatically" % self.instance_name,
8259
                                   errors.ECODE_STATE)
8260

    
8261
      if faulty_primary:
8262
        self.disks = faulty_primary
8263
        self.target_node = instance.primary_node
8264
        self.other_node = secondary_node
8265
        check_nodes = [self.target_node, self.other_node]
8266
      elif faulty_secondary:
8267
        self.disks = faulty_secondary
8268
        self.target_node = secondary_node
8269
        self.other_node = instance.primary_node
8270
        check_nodes = [self.target_node, self.other_node]
8271
      else:
8272
        self.disks = []
8273
        check_nodes = []
8274

    
8275
    else:
8276
      # Non-automatic modes
8277
      if self.mode == constants.REPLACE_DISK_PRI:
8278
        self.target_node = instance.primary_node
8279
        self.other_node = secondary_node
8280
        check_nodes = [self.target_node, self.other_node]
8281

    
8282
      elif self.mode == constants.REPLACE_DISK_SEC:
8283
        self.target_node = secondary_node
8284
        self.other_node = instance.primary_node
8285
        check_nodes = [self.target_node, self.other_node]
8286

    
8287
      elif self.mode == constants.REPLACE_DISK_CHG:
8288
        self.new_node = remote_node
8289
        self.other_node = instance.primary_node
8290
        self.target_node = secondary_node
8291
        check_nodes = [self.new_node, self.other_node]
8292

    
8293
        _CheckNodeNotDrained(self.lu, remote_node)
8294
        _CheckNodeVmCapable(self.lu, remote_node)
8295

    
8296
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8297
        assert old_node_info is not None
8298
        if old_node_info.offline and not self.early_release:
8299
          # doesn't make sense to delay the release
8300
          self.early_release = True
8301
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8302
                          " early-release mode", secondary_node)
8303

    
8304
      else:
8305
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8306
                                     self.mode)
8307

    
8308
      # If not specified all disks should be replaced
8309
      if not self.disks:
8310
        self.disks = range(len(self.instance.disks))
8311

    
8312
    for node in check_nodes:
8313
      _CheckNodeOnline(self.lu, node)
8314

    
8315
    touched_nodes = frozenset([self.new_node, self.other_node,
8316
                               self.target_node])
8317

    
8318
    if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8319
      # Release unneeded node locks
8320
      for name in self.lu.acquired_locks[locking.LEVEL_NODE]:
8321
        if name not in touched_nodes:
8322
          self._ReleaseNodeLock(name)
8323

    
8324
    # Check whether disks are valid
8325
    for disk_idx in self.disks:
8326
      instance.FindDisk(disk_idx)
8327

    
8328
    # Get secondary node IP addresses
8329
    self.node_secondary_ip = \
8330
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8331
           for node_name in touched_nodes
8332
           if node_name is not None)
8333

    
8334
  def Exec(self, feedback_fn):
8335
    """Execute disk replacement.
8336

8337
    This dispatches the disk replacement to the appropriate handler.
8338

8339
    """
8340
    if self.delay_iallocator:
8341
      self._CheckPrereq2()
8342

    
8343
    if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8344
        __debug__):
8345
      # Verify owned locks before starting operation
8346
      owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8347
      assert set(owned_locks) == set(self.node_secondary_ip), \
8348
          "Not owning the correct locks: %s" % (owned_locks, )
8349

    
8350
    if not self.disks:
8351
      feedback_fn("No disks need replacement")
8352
      return
8353

    
8354
    feedback_fn("Replacing disk(s) %s for %s" %
8355
                (utils.CommaJoin(self.disks), self.instance.name))
8356

    
8357
    activate_disks = (not self.instance.admin_up)
8358

    
8359
    # Activate the instance disks if we're replacing them on a down instance
8360
    if activate_disks:
8361
      _StartInstanceDisks(self.lu, self.instance, True)
8362

    
8363
    try:
8364
      # Should we replace the secondary node?
8365
      if self.new_node is not None:
8366
        fn = self._ExecDrbd8Secondary
8367
      else:
8368
        fn = self._ExecDrbd8DiskOnly
8369

    
8370
      result = fn(feedback_fn)
8371
    finally:
8372
      # Deactivate the instance disks if we're replacing them on a
8373
      # down instance
8374
      if activate_disks:
8375
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8376

    
8377
    if __debug__:
8378
      # Verify owned locks
8379
      owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8380
      assert ((self.early_release and not owned_locks) or
8381
              (not self.early_release and
8382
               set(owned_locks) == set(self.node_secondary_ip))), \
8383
        ("Not owning the correct locks, early_release=%s, owned=%r" %
8384
         (self.early_release, owned_locks))
8385

    
8386
    return result
8387

    
8388
  def _CheckVolumeGroup(self, nodes):
8389
    self.lu.LogInfo("Checking volume groups")
8390

    
8391
    vgname = self.cfg.GetVGName()
8392

    
8393
    # Make sure volume group exists on all involved nodes
8394
    results = self.rpc.call_vg_list(nodes)
8395
    if not results:
8396
      raise errors.OpExecError("Can't list volume groups on the nodes")
8397

    
8398
    for node in nodes:
8399
      res = results[node]
8400
      res.Raise("Error checking node %s" % node)
8401
      if vgname not in res.payload:
8402
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8403
                                 (vgname, node))
8404

    
8405
  def _CheckDisksExistence(self, nodes):
8406
    # Check disk existence
8407
    for idx, dev in enumerate(self.instance.disks):
8408
      if idx not in self.disks:
8409
        continue
8410

    
8411
      for node in nodes:
8412
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8413
        self.cfg.SetDiskID(dev, node)
8414

    
8415
        result = self.rpc.call_blockdev_find(node, dev)
8416

    
8417
        msg = result.fail_msg
8418
        if msg or not result.payload:
8419
          if not msg:
8420
            msg = "disk not found"
8421
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8422
                                   (idx, node, msg))
8423

    
8424
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8425
    for idx, dev in enumerate(self.instance.disks):
8426
      if idx not in self.disks:
8427
        continue
8428

    
8429
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8430
                      (idx, node_name))
8431

    
8432
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8433
                                   ldisk=ldisk):
8434
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8435
                                 " replace disks for instance %s" %
8436
                                 (node_name, self.instance.name))
8437

    
8438
  def _CreateNewStorage(self, node_name):
8439
    """Create new storage on the primary or secondary node.
8440

8441
    This is only used for same-node replaces, not for changing the
8442
    secondary node, hence we don't want to modify the existing disk.
8443

8444
    """
8445
    iv_names = {}
8446

    
8447
    for idx, dev in enumerate(self.instance.disks):
8448
      if idx not in self.disks:
8449
        continue
8450

    
8451
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8452

    
8453
      self.cfg.SetDiskID(dev, node_name)
8454

    
8455
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8456
      names = _GenerateUniqueNames(self.lu, lv_names)
8457

    
8458
      vg_data = dev.children[0].logical_id[0]
8459
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8460
                             logical_id=(vg_data, names[0]))
8461
      vg_meta = dev.children[1].logical_id[0]
8462
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8463
                             logical_id=(vg_meta, names[1]))
8464

    
8465
      new_lvs = [lv_data, lv_meta]
8466
      old_lvs = [child.Copy() for child in dev.children]
8467
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8468

    
8469
      # we pass force_create=True to force the LVM creation
8470
      for new_lv in new_lvs:
8471
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8472
                        _GetInstanceInfoText(self.instance), False)
8473

    
8474
    return iv_names
8475

    
8476
  def _CheckDevices(self, node_name, iv_names):
8477
    for name, (dev, _, _) in iv_names.iteritems():
8478
      self.cfg.SetDiskID(dev, node_name)
8479

    
8480
      result = self.rpc.call_blockdev_find(node_name, dev)
8481

    
8482
      msg = result.fail_msg
8483
      if msg or not result.payload:
8484
        if not msg:
8485
          msg = "disk not found"
8486
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8487
                                 (name, msg))
8488

    
8489
      if result.payload.is_degraded:
8490
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8491

    
8492
  def _RemoveOldStorage(self, node_name, iv_names):
8493
    for name, (_, old_lvs, _) in iv_names.iteritems():
8494
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8495

    
8496
      for lv in old_lvs:
8497
        self.cfg.SetDiskID(lv, node_name)
8498

    
8499
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8500
        if msg:
8501
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8502
                             hint="remove unused LVs manually")
8503

    
8504
  def _ReleaseNodeLock(self, node_name):
8505
    """Releases the lock for a given node."""
8506
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8507

    
8508
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
8509
    """Replace a disk on the primary or secondary for DRBD 8.
8510

8511
    The algorithm for replace is quite complicated:
8512

8513
      1. for each disk to be replaced:
8514

8515
        1. create new LVs on the target node with unique names
8516
        1. detach old LVs from the drbd device
8517
        1. rename old LVs to name_replaced.<time_t>
8518
        1. rename new LVs to old LVs
8519
        1. attach the new LVs (with the old names now) to the drbd device
8520

8521
      1. wait for sync across all devices
8522

8523
      1. for each modified disk:
8524

8525
        1. remove old LVs (which have the name name_replaces.<time_t>)
8526

8527
    Failures are not very well handled.
8528

8529
    """
8530
    steps_total = 6
8531

    
8532
    # Step: check device activation
8533
    self.lu.LogStep(1, steps_total, "Check device existence")
8534
    self._CheckDisksExistence([self.other_node, self.target_node])
8535
    self._CheckVolumeGroup([self.target_node, self.other_node])
8536

    
8537
    # Step: check other node consistency
8538
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8539
    self._CheckDisksConsistency(self.other_node,
8540
                                self.other_node == self.instance.primary_node,
8541
                                False)
8542

    
8543
    # Step: create new storage
8544
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8545
    iv_names = self._CreateNewStorage(self.target_node)
8546

    
8547
    # Step: for each lv, detach+rename*2+attach
8548
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8549
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8550
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8551

    
8552
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8553
                                                     old_lvs)
8554
      result.Raise("Can't detach drbd from local storage on node"
8555
                   " %s for device %s" % (self.target_node, dev.iv_name))
8556
      #dev.children = []
8557
      #cfg.Update(instance)
8558

    
8559
      # ok, we created the new LVs, so now we know we have the needed
8560
      # storage; as such, we proceed on the target node to rename
8561
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8562
      # using the assumption that logical_id == physical_id (which in
8563
      # turn is the unique_id on that node)
8564

    
8565
      # FIXME(iustin): use a better name for the replaced LVs
8566
      temp_suffix = int(time.time())
8567
      ren_fn = lambda d, suff: (d.physical_id[0],
8568
                                d.physical_id[1] + "_replaced-%s" % suff)
8569

    
8570
      # Build the rename list based on what LVs exist on the node
8571
      rename_old_to_new = []
8572
      for to_ren in old_lvs:
8573
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8574
        if not result.fail_msg and result.payload:
8575
          # device exists
8576
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8577

    
8578
      self.lu.LogInfo("Renaming the old LVs on the target node")
8579
      result = self.rpc.call_blockdev_rename(self.target_node,
8580
                                             rename_old_to_new)
8581
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8582

    
8583
      # Now we rename the new LVs to the old LVs
8584
      self.lu.LogInfo("Renaming the new LVs on the target node")
8585
      rename_new_to_old = [(new, old.physical_id)
8586
                           for old, new in zip(old_lvs, new_lvs)]
8587
      result = self.rpc.call_blockdev_rename(self.target_node,
8588
                                             rename_new_to_old)
8589
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8590

    
8591
      # Intermediate steps of in memory modifications
8592
      for old, new in zip(old_lvs, new_lvs):
8593
        new.logical_id = old.logical_id
8594
        self.cfg.SetDiskID(new, self.target_node)
8595

    
8596
      # We need to modify old_lvs so that removal later removes the
8597
      # right LVs, not the newly added ones; note that old_lvs is a
8598
      # copy here
8599
      for disk in old_lvs:
8600
        disk.logical_id = ren_fn(disk, temp_suffix)
8601
        self.cfg.SetDiskID(disk, self.target_node)
8602

    
8603
      # Now that the new lvs have the old name, we can add them to the device
8604
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8605
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8606
                                                  new_lvs)
8607
      msg = result.fail_msg
8608
      if msg:
8609
        for new_lv in new_lvs:
8610
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8611
                                               new_lv).fail_msg
8612
          if msg2:
8613
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8614
                               hint=("cleanup manually the unused logical"
8615
                                     "volumes"))
8616
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8617

    
8618
    cstep = 5
8619
    if self.early_release:
8620
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8621
      cstep += 1
8622
      self._RemoveOldStorage(self.target_node, iv_names)
8623
      # WARNING: we release both node locks here, do not do other RPCs
8624
      # than WaitForSync to the primary node
8625
      self._ReleaseNodeLock([self.target_node, self.other_node])
8626

    
8627
    # Wait for sync
8628
    # This can fail as the old devices are degraded and _WaitForSync
8629
    # does a combined result over all disks, so we don't check its return value
8630
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8631
    cstep += 1
8632
    _WaitForSync(self.lu, self.instance)
8633

    
8634
    # Check all devices manually
8635
    self._CheckDevices(self.instance.primary_node, iv_names)
8636

    
8637
    # Step: remove old storage
8638
    if not self.early_release:
8639
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8640
      cstep += 1
8641
      self._RemoveOldStorage(self.target_node, iv_names)
8642

    
8643
  def _ExecDrbd8Secondary(self, feedback_fn):
8644
    """Replace the secondary node for DRBD 8.
8645

8646
    The algorithm for replace is quite complicated:
8647
      - for all disks of the instance:
8648
        - create new LVs on the new node with same names
8649
        - shutdown the drbd device on the old secondary
8650
        - disconnect the drbd network on the primary
8651
        - create the drbd device on the new secondary
8652
        - network attach the drbd on the primary, using an artifice:
8653
          the drbd code for Attach() will connect to the network if it
8654
          finds a device which is connected to the good local disks but
8655
          not network enabled
8656
      - wait for sync across all devices
8657
      - remove all disks from the old secondary
8658

8659
    Failures are not very well handled.
8660

8661
    """
8662
    steps_total = 6
8663

    
8664
    # Step: check device activation
8665
    self.lu.LogStep(1, steps_total, "Check device existence")
8666
    self._CheckDisksExistence([self.instance.primary_node])
8667
    self._CheckVolumeGroup([self.instance.primary_node])
8668

    
8669
    # Step: check other node consistency
8670
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8671
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8672

    
8673
    # Step: create new storage
8674
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8675
    for idx, dev in enumerate(self.instance.disks):
8676
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8677
                      (self.new_node, idx))
8678
      # we pass force_create=True to force LVM creation
8679
      for new_lv in dev.children:
8680
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8681
                        _GetInstanceInfoText(self.instance), False)
8682

    
8683
    # Step 4: dbrd minors and drbd setups changes
8684
    # after this, we must manually remove the drbd minors on both the
8685
    # error and the success paths
8686
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8687
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8688
                                         for dev in self.instance.disks],
8689
                                        self.instance.name)
8690
    logging.debug("Allocated minors %r", minors)
8691

    
8692
    iv_names = {}
8693
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8694
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8695
                      (self.new_node, idx))
8696
      # create new devices on new_node; note that we create two IDs:
8697
      # one without port, so the drbd will be activated without
8698
      # networking information on the new node at this stage, and one
8699
      # with network, for the latter activation in step 4
8700
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8701
      if self.instance.primary_node == o_node1:
8702
        p_minor = o_minor1
8703
      else:
8704
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8705
        p_minor = o_minor2
8706

    
8707
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8708
                      p_minor, new_minor, o_secret)
8709
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8710
                    p_minor, new_minor, o_secret)
8711

    
8712
      iv_names[idx] = (dev, dev.children, new_net_id)
8713
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8714
                    new_net_id)
8715
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8716
                              logical_id=new_alone_id,
8717
                              children=dev.children,
8718
                              size=dev.size)
8719
      try:
8720
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8721
                              _GetInstanceInfoText(self.instance), False)
8722
      except errors.GenericError:
8723
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8724
        raise
8725

    
8726
    # We have new devices, shutdown the drbd on the old secondary
8727
    for idx, dev in enumerate(self.instance.disks):
8728
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8729
      self.cfg.SetDiskID(dev, self.target_node)
8730
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8731
      if msg:
8732
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8733
                           "node: %s" % (idx, msg),
8734
                           hint=("Please cleanup this device manually as"
8735
                                 " soon as possible"))
8736

    
8737
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8738
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8739
                                               self.node_secondary_ip,
8740
                                               self.instance.disks)\
8741
                                              [self.instance.primary_node]
8742

    
8743
    msg = result.fail_msg
8744
    if msg:
8745
      # detaches didn't succeed (unlikely)
8746
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8747
      raise errors.OpExecError("Can't detach the disks from the network on"
8748
                               " old node: %s" % (msg,))
8749

    
8750
    # if we managed to detach at least one, we update all the disks of
8751
    # the instance to point to the new secondary
8752
    self.lu.LogInfo("Updating instance configuration")
8753
    for dev, _, new_logical_id in iv_names.itervalues():
8754
      dev.logical_id = new_logical_id
8755
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8756

    
8757
    self.cfg.Update(self.instance, feedback_fn)
8758

    
8759
    # and now perform the drbd attach
8760
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8761
                    " (standalone => connected)")
8762
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8763
                                            self.new_node],
8764
                                           self.node_secondary_ip,
8765
                                           self.instance.disks,
8766
                                           self.instance.name,
8767
                                           False)
8768
    for to_node, to_result in result.items():
8769
      msg = to_result.fail_msg
8770
      if msg:
8771
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8772
                           to_node, msg,
8773
                           hint=("please do a gnt-instance info to see the"
8774
                                 " status of disks"))
8775
    cstep = 5
8776
    if self.early_release:
8777
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8778
      cstep += 1
8779
      self._RemoveOldStorage(self.target_node, iv_names)
8780
      # WARNING: we release all node locks here, do not do other RPCs
8781
      # than WaitForSync to the primary node
8782
      self._ReleaseNodeLock([self.instance.primary_node,
8783
                             self.target_node,
8784
                             self.new_node])
8785

    
8786
    # Wait for sync
8787
    # This can fail as the old devices are degraded and _WaitForSync
8788
    # does a combined result over all disks, so we don't check its return value
8789
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8790
    cstep += 1
8791
    _WaitForSync(self.lu, self.instance)
8792

    
8793
    # Check all devices manually
8794
    self._CheckDevices(self.instance.primary_node, iv_names)
8795

    
8796
    # Step: remove old storage
8797
    if not self.early_release:
8798
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8799
      self._RemoveOldStorage(self.target_node, iv_names)
8800

    
8801

    
8802
class LURepairNodeStorage(NoHooksLU):
8803
  """Repairs the volume group on a node.
8804

8805
  """
8806
  REQ_BGL = False
8807

    
8808
  def CheckArguments(self):
8809
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8810

    
8811
    storage_type = self.op.storage_type
8812

    
8813
    if (constants.SO_FIX_CONSISTENCY not in
8814
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8815
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8816
                                 " repaired" % storage_type,
8817
                                 errors.ECODE_INVAL)
8818

    
8819
  def ExpandNames(self):
8820
    self.needed_locks = {
8821
      locking.LEVEL_NODE: [self.op.node_name],
8822
      }
8823

    
8824
  def _CheckFaultyDisks(self, instance, node_name):
8825
    """Ensure faulty disks abort the opcode or at least warn."""
8826
    try:
8827
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8828
                                  node_name, True):
8829
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8830
                                   " node '%s'" % (instance.name, node_name),
8831
                                   errors.ECODE_STATE)
8832
    except errors.OpPrereqError, err:
8833
      if self.op.ignore_consistency:
8834
        self.proc.LogWarning(str(err.args[0]))
8835
      else:
8836
        raise
8837

    
8838
  def CheckPrereq(self):
8839
    """Check prerequisites.
8840

8841
    """
8842
    # Check whether any instance on this node has faulty disks
8843
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8844
      if not inst.admin_up:
8845
        continue
8846
      check_nodes = set(inst.all_nodes)
8847
      check_nodes.discard(self.op.node_name)
8848
      for inst_node_name in check_nodes:
8849
        self._CheckFaultyDisks(inst, inst_node_name)
8850

    
8851
  def Exec(self, feedback_fn):
8852
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8853
                (self.op.name, self.op.node_name))
8854

    
8855
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8856
    result = self.rpc.call_storage_execute(self.op.node_name,
8857
                                           self.op.storage_type, st_args,
8858
                                           self.op.name,
8859
                                           constants.SO_FIX_CONSISTENCY)
8860
    result.Raise("Failed to repair storage unit '%s' on %s" %
8861
                 (self.op.name, self.op.node_name))
8862

    
8863

    
8864
class LUNodeEvacStrategy(NoHooksLU):
8865
  """Computes the node evacuation strategy.
8866

8867
  """
8868
  REQ_BGL = False
8869

    
8870
  def CheckArguments(self):
8871
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8872

    
8873
  def ExpandNames(self):
8874
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8875
    self.needed_locks = locks = {}
8876
    if self.op.remote_node is None:
8877
      locks[locking.LEVEL_NODE] = locking.ALL_SET
8878
    else:
8879
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8880
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8881

    
8882
  def Exec(self, feedback_fn):
8883
    instances = []
8884
    for node in self.op.nodes:
8885
      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8886
    if not instances:
8887
      return []
8888

    
8889
    if self.op.remote_node is not None:
8890
      result = []
8891
      for i in instances:
8892
        if i.primary_node == self.op.remote_node:
8893
          raise errors.OpPrereqError("Node %s is the primary node of"
8894
                                     " instance %s, cannot use it as"
8895
                                     " secondary" %
8896
                                     (self.op.remote_node, i.name),
8897
                                     errors.ECODE_INVAL)
8898
        result.append([i.name, self.op.remote_node])
8899
    else:
8900
      ial = IAllocator(self.cfg, self.rpc,
8901
                       mode=constants.IALLOCATOR_MODE_MEVAC,
8902
                       evac_nodes=self.op.nodes)
8903
      ial.Run(self.op.iallocator, validate=True)
8904
      if not ial.success:
8905
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8906
                                 errors.ECODE_NORES)
8907
      result = ial.result
8908
    return result
8909

    
8910

    
8911
class LUInstanceGrowDisk(LogicalUnit):
8912
  """Grow a disk of an instance.
8913

8914
  """
8915
  HPATH = "disk-grow"
8916
  HTYPE = constants.HTYPE_INSTANCE
8917
  REQ_BGL = False
8918

    
8919
  def ExpandNames(self):
8920
    self._ExpandAndLockInstance()
8921
    self.needed_locks[locking.LEVEL_NODE] = []
8922
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8923

    
8924
  def DeclareLocks(self, level):
8925
    if level == locking.LEVEL_NODE:
8926
      self._LockInstancesNodes()
8927

    
8928
  def BuildHooksEnv(self):
8929
    """Build hooks env.
8930

8931
    This runs on the master, the primary and all the secondaries.
8932

8933
    """
8934
    env = {
8935
      "DISK": self.op.disk,
8936
      "AMOUNT": self.op.amount,
8937
      }
8938
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8939
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8940
    return env, nl, nl
8941

    
8942
  def CheckPrereq(self):
8943
    """Check prerequisites.
8944

8945
    This checks that the instance is in the cluster.
8946

8947
    """
8948
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8949
    assert instance is not None, \
8950
      "Cannot retrieve locked instance %s" % self.op.instance_name
8951
    nodenames = list(instance.all_nodes)
8952
    for node in nodenames:
8953
      _CheckNodeOnline(self, node)
8954

    
8955
    self.instance = instance
8956

    
8957
    if instance.disk_template not in constants.DTS_GROWABLE:
8958
      raise errors.OpPrereqError("Instance's disk layout does not support"
8959
                                 " growing.", errors.ECODE_INVAL)
8960

    
8961
    self.disk = instance.FindDisk(self.op.disk)
8962

    
8963
    if instance.disk_template != constants.DT_FILE:
8964
      # TODO: check the free disk space for file, when that feature
8965
      # will be supported
8966
      _CheckNodesFreeDiskPerVG(self, nodenames,
8967
                               self.disk.ComputeGrowth(self.op.amount))
8968

    
8969
  def Exec(self, feedback_fn):
8970
    """Execute disk grow.
8971

8972
    """
8973
    instance = self.instance
8974
    disk = self.disk
8975

    
8976
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8977
    if not disks_ok:
8978
      raise errors.OpExecError("Cannot activate block device to grow")
8979

    
8980
    for node in instance.all_nodes:
8981
      self.cfg.SetDiskID(disk, node)
8982
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8983
      result.Raise("Grow request failed to node %s" % node)
8984

    
8985
      # TODO: Rewrite code to work properly
8986
      # DRBD goes into sync mode for a short amount of time after executing the
8987
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8988
      # calling "resize" in sync mode fails. Sleeping for a short amount of
8989
      # time is a work-around.
8990
      time.sleep(5)
8991

    
8992
    disk.RecordGrow(self.op.amount)
8993
    self.cfg.Update(instance, feedback_fn)
8994
    if self.op.wait_for_sync:
8995
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8996
      if disk_abort:
8997
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8998
                             " status.\nPlease check the instance.")
8999
      if not instance.admin_up:
9000
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9001
    elif not instance.admin_up:
9002
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9003
                           " not supposed to be running because no wait for"
9004
                           " sync mode was requested.")
9005

    
9006

    
9007
class LUInstanceQueryData(NoHooksLU):
9008
  """Query runtime instance data.
9009

9010
  """
9011
  REQ_BGL = False
9012

    
9013
  def ExpandNames(self):
9014
    self.needed_locks = {}
9015

    
9016
    # Use locking if requested or when non-static information is wanted
9017
    if not (self.op.static or self.op.use_locking):
9018
      self.LogWarning("Non-static data requested, locks need to be acquired")
9019
      self.op.use_locking = True
9020

    
9021
    if self.op.instances or not self.op.use_locking:
9022
      # Expand instance names right here
9023
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9024
    else:
9025
      # Will use acquired locks
9026
      self.wanted_names = None
9027

    
9028
    if self.op.use_locking:
9029
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9030

    
9031
      if self.wanted_names is None:
9032
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9033
      else:
9034
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9035

    
9036
      self.needed_locks[locking.LEVEL_NODE] = []
9037
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9038
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9039

    
9040
  def DeclareLocks(self, level):
9041
    if self.op.use_locking and level == locking.LEVEL_NODE:
9042
      self._LockInstancesNodes()
9043

    
9044
  def CheckPrereq(self):
9045
    """Check prerequisites.
9046

9047
    This only checks the optional instance list against the existing names.
9048

9049
    """
9050
    if self.wanted_names is None:
9051
      assert self.op.use_locking, "Locking was not used"
9052
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9053

    
9054
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9055
                             for name in self.wanted_names]
9056

    
9057
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9058
    """Returns the status of a block device
9059

9060
    """
9061
    if self.op.static or not node:
9062
      return None
9063

    
9064
    self.cfg.SetDiskID(dev, node)
9065

    
9066
    result = self.rpc.call_blockdev_find(node, dev)
9067
    if result.offline:
9068
      return None
9069

    
9070
    result.Raise("Can't compute disk status for %s" % instance_name)
9071

    
9072
    status = result.payload
9073
    if status is None:
9074
      return None
9075

    
9076
    return (status.dev_path, status.major, status.minor,
9077
            status.sync_percent, status.estimated_time,
9078
            status.is_degraded, status.ldisk_status)
9079

    
9080
  def _ComputeDiskStatus(self, instance, snode, dev):
9081
    """Compute block device status.
9082

9083
    """
9084
    if dev.dev_type in constants.LDS_DRBD:
9085
      # we change the snode then (otherwise we use the one passed in)
9086
      if dev.logical_id[0] == instance.primary_node:
9087
        snode = dev.logical_id[1]
9088
      else:
9089
        snode = dev.logical_id[0]
9090

    
9091
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9092
                                              instance.name, dev)
9093
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9094

    
9095
    if dev.children:
9096
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9097
                      for child in dev.children]
9098
    else:
9099
      dev_children = []
9100

    
9101
    return {
9102
      "iv_name": dev.iv_name,
9103
      "dev_type": dev.dev_type,
9104
      "logical_id": dev.logical_id,
9105
      "physical_id": dev.physical_id,
9106
      "pstatus": dev_pstatus,
9107
      "sstatus": dev_sstatus,
9108
      "children": dev_children,
9109
      "mode": dev.mode,
9110
      "size": dev.size,
9111
      }
9112

    
9113
  def Exec(self, feedback_fn):
9114
    """Gather and return data"""
9115
    result = {}
9116

    
9117
    cluster = self.cfg.GetClusterInfo()
9118

    
9119
    for instance in self.wanted_instances:
9120
      if not self.op.static:
9121
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9122
                                                  instance.name,
9123
                                                  instance.hypervisor)
9124
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9125
        remote_info = remote_info.payload
9126
        if remote_info and "state" in remote_info:
9127
          remote_state = "up"
9128
        else:
9129
          remote_state = "down"
9130
      else:
9131
        remote_state = None
9132
      if instance.admin_up:
9133
        config_state = "up"
9134
      else:
9135
        config_state = "down"
9136

    
9137
      disks = [self._ComputeDiskStatus(instance, None, device)
9138
               for device in instance.disks]
9139

    
9140
      result[instance.name] = {
9141
        "name": instance.name,
9142
        "config_state": config_state,
9143
        "run_state": remote_state,
9144
        "pnode": instance.primary_node,
9145
        "snodes": instance.secondary_nodes,
9146
        "os": instance.os,
9147
        # this happens to be the same format used for hooks
9148
        "nics": _NICListToTuple(self, instance.nics),
9149
        "disk_template": instance.disk_template,
9150
        "disks": disks,
9151
        "hypervisor": instance.hypervisor,
9152
        "network_port": instance.network_port,
9153
        "hv_instance": instance.hvparams,
9154
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9155
        "be_instance": instance.beparams,
9156
        "be_actual": cluster.FillBE(instance),
9157
        "os_instance": instance.osparams,
9158
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9159
        "serial_no": instance.serial_no,
9160
        "mtime": instance.mtime,
9161
        "ctime": instance.ctime,
9162
        "uuid": instance.uuid,
9163
        }
9164

    
9165
    return result
9166

    
9167

    
9168
class LUInstanceSetParams(LogicalUnit):
9169
  """Modifies an instances's parameters.
9170

9171
  """
9172
  HPATH = "instance-modify"
9173
  HTYPE = constants.HTYPE_INSTANCE
9174
  REQ_BGL = False
9175

    
9176
  def CheckArguments(self):
9177
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9178
            self.op.hvparams or self.op.beparams or self.op.os_name):
9179
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9180

    
9181
    if self.op.hvparams:
9182
      _CheckGlobalHvParams(self.op.hvparams)
9183

    
9184
    # Disk validation
9185
    disk_addremove = 0
9186
    for disk_op, disk_dict in self.op.disks:
9187
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9188
      if disk_op == constants.DDM_REMOVE:
9189
        disk_addremove += 1
9190
        continue
9191
      elif disk_op == constants.DDM_ADD:
9192
        disk_addremove += 1
9193
      else:
9194
        if not isinstance(disk_op, int):
9195
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9196
        if not isinstance(disk_dict, dict):
9197
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9198
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9199

    
9200
      if disk_op == constants.DDM_ADD:
9201
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9202
        if mode not in constants.DISK_ACCESS_SET:
9203
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9204
                                     errors.ECODE_INVAL)
9205
        size = disk_dict.get('size', None)
9206
        if size is None:
9207
          raise errors.OpPrereqError("Required disk parameter size missing",
9208
                                     errors.ECODE_INVAL)
9209
        try:
9210
          size = int(size)
9211
        except (TypeError, ValueError), err:
9212
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9213
                                     str(err), errors.ECODE_INVAL)
9214
        disk_dict['size'] = size
9215
      else:
9216
        # modification of disk
9217
        if 'size' in disk_dict:
9218
          raise errors.OpPrereqError("Disk size change not possible, use"
9219
                                     " grow-disk", errors.ECODE_INVAL)
9220

    
9221
    if disk_addremove > 1:
9222
      raise errors.OpPrereqError("Only one disk add or remove operation"
9223
                                 " supported at a time", errors.ECODE_INVAL)
9224

    
9225
    if self.op.disks and self.op.disk_template is not None:
9226
      raise errors.OpPrereqError("Disk template conversion and other disk"
9227
                                 " changes not supported at the same time",
9228
                                 errors.ECODE_INVAL)
9229

    
9230
    if (self.op.disk_template and
9231
        self.op.disk_template in constants.DTS_NET_MIRROR and
9232
        self.op.remote_node is None):
9233
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9234
                                 " one requires specifying a secondary node",
9235
                                 errors.ECODE_INVAL)
9236

    
9237
    # NIC validation
9238
    nic_addremove = 0
9239
    for nic_op, nic_dict in self.op.nics:
9240
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9241
      if nic_op == constants.DDM_REMOVE:
9242
        nic_addremove += 1
9243
        continue
9244
      elif nic_op == constants.DDM_ADD:
9245
        nic_addremove += 1
9246
      else:
9247
        if not isinstance(nic_op, int):
9248
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9249
        if not isinstance(nic_dict, dict):
9250
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9251
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9252

    
9253
      # nic_dict should be a dict
9254
      nic_ip = nic_dict.get('ip', None)
9255
      if nic_ip is not None:
9256
        if nic_ip.lower() == constants.VALUE_NONE:
9257
          nic_dict['ip'] = None
9258
        else:
9259
          if not netutils.IPAddress.IsValid(nic_ip):
9260
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9261
                                       errors.ECODE_INVAL)
9262

    
9263
      nic_bridge = nic_dict.get('bridge', None)
9264
      nic_link = nic_dict.get('link', None)
9265
      if nic_bridge and nic_link:
9266
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9267
                                   " at the same time", errors.ECODE_INVAL)
9268
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9269
        nic_dict['bridge'] = None
9270
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9271
        nic_dict['link'] = None
9272

    
9273
      if nic_op == constants.DDM_ADD:
9274
        nic_mac = nic_dict.get('mac', None)
9275
        if nic_mac is None:
9276
          nic_dict['mac'] = constants.VALUE_AUTO
9277

    
9278
      if 'mac' in nic_dict:
9279
        nic_mac = nic_dict['mac']
9280
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9281
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9282

    
9283
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9284
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9285
                                     " modifying an existing nic",
9286
                                     errors.ECODE_INVAL)
9287

    
9288
    if nic_addremove > 1:
9289
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9290
                                 " supported at a time", errors.ECODE_INVAL)
9291

    
9292
  def ExpandNames(self):
9293
    self._ExpandAndLockInstance()
9294
    self.needed_locks[locking.LEVEL_NODE] = []
9295
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9296

    
9297
  def DeclareLocks(self, level):
9298
    if level == locking.LEVEL_NODE:
9299
      self._LockInstancesNodes()
9300
      if self.op.disk_template and self.op.remote_node:
9301
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9302
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9303

    
9304
  def BuildHooksEnv(self):
9305
    """Build hooks env.
9306

9307
    This runs on the master, primary and secondaries.
9308

9309
    """
9310
    args = dict()
9311
    if constants.BE_MEMORY in self.be_new:
9312
      args['memory'] = self.be_new[constants.BE_MEMORY]
9313
    if constants.BE_VCPUS in self.be_new:
9314
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9315
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9316
    # information at all.
9317
    if self.op.nics:
9318
      args['nics'] = []
9319
      nic_override = dict(self.op.nics)
9320
      for idx, nic in enumerate(self.instance.nics):
9321
        if idx in nic_override:
9322
          this_nic_override = nic_override[idx]
9323
        else:
9324
          this_nic_override = {}
9325
        if 'ip' in this_nic_override:
9326
          ip = this_nic_override['ip']
9327
        else:
9328
          ip = nic.ip
9329
        if 'mac' in this_nic_override:
9330
          mac = this_nic_override['mac']
9331
        else:
9332
          mac = nic.mac
9333
        if idx in self.nic_pnew:
9334
          nicparams = self.nic_pnew[idx]
9335
        else:
9336
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9337
        mode = nicparams[constants.NIC_MODE]
9338
        link = nicparams[constants.NIC_LINK]
9339
        args['nics'].append((ip, mac, mode, link))
9340
      if constants.DDM_ADD in nic_override:
9341
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9342
        mac = nic_override[constants.DDM_ADD]['mac']
9343
        nicparams = self.nic_pnew[constants.DDM_ADD]
9344
        mode = nicparams[constants.NIC_MODE]
9345
        link = nicparams[constants.NIC_LINK]
9346
        args['nics'].append((ip, mac, mode, link))
9347
      elif constants.DDM_REMOVE in nic_override:
9348
        del args['nics'][-1]
9349

    
9350
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9351
    if self.op.disk_template:
9352
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9353
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9354
    return env, nl, nl
9355

    
9356
  def CheckPrereq(self):
9357
    """Check prerequisites.
9358

9359
    This only checks the instance list against the existing names.
9360

9361
    """
9362
    # checking the new params on the primary/secondary nodes
9363

    
9364
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9365
    cluster = self.cluster = self.cfg.GetClusterInfo()
9366
    assert self.instance is not None, \
9367
      "Cannot retrieve locked instance %s" % self.op.instance_name
9368
    pnode = instance.primary_node
9369
    nodelist = list(instance.all_nodes)
9370

    
9371
    # OS change
9372
    if self.op.os_name and not self.op.force:
9373
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9374
                      self.op.force_variant)
9375
      instance_os = self.op.os_name
9376
    else:
9377
      instance_os = instance.os
9378

    
9379
    if self.op.disk_template:
9380
      if instance.disk_template == self.op.disk_template:
9381
        raise errors.OpPrereqError("Instance already has disk template %s" %
9382
                                   instance.disk_template, errors.ECODE_INVAL)
9383

    
9384
      if (instance.disk_template,
9385
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9386
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9387
                                   " %s to %s" % (instance.disk_template,
9388
                                                  self.op.disk_template),
9389
                                   errors.ECODE_INVAL)
9390
      _CheckInstanceDown(self, instance, "cannot change disk template")
9391
      if self.op.disk_template in constants.DTS_NET_MIRROR:
9392
        if self.op.remote_node == pnode:
9393
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9394
                                     " as the primary node of the instance" %
9395
                                     self.op.remote_node, errors.ECODE_STATE)
9396
        _CheckNodeOnline(self, self.op.remote_node)
9397
        _CheckNodeNotDrained(self, self.op.remote_node)
9398
        # FIXME: here we assume that the old instance type is DT_PLAIN
9399
        assert instance.disk_template == constants.DT_PLAIN
9400
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9401
                 for d in instance.disks]
9402
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9403
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9404

    
9405
    # hvparams processing
9406
    if self.op.hvparams:
9407
      hv_type = instance.hypervisor
9408
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9409
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9410
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9411

    
9412
      # local check
9413
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9414
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9415
      self.hv_new = hv_new # the new actual values
9416
      self.hv_inst = i_hvdict # the new dict (without defaults)
9417
    else:
9418
      self.hv_new = self.hv_inst = {}
9419

    
9420
    # beparams processing
9421
    if self.op.beparams:
9422
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9423
                                   use_none=True)
9424
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9425
      be_new = cluster.SimpleFillBE(i_bedict)
9426
      self.be_new = be_new # the new actual values
9427
      self.be_inst = i_bedict # the new dict (without defaults)
9428
    else:
9429
      self.be_new = self.be_inst = {}
9430
    be_old = cluster.FillBE(instance)
9431

    
9432
    # osparams processing
9433
    if self.op.osparams:
9434
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9435
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9436
      self.os_inst = i_osdict # the new dict (without defaults)
9437
    else:
9438
      self.os_inst = {}
9439

    
9440
    self.warn = []
9441

    
9442
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
9443
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
9444
      mem_check_list = [pnode]
9445
      if be_new[constants.BE_AUTO_BALANCE]:
9446
        # either we changed auto_balance to yes or it was from before
9447
        mem_check_list.extend(instance.secondary_nodes)
9448
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9449
                                                  instance.hypervisor)
9450
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9451
                                         instance.hypervisor)
9452
      pninfo = nodeinfo[pnode]
9453
      msg = pninfo.fail_msg
9454
      if msg:
9455
        # Assume the primary node is unreachable and go ahead
9456
        self.warn.append("Can't get info from primary node %s: %s" %
9457
                         (pnode,  msg))
9458
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9459
        self.warn.append("Node data from primary node %s doesn't contain"
9460
                         " free memory information" % pnode)
9461
      elif instance_info.fail_msg:
9462
        self.warn.append("Can't get instance runtime information: %s" %
9463
                        instance_info.fail_msg)
9464
      else:
9465
        if instance_info.payload:
9466
          current_mem = int(instance_info.payload['memory'])
9467
        else:
9468
          # Assume instance not running
9469
          # (there is a slight race condition here, but it's not very probable,
9470
          # and we have no other way to check)
9471
          current_mem = 0
9472
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9473
                    pninfo.payload['memory_free'])
9474
        if miss_mem > 0:
9475
          raise errors.OpPrereqError("This change will prevent the instance"
9476
                                     " from starting, due to %d MB of memory"
9477
                                     " missing on its primary node" % miss_mem,
9478
                                     errors.ECODE_NORES)
9479

    
9480
      if be_new[constants.BE_AUTO_BALANCE]:
9481
        for node, nres in nodeinfo.items():
9482
          if node not in instance.secondary_nodes:
9483
            continue
9484
          nres.Raise("Can't get info from secondary node %s" % node,
9485
                     prereq=True, ecode=errors.ECODE_STATE)
9486
          if not isinstance(nres.payload.get('memory_free', None), int):
9487
            raise errors.OpPrereqError("Secondary node %s didn't return free"
9488
                                       " memory information" % node,
9489
                                       errors.ECODE_STATE)
9490
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9491
            raise errors.OpPrereqError("This change will prevent the instance"
9492
                                       " from failover to its secondary node"
9493
                                       " %s, due to not enough memory" % node,
9494
                                       errors.ECODE_STATE)
9495

    
9496
    # NIC processing
9497
    self.nic_pnew = {}
9498
    self.nic_pinst = {}
9499
    for nic_op, nic_dict in self.op.nics:
9500
      if nic_op == constants.DDM_REMOVE:
9501
        if not instance.nics:
9502
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9503
                                     errors.ECODE_INVAL)
9504
        continue
9505
      if nic_op != constants.DDM_ADD:
9506
        # an existing nic
9507
        if not instance.nics:
9508
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9509
                                     " no NICs" % nic_op,
9510
                                     errors.ECODE_INVAL)
9511
        if nic_op < 0 or nic_op >= len(instance.nics):
9512
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9513
                                     " are 0 to %d" %
9514
                                     (nic_op, len(instance.nics) - 1),
9515
                                     errors.ECODE_INVAL)
9516
        old_nic_params = instance.nics[nic_op].nicparams
9517
        old_nic_ip = instance.nics[nic_op].ip
9518
      else:
9519
        old_nic_params = {}
9520
        old_nic_ip = None
9521

    
9522
      update_params_dict = dict([(key, nic_dict[key])
9523
                                 for key in constants.NICS_PARAMETERS
9524
                                 if key in nic_dict])
9525

    
9526
      if 'bridge' in nic_dict:
9527
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9528

    
9529
      new_nic_params = _GetUpdatedParams(old_nic_params,
9530
                                         update_params_dict)
9531
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9532
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9533
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9534
      self.nic_pinst[nic_op] = new_nic_params
9535
      self.nic_pnew[nic_op] = new_filled_nic_params
9536
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9537

    
9538
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9539
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9540
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9541
        if msg:
9542
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9543
          if self.op.force:
9544
            self.warn.append(msg)
9545
          else:
9546
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9547
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9548
        if 'ip' in nic_dict:
9549
          nic_ip = nic_dict['ip']
9550
        else:
9551
          nic_ip = old_nic_ip
9552
        if nic_ip is None:
9553
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9554
                                     ' on a routed nic', errors.ECODE_INVAL)
9555
      if 'mac' in nic_dict:
9556
        nic_mac = nic_dict['mac']
9557
        if nic_mac is None:
9558
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9559
                                     errors.ECODE_INVAL)
9560
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9561
          # otherwise generate the mac
9562
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9563
        else:
9564
          # or validate/reserve the current one
9565
          try:
9566
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9567
          except errors.ReservationError:
9568
            raise errors.OpPrereqError("MAC address %s already in use"
9569
                                       " in cluster" % nic_mac,
9570
                                       errors.ECODE_NOTUNIQUE)
9571

    
9572
    # DISK processing
9573
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9574
      raise errors.OpPrereqError("Disk operations not supported for"
9575
                                 " diskless instances",
9576
                                 errors.ECODE_INVAL)
9577
    for disk_op, _ in self.op.disks:
9578
      if disk_op == constants.DDM_REMOVE:
9579
        if len(instance.disks) == 1:
9580
          raise errors.OpPrereqError("Cannot remove the last disk of"
9581
                                     " an instance", errors.ECODE_INVAL)
9582
        _CheckInstanceDown(self, instance, "cannot remove disks")
9583

    
9584
      if (disk_op == constants.DDM_ADD and
9585
          len(instance.disks) >= constants.MAX_DISKS):
9586
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9587
                                   " add more" % constants.MAX_DISKS,
9588
                                   errors.ECODE_STATE)
9589
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9590
        # an existing disk
9591
        if disk_op < 0 or disk_op >= len(instance.disks):
9592
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9593
                                     " are 0 to %d" %
9594
                                     (disk_op, len(instance.disks)),
9595
                                     errors.ECODE_INVAL)
9596

    
9597
    return
9598

    
9599
  def _ConvertPlainToDrbd(self, feedback_fn):
9600
    """Converts an instance from plain to drbd.
9601

9602
    """
9603
    feedback_fn("Converting template to drbd")
9604
    instance = self.instance
9605
    pnode = instance.primary_node
9606
    snode = self.op.remote_node
9607

    
9608
    # create a fake disk info for _GenerateDiskTemplate
9609
    disk_info = [{"size": d.size, "mode": d.mode,
9610
                  "vg": d.logical_id[0]} for d in instance.disks]
9611
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9612
                                      instance.name, pnode, [snode],
9613
                                      disk_info, None, None, 0, feedback_fn)
9614
    info = _GetInstanceInfoText(instance)
9615
    feedback_fn("Creating aditional volumes...")
9616
    # first, create the missing data and meta devices
9617
    for disk in new_disks:
9618
      # unfortunately this is... not too nice
9619
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9620
                            info, True)
9621
      for child in disk.children:
9622
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9623
    # at this stage, all new LVs have been created, we can rename the
9624
    # old ones
9625
    feedback_fn("Renaming original volumes...")
9626
    rename_list = [(o, n.children[0].logical_id)
9627
                   for (o, n) in zip(instance.disks, new_disks)]
9628
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9629
    result.Raise("Failed to rename original LVs")
9630

    
9631
    feedback_fn("Initializing DRBD devices...")
9632
    # all child devices are in place, we can now create the DRBD devices
9633
    for disk in new_disks:
9634
      for node in [pnode, snode]:
9635
        f_create = node == pnode
9636
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9637

    
9638
    # at this point, the instance has been modified
9639
    instance.disk_template = constants.DT_DRBD8
9640
    instance.disks = new_disks
9641
    self.cfg.Update(instance, feedback_fn)
9642

    
9643
    # disks are created, waiting for sync
9644
    disk_abort = not _WaitForSync(self, instance,
9645
                                  oneshot=not self.op.wait_for_sync)
9646
    if disk_abort:
9647
      raise errors.OpExecError("There are some degraded disks for"
9648
                               " this instance, please cleanup manually")
9649

    
9650
  def _ConvertDrbdToPlain(self, feedback_fn):
9651
    """Converts an instance from drbd to plain.
9652

9653
    """
9654
    instance = self.instance
9655
    assert len(instance.secondary_nodes) == 1
9656
    pnode = instance.primary_node
9657
    snode = instance.secondary_nodes[0]
9658
    feedback_fn("Converting template to plain")
9659

    
9660
    old_disks = instance.disks
9661
    new_disks = [d.children[0] for d in old_disks]
9662

    
9663
    # copy over size and mode
9664
    for parent, child in zip(old_disks, new_disks):
9665
      child.size = parent.size
9666
      child.mode = parent.mode
9667

    
9668
    # this is a DRBD disk, return its port to the pool
9669
    # NOTE: this must be done right before the call to cfg.Update!
9670
    for disk in old_disks:
9671
      tcp_port = disk.logical_id[2]
9672
      self.cfg.AddTcpUdpPort(tcp_port)
9673

    
9674
    # update instance structure
9675
    instance.disks = new_disks
9676
    instance.disk_template = constants.DT_PLAIN
9677
    self.cfg.Update(instance, feedback_fn)
9678

    
9679
    feedback_fn("Removing volumes on the secondary node...")
9680
    for disk in old_disks:
9681
      self.cfg.SetDiskID(disk, snode)
9682
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9683
      if msg:
9684
        self.LogWarning("Could not remove block device %s on node %s,"
9685
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9686

    
9687
    feedback_fn("Removing unneeded volumes on the primary node...")
9688
    for idx, disk in enumerate(old_disks):
9689
      meta = disk.children[1]
9690
      self.cfg.SetDiskID(meta, pnode)
9691
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9692
      if msg:
9693
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9694
                        " continuing anyway: %s", idx, pnode, msg)
9695

    
9696

    
9697
  def Exec(self, feedback_fn):
9698
    """Modifies an instance.
9699

9700
    All parameters take effect only at the next restart of the instance.
9701

9702
    """
9703
    # Process here the warnings from CheckPrereq, as we don't have a
9704
    # feedback_fn there.
9705
    for warn in self.warn:
9706
      feedback_fn("WARNING: %s" % warn)
9707

    
9708
    result = []
9709
    instance = self.instance
9710
    # disk changes
9711
    for disk_op, disk_dict in self.op.disks:
9712
      if disk_op == constants.DDM_REMOVE:
9713
        # remove the last disk
9714
        device = instance.disks.pop()
9715
        device_idx = len(instance.disks)
9716
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9717
          self.cfg.SetDiskID(disk, node)
9718
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9719
          if msg:
9720
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9721
                            " continuing anyway", device_idx, node, msg)
9722
        result.append(("disk/%d" % device_idx, "remove"))
9723

    
9724
        # if this is a DRBD disk, return its port to the pool
9725
        if device.dev_type in constants.LDS_DRBD:
9726
          tcp_port = device.logical_id[2]
9727
          self.cfg.AddTcpUdpPort(tcp_port)
9728
      elif disk_op == constants.DDM_ADD:
9729
        # add a new disk
9730
        if instance.disk_template == constants.DT_FILE:
9731
          file_driver, file_path = instance.disks[0].logical_id
9732
          file_path = os.path.dirname(file_path)
9733
        else:
9734
          file_driver = file_path = None
9735
        disk_idx_base = len(instance.disks)
9736
        new_disk = _GenerateDiskTemplate(self,
9737
                                         instance.disk_template,
9738
                                         instance.name, instance.primary_node,
9739
                                         instance.secondary_nodes,
9740
                                         [disk_dict],
9741
                                         file_path,
9742
                                         file_driver,
9743
                                         disk_idx_base, feedback_fn)[0]
9744
        instance.disks.append(new_disk)
9745
        info = _GetInstanceInfoText(instance)
9746

    
9747
        logging.info("Creating volume %s for instance %s",
9748
                     new_disk.iv_name, instance.name)
9749
        # Note: this needs to be kept in sync with _CreateDisks
9750
        #HARDCODE
9751
        for node in instance.all_nodes:
9752
          f_create = node == instance.primary_node
9753
          try:
9754
            _CreateBlockDev(self, node, instance, new_disk,
9755
                            f_create, info, f_create)
9756
          except errors.OpExecError, err:
9757
            self.LogWarning("Failed to create volume %s (%s) on"
9758
                            " node %s: %s",
9759
                            new_disk.iv_name, new_disk, node, err)
9760
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9761
                       (new_disk.size, new_disk.mode)))
9762
      else:
9763
        # change a given disk
9764
        instance.disks[disk_op].mode = disk_dict['mode']
9765
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9766

    
9767
    if self.op.disk_template:
9768
      r_shut = _ShutdownInstanceDisks(self, instance)
9769
      if not r_shut:
9770
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9771
                                 " proceed with disk template conversion")
9772
      mode = (instance.disk_template, self.op.disk_template)
9773
      try:
9774
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9775
      except:
9776
        self.cfg.ReleaseDRBDMinors(instance.name)
9777
        raise
9778
      result.append(("disk_template", self.op.disk_template))
9779

    
9780
    # NIC changes
9781
    for nic_op, nic_dict in self.op.nics:
9782
      if nic_op == constants.DDM_REMOVE:
9783
        # remove the last nic
9784
        del instance.nics[-1]
9785
        result.append(("nic.%d" % len(instance.nics), "remove"))
9786
      elif nic_op == constants.DDM_ADD:
9787
        # mac and bridge should be set, by now
9788
        mac = nic_dict['mac']
9789
        ip = nic_dict.get('ip', None)
9790
        nicparams = self.nic_pinst[constants.DDM_ADD]
9791
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9792
        instance.nics.append(new_nic)
9793
        result.append(("nic.%d" % (len(instance.nics) - 1),
9794
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9795
                       (new_nic.mac, new_nic.ip,
9796
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9797
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9798
                       )))
9799
      else:
9800
        for key in 'mac', 'ip':
9801
          if key in nic_dict:
9802
            setattr(instance.nics[nic_op], key, nic_dict[key])
9803
        if nic_op in self.nic_pinst:
9804
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9805
        for key, val in nic_dict.iteritems():
9806
          result.append(("nic.%s/%d" % (key, nic_op), val))
9807

    
9808
    # hvparams changes
9809
    if self.op.hvparams:
9810
      instance.hvparams = self.hv_inst
9811
      for key, val in self.op.hvparams.iteritems():
9812
        result.append(("hv/%s" % key, val))
9813

    
9814
    # beparams changes
9815
    if self.op.beparams:
9816
      instance.beparams = self.be_inst
9817
      for key, val in self.op.beparams.iteritems():
9818
        result.append(("be/%s" % key, val))
9819

    
9820
    # OS change
9821
    if self.op.os_name:
9822
      instance.os = self.op.os_name
9823

    
9824
    # osparams changes
9825
    if self.op.osparams:
9826
      instance.osparams = self.os_inst
9827
      for key, val in self.op.osparams.iteritems():
9828
        result.append(("os/%s" % key, val))
9829

    
9830
    self.cfg.Update(instance, feedback_fn)
9831

    
9832
    return result
9833

    
9834
  _DISK_CONVERSIONS = {
9835
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9836
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9837
    }
9838

    
9839

    
9840
class LUBackupQuery(NoHooksLU):
9841
  """Query the exports list
9842

9843
  """
9844
  REQ_BGL = False
9845

    
9846
  def ExpandNames(self):
9847
    self.needed_locks = {}
9848
    self.share_locks[locking.LEVEL_NODE] = 1
9849
    if not self.op.nodes:
9850
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9851
    else:
9852
      self.needed_locks[locking.LEVEL_NODE] = \
9853
        _GetWantedNodes(self, self.op.nodes)
9854

    
9855
  def Exec(self, feedback_fn):
9856
    """Compute the list of all the exported system images.
9857

9858
    @rtype: dict
9859
    @return: a dictionary with the structure node->(export-list)
9860
        where export-list is a list of the instances exported on
9861
        that node.
9862

9863
    """
9864
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9865
    rpcresult = self.rpc.call_export_list(self.nodes)
9866
    result = {}
9867
    for node in rpcresult:
9868
      if rpcresult[node].fail_msg:
9869
        result[node] = False
9870
      else:
9871
        result[node] = rpcresult[node].payload
9872

    
9873
    return result
9874

    
9875

    
9876
class LUBackupPrepare(NoHooksLU):
9877
  """Prepares an instance for an export and returns useful information.
9878

9879
  """
9880
  REQ_BGL = False
9881

    
9882
  def ExpandNames(self):
9883
    self._ExpandAndLockInstance()
9884

    
9885
  def CheckPrereq(self):
9886
    """Check prerequisites.
9887

9888
    """
9889
    instance_name = self.op.instance_name
9890

    
9891
    self.instance = self.cfg.GetInstanceInfo(instance_name)
9892
    assert self.instance is not None, \
9893
          "Cannot retrieve locked instance %s" % self.op.instance_name
9894
    _CheckNodeOnline(self, self.instance.primary_node)
9895

    
9896
    self._cds = _GetClusterDomainSecret()
9897

    
9898
  def Exec(self, feedback_fn):
9899
    """Prepares an instance for an export.
9900

9901
    """
9902
    instance = self.instance
9903

    
9904
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9905
      salt = utils.GenerateSecret(8)
9906

    
9907
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9908
      result = self.rpc.call_x509_cert_create(instance.primary_node,
9909
                                              constants.RIE_CERT_VALIDITY)
9910
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
9911

    
9912
      (name, cert_pem) = result.payload
9913

    
9914
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9915
                                             cert_pem)
9916

    
9917
      return {
9918
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9919
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9920
                          salt),
9921
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9922
        }
9923

    
9924
    return None
9925

    
9926

    
9927
class LUBackupExport(LogicalUnit):
9928
  """Export an instance to an image in the cluster.
9929

9930
  """
9931
  HPATH = "instance-export"
9932
  HTYPE = constants.HTYPE_INSTANCE
9933
  REQ_BGL = False
9934

    
9935
  def CheckArguments(self):
9936
    """Check the arguments.
9937

9938
    """
9939
    self.x509_key_name = self.op.x509_key_name
9940
    self.dest_x509_ca_pem = self.op.destination_x509_ca
9941

    
9942
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
9943
      if not self.x509_key_name:
9944
        raise errors.OpPrereqError("Missing X509 key name for encryption",
9945
                                   errors.ECODE_INVAL)
9946

    
9947
      if not self.dest_x509_ca_pem:
9948
        raise errors.OpPrereqError("Missing destination X509 CA",
9949
                                   errors.ECODE_INVAL)
9950

    
9951
  def ExpandNames(self):
9952
    self._ExpandAndLockInstance()
9953

    
9954
    # Lock all nodes for local exports
9955
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9956
      # FIXME: lock only instance primary and destination node
9957
      #
9958
      # Sad but true, for now we have do lock all nodes, as we don't know where
9959
      # the previous export might be, and in this LU we search for it and
9960
      # remove it from its current node. In the future we could fix this by:
9961
      #  - making a tasklet to search (share-lock all), then create the
9962
      #    new one, then one to remove, after
9963
      #  - removing the removal operation altogether
9964
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9965

    
9966
  def DeclareLocks(self, level):
9967
    """Last minute lock declaration."""
9968
    # All nodes are locked anyway, so nothing to do here.
9969

    
9970
  def BuildHooksEnv(self):
9971
    """Build hooks env.
9972

9973
    This will run on the master, primary node and target node.
9974

9975
    """
9976
    env = {
9977
      "EXPORT_MODE": self.op.mode,
9978
      "EXPORT_NODE": self.op.target_node,
9979
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9980
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9981
      # TODO: Generic function for boolean env variables
9982
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9983
      }
9984

    
9985
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9986

    
9987
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9988

    
9989
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
9990
      nl.append(self.op.target_node)
9991

    
9992
    return env, nl, nl
9993

    
9994
  def CheckPrereq(self):
9995
    """Check prerequisites.
9996

9997
    This checks that the instance and node names are valid.
9998

9999
    """
10000
    instance_name = self.op.instance_name
10001

    
10002
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10003
    assert self.instance is not None, \
10004
          "Cannot retrieve locked instance %s" % self.op.instance_name
10005
    _CheckNodeOnline(self, self.instance.primary_node)
10006

    
10007
    if (self.op.remove_instance and self.instance.admin_up and
10008
        not self.op.shutdown):
10009
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10010
                                 " down before")
10011

    
10012
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10013
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10014
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10015
      assert self.dst_node is not None
10016

    
10017
      _CheckNodeOnline(self, self.dst_node.name)
10018
      _CheckNodeNotDrained(self, self.dst_node.name)
10019

    
10020
      self._cds = None
10021
      self.dest_disk_info = None
10022
      self.dest_x509_ca = None
10023

    
10024
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10025
      self.dst_node = None
10026

    
10027
      if len(self.op.target_node) != len(self.instance.disks):
10028
        raise errors.OpPrereqError(("Received destination information for %s"
10029
                                    " disks, but instance %s has %s disks") %
10030
                                   (len(self.op.target_node), instance_name,
10031
                                    len(self.instance.disks)),
10032
                                   errors.ECODE_INVAL)
10033

    
10034
      cds = _GetClusterDomainSecret()
10035

    
10036
      # Check X509 key name
10037
      try:
10038
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10039
      except (TypeError, ValueError), err:
10040
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10041

    
10042
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10043
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10044
                                   errors.ECODE_INVAL)
10045

    
10046
      # Load and verify CA
10047
      try:
10048
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10049
      except OpenSSL.crypto.Error, err:
10050
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10051
                                   (err, ), errors.ECODE_INVAL)
10052

    
10053
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10054
      if errcode is not None:
10055
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10056
                                   (msg, ), errors.ECODE_INVAL)
10057

    
10058
      self.dest_x509_ca = cert
10059

    
10060
      # Verify target information
10061
      disk_info = []
10062
      for idx, disk_data in enumerate(self.op.target_node):
10063
        try:
10064
          (host, port, magic) = \
10065
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10066
        except errors.GenericError, err:
10067
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10068
                                     (idx, err), errors.ECODE_INVAL)
10069

    
10070
        disk_info.append((host, port, magic))
10071

    
10072
      assert len(disk_info) == len(self.op.target_node)
10073
      self.dest_disk_info = disk_info
10074

    
10075
    else:
10076
      raise errors.ProgrammerError("Unhandled export mode %r" %
10077
                                   self.op.mode)
10078

    
10079
    # instance disk type verification
10080
    # TODO: Implement export support for file-based disks
10081
    for disk in self.instance.disks:
10082
      if disk.dev_type == constants.LD_FILE:
10083
        raise errors.OpPrereqError("Export not supported for instances with"
10084
                                   " file-based disks", errors.ECODE_INVAL)
10085

    
10086
  def _CleanupExports(self, feedback_fn):
10087
    """Removes exports of current instance from all other nodes.
10088

10089
    If an instance in a cluster with nodes A..D was exported to node C, its
10090
    exports will be removed from the nodes A, B and D.
10091

10092
    """
10093
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10094

    
10095
    nodelist = self.cfg.GetNodeList()
10096
    nodelist.remove(self.dst_node.name)
10097

    
10098
    # on one-node clusters nodelist will be empty after the removal
10099
    # if we proceed the backup would be removed because OpBackupQuery
10100
    # substitutes an empty list with the full cluster node list.
10101
    iname = self.instance.name
10102
    if nodelist:
10103
      feedback_fn("Removing old exports for instance %s" % iname)
10104
      exportlist = self.rpc.call_export_list(nodelist)
10105
      for node in exportlist:
10106
        if exportlist[node].fail_msg:
10107
          continue
10108
        if iname in exportlist[node].payload:
10109
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10110
          if msg:
10111
            self.LogWarning("Could not remove older export for instance %s"
10112
                            " on node %s: %s", iname, node, msg)
10113

    
10114
  def Exec(self, feedback_fn):
10115
    """Export an instance to an image in the cluster.
10116

10117
    """
10118
    assert self.op.mode in constants.EXPORT_MODES
10119

    
10120
    instance = self.instance
10121
    src_node = instance.primary_node
10122

    
10123
    if self.op.shutdown:
10124
      # shutdown the instance, but not the disks
10125
      feedback_fn("Shutting down instance %s" % instance.name)
10126
      result = self.rpc.call_instance_shutdown(src_node, instance,
10127
                                               self.op.shutdown_timeout)
10128
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10129
      result.Raise("Could not shutdown instance %s on"
10130
                   " node %s" % (instance.name, src_node))
10131

    
10132
    # set the disks ID correctly since call_instance_start needs the
10133
    # correct drbd minor to create the symlinks
10134
    for disk in instance.disks:
10135
      self.cfg.SetDiskID(disk, src_node)
10136

    
10137
    activate_disks = (not instance.admin_up)
10138

    
10139
    if activate_disks:
10140
      # Activate the instance disks if we'exporting a stopped instance
10141
      feedback_fn("Activating disks for %s" % instance.name)
10142
      _StartInstanceDisks(self, instance, None)
10143

    
10144
    try:
10145
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10146
                                                     instance)
10147

    
10148
      helper.CreateSnapshots()
10149
      try:
10150
        if (self.op.shutdown and instance.admin_up and
10151
            not self.op.remove_instance):
10152
          assert not activate_disks
10153
          feedback_fn("Starting instance %s" % instance.name)
10154
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10155
          msg = result.fail_msg
10156
          if msg:
10157
            feedback_fn("Failed to start instance: %s" % msg)
10158
            _ShutdownInstanceDisks(self, instance)
10159
            raise errors.OpExecError("Could not start instance: %s" % msg)
10160

    
10161
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10162
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10163
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10164
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10165
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10166

    
10167
          (key_name, _, _) = self.x509_key_name
10168

    
10169
          dest_ca_pem = \
10170
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10171
                                            self.dest_x509_ca)
10172

    
10173
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10174
                                                     key_name, dest_ca_pem,
10175
                                                     timeouts)
10176
      finally:
10177
        helper.Cleanup()
10178

    
10179
      # Check for backwards compatibility
10180
      assert len(dresults) == len(instance.disks)
10181
      assert compat.all(isinstance(i, bool) for i in dresults), \
10182
             "Not all results are boolean: %r" % dresults
10183

    
10184
    finally:
10185
      if activate_disks:
10186
        feedback_fn("Deactivating disks for %s" % instance.name)
10187
        _ShutdownInstanceDisks(self, instance)
10188

    
10189
    if not (compat.all(dresults) and fin_resu):
10190
      failures = []
10191
      if not fin_resu:
10192
        failures.append("export finalization")
10193
      if not compat.all(dresults):
10194
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10195
                               if not dsk)
10196
        failures.append("disk export: disk(s) %s" % fdsk)
10197

    
10198
      raise errors.OpExecError("Export failed, errors in %s" %
10199
                               utils.CommaJoin(failures))
10200

    
10201
    # At this point, the export was successful, we can cleanup/finish
10202

    
10203
    # Remove instance if requested
10204
    if self.op.remove_instance:
10205
      feedback_fn("Removing instance %s" % instance.name)
10206
      _RemoveInstance(self, feedback_fn, instance,
10207
                      self.op.ignore_remove_failures)
10208

    
10209
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10210
      self._CleanupExports(feedback_fn)
10211

    
10212
    return fin_resu, dresults
10213

    
10214

    
10215
class LUBackupRemove(NoHooksLU):
10216
  """Remove exports related to the named instance.
10217

10218
  """
10219
  REQ_BGL = False
10220

    
10221
  def ExpandNames(self):
10222
    self.needed_locks = {}
10223
    # We need all nodes to be locked in order for RemoveExport to work, but we
10224
    # don't need to lock the instance itself, as nothing will happen to it (and
10225
    # we can remove exports also for a removed instance)
10226
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10227

    
10228
  def Exec(self, feedback_fn):
10229
    """Remove any export.
10230

10231
    """
10232
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10233
    # If the instance was not found we'll try with the name that was passed in.
10234
    # This will only work if it was an FQDN, though.
10235
    fqdn_warn = False
10236
    if not instance_name:
10237
      fqdn_warn = True
10238
      instance_name = self.op.instance_name
10239

    
10240
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10241
    exportlist = self.rpc.call_export_list(locked_nodes)
10242
    found = False
10243
    for node in exportlist:
10244
      msg = exportlist[node].fail_msg
10245
      if msg:
10246
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10247
        continue
10248
      if instance_name in exportlist[node].payload:
10249
        found = True
10250
        result = self.rpc.call_export_remove(node, instance_name)
10251
        msg = result.fail_msg
10252
        if msg:
10253
          logging.error("Could not remove export for instance %s"
10254
                        " on node %s: %s", instance_name, node, msg)
10255

    
10256
    if fqdn_warn and not found:
10257
      feedback_fn("Export not found. If trying to remove an export belonging"
10258
                  " to a deleted instance please use its Fully Qualified"
10259
                  " Domain Name.")
10260

    
10261

    
10262
class LUGroupAdd(LogicalUnit):
10263
  """Logical unit for creating node groups.
10264

10265
  """
10266
  HPATH = "group-add"
10267
  HTYPE = constants.HTYPE_GROUP
10268
  REQ_BGL = False
10269

    
10270
  def ExpandNames(self):
10271
    # We need the new group's UUID here so that we can create and acquire the
10272
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10273
    # that it should not check whether the UUID exists in the configuration.
10274
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10275
    self.needed_locks = {}
10276
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10277

    
10278
  def CheckPrereq(self):
10279
    """Check prerequisites.
10280

10281
    This checks that the given group name is not an existing node group
10282
    already.
10283

10284
    """
10285
    try:
10286
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10287
    except errors.OpPrereqError:
10288
      pass
10289
    else:
10290
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10291
                                 " node group (UUID: %s)" %
10292
                                 (self.op.group_name, existing_uuid),
10293
                                 errors.ECODE_EXISTS)
10294

    
10295
    if self.op.ndparams:
10296
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10297

    
10298
  def BuildHooksEnv(self):
10299
    """Build hooks env.
10300

10301
    """
10302
    env = {
10303
      "GROUP_NAME": self.op.group_name,
10304
      }
10305
    mn = self.cfg.GetMasterNode()
10306
    return env, [mn], [mn]
10307

    
10308
  def Exec(self, feedback_fn):
10309
    """Add the node group to the cluster.
10310

10311
    """
10312
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10313
                                  uuid=self.group_uuid,
10314
                                  alloc_policy=self.op.alloc_policy,
10315
                                  ndparams=self.op.ndparams)
10316

    
10317
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10318
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10319

    
10320

    
10321
class LUGroupAssignNodes(NoHooksLU):
10322
  """Logical unit for assigning nodes to groups.
10323

10324
  """
10325
  REQ_BGL = False
10326

    
10327
  def ExpandNames(self):
10328
    # These raise errors.OpPrereqError on their own:
10329
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10330
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10331

    
10332
    # We want to lock all the affected nodes and groups. We have readily
10333
    # available the list of nodes, and the *destination* group. To gather the
10334
    # list of "source" groups, we need to fetch node information later on.
10335
    self.needed_locks = {
10336
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
10337
      locking.LEVEL_NODE: self.op.nodes,
10338
      }
10339

    
10340
  def DeclareLocks(self, level):
10341
    if level == locking.LEVEL_NODEGROUP:
10342
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
10343

    
10344
      # Try to get all affected nodes' groups without having the group or node
10345
      # lock yet. Needs verification later in the code flow.
10346
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
10347

    
10348
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
10349

    
10350
  def CheckPrereq(self):
10351
    """Check prerequisites.
10352

10353
    """
10354
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
10355
    assert (frozenset(self.acquired_locks[locking.LEVEL_NODE]) ==
10356
            frozenset(self.op.nodes))
10357

    
10358
    expected_locks = (set([self.group_uuid]) |
10359
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
10360
    actual_locks = self.acquired_locks[locking.LEVEL_NODEGROUP]
10361
    if actual_locks != expected_locks:
10362
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
10363
                               " current groups are '%s', used to be '%s'" %
10364
                               (utils.CommaJoin(expected_locks),
10365
                                utils.CommaJoin(actual_locks)))
10366

    
10367
    self.node_data = self.cfg.GetAllNodesInfo()
10368
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10369
    instance_data = self.cfg.GetAllInstancesInfo()
10370

    
10371
    if self.group is None:
10372
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10373
                               (self.op.group_name, self.group_uuid))
10374

    
10375
    (new_splits, previous_splits) = \
10376
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10377
                                             for node in self.op.nodes],
10378
                                            self.node_data, instance_data)
10379

    
10380
    if new_splits:
10381
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10382

    
10383
      if not self.op.force:
10384
        raise errors.OpExecError("The following instances get split by this"
10385
                                 " change and --force was not given: %s" %
10386
                                 fmt_new_splits)
10387
      else:
10388
        self.LogWarning("This operation will split the following instances: %s",
10389
                        fmt_new_splits)
10390

    
10391
        if previous_splits:
10392
          self.LogWarning("In addition, these already-split instances continue"
10393
                          " to be split across groups: %s",
10394
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10395

    
10396
  def Exec(self, feedback_fn):
10397
    """Assign nodes to a new group.
10398

10399
    """
10400
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
10401

    
10402
    self.cfg.AssignGroupNodes(mods)
10403

    
10404
  @staticmethod
10405
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10406
    """Check for split instances after a node assignment.
10407

10408
    This method considers a series of node assignments as an atomic operation,
10409
    and returns information about split instances after applying the set of
10410
    changes.
10411

10412
    In particular, it returns information about newly split instances, and
10413
    instances that were already split, and remain so after the change.
10414

10415
    Only instances whose disk template is listed in constants.DTS_NET_MIRROR are
10416
    considered.
10417

10418
    @type changes: list of (node_name, new_group_uuid) pairs.
10419
    @param changes: list of node assignments to consider.
10420
    @param node_data: a dict with data for all nodes
10421
    @param instance_data: a dict with all instances to consider
10422
    @rtype: a two-tuple
10423
    @return: a list of instances that were previously okay and result split as a
10424
      consequence of this change, and a list of instances that were previously
10425
      split and this change does not fix.
10426

10427
    """
10428
    changed_nodes = dict((node, group) for node, group in changes
10429
                         if node_data[node].group != group)
10430

    
10431
    all_split_instances = set()
10432
    previously_split_instances = set()
10433

    
10434
    def InstanceNodes(instance):
10435
      return [instance.primary_node] + list(instance.secondary_nodes)
10436

    
10437
    for inst in instance_data.values():
10438
      if inst.disk_template not in constants.DTS_NET_MIRROR:
10439
        continue
10440

    
10441
      instance_nodes = InstanceNodes(inst)
10442

    
10443
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10444
        previously_split_instances.add(inst.name)
10445

    
10446
      if len(set(changed_nodes.get(node, node_data[node].group)
10447
                 for node in instance_nodes)) > 1:
10448
        all_split_instances.add(inst.name)
10449

    
10450
    return (list(all_split_instances - previously_split_instances),
10451
            list(previously_split_instances & all_split_instances))
10452

    
10453

    
10454
class _GroupQuery(_QueryBase):
10455

    
10456
  FIELDS = query.GROUP_FIELDS
10457

    
10458
  def ExpandNames(self, lu):
10459
    lu.needed_locks = {}
10460

    
10461
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10462
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10463

    
10464
    if not self.names:
10465
      self.wanted = [name_to_uuid[name]
10466
                     for name in utils.NiceSort(name_to_uuid.keys())]
10467
    else:
10468
      # Accept names to be either names or UUIDs.
10469
      missing = []
10470
      self.wanted = []
10471
      all_uuid = frozenset(self._all_groups.keys())
10472

    
10473
      for name in self.names:
10474
        if name in all_uuid:
10475
          self.wanted.append(name)
10476
        elif name in name_to_uuid:
10477
          self.wanted.append(name_to_uuid[name])
10478
        else:
10479
          missing.append(name)
10480

    
10481
      if missing:
10482
        raise errors.OpPrereqError("Some groups do not exist: %s" %
10483
                                   utils.CommaJoin(missing),
10484
                                   errors.ECODE_NOENT)
10485

    
10486
  def DeclareLocks(self, lu, level):
10487
    pass
10488

    
10489
  def _GetQueryData(self, lu):
10490
    """Computes the list of node groups and their attributes.
10491

10492
    """
10493
    do_nodes = query.GQ_NODE in self.requested_data
10494
    do_instances = query.GQ_INST in self.requested_data
10495

    
10496
    group_to_nodes = None
10497
    group_to_instances = None
10498

    
10499
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10500
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10501
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10502
    # instance->node. Hence, we will need to process nodes even if we only need
10503
    # instance information.
10504
    if do_nodes or do_instances:
10505
      all_nodes = lu.cfg.GetAllNodesInfo()
10506
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10507
      node_to_group = {}
10508

    
10509
      for node in all_nodes.values():
10510
        if node.group in group_to_nodes:
10511
          group_to_nodes[node.group].append(node.name)
10512
          node_to_group[node.name] = node.group
10513

    
10514
      if do_instances:
10515
        all_instances = lu.cfg.GetAllInstancesInfo()
10516
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10517

    
10518
        for instance in all_instances.values():
10519
          node = instance.primary_node
10520
          if node in node_to_group:
10521
            group_to_instances[node_to_group[node]].append(instance.name)
10522

    
10523
        if not do_nodes:
10524
          # Do not pass on node information if it was not requested.
10525
          group_to_nodes = None
10526

    
10527
    return query.GroupQueryData([self._all_groups[uuid]
10528
                                 for uuid in self.wanted],
10529
                                group_to_nodes, group_to_instances)
10530

    
10531

    
10532
class LUGroupQuery(NoHooksLU):
10533
  """Logical unit for querying node groups.
10534

10535
  """
10536
  REQ_BGL = False
10537

    
10538
  def CheckArguments(self):
10539
    self.gq = _GroupQuery(self.op.names, self.op.output_fields, False)
10540

    
10541
  def ExpandNames(self):
10542
    self.gq.ExpandNames(self)
10543

    
10544
  def Exec(self, feedback_fn):
10545
    return self.gq.OldStyleQuery(self)
10546

    
10547

    
10548
class LUGroupSetParams(LogicalUnit):
10549
  """Modifies the parameters of a node group.
10550

10551
  """
10552
  HPATH = "group-modify"
10553
  HTYPE = constants.HTYPE_GROUP
10554
  REQ_BGL = False
10555

    
10556
  def CheckArguments(self):
10557
    all_changes = [
10558
      self.op.ndparams,
10559
      self.op.alloc_policy,
10560
      ]
10561

    
10562
    if all_changes.count(None) == len(all_changes):
10563
      raise errors.OpPrereqError("Please pass at least one modification",
10564
                                 errors.ECODE_INVAL)
10565

    
10566
  def ExpandNames(self):
10567
    # This raises errors.OpPrereqError on its own:
10568
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10569

    
10570
    self.needed_locks = {
10571
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10572
      }
10573

    
10574
  def CheckPrereq(self):
10575
    """Check prerequisites.
10576

10577
    """
10578
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10579

    
10580
    if self.group is None:
10581
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10582
                               (self.op.group_name, self.group_uuid))
10583

    
10584
    if self.op.ndparams:
10585
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10586
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10587
      self.new_ndparams = new_ndparams
10588

    
10589
  def BuildHooksEnv(self):
10590
    """Build hooks env.
10591

10592
    """
10593
    env = {
10594
      "GROUP_NAME": self.op.group_name,
10595
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10596
      }
10597
    mn = self.cfg.GetMasterNode()
10598
    return env, [mn], [mn]
10599

    
10600
  def Exec(self, feedback_fn):
10601
    """Modifies the node group.
10602

10603
    """
10604
    result = []
10605

    
10606
    if self.op.ndparams:
10607
      self.group.ndparams = self.new_ndparams
10608
      result.append(("ndparams", str(self.group.ndparams)))
10609

    
10610
    if self.op.alloc_policy:
10611
      self.group.alloc_policy = self.op.alloc_policy
10612

    
10613
    self.cfg.Update(self.group, feedback_fn)
10614
    return result
10615

    
10616

    
10617

    
10618
class LUGroupRemove(LogicalUnit):
10619
  HPATH = "group-remove"
10620
  HTYPE = constants.HTYPE_GROUP
10621
  REQ_BGL = False
10622

    
10623
  def ExpandNames(self):
10624
    # This will raises errors.OpPrereqError on its own:
10625
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10626
    self.needed_locks = {
10627
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10628
      }
10629

    
10630
  def CheckPrereq(self):
10631
    """Check prerequisites.
10632

10633
    This checks that the given group name exists as a node group, that is
10634
    empty (i.e., contains no nodes), and that is not the last group of the
10635
    cluster.
10636

10637
    """
10638
    # Verify that the group is empty.
10639
    group_nodes = [node.name
10640
                   for node in self.cfg.GetAllNodesInfo().values()
10641
                   if node.group == self.group_uuid]
10642

    
10643
    if group_nodes:
10644
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10645
                                 " nodes: %s" %
10646
                                 (self.op.group_name,
10647
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10648
                                 errors.ECODE_STATE)
10649

    
10650
    # Verify the cluster would not be left group-less.
10651
    if len(self.cfg.GetNodeGroupList()) == 1:
10652
      raise errors.OpPrereqError("Group '%s' is the only group,"
10653
                                 " cannot be removed" %
10654
                                 self.op.group_name,
10655
                                 errors.ECODE_STATE)
10656

    
10657
  def BuildHooksEnv(self):
10658
    """Build hooks env.
10659

10660
    """
10661
    env = {
10662
      "GROUP_NAME": self.op.group_name,
10663
      }
10664
    mn = self.cfg.GetMasterNode()
10665
    return env, [mn], [mn]
10666

    
10667
  def Exec(self, feedback_fn):
10668
    """Remove the node group.
10669

10670
    """
10671
    try:
10672
      self.cfg.RemoveNodeGroup(self.group_uuid)
10673
    except errors.ConfigurationError:
10674
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10675
                               (self.op.group_name, self.group_uuid))
10676

    
10677
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10678

    
10679

    
10680
class LUGroupRename(LogicalUnit):
10681
  HPATH = "group-rename"
10682
  HTYPE = constants.HTYPE_GROUP
10683
  REQ_BGL = False
10684

    
10685
  def ExpandNames(self):
10686
    # This raises errors.OpPrereqError on its own:
10687
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.old_name)
10688

    
10689
    self.needed_locks = {
10690
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10691
      }
10692

    
10693
  def CheckPrereq(self):
10694
    """Check prerequisites.
10695

10696
    This checks that the given old_name exists as a node group, and that
10697
    new_name doesn't.
10698

10699
    """
10700
    try:
10701
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10702
    except errors.OpPrereqError:
10703
      pass
10704
    else:
10705
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10706
                                 " node group (UUID: %s)" %
10707
                                 (self.op.new_name, new_name_uuid),
10708
                                 errors.ECODE_EXISTS)
10709

    
10710
  def BuildHooksEnv(self):
10711
    """Build hooks env.
10712

10713
    """
10714
    env = {
10715
      "OLD_NAME": self.op.old_name,
10716
      "NEW_NAME": self.op.new_name,
10717
      }
10718

    
10719
    mn = self.cfg.GetMasterNode()
10720
    all_nodes = self.cfg.GetAllNodesInfo()
10721
    run_nodes = [mn]
10722
    all_nodes.pop(mn, None)
10723

    
10724
    for node in all_nodes.values():
10725
      if node.group == self.group_uuid:
10726
        run_nodes.append(node.name)
10727

    
10728
    return env, run_nodes, run_nodes
10729

    
10730
  def Exec(self, feedback_fn):
10731
    """Rename the node group.
10732

10733
    """
10734
    group = self.cfg.GetNodeGroup(self.group_uuid)
10735

    
10736
    if group is None:
10737
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10738
                               (self.op.old_name, self.group_uuid))
10739

    
10740
    group.name = self.op.new_name
10741
    self.cfg.Update(group, feedback_fn)
10742

    
10743
    return self.op.new_name
10744

    
10745

    
10746
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10747
  """Generic tags LU.
10748

10749
  This is an abstract class which is the parent of all the other tags LUs.
10750

10751
  """
10752

    
10753
  def ExpandNames(self):
10754
    self.needed_locks = {}
10755
    if self.op.kind == constants.TAG_NODE:
10756
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10757
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10758
    elif self.op.kind == constants.TAG_INSTANCE:
10759
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10760
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10761

    
10762
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10763
    # not possible to acquire the BGL based on opcode parameters)
10764

    
10765
  def CheckPrereq(self):
10766
    """Check prerequisites.
10767

10768
    """
10769
    if self.op.kind == constants.TAG_CLUSTER:
10770
      self.target = self.cfg.GetClusterInfo()
10771
    elif self.op.kind == constants.TAG_NODE:
10772
      self.target = self.cfg.GetNodeInfo(self.op.name)
10773
    elif self.op.kind == constants.TAG_INSTANCE:
10774
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10775
    else:
10776
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10777
                                 str(self.op.kind), errors.ECODE_INVAL)
10778

    
10779

    
10780
class LUTagsGet(TagsLU):
10781
  """Returns the tags of a given object.
10782

10783
  """
10784
  REQ_BGL = False
10785

    
10786
  def ExpandNames(self):
10787
    TagsLU.ExpandNames(self)
10788

    
10789
    # Share locks as this is only a read operation
10790
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10791

    
10792
  def Exec(self, feedback_fn):
10793
    """Returns the tag list.
10794

10795
    """
10796
    return list(self.target.GetTags())
10797

    
10798

    
10799
class LUTagsSearch(NoHooksLU):
10800
  """Searches the tags for a given pattern.
10801

10802
  """
10803
  REQ_BGL = False
10804

    
10805
  def ExpandNames(self):
10806
    self.needed_locks = {}
10807

    
10808
  def CheckPrereq(self):
10809
    """Check prerequisites.
10810

10811
    This checks the pattern passed for validity by compiling it.
10812

10813
    """
10814
    try:
10815
      self.re = re.compile(self.op.pattern)
10816
    except re.error, err:
10817
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10818
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10819

    
10820
  def Exec(self, feedback_fn):
10821
    """Returns the tag list.
10822

10823
    """
10824
    cfg = self.cfg
10825
    tgts = [("/cluster", cfg.GetClusterInfo())]
10826
    ilist = cfg.GetAllInstancesInfo().values()
10827
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10828
    nlist = cfg.GetAllNodesInfo().values()
10829
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10830
    results = []
10831
    for path, target in tgts:
10832
      for tag in target.GetTags():
10833
        if self.re.search(tag):
10834
          results.append((path, tag))
10835
    return results
10836

    
10837

    
10838
class LUTagsSet(TagsLU):
10839
  """Sets a tag on a given object.
10840

10841
  """
10842
  REQ_BGL = False
10843

    
10844
  def CheckPrereq(self):
10845
    """Check prerequisites.
10846

10847
    This checks the type and length of the tag name and value.
10848

10849
    """
10850
    TagsLU.CheckPrereq(self)
10851
    for tag in self.op.tags:
10852
      objects.TaggableObject.ValidateTag(tag)
10853

    
10854
  def Exec(self, feedback_fn):
10855
    """Sets the tag.
10856

10857
    """
10858
    try:
10859
      for tag in self.op.tags:
10860
        self.target.AddTag(tag)
10861
    except errors.TagError, err:
10862
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10863
    self.cfg.Update(self.target, feedback_fn)
10864

    
10865

    
10866
class LUTagsDel(TagsLU):
10867
  """Delete a list of tags from a given object.
10868

10869
  """
10870
  REQ_BGL = False
10871

    
10872
  def CheckPrereq(self):
10873
    """Check prerequisites.
10874

10875
    This checks that we have the given tag.
10876

10877
    """
10878
    TagsLU.CheckPrereq(self)
10879
    for tag in self.op.tags:
10880
      objects.TaggableObject.ValidateTag(tag)
10881
    del_tags = frozenset(self.op.tags)
10882
    cur_tags = self.target.GetTags()
10883

    
10884
    diff_tags = del_tags - cur_tags
10885
    if diff_tags:
10886
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
10887
      raise errors.OpPrereqError("Tag(s) %s not found" %
10888
                                 (utils.CommaJoin(diff_names), ),
10889
                                 errors.ECODE_NOENT)
10890

    
10891
  def Exec(self, feedback_fn):
10892
    """Remove the tag from the object.
10893

10894
    """
10895
    for tag in self.op.tags:
10896
      self.target.RemoveTag(tag)
10897
    self.cfg.Update(self.target, feedback_fn)
10898

    
10899

    
10900
class LUTestDelay(NoHooksLU):
10901
  """Sleep for a specified amount of time.
10902

10903
  This LU sleeps on the master and/or nodes for a specified amount of
10904
  time.
10905

10906
  """
10907
  REQ_BGL = False
10908

    
10909
  def ExpandNames(self):
10910
    """Expand names and set required locks.
10911

10912
    This expands the node list, if any.
10913

10914
    """
10915
    self.needed_locks = {}
10916
    if self.op.on_nodes:
10917
      # _GetWantedNodes can be used here, but is not always appropriate to use
10918
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10919
      # more information.
10920
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10921
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10922

    
10923
  def _TestDelay(self):
10924
    """Do the actual sleep.
10925

10926
    """
10927
    if self.op.on_master:
10928
      if not utils.TestDelay(self.op.duration):
10929
        raise errors.OpExecError("Error during master delay test")
10930
    if self.op.on_nodes:
10931
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10932
      for node, node_result in result.items():
10933
        node_result.Raise("Failure during rpc call to node %s" % node)
10934

    
10935
  def Exec(self, feedback_fn):
10936
    """Execute the test delay opcode, with the wanted repetitions.
10937

10938
    """
10939
    if self.op.repeat == 0:
10940
      self._TestDelay()
10941
    else:
10942
      top_value = self.op.repeat - 1
10943
      for i in range(self.op.repeat):
10944
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10945
        self._TestDelay()
10946

    
10947

    
10948
class LUTestJqueue(NoHooksLU):
10949
  """Utility LU to test some aspects of the job queue.
10950

10951
  """
10952
  REQ_BGL = False
10953

    
10954
  # Must be lower than default timeout for WaitForJobChange to see whether it
10955
  # notices changed jobs
10956
  _CLIENT_CONNECT_TIMEOUT = 20.0
10957
  _CLIENT_CONFIRM_TIMEOUT = 60.0
10958

    
10959
  @classmethod
10960
  def _NotifyUsingSocket(cls, cb, errcls):
10961
    """Opens a Unix socket and waits for another program to connect.
10962

10963
    @type cb: callable
10964
    @param cb: Callback to send socket name to client
10965
    @type errcls: class
10966
    @param errcls: Exception class to use for errors
10967

10968
    """
10969
    # Using a temporary directory as there's no easy way to create temporary
10970
    # sockets without writing a custom loop around tempfile.mktemp and
10971
    # socket.bind
10972
    tmpdir = tempfile.mkdtemp()
10973
    try:
10974
      tmpsock = utils.PathJoin(tmpdir, "sock")
10975

    
10976
      logging.debug("Creating temporary socket at %s", tmpsock)
10977
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10978
      try:
10979
        sock.bind(tmpsock)
10980
        sock.listen(1)
10981

    
10982
        # Send details to client
10983
        cb(tmpsock)
10984

    
10985
        # Wait for client to connect before continuing
10986
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10987
        try:
10988
          (conn, _) = sock.accept()
10989
        except socket.error, err:
10990
          raise errcls("Client didn't connect in time (%s)" % err)
10991
      finally:
10992
        sock.close()
10993
    finally:
10994
      # Remove as soon as client is connected
10995
      shutil.rmtree(tmpdir)
10996

    
10997
    # Wait for client to close
10998
    try:
10999
      try:
11000
        # pylint: disable-msg=E1101
11001
        # Instance of '_socketobject' has no ... member
11002
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11003
        conn.recv(1)
11004
      except socket.error, err:
11005
        raise errcls("Client failed to confirm notification (%s)" % err)
11006
    finally:
11007
      conn.close()
11008

    
11009
  def _SendNotification(self, test, arg, sockname):
11010
    """Sends a notification to the client.
11011

11012
    @type test: string
11013
    @param test: Test name
11014
    @param arg: Test argument (depends on test)
11015
    @type sockname: string
11016
    @param sockname: Socket path
11017

11018
    """
11019
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11020

    
11021
  def _Notify(self, prereq, test, arg):
11022
    """Notifies the client of a test.
11023

11024
    @type prereq: bool
11025
    @param prereq: Whether this is a prereq-phase test
11026
    @type test: string
11027
    @param test: Test name
11028
    @param arg: Test argument (depends on test)
11029

11030
    """
11031
    if prereq:
11032
      errcls = errors.OpPrereqError
11033
    else:
11034
      errcls = errors.OpExecError
11035

    
11036
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11037
                                                  test, arg),
11038
                                   errcls)
11039

    
11040
  def CheckArguments(self):
11041
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11042
    self.expandnames_calls = 0
11043

    
11044
  def ExpandNames(self):
11045
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11046
    if checkargs_calls < 1:
11047
      raise errors.ProgrammerError("CheckArguments was not called")
11048

    
11049
    self.expandnames_calls += 1
11050

    
11051
    if self.op.notify_waitlock:
11052
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11053

    
11054
    self.LogInfo("Expanding names")
11055

    
11056
    # Get lock on master node (just to get a lock, not for a particular reason)
11057
    self.needed_locks = {
11058
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11059
      }
11060

    
11061
  def Exec(self, feedback_fn):
11062
    if self.expandnames_calls < 1:
11063
      raise errors.ProgrammerError("ExpandNames was not called")
11064

    
11065
    if self.op.notify_exec:
11066
      self._Notify(False, constants.JQT_EXEC, None)
11067

    
11068
    self.LogInfo("Executing")
11069

    
11070
    if self.op.log_messages:
11071
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11072
      for idx, msg in enumerate(self.op.log_messages):
11073
        self.LogInfo("Sending log message %s", idx + 1)
11074
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11075
        # Report how many test messages have been sent
11076
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11077

    
11078
    if self.op.fail:
11079
      raise errors.OpExecError("Opcode failure was requested")
11080

    
11081
    return True
11082

    
11083

    
11084
class IAllocator(object):
11085
  """IAllocator framework.
11086

11087
  An IAllocator instance has three sets of attributes:
11088
    - cfg that is needed to query the cluster
11089
    - input data (all members of the _KEYS class attribute are required)
11090
    - four buffer attributes (in|out_data|text), that represent the
11091
      input (to the external script) in text and data structure format,
11092
      and the output from it, again in two formats
11093
    - the result variables from the script (success, info, nodes) for
11094
      easy usage
11095

11096
  """
11097
  # pylint: disable-msg=R0902
11098
  # lots of instance attributes
11099
  _ALLO_KEYS = [
11100
    "name", "mem_size", "disks", "disk_template",
11101
    "os", "tags", "nics", "vcpus", "hypervisor",
11102
    ]
11103
  _RELO_KEYS = [
11104
    "name", "relocate_from",
11105
    ]
11106
  _EVAC_KEYS = [
11107
    "evac_nodes",
11108
    ]
11109

    
11110
  def __init__(self, cfg, rpc, mode, **kwargs):
11111
    self.cfg = cfg
11112
    self.rpc = rpc
11113
    # init buffer variables
11114
    self.in_text = self.out_text = self.in_data = self.out_data = None
11115
    # init all input fields so that pylint is happy
11116
    self.mode = mode
11117
    self.mem_size = self.disks = self.disk_template = None
11118
    self.os = self.tags = self.nics = self.vcpus = None
11119
    self.hypervisor = None
11120
    self.relocate_from = None
11121
    self.name = None
11122
    self.evac_nodes = None
11123
    # computed fields
11124
    self.required_nodes = None
11125
    # init result fields
11126
    self.success = self.info = self.result = None
11127
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11128
      keyset = self._ALLO_KEYS
11129
      fn = self._AddNewInstance
11130
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11131
      keyset = self._RELO_KEYS
11132
      fn = self._AddRelocateInstance
11133
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11134
      keyset = self._EVAC_KEYS
11135
      fn = self._AddEvacuateNodes
11136
    else:
11137
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11138
                                   " IAllocator" % self.mode)
11139
    for key in kwargs:
11140
      if key not in keyset:
11141
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11142
                                     " IAllocator" % key)
11143
      setattr(self, key, kwargs[key])
11144

    
11145
    for key in keyset:
11146
      if key not in kwargs:
11147
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11148
                                     " IAllocator" % key)
11149
    self._BuildInputData(fn)
11150

    
11151
  def _ComputeClusterData(self):
11152
    """Compute the generic allocator input data.
11153

11154
    This is the data that is independent of the actual operation.
11155

11156
    """
11157
    cfg = self.cfg
11158
    cluster_info = cfg.GetClusterInfo()
11159
    # cluster data
11160
    data = {
11161
      "version": constants.IALLOCATOR_VERSION,
11162
      "cluster_name": cfg.GetClusterName(),
11163
      "cluster_tags": list(cluster_info.GetTags()),
11164
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11165
      # we don't have job IDs
11166
      }
11167
    ninfo = cfg.GetAllNodesInfo()
11168
    iinfo = cfg.GetAllInstancesInfo().values()
11169
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11170

    
11171
    # node data
11172
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11173

    
11174
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11175
      hypervisor_name = self.hypervisor
11176
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11177
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11178
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11179
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11180

    
11181
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11182
                                        hypervisor_name)
11183
    node_iinfo = \
11184
      self.rpc.call_all_instances_info(node_list,
11185
                                       cluster_info.enabled_hypervisors)
11186

    
11187
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11188

    
11189
    config_ndata = self._ComputeBasicNodeData(ninfo)
11190
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11191
                                                 i_list, config_ndata)
11192
    assert len(data["nodes"]) == len(ninfo), \
11193
        "Incomplete node data computed"
11194

    
11195
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11196

    
11197
    self.in_data = data
11198

    
11199
  @staticmethod
11200
  def _ComputeNodeGroupData(cfg):
11201
    """Compute node groups data.
11202

11203
    """
11204
    ng = {}
11205
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11206
      ng[guuid] = {
11207
        "name": gdata.name,
11208
        "alloc_policy": gdata.alloc_policy,
11209
        }
11210
    return ng
11211

    
11212
  @staticmethod
11213
  def _ComputeBasicNodeData(node_cfg):
11214
    """Compute global node data.
11215

11216
    @rtype: dict
11217
    @returns: a dict of name: (node dict, node config)
11218

11219
    """
11220
    node_results = {}
11221
    for ninfo in node_cfg.values():
11222
      # fill in static (config-based) values
11223
      pnr = {
11224
        "tags": list(ninfo.GetTags()),
11225
        "primary_ip": ninfo.primary_ip,
11226
        "secondary_ip": ninfo.secondary_ip,
11227
        "offline": ninfo.offline,
11228
        "drained": ninfo.drained,
11229
        "master_candidate": ninfo.master_candidate,
11230
        "group": ninfo.group,
11231
        "master_capable": ninfo.master_capable,
11232
        "vm_capable": ninfo.vm_capable,
11233
        }
11234

    
11235
      node_results[ninfo.name] = pnr
11236

    
11237
    return node_results
11238

    
11239
  @staticmethod
11240
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11241
                              node_results):
11242
    """Compute global node data.
11243

11244
    @param node_results: the basic node structures as filled from the config
11245

11246
    """
11247
    # make a copy of the current dict
11248
    node_results = dict(node_results)
11249
    for nname, nresult in node_data.items():
11250
      assert nname in node_results, "Missing basic data for node %s" % nname
11251
      ninfo = node_cfg[nname]
11252

    
11253
      if not (ninfo.offline or ninfo.drained):
11254
        nresult.Raise("Can't get data for node %s" % nname)
11255
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11256
                                nname)
11257
        remote_info = nresult.payload
11258

    
11259
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11260
                     'vg_size', 'vg_free', 'cpu_total']:
11261
          if attr not in remote_info:
11262
            raise errors.OpExecError("Node '%s' didn't return attribute"
11263
                                     " '%s'" % (nname, attr))
11264
          if not isinstance(remote_info[attr], int):
11265
            raise errors.OpExecError("Node '%s' returned invalid value"
11266
                                     " for '%s': %s" %
11267
                                     (nname, attr, remote_info[attr]))
11268
        # compute memory used by primary instances
11269
        i_p_mem = i_p_up_mem = 0
11270
        for iinfo, beinfo in i_list:
11271
          if iinfo.primary_node == nname:
11272
            i_p_mem += beinfo[constants.BE_MEMORY]
11273
            if iinfo.name not in node_iinfo[nname].payload:
11274
              i_used_mem = 0
11275
            else:
11276
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11277
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11278
            remote_info['memory_free'] -= max(0, i_mem_diff)
11279

    
11280
            if iinfo.admin_up:
11281
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11282

    
11283
        # compute memory used by instances
11284
        pnr_dyn = {
11285
          "total_memory": remote_info['memory_total'],
11286
          "reserved_memory": remote_info['memory_dom0'],
11287
          "free_memory": remote_info['memory_free'],
11288
          "total_disk": remote_info['vg_size'],
11289
          "free_disk": remote_info['vg_free'],
11290
          "total_cpus": remote_info['cpu_total'],
11291
          "i_pri_memory": i_p_mem,
11292
          "i_pri_up_memory": i_p_up_mem,
11293
          }
11294
        pnr_dyn.update(node_results[nname])
11295
        node_results[nname] = pnr_dyn
11296

    
11297
    return node_results
11298

    
11299
  @staticmethod
11300
  def _ComputeInstanceData(cluster_info, i_list):
11301
    """Compute global instance data.
11302

11303
    """
11304
    instance_data = {}
11305
    for iinfo, beinfo in i_list:
11306
      nic_data = []
11307
      for nic in iinfo.nics:
11308
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11309
        nic_dict = {"mac": nic.mac,
11310
                    "ip": nic.ip,
11311
                    "mode": filled_params[constants.NIC_MODE],
11312
                    "link": filled_params[constants.NIC_LINK],
11313
                   }
11314
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11315
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11316
        nic_data.append(nic_dict)
11317
      pir = {
11318
        "tags": list(iinfo.GetTags()),
11319
        "admin_up": iinfo.admin_up,
11320
        "vcpus": beinfo[constants.BE_VCPUS],
11321
        "memory": beinfo[constants.BE_MEMORY],
11322
        "os": iinfo.os,
11323
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11324
        "nics": nic_data,
11325
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11326
        "disk_template": iinfo.disk_template,
11327
        "hypervisor": iinfo.hypervisor,
11328
        }
11329
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11330
                                                 pir["disks"])
11331
      instance_data[iinfo.name] = pir
11332

    
11333
    return instance_data
11334

    
11335
  def _AddNewInstance(self):
11336
    """Add new instance data to allocator structure.
11337

11338
    This in combination with _AllocatorGetClusterData will create the
11339
    correct structure needed as input for the allocator.
11340

11341
    The checks for the completeness of the opcode must have already been
11342
    done.
11343

11344
    """
11345
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11346

    
11347
    if self.disk_template in constants.DTS_NET_MIRROR:
11348
      self.required_nodes = 2
11349
    else:
11350
      self.required_nodes = 1
11351
    request = {
11352
      "name": self.name,
11353
      "disk_template": self.disk_template,
11354
      "tags": self.tags,
11355
      "os": self.os,
11356
      "vcpus": self.vcpus,
11357
      "memory": self.mem_size,
11358
      "disks": self.disks,
11359
      "disk_space_total": disk_space,
11360
      "nics": self.nics,
11361
      "required_nodes": self.required_nodes,
11362
      }
11363
    return request
11364

    
11365
  def _AddRelocateInstance(self):
11366
    """Add relocate instance data to allocator structure.
11367

11368
    This in combination with _IAllocatorGetClusterData will create the
11369
    correct structure needed as input for the allocator.
11370

11371
    The checks for the completeness of the opcode must have already been
11372
    done.
11373

11374
    """
11375
    instance = self.cfg.GetInstanceInfo(self.name)
11376
    if instance is None:
11377
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11378
                                   " IAllocator" % self.name)
11379

    
11380
    if instance.disk_template not in constants.DTS_NET_MIRROR:
11381
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11382
                                 errors.ECODE_INVAL)
11383

    
11384
    if len(instance.secondary_nodes) != 1:
11385
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11386
                                 errors.ECODE_STATE)
11387

    
11388
    self.required_nodes = 1
11389
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11390
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11391

    
11392
    request = {
11393
      "name": self.name,
11394
      "disk_space_total": disk_space,
11395
      "required_nodes": self.required_nodes,
11396
      "relocate_from": self.relocate_from,
11397
      }
11398
    return request
11399

    
11400
  def _AddEvacuateNodes(self):
11401
    """Add evacuate nodes data to allocator structure.
11402

11403
    """
11404
    request = {
11405
      "evac_nodes": self.evac_nodes
11406
      }
11407
    return request
11408

    
11409
  def _BuildInputData(self, fn):
11410
    """Build input data structures.
11411

11412
    """
11413
    self._ComputeClusterData()
11414

    
11415
    request = fn()
11416
    request["type"] = self.mode
11417
    self.in_data["request"] = request
11418

    
11419
    self.in_text = serializer.Dump(self.in_data)
11420

    
11421
  def Run(self, name, validate=True, call_fn=None):
11422
    """Run an instance allocator and return the results.
11423

11424
    """
11425
    if call_fn is None:
11426
      call_fn = self.rpc.call_iallocator_runner
11427

    
11428
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11429
    result.Raise("Failure while running the iallocator script")
11430

    
11431
    self.out_text = result.payload
11432
    if validate:
11433
      self._ValidateResult()
11434

    
11435
  def _ValidateResult(self):
11436
    """Process the allocator results.
11437

11438
    This will process and if successful save the result in
11439
    self.out_data and the other parameters.
11440

11441
    """
11442
    try:
11443
      rdict = serializer.Load(self.out_text)
11444
    except Exception, err:
11445
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11446

    
11447
    if not isinstance(rdict, dict):
11448
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11449

    
11450
    # TODO: remove backwards compatiblity in later versions
11451
    if "nodes" in rdict and "result" not in rdict:
11452
      rdict["result"] = rdict["nodes"]
11453
      del rdict["nodes"]
11454

    
11455
    for key in "success", "info", "result":
11456
      if key not in rdict:
11457
        raise errors.OpExecError("Can't parse iallocator results:"
11458
                                 " missing key '%s'" % key)
11459
      setattr(self, key, rdict[key])
11460

    
11461
    if not isinstance(rdict["result"], list):
11462
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11463
                               " is not a list")
11464
    self.out_data = rdict
11465

    
11466

    
11467
class LUTestAllocator(NoHooksLU):
11468
  """Run allocator tests.
11469

11470
  This LU runs the allocator tests
11471

11472
  """
11473
  def CheckPrereq(self):
11474
    """Check prerequisites.
11475

11476
    This checks the opcode parameters depending on the director and mode test.
11477

11478
    """
11479
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11480
      for attr in ["mem_size", "disks", "disk_template",
11481
                   "os", "tags", "nics", "vcpus"]:
11482
        if not hasattr(self.op, attr):
11483
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11484
                                     attr, errors.ECODE_INVAL)
11485
      iname = self.cfg.ExpandInstanceName(self.op.name)
11486
      if iname is not None:
11487
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11488
                                   iname, errors.ECODE_EXISTS)
11489
      if not isinstance(self.op.nics, list):
11490
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11491
                                   errors.ECODE_INVAL)
11492
      if not isinstance(self.op.disks, list):
11493
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11494
                                   errors.ECODE_INVAL)
11495
      for row in self.op.disks:
11496
        if (not isinstance(row, dict) or
11497
            "size" not in row or
11498
            not isinstance(row["size"], int) or
11499
            "mode" not in row or
11500
            row["mode"] not in ['r', 'w']):
11501
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11502
                                     " parameter", errors.ECODE_INVAL)
11503
      if self.op.hypervisor is None:
11504
        self.op.hypervisor = self.cfg.GetHypervisorType()
11505
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11506
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11507
      self.op.name = fname
11508
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11509
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11510
      if not hasattr(self.op, "evac_nodes"):
11511
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11512
                                   " opcode input", errors.ECODE_INVAL)
11513
    else:
11514
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11515
                                 self.op.mode, errors.ECODE_INVAL)
11516

    
11517
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11518
      if self.op.allocator is None:
11519
        raise errors.OpPrereqError("Missing allocator name",
11520
                                   errors.ECODE_INVAL)
11521
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11522
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11523
                                 self.op.direction, errors.ECODE_INVAL)
11524

    
11525
  def Exec(self, feedback_fn):
11526
    """Run the allocator test.
11527

11528
    """
11529
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11530
      ial = IAllocator(self.cfg, self.rpc,
11531
                       mode=self.op.mode,
11532
                       name=self.op.name,
11533
                       mem_size=self.op.mem_size,
11534
                       disks=self.op.disks,
11535
                       disk_template=self.op.disk_template,
11536
                       os=self.op.os,
11537
                       tags=self.op.tags,
11538
                       nics=self.op.nics,
11539
                       vcpus=self.op.vcpus,
11540
                       hypervisor=self.op.hypervisor,
11541
                       )
11542
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11543
      ial = IAllocator(self.cfg, self.rpc,
11544
                       mode=self.op.mode,
11545
                       name=self.op.name,
11546
                       relocate_from=list(self.relocate_from),
11547
                       )
11548
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11549
      ial = IAllocator(self.cfg, self.rpc,
11550
                       mode=self.op.mode,
11551
                       evac_nodes=self.op.evac_nodes)
11552
    else:
11553
      raise errors.ProgrammerError("Uncatched mode %s in"
11554
                                   " LUTestAllocator.Exec", self.op.mode)
11555

    
11556
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11557
      result = ial.in_text
11558
    else:
11559
      ial.Run(self.op.allocator, validate=False)
11560
      result = ial.out_text
11561
    return result
11562

    
11563

    
11564
#: Query type implementations
11565
_QUERY_IMPL = {
11566
  constants.QR_INSTANCE: _InstanceQuery,
11567
  constants.QR_NODE: _NodeQuery,
11568
  constants.QR_GROUP: _GroupQuery,
11569
  }
11570

    
11571

    
11572
def _GetQueryImplementation(name):
11573
  """Returns the implemtnation for a query type.
11574

11575
  @param name: Query type, must be one of L{constants.QR_OP_QUERY}
11576

11577
  """
11578
  try:
11579
    return _QUERY_IMPL[name]
11580
  except KeyError:
11581
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11582
                               errors.ECODE_INVAL)