Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b53874cb

History | View | Annotate | Download (475.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = False
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1576
    self.share_locks = _ShareAll()
1577

    
1578
  def CheckPrereq(self):
1579
    """Check prerequisites.
1580

1581
    """
1582
    # Retrieve all information
1583
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1584
    self.all_node_info = self.cfg.GetAllNodesInfo()
1585
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1586

    
1587
  def Exec(self, feedback_fn):
1588
    """Verify integrity of cluster, performing various test on nodes.
1589

1590
    """
1591
    self.bad = False
1592
    self._feedback_fn = feedback_fn
1593

    
1594
    feedback_fn("* Verifying cluster config")
1595

    
1596
    for msg in self.cfg.VerifyConfig():
1597
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1598

    
1599
    feedback_fn("* Verifying cluster certificate files")
1600

    
1601
    for cert_filename in constants.ALL_CERT_FILES:
1602
      (errcode, msg) = _VerifyCertificate(cert_filename)
1603
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1604

    
1605
    feedback_fn("* Verifying hypervisor parameters")
1606

    
1607
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1608
                                                self.all_inst_info.values()))
1609

    
1610
    feedback_fn("* Verifying all nodes belong to an existing group")
1611

    
1612
    # We do this verification here because, should this bogus circumstance
1613
    # occur, it would never be caught by VerifyGroup, which only acts on
1614
    # nodes/instances reachable from existing node groups.
1615

    
1616
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1617
                         if node.group not in self.all_group_info)
1618

    
1619
    dangling_instances = {}
1620
    no_node_instances = []
1621

    
1622
    for inst in self.all_inst_info.values():
1623
      if inst.primary_node in dangling_nodes:
1624
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1625
      elif inst.primary_node not in self.all_node_info:
1626
        no_node_instances.append(inst.name)
1627

    
1628
    pretty_dangling = [
1629
        "%s (%s)" %
1630
        (node.name,
1631
         utils.CommaJoin(dangling_instances.get(node.name,
1632
                                                ["no instances"])))
1633
        for node in dangling_nodes]
1634

    
1635
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1636
                  "the following nodes (and their instances) belong to a non"
1637
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1638

    
1639
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1640
                  "the following instances have a non-existing primary-node:"
1641
                  " %s", utils.CommaJoin(no_node_instances))
1642

    
1643
    return not self.bad
1644

    
1645

    
1646
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1647
  """Verifies the status of a node group.
1648

1649
  """
1650
  HPATH = "cluster-verify"
1651
  HTYPE = constants.HTYPE_CLUSTER
1652
  REQ_BGL = False
1653

    
1654
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1655

    
1656
  class NodeImage(object):
1657
    """A class representing the logical and physical status of a node.
1658

1659
    @type name: string
1660
    @ivar name: the node name to which this object refers
1661
    @ivar volumes: a structure as returned from
1662
        L{ganeti.backend.GetVolumeList} (runtime)
1663
    @ivar instances: a list of running instances (runtime)
1664
    @ivar pinst: list of configured primary instances (config)
1665
    @ivar sinst: list of configured secondary instances (config)
1666
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1667
        instances for which this node is secondary (config)
1668
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1669
    @ivar dfree: free disk, as reported by the node (runtime)
1670
    @ivar offline: the offline status (config)
1671
    @type rpc_fail: boolean
1672
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1673
        not whether the individual keys were correct) (runtime)
1674
    @type lvm_fail: boolean
1675
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1676
    @type hyp_fail: boolean
1677
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1678
    @type ghost: boolean
1679
    @ivar ghost: whether this is a known node or not (config)
1680
    @type os_fail: boolean
1681
    @ivar os_fail: whether the RPC call didn't return valid OS data
1682
    @type oslist: list
1683
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1684
    @type vm_capable: boolean
1685
    @ivar vm_capable: whether the node can host instances
1686

1687
    """
1688
    def __init__(self, offline=False, name=None, vm_capable=True):
1689
      self.name = name
1690
      self.volumes = {}
1691
      self.instances = []
1692
      self.pinst = []
1693
      self.sinst = []
1694
      self.sbp = {}
1695
      self.mfree = 0
1696
      self.dfree = 0
1697
      self.offline = offline
1698
      self.vm_capable = vm_capable
1699
      self.rpc_fail = False
1700
      self.lvm_fail = False
1701
      self.hyp_fail = False
1702
      self.ghost = False
1703
      self.os_fail = False
1704
      self.oslist = {}
1705

    
1706
  def ExpandNames(self):
1707
    # This raises errors.OpPrereqError on its own:
1708
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1709

    
1710
    # Get instances in node group; this is unsafe and needs verification later
1711
    inst_names = \
1712
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1713

    
1714
    self.needed_locks = {
1715
      locking.LEVEL_INSTANCE: inst_names,
1716
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1717
      locking.LEVEL_NODE: [],
1718
      }
1719

    
1720
    self.share_locks = _ShareAll()
1721

    
1722
  def DeclareLocks(self, level):
1723
    if level == locking.LEVEL_NODE:
1724
      # Get members of node group; this is unsafe and needs verification later
1725
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1726

    
1727
      all_inst_info = self.cfg.GetAllInstancesInfo()
1728

    
1729
      # In Exec(), we warn about mirrored instances that have primary and
1730
      # secondary living in separate node groups. To fully verify that
1731
      # volumes for these instances are healthy, we will need to do an
1732
      # extra call to their secondaries. We ensure here those nodes will
1733
      # be locked.
1734
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1735
        # Important: access only the instances whose lock is owned
1736
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1737
          nodes.update(all_inst_info[inst].secondary_nodes)
1738

    
1739
      self.needed_locks[locking.LEVEL_NODE] = nodes
1740

    
1741
  def CheckPrereq(self):
1742
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1743
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1744

    
1745
    group_nodes = set(self.group_info.members)
1746
    group_instances = \
1747
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1748

    
1749
    unlocked_nodes = \
1750
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1751

    
1752
    unlocked_instances = \
1753
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1754

    
1755
    if unlocked_nodes:
1756
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1757
                                 utils.CommaJoin(unlocked_nodes),
1758
                                 errors.ECODE_STATE)
1759

    
1760
    if unlocked_instances:
1761
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1762
                                 utils.CommaJoin(unlocked_instances),
1763
                                 errors.ECODE_STATE)
1764

    
1765
    self.all_node_info = self.cfg.GetAllNodesInfo()
1766
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1767

    
1768
    self.my_node_names = utils.NiceSort(group_nodes)
1769
    self.my_inst_names = utils.NiceSort(group_instances)
1770

    
1771
    self.my_node_info = dict((name, self.all_node_info[name])
1772
                             for name in self.my_node_names)
1773

    
1774
    self.my_inst_info = dict((name, self.all_inst_info[name])
1775
                             for name in self.my_inst_names)
1776

    
1777
    # We detect here the nodes that will need the extra RPC calls for verifying
1778
    # split LV volumes; they should be locked.
1779
    extra_lv_nodes = set()
1780

    
1781
    for inst in self.my_inst_info.values():
1782
      if inst.disk_template in constants.DTS_INT_MIRROR:
1783
        for nname in inst.all_nodes:
1784
          if self.all_node_info[nname].group != self.group_uuid:
1785
            extra_lv_nodes.add(nname)
1786

    
1787
    unlocked_lv_nodes = \
1788
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1789

    
1790
    if unlocked_lv_nodes:
1791
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1792
                                 utils.CommaJoin(unlocked_lv_nodes),
1793
                                 errors.ECODE_STATE)
1794
    self.extra_lv_nodes = list(extra_lv_nodes)
1795

    
1796
  def _VerifyNode(self, ninfo, nresult):
1797
    """Perform some basic validation on data returned from a node.
1798

1799
      - check the result data structure is well formed and has all the
1800
        mandatory fields
1801
      - check ganeti version
1802

1803
    @type ninfo: L{objects.Node}
1804
    @param ninfo: the node to check
1805
    @param nresult: the results from the node
1806
    @rtype: boolean
1807
    @return: whether overall this call was successful (and we can expect
1808
         reasonable values in the respose)
1809

1810
    """
1811
    node = ninfo.name
1812
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1813

    
1814
    # main result, nresult should be a non-empty dict
1815
    test = not nresult or not isinstance(nresult, dict)
1816
    _ErrorIf(test, self.ENODERPC, node,
1817
                  "unable to verify node: no data returned")
1818
    if test:
1819
      return False
1820

    
1821
    # compares ganeti version
1822
    local_version = constants.PROTOCOL_VERSION
1823
    remote_version = nresult.get("version", None)
1824
    test = not (remote_version and
1825
                isinstance(remote_version, (list, tuple)) and
1826
                len(remote_version) == 2)
1827
    _ErrorIf(test, self.ENODERPC, node,
1828
             "connection to node returned invalid data")
1829
    if test:
1830
      return False
1831

    
1832
    test = local_version != remote_version[0]
1833
    _ErrorIf(test, self.ENODEVERSION, node,
1834
             "incompatible protocol versions: master %s,"
1835
             " node %s", local_version, remote_version[0])
1836
    if test:
1837
      return False
1838

    
1839
    # node seems compatible, we can actually try to look into its results
1840

    
1841
    # full package version
1842
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1843
                  self.ENODEVERSION, node,
1844
                  "software version mismatch: master %s, node %s",
1845
                  constants.RELEASE_VERSION, remote_version[1],
1846
                  code=self.ETYPE_WARNING)
1847

    
1848
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1849
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1850
      for hv_name, hv_result in hyp_result.iteritems():
1851
        test = hv_result is not None
1852
        _ErrorIf(test, self.ENODEHV, node,
1853
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1854

    
1855
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1856
    if ninfo.vm_capable and isinstance(hvp_result, list):
1857
      for item, hv_name, hv_result in hvp_result:
1858
        _ErrorIf(True, self.ENODEHV, node,
1859
                 "hypervisor %s parameter verify failure (source %s): %s",
1860
                 hv_name, item, hv_result)
1861

    
1862
    test = nresult.get(constants.NV_NODESETUP,
1863
                       ["Missing NODESETUP results"])
1864
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1865
             "; ".join(test))
1866

    
1867
    return True
1868

    
1869
  def _VerifyNodeTime(self, ninfo, nresult,
1870
                      nvinfo_starttime, nvinfo_endtime):
1871
    """Check the node time.
1872

1873
    @type ninfo: L{objects.Node}
1874
    @param ninfo: the node to check
1875
    @param nresult: the remote results for the node
1876
    @param nvinfo_starttime: the start time of the RPC call
1877
    @param nvinfo_endtime: the end time of the RPC call
1878

1879
    """
1880
    node = ninfo.name
1881
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1882

    
1883
    ntime = nresult.get(constants.NV_TIME, None)
1884
    try:
1885
      ntime_merged = utils.MergeTime(ntime)
1886
    except (ValueError, TypeError):
1887
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1888
      return
1889

    
1890
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1891
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1892
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1893
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1894
    else:
1895
      ntime_diff = None
1896

    
1897
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1898
             "Node time diverges by at least %s from master node time",
1899
             ntime_diff)
1900

    
1901
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1902
    """Check the node LVM results.
1903

1904
    @type ninfo: L{objects.Node}
1905
    @param ninfo: the node to check
1906
    @param nresult: the remote results for the node
1907
    @param vg_name: the configured VG name
1908

1909
    """
1910
    if vg_name is None:
1911
      return
1912

    
1913
    node = ninfo.name
1914
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1915

    
1916
    # checks vg existence and size > 20G
1917
    vglist = nresult.get(constants.NV_VGLIST, None)
1918
    test = not vglist
1919
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1920
    if not test:
1921
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1922
                                            constants.MIN_VG_SIZE)
1923
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1924

    
1925
    # check pv names
1926
    pvlist = nresult.get(constants.NV_PVLIST, None)
1927
    test = pvlist is None
1928
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1929
    if not test:
1930
      # check that ':' is not present in PV names, since it's a
1931
      # special character for lvcreate (denotes the range of PEs to
1932
      # use on the PV)
1933
      for _, pvname, owner_vg in pvlist:
1934
        test = ":" in pvname
1935
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1936
                 " '%s' of VG '%s'", pvname, owner_vg)
1937

    
1938
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1939
    """Check the node bridges.
1940

1941
    @type ninfo: L{objects.Node}
1942
    @param ninfo: the node to check
1943
    @param nresult: the remote results for the node
1944
    @param bridges: the expected list of bridges
1945

1946
    """
1947
    if not bridges:
1948
      return
1949

    
1950
    node = ninfo.name
1951
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1952

    
1953
    missing = nresult.get(constants.NV_BRIDGES, None)
1954
    test = not isinstance(missing, list)
1955
    _ErrorIf(test, self.ENODENET, node,
1956
             "did not return valid bridge information")
1957
    if not test:
1958
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1959
               utils.CommaJoin(sorted(missing)))
1960

    
1961
  def _VerifyNodeNetwork(self, ninfo, nresult):
1962
    """Check the node network connectivity results.
1963

1964
    @type ninfo: L{objects.Node}
1965
    @param ninfo: the node to check
1966
    @param nresult: the remote results for the node
1967

1968
    """
1969
    node = ninfo.name
1970
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1971

    
1972
    test = constants.NV_NODELIST not in nresult
1973
    _ErrorIf(test, self.ENODESSH, node,
1974
             "node hasn't returned node ssh connectivity data")
1975
    if not test:
1976
      if nresult[constants.NV_NODELIST]:
1977
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1978
          _ErrorIf(True, self.ENODESSH, node,
1979
                   "ssh communication with node '%s': %s", a_node, a_msg)
1980

    
1981
    test = constants.NV_NODENETTEST not in nresult
1982
    _ErrorIf(test, self.ENODENET, node,
1983
             "node hasn't returned node tcp connectivity data")
1984
    if not test:
1985
      if nresult[constants.NV_NODENETTEST]:
1986
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1987
        for anode in nlist:
1988
          _ErrorIf(True, self.ENODENET, node,
1989
                   "tcp communication with node '%s': %s",
1990
                   anode, nresult[constants.NV_NODENETTEST][anode])
1991

    
1992
    test = constants.NV_MASTERIP not in nresult
1993
    _ErrorIf(test, self.ENODENET, node,
1994
             "node hasn't returned node master IP reachability data")
1995
    if not test:
1996
      if not nresult[constants.NV_MASTERIP]:
1997
        if node == self.master_node:
1998
          msg = "the master node cannot reach the master IP (not configured?)"
1999
        else:
2000
          msg = "cannot reach the master IP"
2001
        _ErrorIf(True, self.ENODENET, node, msg)
2002

    
2003
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2004
                      diskstatus):
2005
    """Verify an instance.
2006

2007
    This function checks to see if the required block devices are
2008
    available on the instance's node.
2009

2010
    """
2011
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2012
    node_current = instanceconfig.primary_node
2013

    
2014
    node_vol_should = {}
2015
    instanceconfig.MapLVsByNode(node_vol_should)
2016

    
2017
    for node in node_vol_should:
2018
      n_img = node_image[node]
2019
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2020
        # ignore missing volumes on offline or broken nodes
2021
        continue
2022
      for volume in node_vol_should[node]:
2023
        test = volume not in n_img.volumes
2024
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2025
                 "volume %s missing on node %s", volume, node)
2026

    
2027
    if instanceconfig.admin_up:
2028
      pri_img = node_image[node_current]
2029
      test = instance not in pri_img.instances and not pri_img.offline
2030
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2031
               "instance not running on its primary node %s",
2032
               node_current)
2033

    
2034
    diskdata = [(nname, success, status, idx)
2035
                for (nname, disks) in diskstatus.items()
2036
                for idx, (success, status) in enumerate(disks)]
2037

    
2038
    for nname, success, bdev_status, idx in diskdata:
2039
      # the 'ghost node' construction in Exec() ensures that we have a
2040
      # node here
2041
      snode = node_image[nname]
2042
      bad_snode = snode.ghost or snode.offline
2043
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2044
               self.EINSTANCEFAULTYDISK, instance,
2045
               "couldn't retrieve status for disk/%s on %s: %s",
2046
               idx, nname, bdev_status)
2047
      _ErrorIf((instanceconfig.admin_up and success and
2048
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2049
               self.EINSTANCEFAULTYDISK, instance,
2050
               "disk/%s on %s is faulty", idx, nname)
2051

    
2052
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2053
    """Verify if there are any unknown volumes in the cluster.
2054

2055
    The .os, .swap and backup volumes are ignored. All other volumes are
2056
    reported as unknown.
2057

2058
    @type reserved: L{ganeti.utils.FieldSet}
2059
    @param reserved: a FieldSet of reserved volume names
2060

2061
    """
2062
    for node, n_img in node_image.items():
2063
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2064
          self.all_node_info[node].group != self.group_uuid):
2065
        # skip non-healthy nodes
2066
        continue
2067
      for volume in n_img.volumes:
2068
        test = ((node not in node_vol_should or
2069
                volume not in node_vol_should[node]) and
2070
                not reserved.Matches(volume))
2071
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2072
                      "volume %s is unknown", volume)
2073

    
2074
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2075
    """Verify N+1 Memory Resilience.
2076

2077
    Check that if one single node dies we can still start all the
2078
    instances it was primary for.
2079

2080
    """
2081
    cluster_info = self.cfg.GetClusterInfo()
2082
    for node, n_img in node_image.items():
2083
      # This code checks that every node which is now listed as
2084
      # secondary has enough memory to host all instances it is
2085
      # supposed to should a single other node in the cluster fail.
2086
      # FIXME: not ready for failover to an arbitrary node
2087
      # FIXME: does not support file-backed instances
2088
      # WARNING: we currently take into account down instances as well
2089
      # as up ones, considering that even if they're down someone
2090
      # might want to start them even in the event of a node failure.
2091
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2092
        # we're skipping nodes marked offline and nodes in other groups from
2093
        # the N+1 warning, since most likely we don't have good memory
2094
        # infromation from them; we already list instances living on such
2095
        # nodes, and that's enough warning
2096
        continue
2097
      for prinode, instances in n_img.sbp.items():
2098
        needed_mem = 0
2099
        for instance in instances:
2100
          bep = cluster_info.FillBE(instance_cfg[instance])
2101
          if bep[constants.BE_AUTO_BALANCE]:
2102
            needed_mem += bep[constants.BE_MEMORY]
2103
        test = n_img.mfree < needed_mem
2104
        self._ErrorIf(test, self.ENODEN1, node,
2105
                      "not enough memory to accomodate instance failovers"
2106
                      " should node %s fail (%dMiB needed, %dMiB available)",
2107
                      prinode, needed_mem, n_img.mfree)
2108

    
2109
  @classmethod
2110
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2111
                   (files_all, files_all_opt, files_mc, files_vm)):
2112
    """Verifies file checksums collected from all nodes.
2113

2114
    @param errorif: Callback for reporting errors
2115
    @param nodeinfo: List of L{objects.Node} objects
2116
    @param master_node: Name of master node
2117
    @param all_nvinfo: RPC results
2118

2119
    """
2120
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2121
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2122
           "Found file listed in more than one file list"
2123

    
2124
    # Define functions determining which nodes to consider for a file
2125
    files2nodefn = [
2126
      (files_all, None),
2127
      (files_all_opt, None),
2128
      (files_mc, lambda node: (node.master_candidate or
2129
                               node.name == master_node)),
2130
      (files_vm, lambda node: node.vm_capable),
2131
      ]
2132

    
2133
    # Build mapping from filename to list of nodes which should have the file
2134
    nodefiles = {}
2135
    for (files, fn) in files2nodefn:
2136
      if fn is None:
2137
        filenodes = nodeinfo
2138
      else:
2139
        filenodes = filter(fn, nodeinfo)
2140
      nodefiles.update((filename,
2141
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2142
                       for filename in files)
2143

    
2144
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2145

    
2146
    fileinfo = dict((filename, {}) for filename in nodefiles)
2147
    ignore_nodes = set()
2148

    
2149
    for node in nodeinfo:
2150
      if node.offline:
2151
        ignore_nodes.add(node.name)
2152
        continue
2153

    
2154
      nresult = all_nvinfo[node.name]
2155

    
2156
      if nresult.fail_msg or not nresult.payload:
2157
        node_files = None
2158
      else:
2159
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2160

    
2161
      test = not (node_files and isinstance(node_files, dict))
2162
      errorif(test, cls.ENODEFILECHECK, node.name,
2163
              "Node did not return file checksum data")
2164
      if test:
2165
        ignore_nodes.add(node.name)
2166
        continue
2167

    
2168
      # Build per-checksum mapping from filename to nodes having it
2169
      for (filename, checksum) in node_files.items():
2170
        assert filename in nodefiles
2171
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2172

    
2173
    for (filename, checksums) in fileinfo.items():
2174
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2175

    
2176
      # Nodes having the file
2177
      with_file = frozenset(node_name
2178
                            for nodes in fileinfo[filename].values()
2179
                            for node_name in nodes) - ignore_nodes
2180

    
2181
      expected_nodes = nodefiles[filename] - ignore_nodes
2182

    
2183
      # Nodes missing file
2184
      missing_file = expected_nodes - with_file
2185

    
2186
      if filename in files_all_opt:
2187
        # All or no nodes
2188
        errorif(missing_file and missing_file != expected_nodes,
2189
                cls.ECLUSTERFILECHECK, None,
2190
                "File %s is optional, but it must exist on all or no"
2191
                " nodes (not found on %s)",
2192
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2193
      else:
2194
        # Non-optional files
2195
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2196
                "File %s is missing from node(s) %s", filename,
2197
                utils.CommaJoin(utils.NiceSort(missing_file)))
2198

    
2199
        # Warn if a node has a file it shouldn't
2200
        unexpected = with_file - expected_nodes
2201
        errorif(unexpected,
2202
                cls.ECLUSTERFILECHECK, None,
2203
                "File %s should not exist on node(s) %s",
2204
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2205

    
2206
      # See if there are multiple versions of the file
2207
      test = len(checksums) > 1
2208
      if test:
2209
        variants = ["variant %s on %s" %
2210
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2211
                    for (idx, (checksum, nodes)) in
2212
                      enumerate(sorted(checksums.items()))]
2213
      else:
2214
        variants = []
2215

    
2216
      errorif(test, cls.ECLUSTERFILECHECK, None,
2217
              "File %s found with %s different checksums (%s)",
2218
              filename, len(checksums), "; ".join(variants))
2219

    
2220
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2221
                      drbd_map):
2222
    """Verifies and the node DRBD status.
2223

2224
    @type ninfo: L{objects.Node}
2225
    @param ninfo: the node to check
2226
    @param nresult: the remote results for the node
2227
    @param instanceinfo: the dict of instances
2228
    @param drbd_helper: the configured DRBD usermode helper
2229
    @param drbd_map: the DRBD map as returned by
2230
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2231

2232
    """
2233
    node = ninfo.name
2234
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2235

    
2236
    if drbd_helper:
2237
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2238
      test = (helper_result == None)
2239
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2240
               "no drbd usermode helper returned")
2241
      if helper_result:
2242
        status, payload = helper_result
2243
        test = not status
2244
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2245
                 "drbd usermode helper check unsuccessful: %s", payload)
2246
        test = status and (payload != drbd_helper)
2247
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2248
                 "wrong drbd usermode helper: %s", payload)
2249

    
2250
    # compute the DRBD minors
2251
    node_drbd = {}
2252
    for minor, instance in drbd_map[node].items():
2253
      test = instance not in instanceinfo
2254
      _ErrorIf(test, self.ECLUSTERCFG, None,
2255
               "ghost instance '%s' in temporary DRBD map", instance)
2256
        # ghost instance should not be running, but otherwise we
2257
        # don't give double warnings (both ghost instance and
2258
        # unallocated minor in use)
2259
      if test:
2260
        node_drbd[minor] = (instance, False)
2261
      else:
2262
        instance = instanceinfo[instance]
2263
        node_drbd[minor] = (instance.name, instance.admin_up)
2264

    
2265
    # and now check them
2266
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2267
    test = not isinstance(used_minors, (tuple, list))
2268
    _ErrorIf(test, self.ENODEDRBD, node,
2269
             "cannot parse drbd status file: %s", str(used_minors))
2270
    if test:
2271
      # we cannot check drbd status
2272
      return
2273

    
2274
    for minor, (iname, must_exist) in node_drbd.items():
2275
      test = minor not in used_minors and must_exist
2276
      _ErrorIf(test, self.ENODEDRBD, node,
2277
               "drbd minor %d of instance %s is not active", minor, iname)
2278
    for minor in used_minors:
2279
      test = minor not in node_drbd
2280
      _ErrorIf(test, self.ENODEDRBD, node,
2281
               "unallocated drbd minor %d is in use", minor)
2282

    
2283
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2284
    """Builds the node OS structures.
2285

2286
    @type ninfo: L{objects.Node}
2287
    @param ninfo: the node to check
2288
    @param nresult: the remote results for the node
2289
    @param nimg: the node image object
2290

2291
    """
2292
    node = ninfo.name
2293
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2294

    
2295
    remote_os = nresult.get(constants.NV_OSLIST, None)
2296
    test = (not isinstance(remote_os, list) or
2297
            not compat.all(isinstance(v, list) and len(v) == 7
2298
                           for v in remote_os))
2299

    
2300
    _ErrorIf(test, self.ENODEOS, node,
2301
             "node hasn't returned valid OS data")
2302

    
2303
    nimg.os_fail = test
2304

    
2305
    if test:
2306
      return
2307

    
2308
    os_dict = {}
2309

    
2310
    for (name, os_path, status, diagnose,
2311
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2312

    
2313
      if name not in os_dict:
2314
        os_dict[name] = []
2315

    
2316
      # parameters is a list of lists instead of list of tuples due to
2317
      # JSON lacking a real tuple type, fix it:
2318
      parameters = [tuple(v) for v in parameters]
2319
      os_dict[name].append((os_path, status, diagnose,
2320
                            set(variants), set(parameters), set(api_ver)))
2321

    
2322
    nimg.oslist = os_dict
2323

    
2324
  def _VerifyNodeOS(self, ninfo, nimg, base):
2325
    """Verifies the node OS list.
2326

2327
    @type ninfo: L{objects.Node}
2328
    @param ninfo: the node to check
2329
    @param nimg: the node image object
2330
    @param base: the 'template' node we match against (e.g. from the master)
2331

2332
    """
2333
    node = ninfo.name
2334
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2335

    
2336
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2337

    
2338
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2339
    for os_name, os_data in nimg.oslist.items():
2340
      assert os_data, "Empty OS status for OS %s?!" % os_name
2341
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2342
      _ErrorIf(not f_status, self.ENODEOS, node,
2343
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2344
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2345
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2346
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2347
      # comparisons with the 'base' image
2348
      test = os_name not in base.oslist
2349
      _ErrorIf(test, self.ENODEOS, node,
2350
               "Extra OS %s not present on reference node (%s)",
2351
               os_name, base.name)
2352
      if test:
2353
        continue
2354
      assert base.oslist[os_name], "Base node has empty OS status?"
2355
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2356
      if not b_status:
2357
        # base OS is invalid, skipping
2358
        continue
2359
      for kind, a, b in [("API version", f_api, b_api),
2360
                         ("variants list", f_var, b_var),
2361
                         ("parameters", beautify_params(f_param),
2362
                          beautify_params(b_param))]:
2363
        _ErrorIf(a != b, self.ENODEOS, node,
2364
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2365
                 kind, os_name, base.name,
2366
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2367

    
2368
    # check any missing OSes
2369
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2370
    _ErrorIf(missing, self.ENODEOS, node,
2371
             "OSes present on reference node %s but missing on this node: %s",
2372
             base.name, utils.CommaJoin(missing))
2373

    
2374
  def _VerifyOob(self, ninfo, nresult):
2375
    """Verifies out of band functionality of a node.
2376

2377
    @type ninfo: L{objects.Node}
2378
    @param ninfo: the node to check
2379
    @param nresult: the remote results for the node
2380

2381
    """
2382
    node = ninfo.name
2383
    # We just have to verify the paths on master and/or master candidates
2384
    # as the oob helper is invoked on the master
2385
    if ((ninfo.master_candidate or ninfo.master_capable) and
2386
        constants.NV_OOB_PATHS in nresult):
2387
      for path_result in nresult[constants.NV_OOB_PATHS]:
2388
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2389

    
2390
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2391
    """Verifies and updates the node volume data.
2392

2393
    This function will update a L{NodeImage}'s internal structures
2394
    with data from the remote call.
2395

2396
    @type ninfo: L{objects.Node}
2397
    @param ninfo: the node to check
2398
    @param nresult: the remote results for the node
2399
    @param nimg: the node image object
2400
    @param vg_name: the configured VG name
2401

2402
    """
2403
    node = ninfo.name
2404
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2405

    
2406
    nimg.lvm_fail = True
2407
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2408
    if vg_name is None:
2409
      pass
2410
    elif isinstance(lvdata, basestring):
2411
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2412
               utils.SafeEncode(lvdata))
2413
    elif not isinstance(lvdata, dict):
2414
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2415
    else:
2416
      nimg.volumes = lvdata
2417
      nimg.lvm_fail = False
2418

    
2419
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2420
    """Verifies and updates the node instance list.
2421

2422
    If the listing was successful, then updates this node's instance
2423
    list. Otherwise, it marks the RPC call as failed for the instance
2424
    list key.
2425

2426
    @type ninfo: L{objects.Node}
2427
    @param ninfo: the node to check
2428
    @param nresult: the remote results for the node
2429
    @param nimg: the node image object
2430

2431
    """
2432
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2433
    test = not isinstance(idata, list)
2434
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2435
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2436
    if test:
2437
      nimg.hyp_fail = True
2438
    else:
2439
      nimg.instances = idata
2440

    
2441
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2442
    """Verifies and computes a node information map
2443

2444
    @type ninfo: L{objects.Node}
2445
    @param ninfo: the node to check
2446
    @param nresult: the remote results for the node
2447
    @param nimg: the node image object
2448
    @param vg_name: the configured VG name
2449

2450
    """
2451
    node = ninfo.name
2452
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2453

    
2454
    # try to read free memory (from the hypervisor)
2455
    hv_info = nresult.get(constants.NV_HVINFO, None)
2456
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2457
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2458
    if not test:
2459
      try:
2460
        nimg.mfree = int(hv_info["memory_free"])
2461
      except (ValueError, TypeError):
2462
        _ErrorIf(True, self.ENODERPC, node,
2463
                 "node returned invalid nodeinfo, check hypervisor")
2464

    
2465
    # FIXME: devise a free space model for file based instances as well
2466
    if vg_name is not None:
2467
      test = (constants.NV_VGLIST not in nresult or
2468
              vg_name not in nresult[constants.NV_VGLIST])
2469
      _ErrorIf(test, self.ENODELVM, node,
2470
               "node didn't return data for the volume group '%s'"
2471
               " - it is either missing or broken", vg_name)
2472
      if not test:
2473
        try:
2474
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475
        except (ValueError, TypeError):
2476
          _ErrorIf(True, self.ENODERPC, node,
2477
                   "node returned invalid LVM info, check LVM status")
2478

    
2479
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480
    """Gets per-disk status information for all instances.
2481

2482
    @type nodelist: list of strings
2483
    @param nodelist: Node names
2484
    @type node_image: dict of (name, L{objects.Node})
2485
    @param node_image: Node objects
2486
    @type instanceinfo: dict of (name, L{objects.Instance})
2487
    @param instanceinfo: Instance objects
2488
    @rtype: {instance: {node: [(succes, payload)]}}
2489
    @return: a dictionary of per-instance dictionaries with nodes as
2490
        keys and disk information as values; the disk information is a
2491
        list of tuples (success, payload)
2492

2493
    """
2494
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495

    
2496
    node_disks = {}
2497
    node_disks_devonly = {}
2498
    diskless_instances = set()
2499
    diskless = constants.DT_DISKLESS
2500

    
2501
    for nname in nodelist:
2502
      node_instances = list(itertools.chain(node_image[nname].pinst,
2503
                                            node_image[nname].sinst))
2504
      diskless_instances.update(inst for inst in node_instances
2505
                                if instanceinfo[inst].disk_template == diskless)
2506
      disks = [(inst, disk)
2507
               for inst in node_instances
2508
               for disk in instanceinfo[inst].disks]
2509

    
2510
      if not disks:
2511
        # No need to collect data
2512
        continue
2513

    
2514
      node_disks[nname] = disks
2515

    
2516
      # Creating copies as SetDiskID below will modify the objects and that can
2517
      # lead to incorrect data returned from nodes
2518
      devonly = [dev.Copy() for (_, dev) in disks]
2519

    
2520
      for dev in devonly:
2521
        self.cfg.SetDiskID(dev, nname)
2522

    
2523
      node_disks_devonly[nname] = devonly
2524

    
2525
    assert len(node_disks) == len(node_disks_devonly)
2526

    
2527
    # Collect data from all nodes with disks
2528
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529
                                                          node_disks_devonly)
2530

    
2531
    assert len(result) == len(node_disks)
2532

    
2533
    instdisk = {}
2534

    
2535
    for (nname, nres) in result.items():
2536
      disks = node_disks[nname]
2537

    
2538
      if nres.offline:
2539
        # No data from this node
2540
        data = len(disks) * [(False, "node offline")]
2541
      else:
2542
        msg = nres.fail_msg
2543
        _ErrorIf(msg, self.ENODERPC, nname,
2544
                 "while getting disk information: %s", msg)
2545
        if msg:
2546
          # No data from this node
2547
          data = len(disks) * [(False, msg)]
2548
        else:
2549
          data = []
2550
          for idx, i in enumerate(nres.payload):
2551
            if isinstance(i, (tuple, list)) and len(i) == 2:
2552
              data.append(i)
2553
            else:
2554
              logging.warning("Invalid result from node %s, entry %d: %s",
2555
                              nname, idx, i)
2556
              data.append((False, "Invalid result from the remote node"))
2557

    
2558
      for ((inst, _), status) in zip(disks, data):
2559
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560

    
2561
    # Add empty entries for diskless instances.
2562
    for inst in diskless_instances:
2563
      assert inst not in instdisk
2564
      instdisk[inst] = {}
2565

    
2566
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568
                      compat.all(isinstance(s, (tuple, list)) and
2569
                                 len(s) == 2 for s in statuses)
2570
                      for inst, nnames in instdisk.items()
2571
                      for nname, statuses in nnames.items())
2572
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573

    
2574
    return instdisk
2575

    
2576
  @staticmethod
2577
  def _SshNodeSelector(group_uuid, all_nodes):
2578
    """Create endless iterators for all potential SSH check hosts.
2579

2580
    """
2581
    nodes = [node for node in all_nodes
2582
             if (node.group != group_uuid and
2583
                 not node.offline)]
2584
    keyfunc = operator.attrgetter("group")
2585

    
2586
    return map(itertools.cycle,
2587
               [sorted(map(operator.attrgetter("name"), names))
2588
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589
                                                  keyfunc)])
2590

    
2591
  @classmethod
2592
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593
    """Choose which nodes should talk to which other nodes.
2594

2595
    We will make nodes contact all nodes in their group, and one node from
2596
    every other group.
2597

2598
    @warning: This algorithm has a known issue if one node group is much
2599
      smaller than others (e.g. just one node). In such a case all other
2600
      nodes will talk to the single node.
2601

2602
    """
2603
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605

    
2606
    return (online_nodes,
2607
            dict((name, sorted([i.next() for i in sel]))
2608
                 for name in online_nodes))
2609

    
2610
  def BuildHooksEnv(self):
2611
    """Build hooks env.
2612

2613
    Cluster-Verify hooks just ran in the post phase and their failure makes
2614
    the output be logged in the verify output and the verification to fail.
2615

2616
    """
2617
    env = {
2618
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619
      }
2620

    
2621
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622
               for node in self.my_node_info.values())
2623

    
2624
    return env
2625

    
2626
  def BuildHooksNodes(self):
2627
    """Build hooks nodes.
2628

2629
    """
2630
    return ([], self.my_node_names)
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Verify integrity of the node group, performing various test on nodes.
2634

2635
    """
2636
    # This method has too many local variables. pylint: disable=R0914
2637
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638

    
2639
    if not self.my_node_names:
2640
      # empty node group
2641
      feedback_fn("* Empty node group, skipping verification")
2642
      return True
2643

    
2644
    self.bad = False
2645
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646
    verbose = self.op.verbose
2647
    self._feedback_fn = feedback_fn
2648

    
2649
    vg_name = self.cfg.GetVGName()
2650
    drbd_helper = self.cfg.GetDRBDHelper()
2651
    cluster = self.cfg.GetClusterInfo()
2652
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653
    hypervisors = cluster.enabled_hypervisors
2654
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655

    
2656
    i_non_redundant = [] # Non redundant instances
2657
    i_non_a_balanced = [] # Non auto-balanced instances
2658
    n_offline = 0 # Count of offline nodes
2659
    n_drained = 0 # Count of nodes being drained
2660
    node_vol_should = {}
2661

    
2662
    # FIXME: verify OS list
2663

    
2664
    # File verification
2665
    filemap = _ComputeAncillaryFiles(cluster, False)
2666

    
2667
    # do local checksums
2668
    master_node = self.master_node = self.cfg.GetMasterNode()
2669
    master_ip = self.cfg.GetMasterIP()
2670

    
2671
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672

    
2673
    node_verify_param = {
2674
      constants.NV_FILELIST:
2675
        utils.UniqueSequence(filename
2676
                             for files in filemap
2677
                             for filename in files),
2678
      constants.NV_NODELIST:
2679
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2680
                                  self.all_node_info.values()),
2681
      constants.NV_HYPERVISOR: hypervisors,
2682
      constants.NV_HVPARAMS:
2683
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2684
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2685
                                 for node in node_data_list
2686
                                 if not node.offline],
2687
      constants.NV_INSTANCELIST: hypervisors,
2688
      constants.NV_VERSION: None,
2689
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2690
      constants.NV_NODESETUP: None,
2691
      constants.NV_TIME: None,
2692
      constants.NV_MASTERIP: (master_node, master_ip),
2693
      constants.NV_OSLIST: None,
2694
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2695
      }
2696

    
2697
    if vg_name is not None:
2698
      node_verify_param[constants.NV_VGLIST] = None
2699
      node_verify_param[constants.NV_LVLIST] = vg_name
2700
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2701
      node_verify_param[constants.NV_DRBDLIST] = None
2702

    
2703
    if drbd_helper:
2704
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2705

    
2706
    # bridge checks
2707
    # FIXME: this needs to be changed per node-group, not cluster-wide
2708
    bridges = set()
2709
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2710
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2711
      bridges.add(default_nicpp[constants.NIC_LINK])
2712
    for instance in self.my_inst_info.values():
2713
      for nic in instance.nics:
2714
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2715
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716
          bridges.add(full_nic[constants.NIC_LINK])
2717

    
2718
    if bridges:
2719
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2720

    
2721
    # Build our expected cluster state
2722
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2723
                                                 name=node.name,
2724
                                                 vm_capable=node.vm_capable))
2725
                      for node in node_data_list)
2726

    
2727
    # Gather OOB paths
2728
    oob_paths = []
2729
    for node in self.all_node_info.values():
2730
      path = _SupportsOob(self.cfg, node)
2731
      if path and path not in oob_paths:
2732
        oob_paths.append(path)
2733

    
2734
    if oob_paths:
2735
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2736

    
2737
    for instance in self.my_inst_names:
2738
      inst_config = self.my_inst_info[instance]
2739

    
2740
      for nname in inst_config.all_nodes:
2741
        if nname not in node_image:
2742
          gnode = self.NodeImage(name=nname)
2743
          gnode.ghost = (nname not in self.all_node_info)
2744
          node_image[nname] = gnode
2745

    
2746
      inst_config.MapLVsByNode(node_vol_should)
2747

    
2748
      pnode = inst_config.primary_node
2749
      node_image[pnode].pinst.append(instance)
2750

    
2751
      for snode in inst_config.secondary_nodes:
2752
        nimg = node_image[snode]
2753
        nimg.sinst.append(instance)
2754
        if pnode not in nimg.sbp:
2755
          nimg.sbp[pnode] = []
2756
        nimg.sbp[pnode].append(instance)
2757

    
2758
    # At this point, we have the in-memory data structures complete,
2759
    # except for the runtime information, which we'll gather next
2760

    
2761
    # Due to the way our RPC system works, exact response times cannot be
2762
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2763
    # time before and after executing the request, we can at least have a time
2764
    # window.
2765
    nvinfo_starttime = time.time()
2766
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2767
                                           node_verify_param,
2768
                                           self.cfg.GetClusterName())
2769
    nvinfo_endtime = time.time()
2770

    
2771
    if self.extra_lv_nodes and vg_name is not None:
2772
      extra_lv_nvinfo = \
2773
          self.rpc.call_node_verify(self.extra_lv_nodes,
2774
                                    {constants.NV_LVLIST: vg_name},
2775
                                    self.cfg.GetClusterName())
2776
    else:
2777
      extra_lv_nvinfo = {}
2778

    
2779
    all_drbd_map = self.cfg.ComputeDRBDMap()
2780

    
2781
    feedback_fn("* Gathering disk information (%s nodes)" %
2782
                len(self.my_node_names))
2783
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2784
                                     self.my_inst_info)
2785

    
2786
    feedback_fn("* Verifying configuration file consistency")
2787

    
2788
    # If not all nodes are being checked, we need to make sure the master node
2789
    # and a non-checked vm_capable node are in the list.
2790
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2791
    if absent_nodes:
2792
      vf_nvinfo = all_nvinfo.copy()
2793
      vf_node_info = list(self.my_node_info.values())
2794
      additional_nodes = []
2795
      if master_node not in self.my_node_info:
2796
        additional_nodes.append(master_node)
2797
        vf_node_info.append(self.all_node_info[master_node])
2798
      # Add the first vm_capable node we find which is not included
2799
      for node in absent_nodes:
2800
        nodeinfo = self.all_node_info[node]
2801
        if nodeinfo.vm_capable and not nodeinfo.offline:
2802
          additional_nodes.append(node)
2803
          vf_node_info.append(self.all_node_info[node])
2804
          break
2805
      key = constants.NV_FILELIST
2806
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2807
                                                 {key: node_verify_param[key]},
2808
                                                 self.cfg.GetClusterName()))
2809
    else:
2810
      vf_nvinfo = all_nvinfo
2811
      vf_node_info = self.my_node_info.values()
2812

    
2813
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2814

    
2815
    feedback_fn("* Verifying node status")
2816

    
2817
    refos_img = None
2818

    
2819
    for node_i in node_data_list:
2820
      node = node_i.name
2821
      nimg = node_image[node]
2822

    
2823
      if node_i.offline:
2824
        if verbose:
2825
          feedback_fn("* Skipping offline node %s" % (node,))
2826
        n_offline += 1
2827
        continue
2828

    
2829
      if node == master_node:
2830
        ntype = "master"
2831
      elif node_i.master_candidate:
2832
        ntype = "master candidate"
2833
      elif node_i.drained:
2834
        ntype = "drained"
2835
        n_drained += 1
2836
      else:
2837
        ntype = "regular"
2838
      if verbose:
2839
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2840

    
2841
      msg = all_nvinfo[node].fail_msg
2842
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2843
      if msg:
2844
        nimg.rpc_fail = True
2845
        continue
2846

    
2847
      nresult = all_nvinfo[node].payload
2848

    
2849
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2850
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2851
      self._VerifyNodeNetwork(node_i, nresult)
2852
      self._VerifyOob(node_i, nresult)
2853

    
2854
      if nimg.vm_capable:
2855
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2856
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2857
                             all_drbd_map)
2858

    
2859
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2860
        self._UpdateNodeInstances(node_i, nresult, nimg)
2861
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2862
        self._UpdateNodeOS(node_i, nresult, nimg)
2863

    
2864
        if not nimg.os_fail:
2865
          if refos_img is None:
2866
            refos_img = nimg
2867
          self._VerifyNodeOS(node_i, nimg, refos_img)
2868
        self._VerifyNodeBridges(node_i, nresult, bridges)
2869

    
2870
        # Check whether all running instancies are primary for the node. (This
2871
        # can no longer be done from _VerifyInstance below, since some of the
2872
        # wrong instances could be from other node groups.)
2873
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2874

    
2875
        for inst in non_primary_inst:
2876
          test = inst in self.all_inst_info
2877
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2878
                   "instance should not run on node %s", node_i.name)
2879
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2880
                   "node is running unknown instance %s", inst)
2881

    
2882
    for node, result in extra_lv_nvinfo.items():
2883
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2884
                              node_image[node], vg_name)
2885

    
2886
    feedback_fn("* Verifying instance status")
2887
    for instance in self.my_inst_names:
2888
      if verbose:
2889
        feedback_fn("* Verifying instance %s" % instance)
2890
      inst_config = self.my_inst_info[instance]
2891
      self._VerifyInstance(instance, inst_config, node_image,
2892
                           instdisk[instance])
2893
      inst_nodes_offline = []
2894

    
2895
      pnode = inst_config.primary_node
2896
      pnode_img = node_image[pnode]
2897
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2898
               self.ENODERPC, pnode, "instance %s, connection to"
2899
               " primary node failed", instance)
2900

    
2901
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2902
               self.EINSTANCEBADNODE, instance,
2903
               "instance is marked as running and lives on offline node %s",
2904
               inst_config.primary_node)
2905

    
2906
      # If the instance is non-redundant we cannot survive losing its primary
2907
      # node, so we are not N+1 compliant. On the other hand we have no disk
2908
      # templates with more than one secondary so that situation is not well
2909
      # supported either.
2910
      # FIXME: does not support file-backed instances
2911
      if not inst_config.secondary_nodes:
2912
        i_non_redundant.append(instance)
2913

    
2914
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2915
               instance, "instance has multiple secondary nodes: %s",
2916
               utils.CommaJoin(inst_config.secondary_nodes),
2917
               code=self.ETYPE_WARNING)
2918

    
2919
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2920
        pnode = inst_config.primary_node
2921
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2922
        instance_groups = {}
2923

    
2924
        for node in instance_nodes:
2925
          instance_groups.setdefault(self.all_node_info[node].group,
2926
                                     []).append(node)
2927

    
2928
        pretty_list = [
2929
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2930
          # Sort so that we always list the primary node first.
2931
          for group, nodes in sorted(instance_groups.items(),
2932
                                     key=lambda (_, nodes): pnode in nodes,
2933
                                     reverse=True)]
2934

    
2935
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2936
                      instance, "instance has primary and secondary nodes in"
2937
                      " different groups: %s", utils.CommaJoin(pretty_list),
2938
                      code=self.ETYPE_WARNING)
2939

    
2940
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2941
        i_non_a_balanced.append(instance)
2942

    
2943
      for snode in inst_config.secondary_nodes:
2944
        s_img = node_image[snode]
2945
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2946
                 "instance %s, connection to secondary node failed", instance)
2947

    
2948
        if s_img.offline:
2949
          inst_nodes_offline.append(snode)
2950

    
2951
      # warn that the instance lives on offline nodes
2952
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2953
               "instance has offline secondary node(s) %s",
2954
               utils.CommaJoin(inst_nodes_offline))
2955
      # ... or ghost/non-vm_capable nodes
2956
      for node in inst_config.all_nodes:
2957
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2958
                 "instance lives on ghost node %s", node)
2959
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2960
                 instance, "instance lives on non-vm_capable node %s", node)
2961

    
2962
    feedback_fn("* Verifying orphan volumes")
2963
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2964

    
2965
    # We will get spurious "unknown volume" warnings if any node of this group
2966
    # is secondary for an instance whose primary is in another group. To avoid
2967
    # them, we find these instances and add their volumes to node_vol_should.
2968
    for inst in self.all_inst_info.values():
2969
      for secondary in inst.secondary_nodes:
2970
        if (secondary in self.my_node_info
2971
            and inst.name not in self.my_inst_info):
2972
          inst.MapLVsByNode(node_vol_should)
2973
          break
2974

    
2975
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2976

    
2977
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2978
      feedback_fn("* Verifying N+1 Memory redundancy")
2979
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2980

    
2981
    feedback_fn("* Other Notes")
2982
    if i_non_redundant:
2983
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2984
                  % len(i_non_redundant))
2985

    
2986
    if i_non_a_balanced:
2987
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2988
                  % len(i_non_a_balanced))
2989

    
2990
    if n_offline:
2991
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2992

    
2993
    if n_drained:
2994
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2995

    
2996
    return not self.bad
2997

    
2998
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2999
    """Analyze the post-hooks' result
3000

3001
    This method analyses the hook result, handles it, and sends some
3002
    nicely-formatted feedback back to the user.
3003

3004
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3005
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3006
    @param hooks_results: the results of the multi-node hooks rpc call
3007
    @param feedback_fn: function used send feedback back to the caller
3008
    @param lu_result: previous Exec result
3009
    @return: the new Exec result, based on the previous result
3010
        and hook results
3011

3012
    """
3013
    # We only really run POST phase hooks, only for non-empty groups,
3014
    # and are only interested in their results
3015
    if not self.my_node_names:
3016
      # empty node group
3017
      pass
3018
    elif phase == constants.HOOKS_PHASE_POST:
3019
      # Used to change hooks' output to proper indentation
3020
      feedback_fn("* Hooks Results")
3021
      assert hooks_results, "invalid result from hooks"
3022

    
3023
      for node_name in hooks_results:
3024
        res = hooks_results[node_name]
3025
        msg = res.fail_msg
3026
        test = msg and not res.offline
3027
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3028
                      "Communication failure in hooks execution: %s", msg)
3029
        if res.offline or msg:
3030
          # No need to investigate payload if node is offline or gave
3031
          # an error.
3032
          continue
3033
        for script, hkr, output in res.payload:
3034
          test = hkr == constants.HKR_FAIL
3035
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3036
                        "Script %s failed, output:", script)
3037
          if test:
3038
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3039
            feedback_fn("%s" % output)
3040
            lu_result = False
3041

    
3042
    return lu_result
3043

    
3044

    
3045
class LUClusterVerifyDisks(NoHooksLU):
3046
  """Verifies the cluster disks status.
3047

3048
  """
3049
  REQ_BGL = False
3050

    
3051
  def ExpandNames(self):
3052
    self.share_locks = _ShareAll()
3053
    self.needed_locks = {
3054
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3055
      }
3056

    
3057
  def Exec(self, feedback_fn):
3058
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3059

    
3060
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3061
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3062
                           for group in group_names])
3063

    
3064

    
3065
class LUGroupVerifyDisks(NoHooksLU):
3066
  """Verifies the status of all disks in a node group.
3067

3068
  """
3069
  REQ_BGL = False
3070

    
3071
  def ExpandNames(self):
3072
    # Raises errors.OpPrereqError on its own if group can't be found
3073
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3074

    
3075
    self.share_locks = _ShareAll()
3076
    self.needed_locks = {
3077
      locking.LEVEL_INSTANCE: [],
3078
      locking.LEVEL_NODEGROUP: [],
3079
      locking.LEVEL_NODE: [],
3080
      }
3081

    
3082
  def DeclareLocks(self, level):
3083
    if level == locking.LEVEL_INSTANCE:
3084
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3085

    
3086
      # Lock instances optimistically, needs verification once node and group
3087
      # locks have been acquired
3088
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3089
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3090

    
3091
    elif level == locking.LEVEL_NODEGROUP:
3092
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3093

    
3094
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3095
        set([self.group_uuid] +
3096
            # Lock all groups used by instances optimistically; this requires
3097
            # going via the node before it's locked, requiring verification
3098
            # later on
3099
            [group_uuid
3100
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3101
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3102

    
3103
    elif level == locking.LEVEL_NODE:
3104
      # This will only lock the nodes in the group to be verified which contain
3105
      # actual instances
3106
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3107
      self._LockInstancesNodes()
3108

    
3109
      # Lock all nodes in group to be verified
3110
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3111
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3112
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3113

    
3114
  def CheckPrereq(self):
3115
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3116
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3117
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3118

    
3119
    assert self.group_uuid in owned_groups
3120

    
3121
    # Check if locked instances are still correct
3122
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3123

    
3124
    # Get instance information
3125
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3126

    
3127
    # Check if node groups for locked instances are still correct
3128
    for (instance_name, inst) in self.instances.items():
3129
      assert owned_nodes.issuperset(inst.all_nodes), \
3130
        "Instance %s's nodes changed while we kept the lock" % instance_name
3131

    
3132
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3133
                                             owned_groups)
3134

    
3135
      assert self.group_uuid in inst_groups, \
3136
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3137

    
3138
  def Exec(self, feedback_fn):
3139
    """Verify integrity of cluster disks.
3140

3141
    @rtype: tuple of three items
3142
    @return: a tuple of (dict of node-to-node_error, list of instances
3143
        which need activate-disks, dict of instance: (node, volume) for
3144
        missing volumes
3145

3146
    """
3147
    res_nodes = {}
3148
    res_instances = set()
3149
    res_missing = {}
3150

    
3151
    nv_dict = _MapInstanceDisksToNodes([inst
3152
                                        for inst in self.instances.values()
3153
                                        if inst.admin_up])
3154

    
3155
    if nv_dict:
3156
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3157
                             set(self.cfg.GetVmCapableNodeList()))
3158

    
3159
      node_lvs = self.rpc.call_lv_list(nodes, [])
3160

    
3161
      for (node, node_res) in node_lvs.items():
3162
        if node_res.offline:
3163
          continue
3164

    
3165
        msg = node_res.fail_msg
3166
        if msg:
3167
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3168
          res_nodes[node] = msg
3169
          continue
3170

    
3171
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3172
          inst = nv_dict.pop((node, lv_name), None)
3173
          if not (lv_online or inst is None):
3174
            res_instances.add(inst)
3175

    
3176
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3177
      # better
3178
      for key, inst in nv_dict.iteritems():
3179
        res_missing.setdefault(inst, []).append(list(key))
3180

    
3181
    return (res_nodes, list(res_instances), res_missing)
3182

    
3183

    
3184
class LUClusterRepairDiskSizes(NoHooksLU):
3185
  """Verifies the cluster disks sizes.
3186

3187
  """
3188
  REQ_BGL = False
3189

    
3190
  def ExpandNames(self):
3191
    if self.op.instances:
3192
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3193
      self.needed_locks = {
3194
        locking.LEVEL_NODE: [],
3195
        locking.LEVEL_INSTANCE: self.wanted_names,
3196
        }
3197
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3198
    else:
3199
      self.wanted_names = None
3200
      self.needed_locks = {
3201
        locking.LEVEL_NODE: locking.ALL_SET,
3202
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3203
        }
3204
    self.share_locks = {
3205
      locking.LEVEL_NODE: 1,
3206
      locking.LEVEL_INSTANCE: 0,
3207
      }
3208

    
3209
  def DeclareLocks(self, level):
3210
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3211
      self._LockInstancesNodes(primary_only=True)
3212

    
3213
  def CheckPrereq(self):
3214
    """Check prerequisites.
3215

3216
    This only checks the optional instance list against the existing names.
3217

3218
    """
3219
    if self.wanted_names is None:
3220
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3221

    
3222
    self.wanted_instances = \
3223
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3224

    
3225
  def _EnsureChildSizes(self, disk):
3226
    """Ensure children of the disk have the needed disk size.
3227

3228
    This is valid mainly for DRBD8 and fixes an issue where the
3229
    children have smaller disk size.
3230

3231
    @param disk: an L{ganeti.objects.Disk} object
3232

3233
    """
3234
    if disk.dev_type == constants.LD_DRBD8:
3235
      assert disk.children, "Empty children for DRBD8?"
3236
      fchild = disk.children[0]
3237
      mismatch = fchild.size < disk.size
3238
      if mismatch:
3239
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3240
                     fchild.size, disk.size)
3241
        fchild.size = disk.size
3242

    
3243
      # and we recurse on this child only, not on the metadev
3244
      return self._EnsureChildSizes(fchild) or mismatch
3245
    else:
3246
      return False
3247

    
3248
  def Exec(self, feedback_fn):
3249
    """Verify the size of cluster disks.
3250

3251
    """
3252
    # TODO: check child disks too
3253
    # TODO: check differences in size between primary/secondary nodes
3254
    per_node_disks = {}
3255
    for instance in self.wanted_instances:
3256
      pnode = instance.primary_node
3257
      if pnode not in per_node_disks:
3258
        per_node_disks[pnode] = []
3259
      for idx, disk in enumerate(instance.disks):
3260
        per_node_disks[pnode].append((instance, idx, disk))
3261

    
3262
    changed = []
3263
    for node, dskl in per_node_disks.items():
3264
      newl = [v[2].Copy() for v in dskl]
3265
      for dsk in newl:
3266
        self.cfg.SetDiskID(dsk, node)
3267
      result = self.rpc.call_blockdev_getsize(node, newl)
3268
      if result.fail_msg:
3269
        self.LogWarning("Failure in blockdev_getsize call to node"
3270
                        " %s, ignoring", node)
3271
        continue
3272
      if len(result.payload) != len(dskl):
3273
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3274
                        " result.payload=%s", node, len(dskl), result.payload)
3275
        self.LogWarning("Invalid result from node %s, ignoring node results",
3276
                        node)
3277
        continue
3278
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3279
        if size is None:
3280
          self.LogWarning("Disk %d of instance %s did not return size"
3281
                          " information, ignoring", idx, instance.name)
3282
          continue
3283
        if not isinstance(size, (int, long)):
3284
          self.LogWarning("Disk %d of instance %s did not return valid"
3285
                          " size information, ignoring", idx, instance.name)
3286
          continue
3287
        size = size >> 20
3288
        if size != disk.size:
3289
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3290
                       " correcting: recorded %d, actual %d", idx,
3291
                       instance.name, disk.size, size)
3292
          disk.size = size
3293
          self.cfg.Update(instance, feedback_fn)
3294
          changed.append((instance.name, idx, size))
3295
        if self._EnsureChildSizes(disk):
3296
          self.cfg.Update(instance, feedback_fn)
3297
          changed.append((instance.name, idx, disk.size))
3298
    return changed
3299

    
3300

    
3301
class LUClusterRename(LogicalUnit):
3302
  """Rename the cluster.
3303

3304
  """
3305
  HPATH = "cluster-rename"
3306
  HTYPE = constants.HTYPE_CLUSTER
3307

    
3308
  def BuildHooksEnv(self):
3309
    """Build hooks env.
3310

3311
    """
3312
    return {
3313
      "OP_TARGET": self.cfg.GetClusterName(),
3314
      "NEW_NAME": self.op.name,
3315
      }
3316

    
3317
  def BuildHooksNodes(self):
3318
    """Build hooks nodes.
3319

3320
    """
3321
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3322

    
3323
  def CheckPrereq(self):
3324
    """Verify that the passed name is a valid one.
3325

3326
    """
3327
    hostname = netutils.GetHostname(name=self.op.name,
3328
                                    family=self.cfg.GetPrimaryIPFamily())
3329

    
3330
    new_name = hostname.name
3331
    self.ip = new_ip = hostname.ip
3332
    old_name = self.cfg.GetClusterName()
3333
    old_ip = self.cfg.GetMasterIP()
3334
    if new_name == old_name and new_ip == old_ip:
3335
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3336
                                 " cluster has changed",
3337
                                 errors.ECODE_INVAL)
3338
    if new_ip != old_ip:
3339
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3340
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3341
                                   " reachable on the network" %
3342
                                   new_ip, errors.ECODE_NOTUNIQUE)
3343

    
3344
    self.op.name = new_name
3345

    
3346
  def Exec(self, feedback_fn):
3347
    """Rename the cluster.
3348

3349
    """
3350
    clustername = self.op.name
3351
    ip = self.ip
3352

    
3353
    # shutdown the master IP
3354
    master = self.cfg.GetMasterNode()
3355
    result = self.rpc.call_node_stop_master(master, False)
3356
    result.Raise("Could not disable the master role")
3357

    
3358
    try:
3359
      cluster = self.cfg.GetClusterInfo()
3360
      cluster.cluster_name = clustername
3361
      cluster.master_ip = ip
3362
      self.cfg.Update(cluster, feedback_fn)
3363

    
3364
      # update the known hosts file
3365
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3366
      node_list = self.cfg.GetOnlineNodeList()
3367
      try:
3368
        node_list.remove(master)
3369
      except ValueError:
3370
        pass
3371
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3372
    finally:
3373
      result = self.rpc.call_node_start_master(master, False, False)
3374
      msg = result.fail_msg
3375
      if msg:
3376
        self.LogWarning("Could not re-enable the master role on"
3377
                        " the master, please restart manually: %s", msg)
3378

    
3379
    return clustername
3380

    
3381

    
3382
class LUClusterSetParams(LogicalUnit):
3383
  """Change the parameters of the cluster.
3384

3385
  """
3386
  HPATH = "cluster-modify"
3387
  HTYPE = constants.HTYPE_CLUSTER
3388
  REQ_BGL = False
3389

    
3390
  def CheckArguments(self):
3391
    """Check parameters
3392

3393
    """
3394
    if self.op.uid_pool:
3395
      uidpool.CheckUidPool(self.op.uid_pool)
3396

    
3397
    if self.op.add_uids:
3398
      uidpool.CheckUidPool(self.op.add_uids)
3399

    
3400
    if self.op.remove_uids:
3401
      uidpool.CheckUidPool(self.op.remove_uids)
3402

    
3403
  def ExpandNames(self):
3404
    # FIXME: in the future maybe other cluster params won't require checking on
3405
    # all nodes to be modified.
3406
    self.needed_locks = {
3407
      locking.LEVEL_NODE: locking.ALL_SET,
3408
    }
3409
    self.share_locks[locking.LEVEL_NODE] = 1
3410

    
3411
  def BuildHooksEnv(self):
3412
    """Build hooks env.
3413

3414
    """
3415
    return {
3416
      "OP_TARGET": self.cfg.GetClusterName(),
3417
      "NEW_VG_NAME": self.op.vg_name,
3418
      }
3419

    
3420
  def BuildHooksNodes(self):
3421
    """Build hooks nodes.
3422

3423
    """
3424
    mn = self.cfg.GetMasterNode()
3425
    return ([mn], [mn])
3426

    
3427
  def CheckPrereq(self):
3428
    """Check prerequisites.
3429

3430
    This checks whether the given params don't conflict and
3431
    if the given volume group is valid.
3432

3433
    """
3434
    if self.op.vg_name is not None and not self.op.vg_name:
3435
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3436
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3437
                                   " instances exist", errors.ECODE_INVAL)
3438

    
3439
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3440
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3441
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3442
                                   " drbd-based instances exist",
3443
                                   errors.ECODE_INVAL)
3444

    
3445
    node_list = self.owned_locks(locking.LEVEL_NODE)
3446

    
3447
    # if vg_name not None, checks given volume group on all nodes
3448
    if self.op.vg_name:
3449
      vglist = self.rpc.call_vg_list(node_list)
3450
      for node in node_list:
3451
        msg = vglist[node].fail_msg
3452
        if msg:
3453
          # ignoring down node
3454
          self.LogWarning("Error while gathering data on node %s"
3455
                          " (ignoring node): %s", node, msg)
3456
          continue
3457
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3458
                                              self.op.vg_name,
3459
                                              constants.MIN_VG_SIZE)
3460
        if vgstatus:
3461
          raise errors.OpPrereqError("Error on node '%s': %s" %
3462
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3463

    
3464
    if self.op.drbd_helper:
3465
      # checks given drbd helper on all nodes
3466
      helpers = self.rpc.call_drbd_helper(node_list)
3467
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3468
        if ninfo.offline:
3469
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3470
          continue
3471
        msg = helpers[node].fail_msg
3472
        if msg:
3473
          raise errors.OpPrereqError("Error checking drbd helper on node"
3474
                                     " '%s': %s" % (node, msg),
3475
                                     errors.ECODE_ENVIRON)
3476
        node_helper = helpers[node].payload
3477
        if node_helper != self.op.drbd_helper:
3478
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3479
                                     (node, node_helper), errors.ECODE_ENVIRON)
3480

    
3481
    self.cluster = cluster = self.cfg.GetClusterInfo()
3482
    # validate params changes
3483
    if self.op.beparams:
3484
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3485
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3486

    
3487
    if self.op.ndparams:
3488
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3489
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3490

    
3491
      # TODO: we need a more general way to handle resetting
3492
      # cluster-level parameters to default values
3493
      if self.new_ndparams["oob_program"] == "":
3494
        self.new_ndparams["oob_program"] = \
3495
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3496

    
3497
    if self.op.nicparams:
3498
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3499
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3500
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3501
      nic_errors = []
3502

    
3503
      # check all instances for consistency
3504
      for instance in self.cfg.GetAllInstancesInfo().values():
3505
        for nic_idx, nic in enumerate(instance.nics):
3506
          params_copy = copy.deepcopy(nic.nicparams)
3507
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3508

    
3509
          # check parameter syntax
3510
          try:
3511
            objects.NIC.CheckParameterSyntax(params_filled)
3512
          except errors.ConfigurationError, err:
3513
            nic_errors.append("Instance %s, nic/%d: %s" %
3514
                              (instance.name, nic_idx, err))
3515

    
3516
          # if we're moving instances to routed, check that they have an ip
3517
          target_mode = params_filled[constants.NIC_MODE]
3518
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3519
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3520
                              " address" % (instance.name, nic_idx))
3521
      if nic_errors:
3522
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3523
                                   "\n".join(nic_errors))
3524

    
3525
    # hypervisor list/parameters
3526
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3527
    if self.op.hvparams:
3528
      for hv_name, hv_dict in self.op.hvparams.items():
3529
        if hv_name not in self.new_hvparams:
3530
          self.new_hvparams[hv_name] = hv_dict
3531
        else:
3532
          self.new_hvparams[hv_name].update(hv_dict)
3533

    
3534
    # os hypervisor parameters
3535
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3536
    if self.op.os_hvp:
3537
      for os_name, hvs in self.op.os_hvp.items():
3538
        if os_name not in self.new_os_hvp:
3539
          self.new_os_hvp[os_name] = hvs
3540
        else:
3541
          for hv_name, hv_dict in hvs.items():
3542
            if hv_name not in self.new_os_hvp[os_name]:
3543
              self.new_os_hvp[os_name][hv_name] = hv_dict
3544
            else:
3545
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3546

    
3547
    # os parameters
3548
    self.new_osp = objects.FillDict(cluster.osparams, {})
3549
    if self.op.osparams:
3550
      for os_name, osp in self.op.osparams.items():
3551
        if os_name not in self.new_osp:
3552
          self.new_osp[os_name] = {}
3553

    
3554
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3555
                                                  use_none=True)
3556

    
3557
        if not self.new_osp[os_name]:
3558
          # we removed all parameters
3559
          del self.new_osp[os_name]
3560
        else:
3561
          # check the parameter validity (remote check)
3562
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3563
                         os_name, self.new_osp[os_name])
3564

    
3565
    # changes to the hypervisor list
3566
    if self.op.enabled_hypervisors is not None:
3567
      self.hv_list = self.op.enabled_hypervisors
3568
      for hv in self.hv_list:
3569
        # if the hypervisor doesn't already exist in the cluster
3570
        # hvparams, we initialize it to empty, and then (in both
3571
        # cases) we make sure to fill the defaults, as we might not
3572
        # have a complete defaults list if the hypervisor wasn't
3573
        # enabled before
3574
        if hv not in new_hvp:
3575
          new_hvp[hv] = {}
3576
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3577
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3578
    else:
3579
      self.hv_list = cluster.enabled_hypervisors
3580

    
3581
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3582
      # either the enabled list has changed, or the parameters have, validate
3583
      for hv_name, hv_params in self.new_hvparams.items():
3584
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3585
            (self.op.enabled_hypervisors and
3586
             hv_name in self.op.enabled_hypervisors)):
3587
          # either this is a new hypervisor, or its parameters have changed
3588
          hv_class = hypervisor.GetHypervisor(hv_name)
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          hv_class.CheckParameterSyntax(hv_params)
3591
          _CheckHVParams(self, node_list, hv_name, hv_params)
3592

    
3593
    if self.op.os_hvp:
3594
      # no need to check any newly-enabled hypervisors, since the
3595
      # defaults have already been checked in the above code-block
3596
      for os_name, os_hvp in self.new_os_hvp.items():
3597
        for hv_name, hv_params in os_hvp.items():
3598
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3599
          # we need to fill in the new os_hvp on top of the actual hv_p
3600
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3601
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3602
          hv_class = hypervisor.GetHypervisor(hv_name)
3603
          hv_class.CheckParameterSyntax(new_osp)
3604
          _CheckHVParams(self, node_list, hv_name, new_osp)
3605

    
3606
    if self.op.default_iallocator:
3607
      alloc_script = utils.FindFile(self.op.default_iallocator,
3608
                                    constants.IALLOCATOR_SEARCH_PATH,
3609
                                    os.path.isfile)
3610
      if alloc_script is None:
3611
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3612
                                   " specified" % self.op.default_iallocator,
3613
                                   errors.ECODE_INVAL)
3614

    
3615
  def Exec(self, feedback_fn):
3616
    """Change the parameters of the cluster.
3617

3618
    """
3619
    if self.op.vg_name is not None:
3620
      new_volume = self.op.vg_name
3621
      if not new_volume:
3622
        new_volume = None
3623
      if new_volume != self.cfg.GetVGName():
3624
        self.cfg.SetVGName(new_volume)
3625
      else:
3626
        feedback_fn("Cluster LVM configuration already in desired"
3627
                    " state, not changing")
3628
    if self.op.drbd_helper is not None:
3629
      new_helper = self.op.drbd_helper
3630
      if not new_helper:
3631
        new_helper = None
3632
      if new_helper != self.cfg.GetDRBDHelper():
3633
        self.cfg.SetDRBDHelper(new_helper)
3634
      else:
3635
        feedback_fn("Cluster DRBD helper already in desired state,"
3636
                    " not changing")
3637
    if self.op.hvparams:
3638
      self.cluster.hvparams = self.new_hvparams
3639
    if self.op.os_hvp:
3640
      self.cluster.os_hvp = self.new_os_hvp
3641
    if self.op.enabled_hypervisors is not None:
3642
      self.cluster.hvparams = self.new_hvparams
3643
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3644
    if self.op.beparams:
3645
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3646
    if self.op.nicparams:
3647
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3648
    if self.op.osparams:
3649
      self.cluster.osparams = self.new_osp
3650
    if self.op.ndparams:
3651
      self.cluster.ndparams = self.new_ndparams
3652

    
3653
    if self.op.candidate_pool_size is not None:
3654
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3655
      # we need to update the pool size here, otherwise the save will fail
3656
      _AdjustCandidatePool(self, [])
3657

    
3658
    if self.op.maintain_node_health is not None:
3659
      self.cluster.maintain_node_health = self.op.maintain_node_health
3660

    
3661
    if self.op.prealloc_wipe_disks is not None:
3662
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3663

    
3664
    if self.op.add_uids is not None:
3665
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3666

    
3667
    if self.op.remove_uids is not None:
3668
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3669

    
3670
    if self.op.uid_pool is not None:
3671
      self.cluster.uid_pool = self.op.uid_pool
3672

    
3673
    if self.op.default_iallocator is not None:
3674
      self.cluster.default_iallocator = self.op.default_iallocator
3675

    
3676
    if self.op.reserved_lvs is not None:
3677
      self.cluster.reserved_lvs = self.op.reserved_lvs
3678

    
3679
    def helper_os(aname, mods, desc):
3680
      desc += " OS list"
3681
      lst = getattr(self.cluster, aname)
3682
      for key, val in mods:
3683
        if key == constants.DDM_ADD:
3684
          if val in lst:
3685
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3686
          else:
3687
            lst.append(val)
3688
        elif key == constants.DDM_REMOVE:
3689
          if val in lst:
3690
            lst.remove(val)
3691
          else:
3692
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3693
        else:
3694
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3695

    
3696
    if self.op.hidden_os:
3697
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3698

    
3699
    if self.op.blacklisted_os:
3700
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3701

    
3702
    if self.op.master_netdev:
3703
      master = self.cfg.GetMasterNode()
3704
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3705
                  self.cluster.master_netdev)
3706
      result = self.rpc.call_node_stop_master(master, False)
3707
      result.Raise("Could not disable the master ip")
3708
      feedback_fn("Changing master_netdev from %s to %s" %
3709
                  (self.cluster.master_netdev, self.op.master_netdev))
3710
      self.cluster.master_netdev = self.op.master_netdev
3711

    
3712
    self.cfg.Update(self.cluster, feedback_fn)
3713

    
3714
    if self.op.master_netdev:
3715
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3716
                  self.op.master_netdev)
3717
      result = self.rpc.call_node_start_master(master, False, False)
3718
      if result.fail_msg:
3719
        self.LogWarning("Could not re-enable the master ip on"
3720
                        " the master, please restart manually: %s",
3721
                        result.fail_msg)
3722

    
3723

    
3724
def _UploadHelper(lu, nodes, fname):
3725
  """Helper for uploading a file and showing warnings.
3726

3727
  """
3728
  if os.path.exists(fname):
3729
    result = lu.rpc.call_upload_file(nodes, fname)
3730
    for to_node, to_result in result.items():
3731
      msg = to_result.fail_msg
3732
      if msg:
3733
        msg = ("Copy of file %s to node %s failed: %s" %
3734
               (fname, to_node, msg))
3735
        lu.proc.LogWarning(msg)
3736

    
3737

    
3738
def _ComputeAncillaryFiles(cluster, redist):
3739
  """Compute files external to Ganeti which need to be consistent.
3740

3741
  @type redist: boolean
3742
  @param redist: Whether to include files which need to be redistributed
3743

3744
  """
3745
  # Compute files for all nodes
3746
  files_all = set([
3747
    constants.SSH_KNOWN_HOSTS_FILE,
3748
    constants.CONFD_HMAC_KEY,
3749
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3750
    ])
3751

    
3752
  if not redist:
3753
    files_all.update(constants.ALL_CERT_FILES)
3754
    files_all.update(ssconf.SimpleStore().GetFileList())
3755
  else:
3756
    # we need to ship at least the RAPI certificate
3757
    files_all.add(constants.RAPI_CERT_FILE)
3758

    
3759
  if cluster.modify_etc_hosts:
3760
    files_all.add(constants.ETC_HOSTS)
3761

    
3762
  # Files which must either exist on all nodes or on none
3763
  files_all_opt = set([
3764
    constants.RAPI_USERS_FILE,
3765
    ])
3766

    
3767
  # Files which should only be on master candidates
3768
  files_mc = set()
3769
  if not redist:
3770
    files_mc.add(constants.CLUSTER_CONF_FILE)
3771

    
3772
  # Files which should only be on VM-capable nodes
3773
  files_vm = set(filename
3774
    for hv_name in cluster.enabled_hypervisors
3775
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3776

    
3777
  # Filenames must be unique
3778
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3779
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3780
         "Found file listed in more than one file list"
3781

    
3782
  return (files_all, files_all_opt, files_mc, files_vm)
3783

    
3784

    
3785
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3786
  """Distribute additional files which are part of the cluster configuration.
3787

3788
  ConfigWriter takes care of distributing the config and ssconf files, but
3789
  there are more files which should be distributed to all nodes. This function
3790
  makes sure those are copied.
3791

3792
  @param lu: calling logical unit
3793
  @param additional_nodes: list of nodes not in the config to distribute to
3794
  @type additional_vm: boolean
3795
  @param additional_vm: whether the additional nodes are vm-capable or not
3796

3797
  """
3798
  # Gather target nodes
3799
  cluster = lu.cfg.GetClusterInfo()
3800
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3801

    
3802
  online_nodes = lu.cfg.GetOnlineNodeList()
3803
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3804

    
3805
  if additional_nodes is not None:
3806
    online_nodes.extend(additional_nodes)
3807
    if additional_vm:
3808
      vm_nodes.extend(additional_nodes)
3809

    
3810
  # Never distribute to master node
3811
  for nodelist in [online_nodes, vm_nodes]:
3812
    if master_info.name in nodelist:
3813
      nodelist.remove(master_info.name)
3814

    
3815
  # Gather file lists
3816
  (files_all, files_all_opt, files_mc, files_vm) = \
3817
    _ComputeAncillaryFiles(cluster, True)
3818

    
3819
  # Never re-distribute configuration file from here
3820
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3821
              constants.CLUSTER_CONF_FILE in files_vm)
3822
  assert not files_mc, "Master candidates not handled in this function"
3823

    
3824
  filemap = [
3825
    (online_nodes, files_all),
3826
    (online_nodes, files_all_opt),
3827
    (vm_nodes, files_vm),
3828
    ]
3829

    
3830
  # Upload the files
3831
  for (node_list, files) in filemap:
3832
    for fname in files:
3833
      _UploadHelper(lu, node_list, fname)
3834

    
3835

    
3836
class LUClusterRedistConf(NoHooksLU):
3837
  """Force the redistribution of cluster configuration.
3838

3839
  This is a very simple LU.
3840

3841
  """
3842
  REQ_BGL = False
3843

    
3844
  def ExpandNames(self):
3845
    self.needed_locks = {
3846
      locking.LEVEL_NODE: locking.ALL_SET,
3847
    }
3848
    self.share_locks[locking.LEVEL_NODE] = 1
3849

    
3850
  def Exec(self, feedback_fn):
3851
    """Redistribute the configuration.
3852

3853
    """
3854
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3855
    _RedistributeAncillaryFiles(self)
3856

    
3857

    
3858
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3859
  """Sleep and poll for an instance's disk to sync.
3860

3861
  """
3862
  if not instance.disks or disks is not None and not disks:
3863
    return True
3864

    
3865
  disks = _ExpandCheckDisks(instance, disks)
3866

    
3867
  if not oneshot:
3868
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3869

    
3870
  node = instance.primary_node
3871

    
3872
  for dev in disks:
3873
    lu.cfg.SetDiskID(dev, node)
3874

    
3875
  # TODO: Convert to utils.Retry
3876

    
3877
  retries = 0
3878
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3879
  while True:
3880
    max_time = 0
3881
    done = True
3882
    cumul_degraded = False
3883
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3884
    msg = rstats.fail_msg
3885
    if msg:
3886
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3887
      retries += 1
3888
      if retries >= 10:
3889
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3890
                                 " aborting." % node)
3891
      time.sleep(6)
3892
      continue
3893
    rstats = rstats.payload
3894
    retries = 0
3895
    for i, mstat in enumerate(rstats):
3896
      if mstat is None:
3897
        lu.LogWarning("Can't compute data for node %s/%s",
3898
                           node, disks[i].iv_name)
3899
        continue
3900

    
3901
      cumul_degraded = (cumul_degraded or
3902
                        (mstat.is_degraded and mstat.sync_percent is None))
3903
      if mstat.sync_percent is not None:
3904
        done = False
3905
        if mstat.estimated_time is not None:
3906
          rem_time = ("%s remaining (estimated)" %
3907
                      utils.FormatSeconds(mstat.estimated_time))
3908
          max_time = mstat.estimated_time
3909
        else:
3910
          rem_time = "no time estimate"
3911
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3912
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3913

    
3914
    # if we're done but degraded, let's do a few small retries, to
3915
    # make sure we see a stable and not transient situation; therefore
3916
    # we force restart of the loop
3917
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3918
      logging.info("Degraded disks found, %d retries left", degr_retries)
3919
      degr_retries -= 1
3920
      time.sleep(1)
3921
      continue
3922

    
3923
    if done or oneshot:
3924
      break
3925

    
3926
    time.sleep(min(60, max_time))
3927

    
3928
  if done:
3929
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3930
  return not cumul_degraded
3931

    
3932

    
3933
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3934
  """Check that mirrors are not degraded.
3935

3936
  The ldisk parameter, if True, will change the test from the
3937
  is_degraded attribute (which represents overall non-ok status for
3938
  the device(s)) to the ldisk (representing the local storage status).
3939

3940
  """
3941
  lu.cfg.SetDiskID(dev, node)
3942

    
3943
  result = True
3944

    
3945
  if on_primary or dev.AssembleOnSecondary():
3946
    rstats = lu.rpc.call_blockdev_find(node, dev)
3947
    msg = rstats.fail_msg
3948
    if msg:
3949
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3950
      result = False
3951
    elif not rstats.payload:
3952
      lu.LogWarning("Can't find disk on node %s", node)
3953
      result = False
3954
    else:
3955
      if ldisk:
3956
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3957
      else:
3958
        result = result and not rstats.payload.is_degraded
3959

    
3960
  if dev.children:
3961
    for child in dev.children:
3962
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3963

    
3964
  return result
3965

    
3966

    
3967
class LUOobCommand(NoHooksLU):
3968
  """Logical unit for OOB handling.
3969

3970
  """
3971
  REQ_BGL = False
3972
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3973

    
3974
  def ExpandNames(self):
3975
    """Gather locks we need.
3976

3977
    """
3978
    if self.op.node_names:
3979
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3980
      lock_names = self.op.node_names
3981
    else:
3982
      lock_names = locking.ALL_SET
3983

    
3984
    self.needed_locks = {
3985
      locking.LEVEL_NODE: lock_names,
3986
      }
3987

    
3988
  def CheckPrereq(self):
3989
    """Check prerequisites.
3990

3991
    This checks:
3992
     - the node exists in the configuration
3993
     - OOB is supported
3994

3995
    Any errors are signaled by raising errors.OpPrereqError.
3996

3997
    """
3998
    self.nodes = []
3999
    self.master_node = self.cfg.GetMasterNode()
4000

    
4001
    assert self.op.power_delay >= 0.0
4002

    
4003
    if self.op.node_names:
4004
      if (self.op.command in self._SKIP_MASTER and
4005
          self.master_node in self.op.node_names):
4006
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4007
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4008

    
4009
        if master_oob_handler:
4010
          additional_text = ("run '%s %s %s' if you want to operate on the"
4011
                             " master regardless") % (master_oob_handler,
4012
                                                      self.op.command,
4013
                                                      self.master_node)
4014
        else:
4015
          additional_text = "it does not support out-of-band operations"
4016

    
4017
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4018
                                    " allowed for %s; %s") %
4019
                                   (self.master_node, self.op.command,
4020
                                    additional_text), errors.ECODE_INVAL)
4021
    else:
4022
      self.op.node_names = self.cfg.GetNodeList()
4023
      if self.op.command in self._SKIP_MASTER:
4024
        self.op.node_names.remove(self.master_node)
4025

    
4026
    if self.op.command in self._SKIP_MASTER:
4027
      assert self.master_node not in self.op.node_names
4028

    
4029
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4030
      if node is None:
4031
        raise errors.OpPrereqError("Node %s not found" % node_name,
4032
                                   errors.ECODE_NOENT)
4033
      else:
4034
        self.nodes.append(node)
4035

    
4036
      if (not self.op.ignore_status and
4037
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4038
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4039
                                    " not marked offline") % node_name,
4040
                                   errors.ECODE_STATE)
4041

    
4042
  def Exec(self, feedback_fn):
4043
    """Execute OOB and return result if we expect any.
4044

4045
    """
4046
    master_node = self.master_node
4047
    ret = []
4048

    
4049
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4050
                                              key=lambda node: node.name)):
4051
      node_entry = [(constants.RS_NORMAL, node.name)]
4052
      ret.append(node_entry)
4053

    
4054
      oob_program = _SupportsOob(self.cfg, node)
4055

    
4056
      if not oob_program:
4057
        node_entry.append((constants.RS_UNAVAIL, None))
4058
        continue
4059

    
4060
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4061
                   self.op.command, oob_program, node.name)
4062
      result = self.rpc.call_run_oob(master_node, oob_program,
4063
                                     self.op.command, node.name,
4064
                                     self.op.timeout)
4065

    
4066
      if result.fail_msg:
4067
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4068
                        node.name, result.fail_msg)
4069
        node_entry.append((constants.RS_NODATA, None))
4070
      else:
4071
        try:
4072
          self._CheckPayload(result)
4073
        except errors.OpExecError, err:
4074
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4075
                          node.name, err)
4076
          node_entry.append((constants.RS_NODATA, None))
4077
        else:
4078
          if self.op.command == constants.OOB_HEALTH:
4079
            # For health we should log important events
4080
            for item, status in result.payload:
4081
              if status in [constants.OOB_STATUS_WARNING,
4082
                            constants.OOB_STATUS_CRITICAL]:
4083
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4084
                                item, node.name, status)
4085

    
4086
          if self.op.command == constants.OOB_POWER_ON:
4087
            node.powered = True
4088
          elif self.op.command == constants.OOB_POWER_OFF:
4089
            node.powered = False
4090
          elif self.op.command == constants.OOB_POWER_STATUS:
4091
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4092
            if powered != node.powered:
4093
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4094
                               " match actual power state (%s)"), node.powered,
4095
                              node.name, powered)
4096

    
4097
          # For configuration changing commands we should update the node
4098
          if self.op.command in (constants.OOB_POWER_ON,
4099
                                 constants.OOB_POWER_OFF):
4100
            self.cfg.Update(node, feedback_fn)
4101

    
4102
          node_entry.append((constants.RS_NORMAL, result.payload))
4103

    
4104
          if (self.op.command == constants.OOB_POWER_ON and
4105
              idx < len(self.nodes) - 1):
4106
            time.sleep(self.op.power_delay)
4107

    
4108
    return ret
4109

    
4110
  def _CheckPayload(self, result):
4111
    """Checks if the payload is valid.
4112

4113
    @param result: RPC result
4114
    @raises errors.OpExecError: If payload is not valid
4115

4116
    """
4117
    errs = []
4118
    if self.op.command == constants.OOB_HEALTH:
4119
      if not isinstance(result.payload, list):
4120
        errs.append("command 'health' is expected to return a list but got %s" %
4121
                    type(result.payload))
4122
      else:
4123
        for item, status in result.payload:
4124
          if status not in constants.OOB_STATUSES:
4125
            errs.append("health item '%s' has invalid status '%s'" %
4126
                        (item, status))
4127

    
4128
    if self.op.command == constants.OOB_POWER_STATUS:
4129
      if not isinstance(result.payload, dict):
4130
        errs.append("power-status is expected to return a dict but got %s" %
4131
                    type(result.payload))
4132

    
4133
    if self.op.command in [
4134
        constants.OOB_POWER_ON,
4135
        constants.OOB_POWER_OFF,
4136
        constants.OOB_POWER_CYCLE,
4137
        ]:
4138
      if result.payload is not None:
4139
        errs.append("%s is expected to not return payload but got '%s'" %
4140
                    (self.op.command, result.payload))
4141

    
4142
    if errs:
4143
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4144
                               utils.CommaJoin(errs))
4145

    
4146

    
4147
class _OsQuery(_QueryBase):
4148
  FIELDS = query.OS_FIELDS
4149

    
4150
  def ExpandNames(self, lu):
4151
    # Lock all nodes in shared mode
4152
    # Temporary removal of locks, should be reverted later
4153
    # TODO: reintroduce locks when they are lighter-weight
4154
    lu.needed_locks = {}
4155
    #self.share_locks[locking.LEVEL_NODE] = 1
4156
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4157

    
4158
    # The following variables interact with _QueryBase._GetNames
4159
    if self.names:
4160
      self.wanted = self.names
4161
    else:
4162
      self.wanted = locking.ALL_SET
4163

    
4164
    self.do_locking = self.use_locking
4165

    
4166
  def DeclareLocks(self, lu, level):
4167
    pass
4168

    
4169
  @staticmethod
4170
  def _DiagnoseByOS(rlist):
4171
    """Remaps a per-node return list into an a per-os per-node dictionary
4172

4173
    @param rlist: a map with node names as keys and OS objects as values
4174

4175
    @rtype: dict
4176
    @return: a dictionary with osnames as keys and as value another
4177
        map, with nodes as keys and tuples of (path, status, diagnose,
4178
        variants, parameters, api_versions) as values, eg::
4179

4180
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4181
                                     (/srv/..., False, "invalid api")],
4182
                           "node2": [(/srv/..., True, "", [], [])]}
4183
          }
4184

4185
    """
4186
    all_os = {}
4187
    # we build here the list of nodes that didn't fail the RPC (at RPC
4188
    # level), so that nodes with a non-responding node daemon don't
4189
    # make all OSes invalid
4190
    good_nodes = [node_name for node_name in rlist
4191
                  if not rlist[node_name].fail_msg]
4192
    for node_name, nr in rlist.items():
4193
      if nr.fail_msg or not nr.payload:
4194
        continue
4195
      for (name, path, status, diagnose, variants,
4196
           params, api_versions) in nr.payload:
4197
        if name not in all_os:
4198
          # build a list of nodes for this os containing empty lists
4199
          # for each node in node_list
4200
          all_os[name] = {}
4201
          for nname in good_nodes:
4202
            all_os[name][nname] = []
4203
        # convert params from [name, help] to (name, help)
4204
        params = [tuple(v) for v in params]
4205
        all_os[name][node_name].append((path, status, diagnose,
4206
                                        variants, params, api_versions))
4207
    return all_os
4208

    
4209
  def _GetQueryData(self, lu):
4210
    """Computes the list of nodes and their attributes.
4211

4212
    """
4213
    # Locking is not used
4214
    assert not (compat.any(lu.glm.is_owned(level)
4215
                           for level in locking.LEVELS
4216
                           if level != locking.LEVEL_CLUSTER) or
4217
                self.do_locking or self.use_locking)
4218

    
4219
    valid_nodes = [node.name
4220
                   for node in lu.cfg.GetAllNodesInfo().values()
4221
                   if not node.offline and node.vm_capable]
4222
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4223
    cluster = lu.cfg.GetClusterInfo()
4224

    
4225
    data = {}
4226

    
4227
    for (os_name, os_data) in pol.items():
4228
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4229
                          hidden=(os_name in cluster.hidden_os),
4230
                          blacklisted=(os_name in cluster.blacklisted_os))
4231

    
4232
      variants = set()
4233
      parameters = set()
4234
      api_versions = set()
4235

    
4236
      for idx, osl in enumerate(os_data.values()):
4237
        info.valid = bool(info.valid and osl and osl[0][1])
4238
        if not info.valid:
4239
          break
4240

    
4241
        (node_variants, node_params, node_api) = osl[0][3:6]
4242
        if idx == 0:
4243
          # First entry
4244
          variants.update(node_variants)
4245
          parameters.update(node_params)
4246
          api_versions.update(node_api)
4247
        else:
4248
          # Filter out inconsistent values
4249
          variants.intersection_update(node_variants)
4250
          parameters.intersection_update(node_params)
4251
          api_versions.intersection_update(node_api)
4252

    
4253
      info.variants = list(variants)
4254
      info.parameters = list(parameters)
4255
      info.api_versions = list(api_versions)
4256

    
4257
      data[os_name] = info
4258

    
4259
    # Prepare data in requested order
4260
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4261
            if name in data]
4262

    
4263

    
4264
class LUOsDiagnose(NoHooksLU):
4265
  """Logical unit for OS diagnose/query.
4266

4267
  """
4268
  REQ_BGL = False
4269

    
4270
  @staticmethod
4271
  def _BuildFilter(fields, names):
4272
    """Builds a filter for querying OSes.
4273

4274
    """
4275
    name_filter = qlang.MakeSimpleFilter("name", names)
4276

    
4277
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4278
    # respective field is not requested
4279
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4280
                     for fname in ["hidden", "blacklisted"]
4281
                     if fname not in fields]
4282
    if "valid" not in fields:
4283
      status_filter.append([qlang.OP_TRUE, "valid"])
4284

    
4285
    if status_filter:
4286
      status_filter.insert(0, qlang.OP_AND)
4287
    else:
4288
      status_filter = None
4289

    
4290
    if name_filter and status_filter:
4291
      return [qlang.OP_AND, name_filter, status_filter]
4292
    elif name_filter:
4293
      return name_filter
4294
    else:
4295
      return status_filter
4296

    
4297
  def CheckArguments(self):
4298
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4299
                       self.op.output_fields, False)
4300

    
4301
  def ExpandNames(self):
4302
    self.oq.ExpandNames(self)
4303

    
4304
  def Exec(self, feedback_fn):
4305
    return self.oq.OldStyleQuery(self)
4306

    
4307

    
4308
class LUNodeRemove(LogicalUnit):
4309
  """Logical unit for removing a node.
4310

4311
  """
4312
  HPATH = "node-remove"
4313
  HTYPE = constants.HTYPE_NODE
4314

    
4315
  def BuildHooksEnv(self):
4316
    """Build hooks env.
4317

4318
    This doesn't run on the target node in the pre phase as a failed
4319
    node would then be impossible to remove.
4320

4321
    """
4322
    return {
4323
      "OP_TARGET": self.op.node_name,
4324
      "NODE_NAME": self.op.node_name,
4325
      }
4326

    
4327
  def BuildHooksNodes(self):
4328
    """Build hooks nodes.
4329

4330
    """
4331
    all_nodes = self.cfg.GetNodeList()
4332
    try:
4333
      all_nodes.remove(self.op.node_name)
4334
    except ValueError:
4335
      logging.warning("Node '%s', which is about to be removed, was not found"
4336
                      " in the list of all nodes", self.op.node_name)
4337
    return (all_nodes, all_nodes)
4338

    
4339
  def CheckPrereq(self):
4340
    """Check prerequisites.
4341

4342
    This checks:
4343
     - the node exists in the configuration
4344
     - it does not have primary or secondary instances
4345
     - it's not the master
4346

4347
    Any errors are signaled by raising errors.OpPrereqError.
4348

4349
    """
4350
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4351
    node = self.cfg.GetNodeInfo(self.op.node_name)
4352
    assert node is not None
4353

    
4354
    masternode = self.cfg.GetMasterNode()
4355
    if node.name == masternode:
4356
      raise errors.OpPrereqError("Node is the master node, failover to another"
4357
                                 " node is required", errors.ECODE_INVAL)
4358

    
4359
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4360
      if node.name in instance.all_nodes:
4361
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4362
                                   " please remove first" % instance_name,
4363
                                   errors.ECODE_INVAL)
4364
    self.op.node_name = node.name
4365
    self.node = node
4366

    
4367
  def Exec(self, feedback_fn):
4368
    """Removes the node from the cluster.
4369

4370
    """
4371
    node = self.node
4372
    logging.info("Stopping the node daemon and removing configs from node %s",
4373
                 node.name)
4374

    
4375
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4376

    
4377
    # Promote nodes to master candidate as needed
4378
    _AdjustCandidatePool(self, exceptions=[node.name])
4379
    self.context.RemoveNode(node.name)
4380

    
4381
    # Run post hooks on the node before it's removed
4382
    _RunPostHook(self, node.name)
4383

    
4384
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4385
    msg = result.fail_msg
4386
    if msg:
4387
      self.LogWarning("Errors encountered on the remote node while leaving"
4388
                      " the cluster: %s", msg)
4389

    
4390
    # Remove node from our /etc/hosts
4391
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4392
      master_node = self.cfg.GetMasterNode()
4393
      result = self.rpc.call_etc_hosts_modify(master_node,
4394
                                              constants.ETC_HOSTS_REMOVE,
4395
                                              node.name, None)
4396
      result.Raise("Can't update hosts file with new host data")
4397
      _RedistributeAncillaryFiles(self)
4398

    
4399

    
4400
class _NodeQuery(_QueryBase):
4401
  FIELDS = query.NODE_FIELDS
4402

    
4403
  def ExpandNames(self, lu):
4404
    lu.needed_locks = {}
4405
    lu.share_locks = _ShareAll()
4406

    
4407
    if self.names:
4408
      self.wanted = _GetWantedNodes(lu, self.names)
4409
    else:
4410
      self.wanted = locking.ALL_SET
4411

    
4412
    self.do_locking = (self.use_locking and
4413
                       query.NQ_LIVE in self.requested_data)
4414

    
4415
    if self.do_locking:
4416
      # If any non-static field is requested we need to lock the nodes
4417
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4418

    
4419
  def DeclareLocks(self, lu, level):
4420
    pass
4421

    
4422
  def _GetQueryData(self, lu):
4423
    """Computes the list of nodes and their attributes.
4424

4425
    """
4426
    all_info = lu.cfg.GetAllNodesInfo()
4427

    
4428
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4429

    
4430
    # Gather data as requested
4431
    if query.NQ_LIVE in self.requested_data:
4432
      # filter out non-vm_capable nodes
4433
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4434

    
4435
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4436
                                        lu.cfg.GetHypervisorType())
4437
      live_data = dict((name, nresult.payload)
4438
                       for (name, nresult) in node_data.items()
4439
                       if not nresult.fail_msg and nresult.payload)
4440
    else:
4441
      live_data = None
4442

    
4443
    if query.NQ_INST in self.requested_data:
4444
      node_to_primary = dict([(name, set()) for name in nodenames])
4445
      node_to_secondary = dict([(name, set()) for name in nodenames])
4446

    
4447
      inst_data = lu.cfg.GetAllInstancesInfo()
4448

    
4449
      for inst in inst_data.values():
4450
        if inst.primary_node in node_to_primary:
4451
          node_to_primary[inst.primary_node].add(inst.name)
4452
        for secnode in inst.secondary_nodes:
4453
          if secnode in node_to_secondary:
4454
            node_to_secondary[secnode].add(inst.name)
4455
    else:
4456
      node_to_primary = None
4457
      node_to_secondary = None
4458

    
4459
    if query.NQ_OOB in self.requested_data:
4460
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4461
                         for name, node in all_info.iteritems())
4462
    else:
4463
      oob_support = None
4464

    
4465
    if query.NQ_GROUP in self.requested_data:
4466
      groups = lu.cfg.GetAllNodeGroupsInfo()
4467
    else:
4468
      groups = {}
4469

    
4470
    return query.NodeQueryData([all_info[name] for name in nodenames],
4471
                               live_data, lu.cfg.GetMasterNode(),
4472
                               node_to_primary, node_to_secondary, groups,
4473
                               oob_support, lu.cfg.GetClusterInfo())
4474

    
4475

    
4476
class LUNodeQuery(NoHooksLU):
4477
  """Logical unit for querying nodes.
4478

4479
  """
4480
  # pylint: disable=W0142
4481
  REQ_BGL = False
4482

    
4483
  def CheckArguments(self):
4484
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4485
                         self.op.output_fields, self.op.use_locking)
4486

    
4487
  def ExpandNames(self):
4488
    self.nq.ExpandNames(self)
4489

    
4490
  def Exec(self, feedback_fn):
4491
    return self.nq.OldStyleQuery(self)
4492

    
4493

    
4494
class LUNodeQueryvols(NoHooksLU):
4495
  """Logical unit for getting volumes on node(s).
4496

4497
  """
4498
  REQ_BGL = False
4499
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4500
  _FIELDS_STATIC = utils.FieldSet("node")
4501

    
4502
  def CheckArguments(self):
4503
    _CheckOutputFields(static=self._FIELDS_STATIC,
4504
                       dynamic=self._FIELDS_DYNAMIC,
4505
                       selected=self.op.output_fields)
4506

    
4507
  def ExpandNames(self):
4508
    self.needed_locks = {}
4509
    self.share_locks[locking.LEVEL_NODE] = 1
4510
    if not self.op.nodes:
4511
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4512
    else:
4513
      self.needed_locks[locking.LEVEL_NODE] = \
4514
        _GetWantedNodes(self, self.op.nodes)
4515

    
4516
  def Exec(self, feedback_fn):
4517
    """Computes the list of nodes and their attributes.
4518

4519
    """
4520
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4521
    volumes = self.rpc.call_node_volumes(nodenames)
4522

    
4523
    ilist = self.cfg.GetAllInstancesInfo()
4524
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4525

    
4526
    output = []
4527
    for node in nodenames:
4528
      nresult = volumes[node]
4529
      if nresult.offline:
4530
        continue
4531
      msg = nresult.fail_msg
4532
      if msg:
4533
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4534
        continue
4535

    
4536
      node_vols = sorted(nresult.payload,
4537
                         key=operator.itemgetter("dev"))
4538

    
4539
      for vol in node_vols:
4540
        node_output = []
4541
        for field in self.op.output_fields:
4542
          if field == "node":
4543
            val = node
4544
          elif field == "phys":
4545
            val = vol["dev"]
4546
          elif field == "vg":
4547
            val = vol["vg"]
4548
          elif field == "name":
4549
            val = vol["name"]
4550
          elif field == "size":
4551
            val = int(float(vol["size"]))
4552
          elif field == "instance":
4553
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4554
          else:
4555
            raise errors.ParameterError(field)
4556
          node_output.append(str(val))
4557

    
4558
        output.append(node_output)
4559

    
4560
    return output
4561

    
4562

    
4563
class LUNodeQueryStorage(NoHooksLU):
4564
  """Logical unit for getting information on storage units on node(s).
4565

4566
  """
4567
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4568
  REQ_BGL = False
4569

    
4570
  def CheckArguments(self):
4571
    _CheckOutputFields(static=self._FIELDS_STATIC,
4572
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4573
                       selected=self.op.output_fields)
4574

    
4575
  def ExpandNames(self):
4576
    self.needed_locks = {}
4577
    self.share_locks[locking.LEVEL_NODE] = 1
4578

    
4579
    if self.op.nodes:
4580
      self.needed_locks[locking.LEVEL_NODE] = \
4581
        _GetWantedNodes(self, self.op.nodes)
4582
    else:
4583
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4584

    
4585
  def Exec(self, feedback_fn):
4586
    """Computes the list of nodes and their attributes.
4587

4588
    """
4589
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4590

    
4591
    # Always get name to sort by
4592
    if constants.SF_NAME in self.op.output_fields:
4593
      fields = self.op.output_fields[:]
4594
    else:
4595
      fields = [constants.SF_NAME] + self.op.output_fields
4596

    
4597
    # Never ask for node or type as it's only known to the LU
4598
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4599
      while extra in fields:
4600
        fields.remove(extra)
4601

    
4602
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4603
    name_idx = field_idx[constants.SF_NAME]
4604

    
4605
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4606
    data = self.rpc.call_storage_list(self.nodes,
4607
                                      self.op.storage_type, st_args,
4608
                                      self.op.name, fields)
4609

    
4610
    result = []
4611

    
4612
    for node in utils.NiceSort(self.nodes):
4613
      nresult = data[node]
4614
      if nresult.offline:
4615
        continue
4616

    
4617
      msg = nresult.fail_msg
4618
      if msg:
4619
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4620
        continue
4621

    
4622
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4623

    
4624
      for name in utils.NiceSort(rows.keys()):
4625
        row = rows[name]
4626

    
4627
        out = []
4628

    
4629
        for field in self.op.output_fields:
4630
          if field == constants.SF_NODE:
4631
            val = node
4632
          elif field == constants.SF_TYPE:
4633
            val = self.op.storage_type
4634
          elif field in field_idx:
4635
            val = row[field_idx[field]]
4636
          else:
4637
            raise errors.ParameterError(field)
4638

    
4639
          out.append(val)
4640

    
4641
        result.append(out)
4642

    
4643
    return result
4644

    
4645

    
4646
class _InstanceQuery(_QueryBase):
4647
  FIELDS = query.INSTANCE_FIELDS
4648

    
4649
  def ExpandNames(self, lu):
4650
    lu.needed_locks = {}
4651
    lu.share_locks = _ShareAll()
4652

    
4653
    if self.names:
4654
      self.wanted = _GetWantedInstances(lu, self.names)
4655
    else:
4656
      self.wanted = locking.ALL_SET
4657

    
4658
    self.do_locking = (self.use_locking and
4659
                       query.IQ_LIVE in self.requested_data)
4660
    if self.do_locking:
4661
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4662
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4663
      lu.needed_locks[locking.LEVEL_NODE] = []
4664
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4665

    
4666
    self.do_grouplocks = (self.do_locking and
4667
                          query.IQ_NODES in self.requested_data)
4668

    
4669
  def DeclareLocks(self, lu, level):
4670
    if self.do_locking:
4671
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4672
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4673

    
4674
        # Lock all groups used by instances optimistically; this requires going
4675
        # via the node before it's locked, requiring verification later on
4676
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4677
          set(group_uuid
4678
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4679
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4680
      elif level == locking.LEVEL_NODE:
4681
        lu._LockInstancesNodes() # pylint: disable=W0212
4682

    
4683
  @staticmethod
4684
  def _CheckGroupLocks(lu):
4685
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4686
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4687

    
4688
    # Check if node groups for locked instances are still correct
4689
    for instance_name in owned_instances:
4690
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4691

    
4692
  def _GetQueryData(self, lu):
4693
    """Computes the list of instances and their attributes.
4694

4695
    """
4696
    if self.do_grouplocks:
4697
      self._CheckGroupLocks(lu)
4698

    
4699
    cluster = lu.cfg.GetClusterInfo()
4700
    all_info = lu.cfg.GetAllInstancesInfo()
4701

    
4702
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4703

    
4704
    instance_list = [all_info[name] for name in instance_names]
4705
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4706
                                        for inst in instance_list)))
4707
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4708
    bad_nodes = []
4709
    offline_nodes = []
4710
    wrongnode_inst = set()
4711

    
4712
    # Gather data as requested
4713
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4714
      live_data = {}
4715
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4716
      for name in nodes:
4717
        result = node_data[name]
4718
        if result.offline:
4719
          # offline nodes will be in both lists
4720
          assert result.fail_msg
4721
          offline_nodes.append(name)
4722
        if result.fail_msg:
4723
          bad_nodes.append(name)
4724
        elif result.payload:
4725
          for inst in result.payload:
4726
            if inst in all_info:
4727
              if all_info[inst].primary_node == name:
4728
                live_data.update(result.payload)
4729
              else:
4730
                wrongnode_inst.add(inst)
4731
            else:
4732
              # orphan instance; we don't list it here as we don't
4733
              # handle this case yet in the output of instance listing
4734
              logging.warning("Orphan instance '%s' found on node %s",
4735
                              inst, name)
4736
        # else no instance is alive
4737
    else:
4738
      live_data = {}
4739

    
4740
    if query.IQ_DISKUSAGE in self.requested_data:
4741
      disk_usage = dict((inst.name,
4742
                         _ComputeDiskSize(inst.disk_template,
4743
                                          [{constants.IDISK_SIZE: disk.size}
4744
                                           for disk in inst.disks]))
4745
                        for inst in instance_list)
4746
    else:
4747
      disk_usage = None
4748

    
4749
    if query.IQ_CONSOLE in self.requested_data:
4750
      consinfo = {}
4751
      for inst in instance_list:
4752
        if inst.name in live_data:
4753
          # Instance is running
4754
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4755
        else:
4756
          consinfo[inst.name] = None
4757
      assert set(consinfo.keys()) == set(instance_names)
4758
    else:
4759
      consinfo = None
4760

    
4761
    if query.IQ_NODES in self.requested_data:
4762
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4763
                                            instance_list)))
4764
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4765
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4766
                    for uuid in set(map(operator.attrgetter("group"),
4767
                                        nodes.values())))
4768
    else:
4769
      nodes = None
4770
      groups = None
4771

    
4772
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4773
                                   disk_usage, offline_nodes, bad_nodes,
4774
                                   live_data, wrongnode_inst, consinfo,
4775
                                   nodes, groups)
4776

    
4777

    
4778
class LUQuery(NoHooksLU):
4779
  """Query for resources/items of a certain kind.
4780

4781
  """
4782
  # pylint: disable=W0142
4783
  REQ_BGL = False
4784

    
4785
  def CheckArguments(self):
4786
    qcls = _GetQueryImplementation(self.op.what)
4787

    
4788
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4789

    
4790
  def ExpandNames(self):
4791
    self.impl.ExpandNames(self)
4792

    
4793
  def DeclareLocks(self, level):
4794
    self.impl.DeclareLocks(self, level)
4795

    
4796
  def Exec(self, feedback_fn):
4797
    return self.impl.NewStyleQuery(self)
4798

    
4799

    
4800
class LUQueryFields(NoHooksLU):
4801
  """Query for resources/items of a certain kind.
4802

4803
  """
4804
  # pylint: disable=W0142
4805
  REQ_BGL = False
4806

    
4807
  def CheckArguments(self):
4808
    self.qcls = _GetQueryImplementation(self.op.what)
4809

    
4810
  def ExpandNames(self):
4811
    self.needed_locks = {}
4812

    
4813
  def Exec(self, feedback_fn):
4814
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4815

    
4816

    
4817
class LUNodeModifyStorage(NoHooksLU):
4818
  """Logical unit for modifying a storage volume on a node.
4819

4820
  """
4821
  REQ_BGL = False
4822

    
4823
  def CheckArguments(self):
4824
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4825

    
4826
    storage_type = self.op.storage_type
4827

    
4828
    try:
4829
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4830
    except KeyError:
4831
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4832
                                 " modified" % storage_type,
4833
                                 errors.ECODE_INVAL)
4834

    
4835
    diff = set(self.op.changes.keys()) - modifiable
4836
    if diff:
4837
      raise errors.OpPrereqError("The following fields can not be modified for"
4838
                                 " storage units of type '%s': %r" %
4839
                                 (storage_type, list(diff)),
4840
                                 errors.ECODE_INVAL)
4841

    
4842
  def ExpandNames(self):
4843
    self.needed_locks = {
4844
      locking.LEVEL_NODE: self.op.node_name,
4845
      }
4846

    
4847
  def Exec(self, feedback_fn):
4848
    """Computes the list of nodes and their attributes.
4849

4850
    """
4851
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4852
    result = self.rpc.call_storage_modify(self.op.node_name,
4853
                                          self.op.storage_type, st_args,
4854
                                          self.op.name, self.op.changes)
4855
    result.Raise("Failed to modify storage unit '%s' on %s" %
4856
                 (self.op.name, self.op.node_name))
4857

    
4858

    
4859
class LUNodeAdd(LogicalUnit):
4860
  """Logical unit for adding node to the cluster.
4861

4862
  """
4863
  HPATH = "node-add"
4864
  HTYPE = constants.HTYPE_NODE
4865
  _NFLAGS = ["master_capable", "vm_capable"]
4866

    
4867
  def CheckArguments(self):
4868
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4869
    # validate/normalize the node name
4870
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4871
                                         family=self.primary_ip_family)
4872
    self.op.node_name = self.hostname.name
4873

    
4874
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4875
      raise errors.OpPrereqError("Cannot readd the master node",
4876
                                 errors.ECODE_STATE)
4877

    
4878
    if self.op.readd and self.op.group:
4879
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4880
                                 " being readded", errors.ECODE_INVAL)
4881

    
4882
  def BuildHooksEnv(self):
4883
    """Build hooks env.
4884

4885
    This will run on all nodes before, and on all nodes + the new node after.
4886

4887
    """
4888
    return {
4889
      "OP_TARGET": self.op.node_name,
4890
      "NODE_NAME": self.op.node_name,
4891
      "NODE_PIP": self.op.primary_ip,
4892
      "NODE_SIP": self.op.secondary_ip,
4893
      "MASTER_CAPABLE": str(self.op.master_capable),
4894
      "VM_CAPABLE": str(self.op.vm_capable),
4895
      }
4896

    
4897
  def BuildHooksNodes(self):
4898
    """Build hooks nodes.
4899

4900
    """
4901
    # Exclude added node
4902
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4903
    post_nodes = pre_nodes + [self.op.node_name, ]
4904

    
4905
    return (pre_nodes, post_nodes)
4906

    
4907
  def CheckPrereq(self):
4908
    """Check prerequisites.
4909

4910
    This checks:
4911
     - the new node is not already in the config
4912
     - it is resolvable
4913
     - its parameters (single/dual homed) matches the cluster
4914

4915
    Any errors are signaled by raising errors.OpPrereqError.
4916

4917
    """
4918
    cfg = self.cfg
4919
    hostname = self.hostname
4920
    node = hostname.name
4921
    primary_ip = self.op.primary_ip = hostname.ip
4922
    if self.op.secondary_ip is None:
4923
      if self.primary_ip_family == netutils.IP6Address.family:
4924
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4925
                                   " IPv4 address must be given as secondary",
4926
                                   errors.ECODE_INVAL)
4927
      self.op.secondary_ip = primary_ip
4928

    
4929
    secondary_ip = self.op.secondary_ip
4930
    if not netutils.IP4Address.IsValid(secondary_ip):
4931
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4932
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4933

    
4934
    node_list = cfg.GetNodeList()
4935
    if not self.op.readd and node in node_list:
4936
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4937
                                 node, errors.ECODE_EXISTS)
4938
    elif self.op.readd and node not in node_list:
4939
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4940
                                 errors.ECODE_NOENT)
4941

    
4942
    self.changed_primary_ip = False
4943

    
4944
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4945
      if self.op.readd and node == existing_node_name:
4946
        if existing_node.secondary_ip != secondary_ip:
4947
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4948
                                     " address configuration as before",
4949
                                     errors.ECODE_INVAL)
4950
        if existing_node.primary_ip != primary_ip:
4951
          self.changed_primary_ip = True
4952

    
4953
        continue
4954

    
4955
      if (existing_node.primary_ip == primary_ip or
4956
          existing_node.secondary_ip == primary_ip or
4957
          existing_node.primary_ip == secondary_ip or
4958
          existing_node.secondary_ip == secondary_ip):
4959
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4960
                                   " existing node %s" % existing_node.name,
4961
                                   errors.ECODE_NOTUNIQUE)
4962

    
4963
    # After this 'if' block, None is no longer a valid value for the
4964
    # _capable op attributes
4965
    if self.op.readd:
4966
      old_node = self.cfg.GetNodeInfo(node)
4967
      assert old_node is not None, "Can't retrieve locked node %s" % node
4968
      for attr in self._NFLAGS:
4969
        if getattr(self.op, attr) is None:
4970
          setattr(self.op, attr, getattr(old_node, attr))
4971
    else:
4972
      for attr in self._NFLAGS:
4973
        if getattr(self.op, attr) is None:
4974
          setattr(self.op, attr, True)
4975

    
4976
    if self.op.readd and not self.op.vm_capable:
4977
      pri, sec = cfg.GetNodeInstances(node)
4978
      if pri or sec:
4979
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4980
                                   " flag set to false, but it already holds"
4981
                                   " instances" % node,
4982
                                   errors.ECODE_STATE)
4983

    
4984
    # check that the type of the node (single versus dual homed) is the
4985
    # same as for the master
4986
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4987
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4988
    newbie_singlehomed = secondary_ip == primary_ip
4989
    if master_singlehomed != newbie_singlehomed:
4990
      if master_singlehomed:
4991
        raise errors.OpPrereqError("The master has no secondary ip but the"
4992
                                   " new node has one",
4993
                                   errors.ECODE_INVAL)
4994
      else:
4995
        raise errors.OpPrereqError("The master has a secondary ip but the"
4996
                                   " new node doesn't have one",
4997
                                   errors.ECODE_INVAL)
4998

    
4999
    # checks reachability
5000
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5001
      raise errors.OpPrereqError("Node not reachable by ping",
5002
                                 errors.ECODE_ENVIRON)
5003

    
5004
    if not newbie_singlehomed:
5005
      # check reachability from my secondary ip to newbie's secondary ip
5006
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5007
                           source=myself.secondary_ip):
5008
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5009
                                   " based ping to node daemon port",
5010
                                   errors.ECODE_ENVIRON)
5011

    
5012
    if self.op.readd:
5013
      exceptions = [node]
5014
    else:
5015
      exceptions = []
5016

    
5017
    if self.op.master_capable:
5018
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5019
    else:
5020
      self.master_candidate = False
5021

    
5022
    if self.op.readd:
5023
      self.new_node = old_node
5024
    else:
5025
      node_group = cfg.LookupNodeGroup(self.op.group)
5026
      self.new_node = objects.Node(name=node,
5027
                                   primary_ip=primary_ip,
5028
                                   secondary_ip=secondary_ip,
5029
                                   master_candidate=self.master_candidate,
5030
                                   offline=False, drained=False,
5031
                                   group=node_group)
5032

    
5033
    if self.op.ndparams:
5034
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5035

    
5036
    # check connectivity
5037
    result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5038
    result.Raise("Can't get version information from node %s" % node)
5039
    if constants.PROTOCOL_VERSION == result.payload:
5040
      logging.info("Communication to node %s fine, sw version %s match",
5041
                   node, result.payload)
5042
    else:
5043
      raise errors.OpPrereqError("Version mismatch master version %s,"
5044
                                 " node version %s" %
5045
                                 (constants.PROTOCOL_VERSION, result.payload),
5046
                                 errors.ECODE_ENVIRON)
5047

    
5048
  def Exec(self, feedback_fn):
5049
    """Adds the new node to the cluster.
5050

5051
    """
5052
    new_node = self.new_node
5053
    node = new_node.name
5054

    
5055
    # We adding a new node so we assume it's powered
5056
    new_node.powered = True
5057

    
5058
    # for re-adds, reset the offline/drained/master-candidate flags;
5059
    # we need to reset here, otherwise offline would prevent RPC calls
5060
    # later in the procedure; this also means that if the re-add
5061
    # fails, we are left with a non-offlined, broken node
5062
    if self.op.readd:
5063
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5064
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5065
      # if we demote the node, we do cleanup later in the procedure
5066
      new_node.master_candidate = self.master_candidate
5067
      if self.changed_primary_ip:
5068
        new_node.primary_ip = self.op.primary_ip
5069

    
5070
    # copy the master/vm_capable flags
5071
    for attr in self._NFLAGS:
5072
      setattr(new_node, attr, getattr(self.op, attr))
5073

    
5074
    # notify the user about any possible mc promotion
5075
    if new_node.master_candidate:
5076
      self.LogInfo("Node will be a master candidate")
5077

    
5078
    if self.op.ndparams:
5079
      new_node.ndparams = self.op.ndparams
5080
    else:
5081
      new_node.ndparams = {}
5082

    
5083
    # Add node to our /etc/hosts, and add key to known_hosts
5084
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5085
      master_node = self.cfg.GetMasterNode()
5086
      result = self.rpc.call_etc_hosts_modify(master_node,
5087
                                              constants.ETC_HOSTS_ADD,
5088
                                              self.hostname.name,
5089
                                              self.hostname.ip)
5090
      result.Raise("Can't update hosts file with new host data")
5091

    
5092
    if new_node.secondary_ip != new_node.primary_ip:
5093
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5094
                               False)
5095

    
5096
    node_verify_list = [self.cfg.GetMasterNode()]
5097
    node_verify_param = {
5098
      constants.NV_NODELIST: ([node], {}),
5099
      # TODO: do a node-net-test as well?
5100
    }
5101

    
5102
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5103
                                       self.cfg.GetClusterName())
5104
    for verifier in node_verify_list:
5105
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5106
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5107
      if nl_payload:
5108
        for failed in nl_payload:
5109
          feedback_fn("ssh/hostname verification failed"
5110
                      " (checking from %s): %s" %
5111
                      (verifier, nl_payload[failed]))
5112
        raise errors.OpExecError("ssh/hostname verification failed")
5113

    
5114
    if self.op.readd:
5115
      _RedistributeAncillaryFiles(self)
5116
      self.context.ReaddNode(new_node)
5117
      # make sure we redistribute the config
5118
      self.cfg.Update(new_node, feedback_fn)
5119
      # and make sure the new node will not have old files around
5120
      if not new_node.master_candidate:
5121
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5122
        msg = result.fail_msg
5123
        if msg:
5124
          self.LogWarning("Node failed to demote itself from master"
5125
                          " candidate status: %s" % msg)
5126
    else:
5127
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5128
                                  additional_vm=self.op.vm_capable)
5129
      self.context.AddNode(new_node, self.proc.GetECId())
5130

    
5131

    
5132
class LUNodeSetParams(LogicalUnit):
5133
  """Modifies the parameters of a node.
5134

5135
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5136
      to the node role (as _ROLE_*)
5137
  @cvar _R2F: a dictionary from node role to tuples of flags
5138
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5139

5140
  """
5141
  HPATH = "node-modify"
5142
  HTYPE = constants.HTYPE_NODE
5143
  REQ_BGL = False
5144
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5145
  _F2R = {
5146
    (True, False, False): _ROLE_CANDIDATE,
5147
    (False, True, False): _ROLE_DRAINED,
5148
    (False, False, True): _ROLE_OFFLINE,
5149
    (False, False, False): _ROLE_REGULAR,
5150
    }
5151
  _R2F = dict((v, k) for k, v in _F2R.items())
5152
  _FLAGS = ["master_candidate", "drained", "offline"]
5153

    
5154
  def CheckArguments(self):
5155
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5156
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5157
                self.op.master_capable, self.op.vm_capable,
5158
                self.op.secondary_ip, self.op.ndparams]
5159
    if all_mods.count(None) == len(all_mods):
5160
      raise errors.OpPrereqError("Please pass at least one modification",
5161
                                 errors.ECODE_INVAL)
5162
    if all_mods.count(True) > 1:
5163
      raise errors.OpPrereqError("Can't set the node into more than one"
5164
                                 " state at the same time",
5165
                                 errors.ECODE_INVAL)
5166

    
5167
    # Boolean value that tells us whether we might be demoting from MC
5168
    self.might_demote = (self.op.master_candidate == False or
5169
                         self.op.offline == True or
5170
                         self.op.drained == True or
5171
                         self.op.master_capable == False)
5172

    
5173
    if self.op.secondary_ip:
5174
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5175
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5176
                                   " address" % self.op.secondary_ip,
5177
                                   errors.ECODE_INVAL)
5178

    
5179
    self.lock_all = self.op.auto_promote and self.might_demote
5180
    self.lock_instances = self.op.secondary_ip is not None
5181

    
5182
  def ExpandNames(self):
5183
    if self.lock_all:
5184
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5185
    else:
5186
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5187

    
5188
    if self.lock_instances:
5189
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5190

    
5191
  def DeclareLocks(self, level):
5192
    # If we have locked all instances, before waiting to lock nodes, release
5193
    # all the ones living on nodes unrelated to the current operation.
5194
    if level == locking.LEVEL_NODE and self.lock_instances:
5195
      self.affected_instances = []
5196
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5197
        instances_keep = []
5198

    
5199
        # Build list of instances to release
5200
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5201
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5202
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5203
              self.op.node_name in instance.all_nodes):
5204
            instances_keep.append(instance_name)
5205
            self.affected_instances.append(instance)
5206

    
5207
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5208

    
5209
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5210
                set(instances_keep))
5211

    
5212
  def BuildHooksEnv(self):
5213
    """Build hooks env.
5214

5215
    This runs on the master node.
5216

5217
    """
5218
    return {
5219
      "OP_TARGET": self.op.node_name,
5220
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5221
      "OFFLINE": str(self.op.offline),
5222
      "DRAINED": str(self.op.drained),
5223
      "MASTER_CAPABLE": str(self.op.master_capable),
5224
      "VM_CAPABLE": str(self.op.vm_capable),
5225
      }
5226

    
5227
  def BuildHooksNodes(self):
5228
    """Build hooks nodes.
5229

5230
    """
5231
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5232
    return (nl, nl)
5233

    
5234
  def CheckPrereq(self):
5235
    """Check prerequisites.
5236

5237
    This only checks the instance list against the existing names.
5238

5239
    """
5240
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5241

    
5242
    if (self.op.master_candidate is not None or
5243
        self.op.drained is not None or
5244
        self.op.offline is not None):
5245
      # we can't change the master's node flags
5246
      if self.op.node_name == self.cfg.GetMasterNode():
5247
        raise errors.OpPrereqError("The master role can be changed"
5248
                                   " only via master-failover",
5249
                                   errors.ECODE_INVAL)
5250

    
5251
    if self.op.master_candidate and not node.master_capable:
5252
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5253
                                 " it a master candidate" % node.name,
5254
                                 errors.ECODE_STATE)
5255

    
5256
    if self.op.vm_capable == False:
5257
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5258
      if ipri or isec:
5259
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5260
                                   " the vm_capable flag" % node.name,
5261
                                   errors.ECODE_STATE)
5262

    
5263
    if node.master_candidate and self.might_demote and not self.lock_all:
5264
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5265
      # check if after removing the current node, we're missing master
5266
      # candidates
5267
      (mc_remaining, mc_should, _) = \
5268
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5269
      if mc_remaining < mc_should:
5270
        raise errors.OpPrereqError("Not enough master candidates, please"
5271
                                   " pass auto promote option to allow"
5272
                                   " promotion", errors.ECODE_STATE)
5273

    
5274
    self.old_flags = old_flags = (node.master_candidate,
5275
                                  node.drained, node.offline)
5276
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5277
    self.old_role = old_role = self._F2R[old_flags]
5278

    
5279
    # Check for ineffective changes
5280
    for attr in self._FLAGS:
5281
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5282
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5283
        setattr(self.op, attr, None)
5284

    
5285
    # Past this point, any flag change to False means a transition
5286
    # away from the respective state, as only real changes are kept
5287

    
5288
    # TODO: We might query the real power state if it supports OOB
5289
    if _SupportsOob(self.cfg, node):
5290
      if self.op.offline is False and not (node.powered or
5291
                                           self.op.powered == True):
5292
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5293
                                    " offline status can be reset") %
5294
                                   self.op.node_name)
5295
    elif self.op.powered is not None:
5296
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5297
                                  " as it does not support out-of-band"
5298
                                  " handling") % self.op.node_name)
5299

    
5300
    # If we're being deofflined/drained, we'll MC ourself if needed
5301
    if (self.op.drained == False or self.op.offline == False or
5302
        (self.op.master_capable and not node.master_capable)):
5303
      if _DecideSelfPromotion(self):
5304
        self.op.master_candidate = True
5305
        self.LogInfo("Auto-promoting node to master candidate")
5306

    
5307
    # If we're no longer master capable, we'll demote ourselves from MC
5308
    if self.op.master_capable == False and node.master_candidate:
5309
      self.LogInfo("Demoting from master candidate")
5310
      self.op.master_candidate = False
5311

    
5312
    # Compute new role
5313
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5314
    if self.op.master_candidate:
5315
      new_role = self._ROLE_CANDIDATE
5316
    elif self.op.drained:
5317
      new_role = self._ROLE_DRAINED
5318
    elif self.op.offline:
5319
      new_role = self._ROLE_OFFLINE
5320
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5321
      # False is still in new flags, which means we're un-setting (the
5322
      # only) True flag
5323
      new_role = self._ROLE_REGULAR
5324
    else: # no new flags, nothing, keep old role
5325
      new_role = old_role
5326

    
5327
    self.new_role = new_role
5328

    
5329
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5330
      # Trying to transition out of offline status
5331
      result = self.rpc.call_version([node.name])[node.name]
5332
      if result.fail_msg:
5333
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5334
                                   " to report its version: %s" %
5335
                                   (node.name, result.fail_msg),
5336
                                   errors.ECODE_STATE)
5337
      else:
5338
        self.LogWarning("Transitioning node from offline to online state"
5339
                        " without using re-add. Please make sure the node"
5340
                        " is healthy!")
5341

    
5342
    if self.op.secondary_ip:
5343
      # Ok even without locking, because this can't be changed by any LU
5344
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5345
      master_singlehomed = master.secondary_ip == master.primary_ip
5346
      if master_singlehomed and self.op.secondary_ip:
5347
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5348
                                   " homed cluster", errors.ECODE_INVAL)
5349

    
5350
      if node.offline:
5351
        if self.affected_instances:
5352
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5353
                                     " node has instances (%s) configured"
5354
                                     " to use it" % self.affected_instances)
5355
      else:
5356
        # On online nodes, check that no instances are running, and that
5357
        # the node has the new ip and we can reach it.
5358
        for instance in self.affected_instances:
5359
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5360

    
5361
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5362
        if master.name != node.name:
5363
          # check reachability from master secondary ip to new secondary ip
5364
          if not netutils.TcpPing(self.op.secondary_ip,
5365
                                  constants.DEFAULT_NODED_PORT,
5366
                                  source=master.secondary_ip):
5367
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5368
                                       " based ping to node daemon port",
5369
                                       errors.ECODE_ENVIRON)
5370

    
5371
    if self.op.ndparams:
5372
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5373
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5374
      self.new_ndparams = new_ndparams
5375

    
5376
  def Exec(self, feedback_fn):
5377
    """Modifies a node.
5378

5379
    """
5380
    node = self.node
5381
    old_role = self.old_role
5382
    new_role = self.new_role
5383

    
5384
    result = []
5385

    
5386
    if self.op.ndparams:
5387
      node.ndparams = self.new_ndparams
5388

    
5389
    if self.op.powered is not None:
5390
      node.powered = self.op.powered
5391

    
5392
    for attr in ["master_capable", "vm_capable"]:
5393
      val = getattr(self.op, attr)
5394
      if val is not None:
5395
        setattr(node, attr, val)
5396
        result.append((attr, str(val)))
5397

    
5398
    if new_role != old_role:
5399
      # Tell the node to demote itself, if no longer MC and not offline
5400
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5401
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5402
        if msg:
5403
          self.LogWarning("Node failed to demote itself: %s", msg)
5404

    
5405
      new_flags = self._R2F[new_role]
5406
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5407
        if of != nf:
5408
          result.append((desc, str(nf)))
5409
      (node.master_candidate, node.drained, node.offline) = new_flags
5410

    
5411
      # we locked all nodes, we adjust the CP before updating this node
5412
      if self.lock_all:
5413
        _AdjustCandidatePool(self, [node.name])
5414

    
5415
    if self.op.secondary_ip:
5416
      node.secondary_ip = self.op.secondary_ip
5417
      result.append(("secondary_ip", self.op.secondary_ip))
5418

    
5419
    # this will trigger configuration file update, if needed
5420
    self.cfg.Update(node, feedback_fn)
5421

    
5422
    # this will trigger job queue propagation or cleanup if the mc
5423
    # flag changed
5424
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5425
      self.context.ReaddNode(node)
5426

    
5427
    return result
5428

    
5429

    
5430
class LUNodePowercycle(NoHooksLU):
5431
  """Powercycles a node.
5432

5433
  """
5434
  REQ_BGL = False
5435

    
5436
  def CheckArguments(self):
5437
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5438
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5439
      raise errors.OpPrereqError("The node is the master and the force"
5440
                                 " parameter was not set",
5441
                                 errors.ECODE_INVAL)
5442

    
5443
  def ExpandNames(self):
5444
    """Locking for PowercycleNode.
5445

5446
    This is a last-resort option and shouldn't block on other
5447
    jobs. Therefore, we grab no locks.
5448

5449
    """
5450
    self.needed_locks = {}
5451

    
5452
  def Exec(self, feedback_fn):
5453
    """Reboots a node.
5454

5455
    """
5456
    result = self.rpc.call_node_powercycle(self.op.node_name,
5457
                                           self.cfg.GetHypervisorType())
5458
    result.Raise("Failed to schedule the reboot")
5459
    return result.payload
5460

    
5461

    
5462
class LUClusterQuery(NoHooksLU):
5463
  """Query cluster configuration.
5464

5465
  """
5466
  REQ_BGL = False
5467

    
5468
  def ExpandNames(self):
5469
    self.needed_locks = {}
5470

    
5471
  def Exec(self, feedback_fn):
5472
    """Return cluster config.
5473

5474
    """
5475
    cluster = self.cfg.GetClusterInfo()
5476
    os_hvp = {}
5477

    
5478
    # Filter just for enabled hypervisors
5479
    for os_name, hv_dict in cluster.os_hvp.items():
5480
      os_hvp[os_name] = {}
5481
      for hv_name, hv_params in hv_dict.items():
5482
        if hv_name in cluster.enabled_hypervisors:
5483
          os_hvp[os_name][hv_name] = hv_params
5484

    
5485
    # Convert ip_family to ip_version
5486
    primary_ip_version = constants.IP4_VERSION
5487
    if cluster.primary_ip_family == netutils.IP6Address.family:
5488
      primary_ip_version = constants.IP6_VERSION
5489

    
5490
    result = {
5491
      "software_version": constants.RELEASE_VERSION,
5492
      "protocol_version": constants.PROTOCOL_VERSION,
5493
      "config_version": constants.CONFIG_VERSION,
5494
      "os_api_version": max(constants.OS_API_VERSIONS),
5495
      "export_version": constants.EXPORT_VERSION,
5496
      "architecture": (platform.architecture()[0], platform.machine()),
5497
      "name": cluster.cluster_name,
5498
      "master": cluster.master_node,
5499
      "default_hypervisor": cluster.enabled_hypervisors[0],
5500
      "enabled_hypervisors": cluster.enabled_hypervisors,
5501
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5502
                        for hypervisor_name in cluster.enabled_hypervisors]),
5503
      "os_hvp": os_hvp,
5504
      "beparams": cluster.beparams,
5505
      "osparams": cluster.osparams,
5506
      "nicparams": cluster.nicparams,
5507
      "ndparams": cluster.ndparams,
5508
      "candidate_pool_size": cluster.candidate_pool_size,
5509
      "master_netdev": cluster.master_netdev,
5510
      "volume_group_name": cluster.volume_group_name,
5511
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5512
      "file_storage_dir": cluster.file_storage_dir,
5513
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5514
      "maintain_node_health": cluster.maintain_node_health,
5515
      "ctime": cluster.ctime,
5516
      "mtime": cluster.mtime,
5517
      "uuid": cluster.uuid,
5518
      "tags": list(cluster.GetTags()),
5519
      "uid_pool": cluster.uid_pool,
5520
      "default_iallocator": cluster.default_iallocator,
5521
      "reserved_lvs": cluster.reserved_lvs,
5522
      "primary_ip_version": primary_ip_version,
5523
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5524
      "hidden_os": cluster.hidden_os,
5525
      "blacklisted_os": cluster.blacklisted_os,
5526
      }
5527

    
5528
    return result
5529

    
5530

    
5531
class LUClusterConfigQuery(NoHooksLU):
5532
  """Return configuration values.
5533

5534
  """
5535
  REQ_BGL = False
5536
  _FIELDS_DYNAMIC = utils.FieldSet()
5537
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5538
                                  "watcher_pause", "volume_group_name")
5539

    
5540
  def CheckArguments(self):
5541
    _CheckOutputFields(static=self._FIELDS_STATIC,
5542
                       dynamic=self._FIELDS_DYNAMIC,
5543
                       selected=self.op.output_fields)
5544

    
5545
  def ExpandNames(self):
5546
    self.needed_locks = {}
5547

    
5548
  def Exec(self, feedback_fn):
5549
    """Dump a representation of the cluster config to the standard output.
5550

5551
    """
5552
    values = []
5553
    for field in self.op.output_fields:
5554
      if field == "cluster_name":
5555
        entry = self.cfg.GetClusterName()
5556
      elif field == "master_node":
5557
        entry = self.cfg.GetMasterNode()
5558
      elif field == "drain_flag":
5559
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5560
      elif field == "watcher_pause":
5561
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5562
      elif field == "volume_group_name":
5563
        entry = self.cfg.GetVGName()
5564
      else:
5565
        raise errors.ParameterError(field)
5566
      values.append(entry)
5567
    return values
5568

    
5569

    
5570
class LUInstanceActivateDisks(NoHooksLU):
5571
  """Bring up an instance's disks.
5572

5573
  """
5574
  REQ_BGL = False
5575

    
5576
  def ExpandNames(self):
5577
    self._ExpandAndLockInstance()
5578
    self.needed_locks[locking.LEVEL_NODE] = []
5579
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5580

    
5581
  def DeclareLocks(self, level):
5582
    if level == locking.LEVEL_NODE:
5583
      self._LockInstancesNodes()
5584

    
5585
  def CheckPrereq(self):
5586
    """Check prerequisites.
5587

5588
    This checks that the instance is in the cluster.
5589

5590
    """
5591
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5592
    assert self.instance is not None, \
5593
      "Cannot retrieve locked instance %s" % self.op.instance_name
5594
    _CheckNodeOnline(self, self.instance.primary_node)
5595

    
5596
  def Exec(self, feedback_fn):
5597
    """Activate the disks.
5598

5599
    """
5600
    disks_ok, disks_info = \
5601
              _AssembleInstanceDisks(self, self.instance,
5602
                                     ignore_size=self.op.ignore_size)
5603
    if not disks_ok:
5604
      raise errors.OpExecError("Cannot activate block devices")
5605

    
5606
    return disks_info
5607

    
5608

    
5609
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5610
                           ignore_size=False):
5611
  """Prepare the block devices for an instance.
5612

5613
  This sets up the block devices on all nodes.
5614

5615
  @type lu: L{LogicalUnit}
5616
  @param lu: the logical unit on whose behalf we execute
5617
  @type instance: L{objects.Instance}
5618
  @param instance: the instance for whose disks we assemble
5619
  @type disks: list of L{objects.Disk} or None
5620
  @param disks: which disks to assemble (or all, if None)
5621
  @type ignore_secondaries: boolean
5622
  @param ignore_secondaries: if true, errors on secondary nodes
5623
      won't result in an error return from the function
5624
  @type ignore_size: boolean
5625
  @param ignore_size: if true, the current known size of the disk
5626
      will not be used during the disk activation, useful for cases
5627
      when the size is wrong
5628
  @return: False if the operation failed, otherwise a list of
5629
      (host, instance_visible_name, node_visible_name)
5630
      with the mapping from node devices to instance devices
5631

5632
  """
5633
  device_info = []
5634
  disks_ok = True
5635
  iname = instance.name
5636
  disks = _ExpandCheckDisks(instance, disks)
5637

    
5638
  # With the two passes mechanism we try to reduce the window of
5639
  # opportunity for the race condition of switching DRBD to primary
5640
  # before handshaking occured, but we do not eliminate it
5641

    
5642
  # The proper fix would be to wait (with some limits) until the
5643
  # connection has been made and drbd transitions from WFConnection
5644
  # into any other network-connected state (Connected, SyncTarget,
5645
  # SyncSource, etc.)
5646

    
5647
  # 1st pass, assemble on all nodes in secondary mode
5648
  for idx, inst_disk in enumerate(disks):
5649
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5650
      if ignore_size:
5651
        node_disk = node_disk.Copy()
5652
        node_disk.UnsetSize()
5653
      lu.cfg.SetDiskID(node_disk, node)
5654
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5655
      msg = result.fail_msg
5656
      if msg:
5657
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5658
                           " (is_primary=False, pass=1): %s",
5659
                           inst_disk.iv_name, node, msg)
5660
        if not ignore_secondaries:
5661
          disks_ok = False
5662

    
5663
  # FIXME: race condition on drbd migration to primary
5664

    
5665
  # 2nd pass, do only the primary node
5666
  for idx, inst_disk in enumerate(disks):
5667
    dev_path = None
5668

    
5669
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5670
      if node != instance.primary_node:
5671
        continue
5672
      if ignore_size:
5673
        node_disk = node_disk.Copy()
5674
        node_disk.UnsetSize()
5675
      lu.cfg.SetDiskID(node_disk, node)
5676
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5677
      msg = result.fail_msg
5678
      if msg:
5679
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5680
                           " (is_primary=True, pass=2): %s",
5681
                           inst_disk.iv_name, node, msg)
5682
        disks_ok = False
5683
      else:
5684
        dev_path = result.payload
5685

    
5686
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5687

    
5688
  # leave the disks configured for the primary node
5689
  # this is a workaround that would be fixed better by
5690
  # improving the logical/physical id handling
5691
  for disk in disks:
5692
    lu.cfg.SetDiskID(disk, instance.primary_node)
5693

    
5694
  return disks_ok, device_info
5695

    
5696

    
5697
def _StartInstanceDisks(lu, instance, force):
5698
  """Start the disks of an instance.
5699

5700
  """
5701
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5702
                                           ignore_secondaries=force)
5703
  if not disks_ok:
5704
    _ShutdownInstanceDisks(lu, instance)
5705
    if force is not None and not force:
5706
      lu.proc.LogWarning("", hint="If the message above refers to a"
5707
                         " secondary node,"
5708
                         " you can retry the operation using '--force'.")
5709
    raise errors.OpExecError("Disk consistency error")
5710

    
5711

    
5712
class LUInstanceDeactivateDisks(NoHooksLU):
5713
  """Shutdown an instance's disks.
5714

5715
  """
5716
  REQ_BGL = False
5717

    
5718
  def ExpandNames(self):
5719
    self._ExpandAndLockInstance()
5720
    self.needed_locks[locking.LEVEL_NODE] = []
5721
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5722

    
5723
  def DeclareLocks(self, level):
5724
    if level == locking.LEVEL_NODE:
5725
      self._LockInstancesNodes()
5726

    
5727
  def CheckPrereq(self):
5728
    """Check prerequisites.
5729

5730
    This checks that the instance is in the cluster.
5731

5732
    """
5733
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5734
    assert self.instance is not None, \
5735
      "Cannot retrieve locked instance %s" % self.op.instance_name
5736

    
5737
  def Exec(self, feedback_fn):
5738
    """Deactivate the disks
5739

5740
    """
5741
    instance = self.instance
5742
    if self.op.force:
5743
      _ShutdownInstanceDisks(self, instance)
5744
    else:
5745
      _SafeShutdownInstanceDisks(self, instance)
5746

    
5747

    
5748
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5749
  """Shutdown block devices of an instance.
5750

5751
  This function checks if an instance is running, before calling
5752
  _ShutdownInstanceDisks.
5753

5754
  """
5755
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5756
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5757

    
5758

    
5759
def _ExpandCheckDisks(instance, disks):
5760
  """Return the instance disks selected by the disks list
5761

5762
  @type disks: list of L{objects.Disk} or None
5763
  @param disks: selected disks
5764
  @rtype: list of L{objects.Disk}
5765
  @return: selected instance disks to act on
5766

5767
  """
5768
  if disks is None:
5769
    return instance.disks
5770
  else:
5771
    if not set(disks).issubset(instance.disks):
5772
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5773
                                   " target instance")
5774
    return disks
5775

    
5776

    
5777
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5778
  """Shutdown block devices of an instance.
5779

5780
  This does the shutdown on all nodes of the instance.
5781

5782
  If the ignore_primary is false, errors on the primary node are
5783
  ignored.
5784

5785
  """
5786
  all_result = True
5787
  disks = _ExpandCheckDisks(instance, disks)
5788

    
5789
  for disk in disks:
5790
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5791
      lu.cfg.SetDiskID(top_disk, node)
5792
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5793
      msg = result.fail_msg
5794
      if msg:
5795
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5796
                      disk.iv_name, node, msg)
5797
        if ((node == instance.primary_node and not ignore_primary) or
5798
            (node != instance.primary_node and not result.offline)):
5799
          all_result = False
5800
  return all_result
5801

    
5802

    
5803
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5804
  """Checks if a node has enough free memory.
5805

5806
  This function check if a given node has the needed amount of free
5807
  memory. In case the node has less memory or we cannot get the
5808
  information from the node, this function raise an OpPrereqError
5809
  exception.
5810

5811
  @type lu: C{LogicalUnit}
5812
  @param lu: a logical unit from which we get configuration data
5813
  @type node: C{str}
5814
  @param node: the node to check
5815
  @type reason: C{str}
5816
  @param reason: string to use in the error message
5817
  @type requested: C{int}
5818
  @param requested: the amount of memory in MiB to check for
5819
  @type hypervisor_name: C{str}
5820
  @param hypervisor_name: the hypervisor to ask for memory stats
5821
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5822
      we cannot check the node
5823

5824
  """
5825
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5826
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5827
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5828
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5829
  if not isinstance(free_mem, int):
5830
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5831
                               " was '%s'" % (node, free_mem),
5832
                               errors.ECODE_ENVIRON)
5833
  if requested > free_mem:
5834
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5835
                               " needed %s MiB, available %s MiB" %
5836
                               (node, reason, requested, free_mem),
5837
                               errors.ECODE_NORES)
5838

    
5839

    
5840
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5841
  """Checks if nodes have enough free disk space in the all VGs.
5842

5843
  This function check if all given nodes have the needed amount of
5844
  free disk. In case any node has less disk or we cannot get the
5845
  information from the node, this function raise an OpPrereqError
5846
  exception.
5847

5848
  @type lu: C{LogicalUnit}
5849
  @param lu: a logical unit from which we get configuration data
5850
  @type nodenames: C{list}
5851
  @param nodenames: the list of node names to check
5852
  @type req_sizes: C{dict}
5853
  @param req_sizes: the hash of vg and corresponding amount of disk in
5854
      MiB to check for
5855
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5856
      or we cannot check the node
5857

5858
  """
5859
  for vg, req_size in req_sizes.items():
5860
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5861

    
5862

    
5863
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5864
  """Checks if nodes have enough free disk space in the specified VG.
5865

5866
  This function check if all given nodes have the needed amount of
5867
  free disk. In case any node has less disk or we cannot get the
5868
  information from the node, this function raise an OpPrereqError
5869
  exception.
5870

5871
  @type lu: C{LogicalUnit}
5872
  @param lu: a logical unit from which we get configuration data
5873
  @type nodenames: C{list}
5874
  @param nodenames: the list of node names to check
5875
  @type vg: C{str}
5876
  @param vg: the volume group to check
5877
  @type requested: C{int}
5878
  @param requested: the amount of disk in MiB to check for
5879
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5880
      or we cannot check the node
5881

5882
  """
5883
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5884
  for node in nodenames:
5885
    info = nodeinfo[node]
5886
    info.Raise("Cannot get current information from node %s" % node,
5887
               prereq=True, ecode=errors.ECODE_ENVIRON)
5888
    vg_free = info.payload.get("vg_free", None)
5889
    if not isinstance(vg_free, int):
5890
      raise errors.OpPrereqError("Can't compute free disk space on node"
5891
                                 " %s for vg %s, result was '%s'" %
5892
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5893
    if requested > vg_free:
5894
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5895
                                 " vg %s: required %d MiB, available %d MiB" %
5896
                                 (node, vg, requested, vg_free),
5897
                                 errors.ECODE_NORES)
5898

    
5899

    
5900
class LUInstanceStartup(LogicalUnit):
5901
  """Starts an instance.
5902

5903
  """
5904
  HPATH = "instance-start"
5905
  HTYPE = constants.HTYPE_INSTANCE
5906
  REQ_BGL = False
5907

    
5908
  def CheckArguments(self):
5909
    # extra beparams
5910
    if self.op.beparams:
5911
      # fill the beparams dict
5912
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5913

    
5914
  def ExpandNames(self):
5915
    self._ExpandAndLockInstance()
5916

    
5917
  def BuildHooksEnv(self):
5918
    """Build hooks env.
5919

5920
    This runs on master, primary and secondary nodes of the instance.
5921

5922
    """
5923
    env = {
5924
      "FORCE": self.op.force,
5925
      }
5926

    
5927
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5928

    
5929
    return env
5930

    
5931
  def BuildHooksNodes(self):
5932
    """Build hooks nodes.
5933

5934
    """
5935
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5936
    return (nl, nl)
5937

    
5938
  def CheckPrereq(self):
5939
    """Check prerequisites.
5940

5941
    This checks that the instance is in the cluster.
5942

5943
    """
5944
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5945
    assert self.instance is not None, \
5946
      "Cannot retrieve locked instance %s" % self.op.instance_name
5947

    
5948
    # extra hvparams
5949
    if self.op.hvparams:
5950
      # check hypervisor parameter syntax (locally)
5951
      cluster = self.cfg.GetClusterInfo()
5952
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5953
      filled_hvp = cluster.FillHV(instance)
5954
      filled_hvp.update(self.op.hvparams)
5955
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5956
      hv_type.CheckParameterSyntax(filled_hvp)
5957
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5958

    
5959
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5960

    
5961
    if self.primary_offline and self.op.ignore_offline_nodes:
5962
      self.proc.LogWarning("Ignoring offline primary node")
5963

    
5964
      if self.op.hvparams or self.op.beparams:
5965
        self.proc.LogWarning("Overridden parameters are ignored")
5966
    else:
5967
      _CheckNodeOnline(self, instance.primary_node)
5968

    
5969
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5970

    
5971
      # check bridges existence
5972
      _CheckInstanceBridgesExist(self, instance)
5973

    
5974
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5975
                                                instance.name,
5976
                                                instance.hypervisor)
5977
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5978
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5979
      if not remote_info.payload: # not running already
5980
        _CheckNodeFreeMemory(self, instance.primary_node,
5981
                             "starting instance %s" % instance.name,
5982
                             bep[constants.BE_MEMORY], instance.hypervisor)
5983

    
5984
  def Exec(self, feedback_fn):
5985
    """Start the instance.
5986

5987
    """
5988
    instance = self.instance
5989
    force = self.op.force
5990

    
5991
    if not self.op.no_remember:
5992
      self.cfg.MarkInstanceUp(instance.name)
5993

    
5994
    if self.primary_offline:
5995
      assert self.op.ignore_offline_nodes
5996
      self.proc.LogInfo("Primary node offline, marked instance as started")
5997
    else:
5998
      node_current = instance.primary_node
5999

    
6000
      _StartInstanceDisks(self, instance, force)
6001

    
6002
      result = self.rpc.call_instance_start(node_current, instance,
6003
                                            self.op.hvparams, self.op.beparams,
6004
                                            self.op.startup_paused)
6005
      msg = result.fail_msg
6006
      if msg:
6007
        _ShutdownInstanceDisks(self, instance)
6008
        raise errors.OpExecError("Could not start instance: %s" % msg)
6009

    
6010

    
6011
class LUInstanceReboot(LogicalUnit):
6012
  """Reboot an instance.
6013

6014
  """
6015
  HPATH = "instance-reboot"
6016
  HTYPE = constants.HTYPE_INSTANCE
6017
  REQ_BGL = False
6018

    
6019
  def ExpandNames(self):
6020
    self._ExpandAndLockInstance()
6021

    
6022
  def BuildHooksEnv(self):
6023
    """Build hooks env.
6024

6025
    This runs on master, primary and secondary nodes of the instance.
6026

6027
    """
6028
    env = {
6029
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6030
      "REBOOT_TYPE": self.op.reboot_type,
6031
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6032
      }
6033

    
6034
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6035

    
6036
    return env
6037

    
6038
  def BuildHooksNodes(self):
6039
    """Build hooks nodes.
6040

6041
    """
6042
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6043
    return (nl, nl)
6044

    
6045
  def CheckPrereq(self):
6046
    """Check prerequisites.
6047

6048
    This checks that the instance is in the cluster.
6049

6050
    """
6051
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6052
    assert self.instance is not None, \
6053
      "Cannot retrieve locked instance %s" % self.op.instance_name
6054

    
6055
    _CheckNodeOnline(self, instance.primary_node)
6056

    
6057
    # check bridges existence
6058
    _CheckInstanceBridgesExist(self, instance)
6059

    
6060
  def Exec(self, feedback_fn):
6061
    """Reboot the instance.
6062

6063
    """
6064
    instance = self.instance
6065
    ignore_secondaries = self.op.ignore_secondaries
6066
    reboot_type = self.op.reboot_type
6067

    
6068
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6069
                                              instance.name,
6070
                                              instance.hypervisor)
6071
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6072
    instance_running = bool(remote_info.payload)
6073

    
6074
    node_current = instance.primary_node
6075

    
6076
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6077
                                            constants.INSTANCE_REBOOT_HARD]:
6078
      for disk in instance.disks:
6079
        self.cfg.SetDiskID(disk, node_current)
6080
      result = self.rpc.call_instance_reboot(node_current, instance,
6081
                                             reboot_type,
6082
                                             self.op.shutdown_timeout)
6083
      result.Raise("Could not reboot instance")
6084
    else:
6085
      if instance_running:
6086
        result = self.rpc.call_instance_shutdown(node_current, instance,
6087
                                                 self.op.shutdown_timeout)
6088
        result.Raise("Could not shutdown instance for full reboot")
6089
        _ShutdownInstanceDisks(self, instance)
6090
      else:
6091
        self.LogInfo("Instance %s was already stopped, starting now",
6092
                     instance.name)
6093
      _StartInstanceDisks(self, instance, ignore_secondaries)
6094
      result = self.rpc.call_instance_start(node_current, instance,
6095
                                            None, None, False)
6096
      msg = result.fail_msg
6097
      if msg:
6098
        _ShutdownInstanceDisks(self, instance)
6099
        raise errors.OpExecError("Could not start instance for"
6100
                                 " full reboot: %s" % msg)
6101

    
6102
    self.cfg.MarkInstanceUp(instance.name)
6103

    
6104

    
6105
class LUInstanceShutdown(LogicalUnit):
6106
  """Shutdown an instance.
6107

6108
  """
6109
  HPATH = "instance-stop"
6110
  HTYPE = constants.HTYPE_INSTANCE
6111
  REQ_BGL = False
6112

    
6113
  def ExpandNames(self):
6114
    self._ExpandAndLockInstance()
6115

    
6116
  def BuildHooksEnv(self):
6117
    """Build hooks env.
6118

6119
    This runs on master, primary and secondary nodes of the instance.
6120

6121
    """
6122
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6123
    env["TIMEOUT"] = self.op.timeout
6124
    return env
6125

    
6126
  def BuildHooksNodes(self):
6127
    """Build hooks nodes.
6128

6129
    """
6130
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6131
    return (nl, nl)
6132

    
6133
  def CheckPrereq(self):
6134
    """Check prerequisites.
6135

6136
    This checks that the instance is in the cluster.
6137

6138
    """
6139
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6140
    assert self.instance is not None, \
6141
      "Cannot retrieve locked instance %s" % self.op.instance_name
6142

    
6143
    self.primary_offline = \
6144
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6145

    
6146
    if self.primary_offline and self.op.ignore_offline_nodes:
6147
      self.proc.LogWarning("Ignoring offline primary node")
6148
    else:
6149
      _CheckNodeOnline(self, self.instance.primary_node)
6150

    
6151
  def Exec(self, feedback_fn):
6152
    """Shutdown the instance.
6153

6154
    """
6155
    instance = self.instance
6156
    node_current = instance.primary_node
6157
    timeout = self.op.timeout
6158

    
6159
    if not self.op.no_remember:
6160
      self.cfg.MarkInstanceDown(instance.name)
6161

    
6162
    if self.primary_offline:
6163
      assert self.op.ignore_offline_nodes
6164
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6165
    else:
6166
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6167
      msg = result.fail_msg
6168
      if msg:
6169
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6170

    
6171
      _ShutdownInstanceDisks(self, instance)
6172

    
6173

    
6174
class LUInstanceReinstall(LogicalUnit):
6175
  """Reinstall an instance.
6176

6177
  """
6178
  HPATH = "instance-reinstall"
6179
  HTYPE = constants.HTYPE_INSTANCE
6180
  REQ_BGL = False
6181

    
6182
  def ExpandNames(self):
6183
    self._ExpandAndLockInstance()
6184

    
6185
  def BuildHooksEnv(self):
6186
    """Build hooks env.
6187

6188
    This runs on master, primary and secondary nodes of the instance.
6189

6190
    """
6191
    return _BuildInstanceHookEnvByObject(self, self.instance)
6192

    
6193
  def BuildHooksNodes(self):
6194
    """Build hooks nodes.
6195

6196
    """
6197
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6198
    return (nl, nl)
6199

    
6200
  def CheckPrereq(self):
6201
    """Check prerequisites.
6202

6203
    This checks that the instance is in the cluster and is not running.
6204

6205
    """
6206
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6207
    assert instance is not None, \
6208
      "Cannot retrieve locked instance %s" % self.op.instance_name
6209
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6210
                     " offline, cannot reinstall")
6211
    for node in instance.secondary_nodes:
6212
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6213
                       " cannot reinstall")
6214

    
6215
    if instance.disk_template == constants.DT_DISKLESS:
6216
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6217
                                 self.op.instance_name,
6218
                                 errors.ECODE_INVAL)
6219
    _CheckInstanceDown(self, instance, "cannot reinstall")
6220

    
6221
    if self.op.os_type is not None:
6222
      # OS verification
6223
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6224
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6225
      instance_os = self.op.os_type
6226
    else:
6227
      instance_os = instance.os
6228

    
6229
    nodelist = list(instance.all_nodes)
6230

    
6231
    if self.op.osparams:
6232
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6233
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6234
      self.os_inst = i_osdict # the new dict (without defaults)
6235
    else:
6236
      self.os_inst = None
6237

    
6238
    self.instance = instance
6239

    
6240
  def Exec(self, feedback_fn):
6241
    """Reinstall the instance.
6242

6243
    """
6244
    inst = self.instance
6245

    
6246
    if self.op.os_type is not None:
6247
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6248
      inst.os = self.op.os_type
6249
      # Write to configuration
6250
      self.cfg.Update(inst, feedback_fn)
6251

    
6252
    _StartInstanceDisks(self, inst, None)
6253
    try:
6254
      feedback_fn("Running the instance OS create scripts...")
6255
      # FIXME: pass debug option from opcode to backend
6256
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6257
                                             self.op.debug_level,
6258
                                             osparams=self.os_inst)
6259
      result.Raise("Could not install OS for instance %s on node %s" %
6260
                   (inst.name, inst.primary_node))
6261
    finally:
6262
      _ShutdownInstanceDisks(self, inst)
6263

    
6264

    
6265
class LUInstanceRecreateDisks(LogicalUnit):
6266
  """Recreate an instance's missing disks.
6267

6268
  """
6269
  HPATH = "instance-recreate-disks"
6270
  HTYPE = constants.HTYPE_INSTANCE
6271
  REQ_BGL = False
6272

    
6273
  def CheckArguments(self):
6274
    # normalise the disk list
6275
    self.op.disks = sorted(frozenset(self.op.disks))
6276

    
6277
  def ExpandNames(self):
6278
    self._ExpandAndLockInstance()
6279
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6280
    if self.op.nodes:
6281
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6282
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6283
    else:
6284
      self.needed_locks[locking.LEVEL_NODE] = []
6285

    
6286
  def DeclareLocks(self, level):
6287
    if level == locking.LEVEL_NODE:
6288
      # if we replace the nodes, we only need to lock the old primary,
6289
      # otherwise we need to lock all nodes for disk re-creation
6290
      primary_only = bool(self.op.nodes)
6291
      self._LockInstancesNodes(primary_only=primary_only)
6292

    
6293
  def BuildHooksEnv(self):
6294
    """Build hooks env.
6295

6296
    This runs on master, primary and secondary nodes of the instance.
6297

6298
    """
6299
    return _BuildInstanceHookEnvByObject(self, self.instance)
6300

    
6301
  def BuildHooksNodes(self):
6302
    """Build hooks nodes.
6303

6304
    """
6305
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6306
    return (nl, nl)
6307

    
6308
  def CheckPrereq(self):
6309
    """Check prerequisites.
6310

6311
    This checks that the instance is in the cluster and is not running.
6312

6313
    """
6314
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6315
    assert instance is not None, \
6316
      "Cannot retrieve locked instance %s" % self.op.instance_name
6317
    if self.op.nodes:
6318
      if len(self.op.nodes) != len(instance.all_nodes):
6319
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6320
                                   " %d replacement nodes were specified" %
6321
                                   (instance.name, len(instance.all_nodes),
6322
                                    len(self.op.nodes)),
6323
                                   errors.ECODE_INVAL)
6324
      assert instance.disk_template != constants.DT_DRBD8 or \
6325
          len(self.op.nodes) == 2
6326
      assert instance.disk_template != constants.DT_PLAIN or \
6327
          len(self.op.nodes) == 1
6328
      primary_node = self.op.nodes[0]
6329
    else:
6330
      primary_node = instance.primary_node
6331
    _CheckNodeOnline(self, primary_node)
6332

    
6333
    if instance.disk_template == constants.DT_DISKLESS:
6334
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6335
                                 self.op.instance_name, errors.ECODE_INVAL)
6336
    # if we replace nodes *and* the old primary is offline, we don't
6337
    # check
6338
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6339
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6340
    if not (self.op.nodes and old_pnode.offline):
6341
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6342

    
6343
    if not self.op.disks:
6344
      self.op.disks = range(len(instance.disks))
6345
    else:
6346
      for idx in self.op.disks:
6347
        if idx >= len(instance.disks):
6348
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6349
                                     errors.ECODE_INVAL)
6350
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6351
      raise errors.OpPrereqError("Can't recreate disks partially and"
6352
                                 " change the nodes at the same time",
6353
                                 errors.ECODE_INVAL)
6354
    self.instance = instance
6355

    
6356
  def Exec(self, feedback_fn):
6357
    """Recreate the disks.
6358

6359
    """
6360
    instance = self.instance
6361

    
6362
    to_skip = []
6363
    mods = [] # keeps track of needed logical_id changes
6364

    
6365
    for idx, disk in enumerate(instance.disks):
6366
      if idx not in self.op.disks: # disk idx has not been passed in
6367
        to_skip.append(idx)
6368
        continue
6369
      # update secondaries for disks, if needed
6370
      if self.op.nodes:
6371
        if disk.dev_type == constants.LD_DRBD8:
6372
          # need to update the nodes and minors
6373
          assert len(self.op.nodes) == 2
6374
          assert len(disk.logical_id) == 6 # otherwise disk internals
6375
                                           # have changed
6376
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6377
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6378
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6379
                    new_minors[0], new_minors[1], old_secret)
6380
          assert len(disk.logical_id) == len(new_id)
6381
          mods.append((idx, new_id))
6382

    
6383
    # now that we have passed all asserts above, we can apply the mods
6384
    # in a single run (to avoid partial changes)
6385
    for idx, new_id in mods:
6386
      instance.disks[idx].logical_id = new_id
6387

    
6388
    # change primary node, if needed
6389
    if self.op.nodes:
6390
      instance.primary_node = self.op.nodes[0]
6391
      self.LogWarning("Changing the instance's nodes, you will have to"
6392
                      " remove any disks left on the older nodes manually")
6393

    
6394
    if self.op.nodes:
6395
      self.cfg.Update(instance, feedback_fn)
6396

    
6397
    _CreateDisks(self, instance, to_skip=to_skip)
6398

    
6399

    
6400
class LUInstanceRename(LogicalUnit):
6401
  """Rename an instance.
6402

6403
  """
6404
  HPATH = "instance-rename"
6405
  HTYPE = constants.HTYPE_INSTANCE
6406

    
6407
  def CheckArguments(self):
6408
    """Check arguments.
6409

6410
    """
6411
    if self.op.ip_check and not self.op.name_check:
6412
      # TODO: make the ip check more flexible and not depend on the name check
6413
      raise errors.OpPrereqError("IP address check requires a name check",
6414
                                 errors.ECODE_INVAL)
6415

    
6416
  def BuildHooksEnv(self):
6417
    """Build hooks env.
6418

6419
    This runs on master, primary and secondary nodes of the instance.
6420

6421
    """
6422
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6423
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6424
    return env
6425

    
6426
  def BuildHooksNodes(self):
6427
    """Build hooks nodes.
6428

6429
    """
6430
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6431
    return (nl, nl)
6432

    
6433
  def CheckPrereq(self):
6434
    """Check prerequisites.
6435

6436
    This checks that the instance is in the cluster and is not running.
6437

6438
    """
6439
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6440
                                                self.op.instance_name)
6441
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6442
    assert instance is not None
6443
    _CheckNodeOnline(self, instance.primary_node)
6444
    _CheckInstanceDown(self, instance, "cannot rename")
6445
    self.instance = instance
6446

    
6447
    new_name = self.op.new_name
6448
    if self.op.name_check:
6449
      hostname = netutils.GetHostname(name=new_name)
6450
      if hostname.name != new_name:
6451
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6452
                     hostname.name)
6453
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6454
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6455
                                    " same as given hostname '%s'") %
6456
                                    (hostname.name, self.op.new_name),
6457
                                    errors.ECODE_INVAL)
6458
      new_name = self.op.new_name = hostname.name
6459
      if (self.op.ip_check and
6460
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6461
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6462
                                   (hostname.ip, new_name),
6463
                                   errors.ECODE_NOTUNIQUE)
6464

    
6465
    instance_list = self.cfg.GetInstanceList()
6466
    if new_name in instance_list and new_name != instance.name:
6467
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6468
                                 new_name, errors.ECODE_EXISTS)
6469

    
6470
  def Exec(self, feedback_fn):
6471
    """Rename the instance.
6472

6473
    """
6474
    inst = self.instance
6475
    old_name = inst.name
6476

    
6477
    rename_file_storage = False
6478
    if (inst.disk_template in constants.DTS_FILEBASED and
6479
        self.op.new_name != inst.name):
6480
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6481
      rename_file_storage = True
6482

    
6483
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6484
    # Change the instance lock. This is definitely safe while we hold the BGL.
6485
    # Otherwise the new lock would have to be added in acquired mode.
6486
    assert self.REQ_BGL
6487
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6488
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6489

    
6490
    # re-read the instance from the configuration after rename
6491
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6492

    
6493
    if rename_file_storage:
6494
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6495
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6496
                                                     old_file_storage_dir,
6497
                                                     new_file_storage_dir)
6498
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6499
                   " (but the instance has been renamed in Ganeti)" %
6500
                   (inst.primary_node, old_file_storage_dir,
6501
                    new_file_storage_dir))
6502

    
6503
    _StartInstanceDisks(self, inst, None)
6504
    try:
6505
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6506
                                                 old_name, self.op.debug_level)
6507
      msg = result.fail_msg
6508
      if msg:
6509
        msg = ("Could not run OS rename script for instance %s on node %s"
6510
               " (but the instance has been renamed in Ganeti): %s" %
6511
               (inst.name, inst.primary_node, msg))
6512
        self.proc.LogWarning(msg)
6513
    finally:
6514
      _ShutdownInstanceDisks(self, inst)
6515

    
6516
    return inst.name
6517

    
6518

    
6519
class LUInstanceRemove(LogicalUnit):
6520
  """Remove an instance.
6521

6522
  """
6523
  HPATH = "instance-remove"
6524
  HTYPE = constants.HTYPE_INSTANCE
6525
  REQ_BGL = False
6526

    
6527
  def ExpandNames(self):
6528
    self._ExpandAndLockInstance()
6529
    self.needed_locks[locking.LEVEL_NODE] = []
6530
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6531

    
6532
  def DeclareLocks(self, level):
6533
    if level == locking.LEVEL_NODE:
6534
      self._LockInstancesNodes()
6535

    
6536
  def BuildHooksEnv(self):
6537
    """Build hooks env.
6538

6539
    This runs on master, primary and secondary nodes of the instance.
6540

6541
    """
6542
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6543
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6544
    return env
6545

    
6546
  def BuildHooksNodes(self):
6547
    """Build hooks nodes.
6548

6549
    """
6550
    nl = [self.cfg.GetMasterNode()]
6551
    nl_post = list(self.instance.all_nodes) + nl
6552
    return (nl, nl_post)
6553

    
6554
  def CheckPrereq(self):
6555
    """Check prerequisites.
6556

6557
    This checks that the instance is in the cluster.
6558

6559
    """
6560
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6561
    assert self.instance is not None, \
6562
      "Cannot retrieve locked instance %s" % self.op.instance_name
6563

    
6564
  def Exec(self, feedback_fn):
6565
    """Remove the instance.
6566

6567
    """
6568
    instance = self.instance
6569
    logging.info("Shutting down instance %s on node %s",
6570
                 instance.name, instance.primary_node)
6571

    
6572
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6573
                                             self.op.shutdown_timeout)
6574
    msg = result.fail_msg
6575
    if msg:
6576
      if self.op.ignore_failures:
6577
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6578
      else:
6579
        raise errors.OpExecError("Could not shutdown instance %s on"
6580
                                 " node %s: %s" %
6581
                                 (instance.name, instance.primary_node, msg))
6582

    
6583
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6584

    
6585

    
6586
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6587
  """Utility function to remove an instance.
6588

6589
  """
6590
  logging.info("Removing block devices for instance %s", instance.name)
6591

    
6592
  if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
6593
    if not ignore_failures:
6594
      raise errors.OpExecError("Can't remove instance's disks")
6595
    feedback_fn("Warning: can't remove instance's disks")
6596

    
6597
  logging.info("Removing instance %s out of cluster config", instance.name)
6598

    
6599
  lu.cfg.RemoveInstance(instance.name)
6600

    
6601
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6602
    "Instance lock removal conflict"
6603

    
6604
  # Remove lock for the instance
6605
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6606

    
6607

    
6608
class LUInstanceQuery(NoHooksLU):
6609
  """Logical unit for querying instances.
6610

6611
  """
6612
  # pylint: disable=W0142
6613
  REQ_BGL = False
6614

    
6615
  def CheckArguments(self):
6616
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6617
                             self.op.output_fields, self.op.use_locking)
6618

    
6619
  def ExpandNames(self):
6620
    self.iq.ExpandNames(self)
6621

    
6622
  def DeclareLocks(self, level):
6623
    self.iq.DeclareLocks(self, level)
6624

    
6625
  def Exec(self, feedback_fn):
6626
    return self.iq.OldStyleQuery(self)
6627

    
6628

    
6629
class LUInstanceFailover(LogicalUnit):
6630
  """Failover an instance.
6631

6632
  """
6633
  HPATH = "instance-failover"
6634
  HTYPE = constants.HTYPE_INSTANCE
6635
  REQ_BGL = False
6636

    
6637
  def CheckArguments(self):
6638
    """Check the arguments.
6639

6640
    """
6641
    self.iallocator = getattr(self.op, "iallocator", None)
6642
    self.target_node = getattr(self.op, "target_node", None)
6643

    
6644
  def ExpandNames(self):
6645
    self._ExpandAndLockInstance()
6646

    
6647
    if self.op.target_node is not None:
6648
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6649

    
6650
    self.needed_locks[locking.LEVEL_NODE] = []
6651
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6652

    
6653
    ignore_consistency = self.op.ignore_consistency
6654
    shutdown_timeout = self.op.shutdown_timeout
6655
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6656
                                       cleanup=False,
6657
                                       failover=True,
6658
                                       ignore_consistency=ignore_consistency,
6659
                                       shutdown_timeout=shutdown_timeout)
6660
    self.tasklets = [self._migrater]
6661

    
6662
  def DeclareLocks(self, level):
6663
    if level == locking.LEVEL_NODE:
6664
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6665
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6666
        if self.op.target_node is None:
6667
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6668
        else:
6669
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6670
                                                   self.op.target_node]
6671
        del self.recalculate_locks[locking.LEVEL_NODE]
6672
      else:
6673
        self._LockInstancesNodes()
6674

    
6675
  def BuildHooksEnv(self):
6676
    """Build hooks env.
6677

6678
    This runs on master, primary and secondary nodes of the instance.
6679

6680
    """
6681
    instance = self._migrater.instance
6682
    source_node = instance.primary_node
6683
    target_node = self.op.target_node
6684
    env = {
6685
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6686
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6687
      "OLD_PRIMARY": source_node,
6688
      "NEW_PRIMARY": target_node,
6689
      }
6690

    
6691
    if instance.disk_template in constants.DTS_INT_MIRROR:
6692
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6693
      env["NEW_SECONDARY"] = source_node
6694
    else:
6695
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6696

    
6697
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6698

    
6699
    return env
6700

    
6701
  def BuildHooksNodes(self):
6702
    """Build hooks nodes.
6703

6704
    """
6705
    instance = self._migrater.instance
6706
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6707
    return (nl, nl + [instance.primary_node])
6708

    
6709

    
6710
class LUInstanceMigrate(LogicalUnit):
6711
  """Migrate an instance.
6712

6713
  This is migration without shutting down, compared to the failover,
6714
  which is done with shutdown.
6715

6716
  """
6717
  HPATH = "instance-migrate"
6718
  HTYPE = constants.HTYPE_INSTANCE
6719
  REQ_BGL = False
6720

    
6721
  def ExpandNames(self):
6722
    self._ExpandAndLockInstance()
6723

    
6724
    if self.op.target_node is not None:
6725
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6726

    
6727
    self.needed_locks[locking.LEVEL_NODE] = []
6728
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6729

    
6730
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6731
                                       cleanup=self.op.cleanup,
6732
                                       failover=False,
6733
                                       fallback=self.op.allow_failover)
6734
    self.tasklets = [self._migrater]
6735

    
6736
  def DeclareLocks(self, level):
6737
    if level == locking.LEVEL_NODE:
6738
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6739
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6740
        if self.op.target_node is None:
6741
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6742
        else:
6743
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6744
                                                   self.op.target_node]
6745
        del self.recalculate_locks[locking.LEVEL_NODE]
6746
      else:
6747
        self._LockInstancesNodes()
6748

    
6749
  def BuildHooksEnv(self):
6750
    """Build hooks env.
6751

6752
    This runs on master, primary and secondary nodes of the instance.
6753

6754
    """
6755
    instance = self._migrater.instance
6756
    source_node = instance.primary_node
6757
    target_node = self.op.target_node
6758
    env = _BuildInstanceHookEnvByObject(self, instance)
6759
    env.update({
6760
      "MIGRATE_LIVE": self._migrater.live,
6761
      "MIGRATE_CLEANUP": self.op.cleanup,
6762
      "OLD_PRIMARY": source_node,
6763
      "NEW_PRIMARY": target_node,
6764
      })
6765

    
6766
    if instance.disk_template in constants.DTS_INT_MIRROR:
6767
      env["OLD_SECONDARY"] = target_node
6768
      env["NEW_SECONDARY"] = source_node
6769
    else:
6770
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6771

    
6772
    return env
6773

    
6774
  def BuildHooksNodes(self):
6775
    """Build hooks nodes.
6776

6777
    """
6778
    instance = self._migrater.instance
6779
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6780
    return (nl, nl + [instance.primary_node])
6781

    
6782

    
6783
class LUInstanceMove(LogicalUnit):
6784
  """Move an instance by data-copying.
6785

6786
  """
6787
  HPATH = "instance-move"
6788
  HTYPE = constants.HTYPE_INSTANCE
6789
  REQ_BGL = False
6790

    
6791
  def ExpandNames(self):
6792
    self._ExpandAndLockInstance()
6793
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6794
    self.op.target_node = target_node
6795
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6796
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6797

    
6798
  def DeclareLocks(self, level):
6799
    if level == locking.LEVEL_NODE:
6800
      self._LockInstancesNodes(primary_only=True)
6801

    
6802
  def BuildHooksEnv(self):
6803
    """Build hooks env.
6804

6805
    This runs on master, primary and secondary nodes of the instance.
6806

6807
    """
6808
    env = {
6809
      "TARGET_NODE": self.op.target_node,
6810
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6811
      }
6812
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6813
    return env
6814

    
6815
  def BuildHooksNodes(self):
6816
    """Build hooks nodes.
6817

6818
    """
6819
    nl = [
6820
      self.cfg.GetMasterNode(),
6821
      self.instance.primary_node,
6822
      self.op.target_node,
6823
      ]
6824
    return (nl, nl)
6825

    
6826
  def CheckPrereq(self):
6827
    """Check prerequisites.
6828

6829
    This checks that the instance is in the cluster.
6830

6831
    """
6832
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6833
    assert self.instance is not None, \
6834
      "Cannot retrieve locked instance %s" % self.op.instance_name
6835

    
6836
    node = self.cfg.GetNodeInfo(self.op.target_node)
6837
    assert node is not None, \
6838
      "Cannot retrieve locked node %s" % self.op.target_node
6839

    
6840
    self.target_node = target_node = node.name
6841

    
6842
    if target_node == instance.primary_node:
6843
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6844
                                 (instance.name, target_node),
6845
                                 errors.ECODE_STATE)
6846

    
6847
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6848

    
6849
    for idx, dsk in enumerate(instance.disks):
6850
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6851
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6852
                                   " cannot copy" % idx, errors.ECODE_STATE)
6853

    
6854
    _CheckNodeOnline(self, target_node)
6855
    _CheckNodeNotDrained(self, target_node)
6856
    _CheckNodeVmCapable(self, target_node)
6857

    
6858
    if instance.admin_up:
6859
      # check memory requirements on the secondary node
6860
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6861
                           instance.name, bep[constants.BE_MEMORY],
6862
                           instance.hypervisor)
6863
    else:
6864
      self.LogInfo("Not checking memory on the secondary node as"
6865
                   " instance will not be started")
6866

    
6867
    # check bridge existance
6868
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6869

    
6870
  def Exec(self, feedback_fn):
6871
    """Move an instance.
6872

6873
    The move is done by shutting it down on its present node, copying
6874
    the data over (slow) and starting it on the new node.
6875

6876
    """
6877
    instance = self.instance
6878

    
6879
    source_node = instance.primary_node
6880
    target_node = self.target_node
6881

    
6882
    self.LogInfo("Shutting down instance %s on source node %s",
6883
                 instance.name, source_node)
6884

    
6885
    result = self.rpc.call_instance_shutdown(source_node, instance,
6886
                                             self.op.shutdown_timeout)
6887
    msg = result.fail_msg
6888
    if msg:
6889
      if self.op.ignore_consistency:
6890
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6891
                             " Proceeding anyway. Please make sure node"
6892
                             " %s is down. Error details: %s",
6893
                             instance.name, source_node, source_node, msg)
6894
      else:
6895
        raise errors.OpExecError("Could not shutdown instance %s on"
6896
                                 " node %s: %s" %
6897
                                 (instance.name, source_node, msg))
6898

    
6899
    # create the target disks
6900
    try:
6901
      _CreateDisks(self, instance, target_node=target_node)
6902
    except errors.OpExecError:
6903
      self.LogWarning("Device creation failed, reverting...")
6904
      try:
6905
        _RemoveDisks(self, instance, target_node=target_node)
6906
      finally:
6907
        self.cfg.ReleaseDRBDMinors(instance.name)
6908
        raise
6909

    
6910
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6911

    
6912
    errs = []
6913
    # activate, get path, copy the data over
6914
    for idx, disk in enumerate(instance.disks):
6915
      self.LogInfo("Copying data for disk %d", idx)
6916
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6917
                                               instance.name, True, idx)
6918
      if result.fail_msg:
6919
        self.LogWarning("Can't assemble newly created disk %d: %s",
6920
                        idx, result.fail_msg)
6921
        errs.append(result.fail_msg)
6922
        break
6923
      dev_path = result.payload
6924
      result = self.rpc.call_blockdev_export(source_node, disk,
6925
                                             target_node, dev_path,
6926
                                             cluster_name)
6927
      if result.fail_msg:
6928
        self.LogWarning("Can't copy data over for disk %d: %s",
6929
                        idx, result.fail_msg)
6930
        errs.append(result.fail_msg)
6931
        break
6932

    
6933
    if errs:
6934
      self.LogWarning("Some disks failed to copy, aborting")
6935
      try:
6936
        _RemoveDisks(self, instance, target_node=target_node)
6937
      finally:
6938
        self.cfg.ReleaseDRBDMinors(instance.name)
6939
        raise errors.OpExecError("Errors during disk copy: %s" %
6940
                                 (",".join(errs),))
6941

    
6942
    instance.primary_node = target_node
6943
    self.cfg.Update(instance, feedback_fn)
6944

    
6945
    self.LogInfo("Removing the disks on the original node")
6946
    _RemoveDisks(self, instance, target_node=source_node)
6947

    
6948
    # Only start the instance if it's marked as up
6949
    if instance.admin_up:
6950
      self.LogInfo("Starting instance %s on node %s",
6951
                   instance.name, target_node)
6952

    
6953
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6954
                                           ignore_secondaries=True)
6955
      if not disks_ok:
6956
        _ShutdownInstanceDisks(self, instance)
6957
        raise errors.OpExecError("Can't activate the instance's disks")
6958

    
6959
      result = self.rpc.call_instance_start(target_node, instance,
6960
                                            None, None, False)
6961
      msg = result.fail_msg
6962
      if msg:
6963
        _ShutdownInstanceDisks(self, instance)
6964
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6965
                                 (instance.name, target_node, msg))
6966

    
6967

    
6968
class LUNodeMigrate(LogicalUnit):
6969
  """Migrate all instances from a node.
6970

6971
  """
6972
  HPATH = "node-migrate"
6973
  HTYPE = constants.HTYPE_NODE
6974
  REQ_BGL = False
6975

    
6976
  def CheckArguments(self):
6977
    pass
6978

    
6979
  def ExpandNames(self):
6980
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6981

    
6982
    self.share_locks = _ShareAll()
6983
    self.needed_locks = {
6984
      locking.LEVEL_NODE: [self.op.node_name],
6985
      }
6986

    
6987
  def BuildHooksEnv(self):
6988
    """Build hooks env.
6989

6990
    This runs on the master, the primary and all the secondaries.
6991

6992
    """
6993
    return {
6994
      "NODE_NAME": self.op.node_name,
6995
      }
6996

    
6997
  def BuildHooksNodes(self):
6998
    """Build hooks nodes.
6999

7000
    """
7001
    nl = [self.cfg.GetMasterNode()]
7002
    return (nl, nl)
7003

    
7004
  def CheckPrereq(self):
7005
    pass
7006

    
7007
  def Exec(self, feedback_fn):
7008
    # Prepare jobs for migration instances
7009
    jobs = [
7010
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7011
                                 mode=self.op.mode,
7012
                                 live=self.op.live,
7013
                                 iallocator=self.op.iallocator,
7014
                                 target_node=self.op.target_node)]
7015
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7016
      ]
7017

    
7018
    # TODO: Run iallocator in this opcode and pass correct placement options to
7019
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7020
    # running the iallocator and the actual migration, a good consistency model
7021
    # will have to be found.
7022

    
7023
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7024
            frozenset([self.op.node_name]))
7025

    
7026
    return ResultWithJobs(jobs)
7027

    
7028

    
7029
class TLMigrateInstance(Tasklet):
7030
  """Tasklet class for instance migration.
7031

7032
  @type live: boolean
7033
  @ivar live: whether the migration will be done live or non-live;
7034
      this variable is initalized only after CheckPrereq has run
7035
  @type cleanup: boolean
7036
  @ivar cleanup: Wheater we cleanup from a failed migration
7037
  @type iallocator: string
7038
  @ivar iallocator: The iallocator used to determine target_node
7039
  @type target_node: string
7040
  @ivar target_node: If given, the target_node to reallocate the instance to
7041
  @type failover: boolean
7042
  @ivar failover: Whether operation results in failover or migration
7043
  @type fallback: boolean
7044
  @ivar fallback: Whether fallback to failover is allowed if migration not
7045
                  possible
7046
  @type ignore_consistency: boolean
7047
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7048
                            and target node
7049
  @type shutdown_timeout: int
7050
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7051

7052
  """
7053
  def __init__(self, lu, instance_name, cleanup=False,
7054
               failover=False, fallback=False,
7055
               ignore_consistency=False,
7056
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7057
    """Initializes this class.
7058

7059
    """
7060
    Tasklet.__init__(self, lu)
7061

    
7062
    # Parameters
7063
    self.instance_name = instance_name
7064
    self.cleanup = cleanup
7065
    self.live = False # will be overridden later
7066
    self.failover = failover
7067
    self.fallback = fallback
7068
    self.ignore_consistency = ignore_consistency
7069
    self.shutdown_timeout = shutdown_timeout
7070

    
7071
  def CheckPrereq(self):
7072
    """Check prerequisites.
7073

7074
    This checks that the instance is in the cluster.
7075

7076
    """
7077
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7078
    instance = self.cfg.GetInstanceInfo(instance_name)
7079
    assert instance is not None
7080
    self.instance = instance
7081

    
7082
    if (not self.cleanup and not instance.admin_up and not self.failover and
7083
        self.fallback):
7084
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7085
                      " to failover")
7086
      self.failover = True
7087

    
7088
    if instance.disk_template not in constants.DTS_MIRRORED:
7089
      if self.failover:
7090
        text = "failovers"
7091
      else:
7092
        text = "migrations"
7093
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7094
                                 " %s" % (instance.disk_template, text),
7095
                                 errors.ECODE_STATE)
7096

    
7097
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7098
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7099

    
7100
      if self.lu.op.iallocator:
7101
        self._RunAllocator()
7102
      else:
7103
        # We set set self.target_node as it is required by
7104
        # BuildHooksEnv
7105
        self.target_node = self.lu.op.target_node
7106

    
7107
      # self.target_node is already populated, either directly or by the
7108
      # iallocator run
7109
      target_node = self.target_node
7110
      if self.target_node == instance.primary_node:
7111
        raise errors.OpPrereqError("Cannot migrate instance %s"
7112
                                   " to its primary (%s)" %
7113
                                   (instance.name, instance.primary_node))
7114

    
7115
      if len(self.lu.tasklets) == 1:
7116
        # It is safe to release locks only when we're the only tasklet
7117
        # in the LU
7118
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7119
                      keep=[instance.primary_node, self.target_node])
7120

    
7121
    else:
7122
      secondary_nodes = instance.secondary_nodes
7123
      if not secondary_nodes:
7124
        raise errors.ConfigurationError("No secondary node but using"
7125
                                        " %s disk template" %
7126
                                        instance.disk_template)
7127
      target_node = secondary_nodes[0]
7128
      if self.lu.op.iallocator or (self.lu.op.target_node and
7129
                                   self.lu.op.target_node != target_node):
7130
        if self.failover:
7131
          text = "failed over"
7132
        else:
7133
          text = "migrated"
7134
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7135
                                   " be %s to arbitrary nodes"
7136
                                   " (neither an iallocator nor a target"
7137
                                   " node can be passed)" %
7138
                                   (instance.disk_template, text),
7139
                                   errors.ECODE_INVAL)
7140

    
7141
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7142

    
7143
    # check memory requirements on the secondary node
7144
    if not self.cleanup and (not self.failover or instance.admin_up):
7145
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7146
                           instance.name, i_be[constants.BE_MEMORY],
7147
                           instance.hypervisor)
7148
    else:
7149
      self.lu.LogInfo("Not checking memory on the secondary node as"
7150
                      " instance will not be started")
7151

    
7152
    # check bridge existance
7153
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7154

    
7155
    if not self.cleanup:
7156
      _CheckNodeNotDrained(self.lu, target_node)
7157
      if not self.failover:
7158
        result = self.rpc.call_instance_migratable(instance.primary_node,
7159
                                                   instance)
7160
        if result.fail_msg and self.fallback:
7161
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7162
                          " failover")
7163
          self.failover = True
7164
        else:
7165
          result.Raise("Can't migrate, please use failover",
7166
                       prereq=True, ecode=errors.ECODE_STATE)
7167

    
7168
    assert not (self.failover and self.cleanup)
7169

    
7170
    if not self.failover:
7171
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7172
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7173
                                   " parameters are accepted",
7174
                                   errors.ECODE_INVAL)
7175
      if self.lu.op.live is not None:
7176
        if self.lu.op.live:
7177
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7178
        else:
7179
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7180
        # reset the 'live' parameter to None so that repeated
7181
        # invocations of CheckPrereq do not raise an exception
7182
        self.lu.op.live = None
7183
      elif self.lu.op.mode is None:
7184
        # read the default value from the hypervisor
7185
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7186
                                                skip_globals=False)
7187
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7188

    
7189
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7190
    else:
7191
      # Failover is never live
7192
      self.live = False
7193

    
7194
  def _RunAllocator(self):
7195
    """Run the allocator based on input opcode.
7196

7197
    """
7198
    ial = IAllocator(self.cfg, self.rpc,
7199
                     mode=constants.IALLOCATOR_MODE_RELOC,
7200
                     name=self.instance_name,
7201
                     # TODO See why hail breaks with a single node below
7202
                     relocate_from=[self.instance.primary_node,
7203
                                    self.instance.primary_node],
7204
                     )
7205

    
7206
    ial.Run(self.lu.op.iallocator)
7207

    
7208
    if not ial.success:
7209
      raise errors.OpPrereqError("Can't compute nodes using"
7210
                                 " iallocator '%s': %s" %
7211
                                 (self.lu.op.iallocator, ial.info),
7212
                                 errors.ECODE_NORES)
7213
    if len(ial.result) != ial.required_nodes:
7214
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7215
                                 " of nodes (%s), required %s" %
7216
                                 (self.lu.op.iallocator, len(ial.result),
7217
                                  ial.required_nodes), errors.ECODE_FAULT)
7218
    self.target_node = ial.result[0]
7219
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7220
                 self.instance_name, self.lu.op.iallocator,
7221
                 utils.CommaJoin(ial.result))
7222

    
7223
  def _WaitUntilSync(self):
7224
    """Poll with custom rpc for disk sync.
7225

7226
    This uses our own step-based rpc call.
7227

7228
    """
7229
    self.feedback_fn("* wait until resync is done")
7230
    all_done = False
7231
    while not all_done:
7232
      all_done = True
7233
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7234
                                            self.nodes_ip,
7235
                                            self.instance.disks)
7236
      min_percent = 100
7237
      for node, nres in result.items():
7238
        nres.Raise("Cannot resync disks on node %s" % node)
7239
        node_done, node_percent = nres.payload
7240
        all_done = all_done and node_done
7241
        if node_percent is not None:
7242
          min_percent = min(min_percent, node_percent)
7243
      if not all_done:
7244
        if min_percent < 100:
7245
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7246
        time.sleep(2)
7247

    
7248
  def _EnsureSecondary(self, node):
7249
    """Demote a node to secondary.
7250

7251
    """
7252
    self.feedback_fn("* switching node %s to secondary mode" % node)
7253

    
7254
    for dev in self.instance.disks:
7255
      self.cfg.SetDiskID(dev, node)
7256

    
7257
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7258
                                          self.instance.disks)
7259
    result.Raise("Cannot change disk to secondary on node %s" % node)
7260

    
7261
  def _GoStandalone(self):
7262
    """Disconnect from the network.
7263

7264
    """
7265
    self.feedback_fn("* changing into standalone mode")
7266
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7267
                                               self.instance.disks)
7268
    for node, nres in result.items():
7269
      nres.Raise("Cannot disconnect disks node %s" % node)
7270

    
7271
  def _GoReconnect(self, multimaster):
7272
    """Reconnect to the network.
7273

7274
    """
7275
    if multimaster:
7276
      msg = "dual-master"
7277
    else:
7278
      msg = "single-master"
7279
    self.feedback_fn("* changing disks into %s mode" % msg)
7280
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7281
                                           self.instance.disks,
7282
                                           self.instance.name, multimaster)
7283
    for node, nres in result.items():
7284
      nres.Raise("Cannot change disks config on node %s" % node)
7285

    
7286
  def _ExecCleanup(self):
7287
    """Try to cleanup after a failed migration.
7288

7289
    The cleanup is done by:
7290
      - check that the instance is running only on one node
7291
        (and update the config if needed)
7292
      - change disks on its secondary node to secondary
7293
      - wait until disks are fully synchronized
7294
      - disconnect from the network
7295
      - change disks into single-master mode
7296
      - wait again until disks are fully synchronized
7297

7298
    """
7299
    instance = self.instance
7300
    target_node = self.target_node
7301
    source_node = self.source_node
7302

    
7303
    # check running on only one node
7304
    self.feedback_fn("* checking where the instance actually runs"
7305
                     " (if this hangs, the hypervisor might be in"
7306
                     " a bad state)")
7307
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7308
    for node, result in ins_l.items():
7309
      result.Raise("Can't contact node %s" % node)
7310

    
7311
    runningon_source = instance.name in ins_l[source_node].payload
7312
    runningon_target = instance.name in ins_l[target_node].payload
7313

    
7314
    if runningon_source and runningon_target:
7315
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7316
                               " or the hypervisor is confused; you will have"
7317
                               " to ensure manually that it runs only on one"
7318
                               " and restart this operation")
7319

    
7320
    if not (runningon_source or runningon_target):
7321
      raise errors.OpExecError("Instance does not seem to be running at all;"
7322
                               " in this case it's safer to repair by"
7323
                               " running 'gnt-instance stop' to ensure disk"
7324
                               " shutdown, and then restarting it")
7325

    
7326
    if runningon_target:
7327
      # the migration has actually succeeded, we need to update the config
7328
      self.feedback_fn("* instance running on secondary node (%s),"
7329
                       " updating config" % target_node)
7330
      instance.primary_node = target_node
7331
      self.cfg.Update(instance, self.feedback_fn)
7332
      demoted_node = source_node
7333
    else:
7334
      self.feedback_fn("* instance confirmed to be running on its"
7335
                       " primary node (%s)" % source_node)
7336
      demoted_node = target_node
7337

    
7338
    if instance.disk_template in constants.DTS_INT_MIRROR:
7339
      self._EnsureSecondary(demoted_node)
7340
      try:
7341
        self._WaitUntilSync()
7342
      except errors.OpExecError:
7343
        # we ignore here errors, since if the device is standalone, it
7344
        # won't be able to sync
7345
        pass
7346
      self._GoStandalone()
7347
      self._GoReconnect(False)
7348
      self._WaitUntilSync()
7349

    
7350
    self.feedback_fn("* done")
7351

    
7352
  def _RevertDiskStatus(self):
7353
    """Try to revert the disk status after a failed migration.
7354

7355
    """
7356
    target_node = self.target_node
7357
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7358
      return
7359

    
7360
    try:
7361
      self._EnsureSecondary(target_node)
7362
      self._GoStandalone()
7363
      self._GoReconnect(False)
7364
      self._WaitUntilSync()
7365
    except errors.OpExecError, err:
7366
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7367
                         " please try to recover the instance manually;"
7368
                         " error '%s'" % str(err))
7369

    
7370
  def _AbortMigration(self):
7371
    """Call the hypervisor code to abort a started migration.
7372

7373
    """
7374
    instance = self.instance
7375
    target_node = self.target_node
7376
    migration_info = self.migration_info
7377

    
7378
    abort_result = self.rpc.call_finalize_migration(target_node,
7379
                                                    instance,
7380
                                                    migration_info,
7381
                                                    False)
7382
    abort_msg = abort_result.fail_msg
7383
    if abort_msg:
7384
      logging.error("Aborting migration failed on target node %s: %s",
7385
                    target_node, abort_msg)
7386
      # Don't raise an exception here, as we stil have to try to revert the
7387
      # disk status, even if this step failed.
7388

    
7389
  def _ExecMigration(self):
7390
    """Migrate an instance.
7391

7392
    The migrate is done by:
7393
      - change the disks into dual-master mode
7394
      - wait until disks are fully synchronized again
7395
      - migrate the instance
7396
      - change disks on the new secondary node (the old primary) to secondary
7397
      - wait until disks are fully synchronized
7398
      - change disks into single-master mode
7399

7400
    """
7401
    instance = self.instance
7402
    target_node = self.target_node
7403
    source_node = self.source_node
7404

    
7405
    self.feedback_fn("* checking disk consistency between source and target")
7406
    for dev in instance.disks:
7407
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7408
        raise errors.OpExecError("Disk %s is degraded or not fully"
7409
                                 " synchronized on target node,"
7410
                                 " aborting migration" % dev.iv_name)
7411

    
7412
    # First get the migration information from the remote node
7413
    result = self.rpc.call_migration_info(source_node, instance)
7414
    msg = result.fail_msg
7415
    if msg:
7416
      log_err = ("Failed fetching source migration information from %s: %s" %
7417
                 (source_node, msg))
7418
      logging.error(log_err)
7419
      raise errors.OpExecError(log_err)
7420

    
7421
    self.migration_info = migration_info = result.payload
7422

    
7423
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7424
      # Then switch the disks to master/master mode
7425
      self._EnsureSecondary(target_node)
7426
      self._GoStandalone()
7427
      self._GoReconnect(True)
7428
      self._WaitUntilSync()
7429

    
7430
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7431
    result = self.rpc.call_accept_instance(target_node,
7432
                                           instance,
7433
                                           migration_info,
7434
                                           self.nodes_ip[target_node])
7435

    
7436
    msg = result.fail_msg
7437
    if msg:
7438
      logging.error("Instance pre-migration failed, trying to revert"
7439
                    " disk status: %s", msg)
7440
      self.feedback_fn("Pre-migration failed, aborting")
7441
      self._AbortMigration()
7442
      self._RevertDiskStatus()
7443
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7444
                               (instance.name, msg))
7445

    
7446
    self.feedback_fn("* migrating instance to %s" % target_node)
7447
    result = self.rpc.call_instance_migrate(source_node, instance,
7448
                                            self.nodes_ip[target_node],
7449
                                            self.live)
7450
    msg = result.fail_msg
7451
    if msg:
7452
      logging.error("Instance migration failed, trying to revert"
7453
                    " disk status: %s", msg)
7454
      self.feedback_fn("Migration failed, aborting")
7455
      self._AbortMigration()
7456
      self._RevertDiskStatus()
7457
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7458
                               (instance.name, msg))
7459

    
7460
    instance.primary_node = target_node
7461
    # distribute new instance config to the other nodes
7462
    self.cfg.Update(instance, self.feedback_fn)
7463

    
7464
    result = self.rpc.call_finalize_migration(target_node,
7465
                                              instance,
7466
                                              migration_info,
7467
                                              True)
7468
    msg = result.fail_msg
7469
    if msg:
7470
      logging.error("Instance migration succeeded, but finalization failed:"
7471
                    " %s", msg)
7472
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7473
                               msg)
7474

    
7475
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7476
      self._EnsureSecondary(source_node)
7477
      self._WaitUntilSync()
7478
      self._GoStandalone()
7479
      self._GoReconnect(False)
7480
      self._WaitUntilSync()
7481

    
7482
    self.feedback_fn("* done")
7483

    
7484
  def _ExecFailover(self):
7485
    """Failover an instance.
7486

7487
    The failover is done by shutting it down on its present node and
7488
    starting it on the secondary.
7489

7490
    """
7491
    instance = self.instance
7492
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7493

    
7494
    source_node = instance.primary_node
7495
    target_node = self.target_node
7496

    
7497
    if instance.admin_up:
7498
      self.feedback_fn("* checking disk consistency between source and target")
7499
      for dev in instance.disks:
7500
        # for drbd, these are drbd over lvm
7501
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7502
          if primary_node.offline:
7503
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7504
                             " target node %s" %
7505
                             (primary_node.name, dev.iv_name, target_node))
7506
          elif not self.ignore_consistency:
7507
            raise errors.OpExecError("Disk %s is degraded on target node,"
7508
                                     " aborting failover" % dev.iv_name)
7509
    else:
7510
      self.feedback_fn("* not checking disk consistency as instance is not"
7511
                       " running")
7512

    
7513
    self.feedback_fn("* shutting down instance on source node")
7514
    logging.info("Shutting down instance %s on node %s",
7515
                 instance.name, source_node)
7516

    
7517
    result = self.rpc.call_instance_shutdown(source_node, instance,
7518
                                             self.shutdown_timeout)
7519
    msg = result.fail_msg
7520
    if msg:
7521
      if self.ignore_consistency or primary_node.offline:
7522
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7523
                           " proceeding anyway; please make sure node"
7524
                           " %s is down; error details: %s",
7525
                           instance.name, source_node, source_node, msg)
7526
      else:
7527
        raise errors.OpExecError("Could not shutdown instance %s on"
7528
                                 " node %s: %s" %
7529
                                 (instance.name, source_node, msg))
7530

    
7531
    self.feedback_fn("* deactivating the instance's disks on source node")
7532
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7533
      raise errors.OpExecError("Can't shut down the instance's disks")
7534

    
7535
    instance.primary_node = target_node
7536
    # distribute new instance config to the other nodes
7537
    self.cfg.Update(instance, self.feedback_fn)
7538

    
7539
    # Only start the instance if it's marked as up
7540
    if instance.admin_up:
7541
      self.feedback_fn("* activating the instance's disks on target node %s" %
7542
                       target_node)
7543
      logging.info("Starting instance %s on node %s",
7544
                   instance.name, target_node)
7545

    
7546
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7547
                                           ignore_secondaries=True)
7548
      if not disks_ok:
7549
        _ShutdownInstanceDisks(self.lu, instance)
7550
        raise errors.OpExecError("Can't activate the instance's disks")
7551

    
7552
      self.feedback_fn("* starting the instance on the target node %s" %
7553
                       target_node)
7554
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7555
                                            False)
7556
      msg = result.fail_msg
7557
      if msg:
7558
        _ShutdownInstanceDisks(self.lu, instance)
7559
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7560
                                 (instance.name, target_node, msg))
7561

    
7562
  def Exec(self, feedback_fn):
7563
    """Perform the migration.
7564

7565
    """
7566
    self.feedback_fn = feedback_fn
7567
    self.source_node = self.instance.primary_node
7568

    
7569
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7570
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7571
      self.target_node = self.instance.secondary_nodes[0]
7572
      # Otherwise self.target_node has been populated either
7573
      # directly, or through an iallocator.
7574

    
7575
    self.all_nodes = [self.source_node, self.target_node]
7576
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7577
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7578

    
7579
    if self.failover:
7580
      feedback_fn("Failover instance %s" % self.instance.name)
7581
      self._ExecFailover()
7582
    else:
7583
      feedback_fn("Migrating instance %s" % self.instance.name)
7584

    
7585
      if self.cleanup:
7586
        return self._ExecCleanup()
7587
      else:
7588
        return self._ExecMigration()
7589

    
7590

    
7591
def _CreateBlockDev(lu, node, instance, device, force_create,
7592
                    info, force_open):
7593
  """Create a tree of block devices on a given node.
7594

7595
  If this device type has to be created on secondaries, create it and
7596
  all its children.
7597

7598
  If not, just recurse to children keeping the same 'force' value.
7599

7600
  @param lu: the lu on whose behalf we execute
7601
  @param node: the node on which to create the device
7602
  @type instance: L{objects.Instance}
7603
  @param instance: the instance which owns the device
7604
  @type device: L{objects.Disk}
7605
  @param device: the device to create
7606
  @type force_create: boolean
7607
  @param force_create: whether to force creation of this device; this
7608
      will be change to True whenever we find a device which has
7609
      CreateOnSecondary() attribute
7610
  @param info: the extra 'metadata' we should attach to the device
7611
      (this will be represented as a LVM tag)
7612
  @type force_open: boolean
7613
  @param force_open: this parameter will be passes to the
7614
      L{backend.BlockdevCreate} function where it specifies
7615
      whether we run on primary or not, and it affects both
7616
      the child assembly and the device own Open() execution
7617

7618
  """
7619
  if device.CreateOnSecondary():
7620
    force_create = True
7621

    
7622
  if device.children:
7623
    for child in device.children:
7624
      _CreateBlockDev(lu, node, instance, child, force_create,
7625
                      info, force_open)
7626

    
7627
  if not force_create:
7628
    return
7629

    
7630
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7631

    
7632

    
7633
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7634
  """Create a single block device on a given node.
7635

7636
  This will not recurse over children of the device, so they must be
7637
  created in advance.
7638

7639
  @param lu: the lu on whose behalf we execute
7640
  @param node: the node on which to create the device
7641
  @type instance: L{objects.Instance}
7642
  @param instance: the instance which owns the device
7643
  @type device: L{objects.Disk}
7644
  @param device: the device to create
7645
  @param info: the extra 'metadata' we should attach to the device
7646
      (this will be represented as a LVM tag)
7647
  @type force_open: boolean
7648
  @param force_open: this parameter will be passes to the
7649
      L{backend.BlockdevCreate} function where it specifies
7650
      whether we run on primary or not, and it affects both
7651
      the child assembly and the device own Open() execution
7652

7653
  """
7654
  lu.cfg.SetDiskID(device, node)
7655
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7656
                                       instance.name, force_open, info)
7657
  result.Raise("Can't create block device %s on"
7658
               " node %s for instance %s" % (device, node, instance.name))
7659
  if device.physical_id is None:
7660
    device.physical_id = result.payload
7661

    
7662

    
7663
def _GenerateUniqueNames(lu, exts):
7664
  """Generate a suitable LV name.
7665

7666
  This will generate a logical volume name for the given instance.
7667

7668
  """
7669
  results = []
7670
  for val in exts:
7671
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7672
    results.append("%s%s" % (new_id, val))
7673
  return results
7674

    
7675

    
7676
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7677
                         iv_name, p_minor, s_minor):
7678
  """Generate a drbd8 device complete with its children.
7679

7680
  """
7681
  assert len(vgnames) == len(names) == 2
7682
  port = lu.cfg.AllocatePort()
7683
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7684
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7685
                          logical_id=(vgnames[0], names[0]))
7686
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7687
                          logical_id=(vgnames[1], names[1]))
7688
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7689
                          logical_id=(primary, secondary, port,
7690
                                      p_minor, s_minor,
7691
                                      shared_secret),
7692
                          children=[dev_data, dev_meta],
7693
                          iv_name=iv_name)
7694
  return drbd_dev
7695

    
7696

    
7697
def _GenerateDiskTemplate(lu, template_name,
7698
                          instance_name, primary_node,
7699
                          secondary_nodes, disk_info,
7700
                          file_storage_dir, file_driver,
7701
                          base_index, feedback_fn):
7702
  """Generate the entire disk layout for a given template type.
7703

7704
  """
7705
  #TODO: compute space requirements
7706

    
7707
  vgname = lu.cfg.GetVGName()
7708
  disk_count = len(disk_info)
7709
  disks = []
7710
  if template_name == constants.DT_DISKLESS:
7711
    pass
7712
  elif template_name == constants.DT_PLAIN:
7713
    if len(secondary_nodes) != 0:
7714
      raise errors.ProgrammerError("Wrong template configuration")
7715

    
7716
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7717
                                      for i in range(disk_count)])
7718
    for idx, disk in enumerate(disk_info):
7719
      disk_index = idx + base_index
7720
      vg = disk.get(constants.IDISK_VG, vgname)
7721
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7722
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7723
                              size=disk[constants.IDISK_SIZE],
7724
                              logical_id=(vg, names[idx]),
7725
                              iv_name="disk/%d" % disk_index,
7726
                              mode=disk[constants.IDISK_MODE])
7727
      disks.append(disk_dev)
7728
  elif template_name == constants.DT_DRBD8:
7729
    if len(secondary_nodes) != 1:
7730
      raise errors.ProgrammerError("Wrong template configuration")
7731
    remote_node = secondary_nodes[0]
7732
    minors = lu.cfg.AllocateDRBDMinor(
7733
      [primary_node, remote_node] * len(disk_info), instance_name)
7734

    
7735
    names = []
7736
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7737
                                               for i in range(disk_count)]):
7738
      names.append(lv_prefix + "_data")
7739
      names.append(lv_prefix + "_meta")
7740
    for idx, disk in enumerate(disk_info):
7741
      disk_index = idx + base_index
7742
      data_vg = disk.get(constants.IDISK_VG, vgname)
7743
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7744
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7745
                                      disk[constants.IDISK_SIZE],
7746
                                      [data_vg, meta_vg],
7747
                                      names[idx * 2:idx * 2 + 2],
7748
                                      "disk/%d" % disk_index,
7749
                                      minors[idx * 2], minors[idx * 2 + 1])
7750
      disk_dev.mode = disk[constants.IDISK_MODE]
7751
      disks.append(disk_dev)
7752
  elif template_name == constants.DT_FILE:
7753
    if len(secondary_nodes) != 0:
7754
      raise errors.ProgrammerError("Wrong template configuration")
7755

    
7756
    opcodes.RequireFileStorage()
7757

    
7758
    for idx, disk in enumerate(disk_info):
7759
      disk_index = idx + base_index
7760
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7761
                              size=disk[constants.IDISK_SIZE],
7762
                              iv_name="disk/%d" % disk_index,
7763
                              logical_id=(file_driver,
7764
                                          "%s/disk%d" % (file_storage_dir,
7765
                                                         disk_index)),
7766
                              mode=disk[constants.IDISK_MODE])
7767
      disks.append(disk_dev)
7768
  elif template_name == constants.DT_SHARED_FILE:
7769
    if len(secondary_nodes) != 0:
7770
      raise errors.ProgrammerError("Wrong template configuration")
7771

    
7772
    opcodes.RequireSharedFileStorage()
7773

    
7774
    for idx, disk in enumerate(disk_info):
7775
      disk_index = idx + base_index
7776
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7777
                              size=disk[constants.IDISK_SIZE],
7778
                              iv_name="disk/%d" % disk_index,
7779
                              logical_id=(file_driver,
7780
                                          "%s/disk%d" % (file_storage_dir,
7781
                                                         disk_index)),
7782
                              mode=disk[constants.IDISK_MODE])
7783
      disks.append(disk_dev)
7784
  elif template_name == constants.DT_BLOCK:
7785
    if len(secondary_nodes) != 0:
7786
      raise errors.ProgrammerError("Wrong template configuration")
7787

    
7788
    for idx, disk in enumerate(disk_info):
7789
      disk_index = idx + base_index
7790
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7791
                              size=disk[constants.IDISK_SIZE],
7792
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7793
                                          disk[constants.IDISK_ADOPT]),
7794
                              iv_name="disk/%d" % disk_index,
7795
                              mode=disk[constants.IDISK_MODE])
7796
      disks.append(disk_dev)
7797

    
7798
  else:
7799
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7800
  return disks
7801

    
7802

    
7803
def _GetInstanceInfoText(instance):
7804
  """Compute that text that should be added to the disk's metadata.
7805

7806
  """
7807
  return "originstname+%s" % instance.name
7808

    
7809

    
7810
def _CalcEta(time_taken, written, total_size):
7811
  """Calculates the ETA based on size written and total size.
7812

7813
  @param time_taken: The time taken so far
7814
  @param written: amount written so far
7815
  @param total_size: The total size of data to be written
7816
  @return: The remaining time in seconds
7817

7818
  """
7819
  avg_time = time_taken / float(written)
7820
  return (total_size - written) * avg_time
7821

    
7822

    
7823
def _WipeDisks(lu, instance):
7824
  """Wipes instance disks.
7825

7826
  @type lu: L{LogicalUnit}
7827
  @param lu: the logical unit on whose behalf we execute
7828
  @type instance: L{objects.Instance}
7829
  @param instance: the instance whose disks we should create
7830
  @return: the success of the wipe
7831

7832
  """
7833
  node = instance.primary_node
7834

    
7835
  for device in instance.disks:
7836
    lu.cfg.SetDiskID(device, node)
7837

    
7838
  logging.info("Pause sync of instance %s disks", instance.name)
7839
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7840

    
7841
  for idx, success in enumerate(result.payload):
7842
    if not success:
7843
      logging.warn("pause-sync of instance %s for disks %d failed",
7844
                   instance.name, idx)
7845

    
7846
  try:
7847
    for idx, device in enumerate(instance.disks):
7848
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7849
      # MAX_WIPE_CHUNK at max
7850
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7851
                            constants.MIN_WIPE_CHUNK_PERCENT)
7852
      # we _must_ make this an int, otherwise rounding errors will
7853
      # occur
7854
      wipe_chunk_size = int(wipe_chunk_size)
7855

    
7856
      lu.LogInfo("* Wiping disk %d", idx)
7857
      logging.info("Wiping disk %d for instance %s, node %s using"
7858
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7859

    
7860
      offset = 0
7861
      size = device.size
7862
      last_output = 0
7863
      start_time = time.time()
7864

    
7865
      while offset < size:
7866
        wipe_size = min(wipe_chunk_size, size - offset)
7867
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7868
                      idx, offset, wipe_size)
7869
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7870
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7871
                     (idx, offset, wipe_size))
7872
        now = time.time()
7873
        offset += wipe_size
7874
        if now - last_output >= 60:
7875
          eta = _CalcEta(now - start_time, offset, size)
7876
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7877
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7878
          last_output = now
7879
  finally:
7880
    logging.info("Resume sync of instance %s disks", instance.name)
7881

    
7882
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7883

    
7884
    for idx, success in enumerate(result.payload):
7885
      if not success:
7886
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7887
                      " look at the status and troubleshoot the issue", idx)
7888
        logging.warn("resume-sync of instance %s for disks %d failed",
7889
                     instance.name, idx)
7890

    
7891

    
7892
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7893
  """Create all disks for an instance.
7894

7895
  This abstracts away some work from AddInstance.
7896

7897
  @type lu: L{LogicalUnit}
7898
  @param lu: the logical unit on whose behalf we execute
7899
  @type instance: L{objects.Instance}
7900
  @param instance: the instance whose disks we should create
7901
  @type to_skip: list
7902
  @param to_skip: list of indices to skip
7903
  @type target_node: string
7904
  @param target_node: if passed, overrides the target node for creation
7905
  @rtype: boolean
7906
  @return: the success of the creation
7907

7908
  """
7909
  info = _GetInstanceInfoText(instance)
7910
  if target_node is None:
7911
    pnode = instance.primary_node
7912
    all_nodes = instance.all_nodes
7913
  else:
7914
    pnode = target_node
7915
    all_nodes = [pnode]
7916

    
7917
  if instance.disk_template in constants.DTS_FILEBASED:
7918
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7919
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7920

    
7921
    result.Raise("Failed to create directory '%s' on"
7922
                 " node %s" % (file_storage_dir, pnode))
7923

    
7924
  # Note: this needs to be kept in sync with adding of disks in
7925
  # LUInstanceSetParams
7926
  for idx, device in enumerate(instance.disks):
7927
    if to_skip and idx in to_skip:
7928
      continue
7929
    logging.info("Creating volume %s for instance %s",
7930
                 device.iv_name, instance.name)
7931
    #HARDCODE
7932
    for node in all_nodes:
7933
      f_create = node == pnode
7934
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7935

    
7936

    
7937
def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
7938
  """Remove all disks for an instance.
7939

7940
  This abstracts away some work from `AddInstance()` and
7941
  `RemoveInstance()`. Note that in case some of the devices couldn't
7942
  be removed, the removal will continue with the other ones (compare
7943
  with `_CreateDisks()`).
7944

7945
  @type lu: L{LogicalUnit}
7946
  @param lu: the logical unit on whose behalf we execute
7947
  @type instance: L{objects.Instance}
7948
  @param instance: the instance whose disks we should remove
7949
  @type target_node: string
7950
  @param target_node: used to override the node on which to remove the disks
7951
  @rtype: boolean
7952
  @return: the success of the removal
7953

7954
  """
7955
  logging.info("Removing block devices for instance %s", instance.name)
7956

    
7957
  all_result = True
7958
  ports_to_release = set()
7959
  for device in instance.disks:
7960
    if target_node:
7961
      edata = [(target_node, device)]
7962
    else:
7963
      edata = device.ComputeNodeTree(instance.primary_node)
7964
    for node, disk in edata:
7965
      lu.cfg.SetDiskID(disk, node)
7966
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7967
      if msg:
7968
        lu.LogWarning("Could not remove block device %s on node %s,"
7969
                      " continuing anyway: %s", device.iv_name, node, msg)
7970
        all_result = False
7971

    
7972
    # if this is a DRBD disk, return its port to the pool
7973
    if device.dev_type in constants.LDS_DRBD:
7974
      ports_to_release.add(device.logical_id[2])
7975

    
7976
  if all_result or ignore_failures:
7977
    for port in ports_to_release:
7978
      lu.cfg.AddTcpUdpPort(port)
7979

    
7980
  if instance.disk_template == constants.DT_FILE:
7981
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7982
    if target_node:
7983
      tgt = target_node
7984
    else:
7985
      tgt = instance.primary_node
7986
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7987
    if result.fail_msg:
7988
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7989
                    file_storage_dir, instance.primary_node, result.fail_msg)
7990
      all_result = False
7991

    
7992
  return all_result
7993

    
7994

    
7995
def _ComputeDiskSizePerVG(disk_template, disks):
7996
  """Compute disk size requirements in the volume group
7997

7998
  """
7999
  def _compute(disks, payload):
8000
    """Universal algorithm.
8001

8002
    """
8003
    vgs = {}
8004
    for disk in disks:
8005
      vgs[disk[constants.IDISK_VG]] = \
8006
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8007

    
8008
    return vgs
8009

    
8010
  # Required free disk space as a function of disk and swap space
8011
  req_size_dict = {
8012
    constants.DT_DISKLESS: {},
8013
    constants.DT_PLAIN: _compute(disks, 0),
8014
    # 128 MB are added for drbd metadata for each disk
8015
    constants.DT_DRBD8: _compute(disks, 128),
8016
    constants.DT_FILE: {},
8017
    constants.DT_SHARED_FILE: {},
8018
  }
8019

    
8020
  if disk_template not in req_size_dict:
8021
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8022
                                 " is unknown" % disk_template)
8023

    
8024
  return req_size_dict[disk_template]
8025

    
8026

    
8027
def _ComputeDiskSize(disk_template, disks):
8028
  """Compute disk size requirements in the volume group
8029

8030
  """
8031
  # Required free disk space as a function of disk and swap space
8032
  req_size_dict = {
8033
    constants.DT_DISKLESS: None,
8034
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8035
    # 128 MB are added for drbd metadata for each disk
8036
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8037
    constants.DT_FILE: None,
8038
    constants.DT_SHARED_FILE: 0,
8039
    constants.DT_BLOCK: 0,
8040
  }
8041

    
8042
  if disk_template not in req_size_dict:
8043
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8044
                                 " is unknown" % disk_template)
8045

    
8046
  return req_size_dict[disk_template]
8047

    
8048

    
8049
def _FilterVmNodes(lu, nodenames):
8050
  """Filters out non-vm_capable nodes from a list.
8051

8052
  @type lu: L{LogicalUnit}
8053
  @param lu: the logical unit for which we check
8054
  @type nodenames: list
8055
  @param nodenames: the list of nodes on which we should check
8056
  @rtype: list
8057
  @return: the list of vm-capable nodes
8058

8059
  """
8060
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8061
  return [name for name in nodenames if name not in vm_nodes]
8062

    
8063

    
8064
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8065
  """Hypervisor parameter validation.
8066

8067
  This function abstract the hypervisor parameter validation to be
8068
  used in both instance create and instance modify.
8069

8070
  @type lu: L{LogicalUnit}
8071
  @param lu: the logical unit for which we check
8072
  @type nodenames: list
8073
  @param nodenames: the list of nodes on which we should check
8074
  @type hvname: string
8075
  @param hvname: the name of the hypervisor we should use
8076
  @type hvparams: dict
8077
  @param hvparams: the parameters which we need to check
8078
  @raise errors.OpPrereqError: if the parameters are not valid
8079

8080
  """
8081
  nodenames = _FilterVmNodes(lu, nodenames)
8082
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8083
                                                  hvname,
8084
                                                  hvparams)
8085
  for node in nodenames:
8086
    info = hvinfo[node]
8087
    if info.offline:
8088
      continue
8089
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8090

    
8091

    
8092
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8093
  """OS parameters validation.
8094

8095
  @type lu: L{LogicalUnit}
8096
  @param lu: the logical unit for which we check
8097
  @type required: boolean
8098
  @param required: whether the validation should fail if the OS is not
8099
      found
8100
  @type nodenames: list
8101
  @param nodenames: the list of nodes on which we should check
8102
  @type osname: string
8103
  @param osname: the name of the hypervisor we should use
8104
  @type osparams: dict
8105
  @param osparams: the parameters which we need to check
8106
  @raise errors.OpPrereqError: if the parameters are not valid
8107

8108
  """
8109
  nodenames = _FilterVmNodes(lu, nodenames)
8110
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8111
                                   [constants.OS_VALIDATE_PARAMETERS],
8112
                                   osparams)
8113
  for node, nres in result.items():
8114
    # we don't check for offline cases since this should be run only
8115
    # against the master node and/or an instance's nodes
8116
    nres.Raise("OS Parameters validation failed on node %s" % node)
8117
    if not nres.payload:
8118
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8119
                 osname, node)
8120

    
8121

    
8122
class LUInstanceCreate(LogicalUnit):
8123
  """Create an instance.
8124

8125
  """
8126
  HPATH = "instance-add"
8127
  HTYPE = constants.HTYPE_INSTANCE
8128
  REQ_BGL = False
8129

    
8130
  def CheckArguments(self):
8131
    """Check arguments.
8132

8133
    """
8134
    # do not require name_check to ease forward/backward compatibility
8135
    # for tools
8136
    if self.op.no_install and self.op.start:
8137
      self.LogInfo("No-installation mode selected, disabling startup")
8138
      self.op.start = False
8139
    # validate/normalize the instance name
8140
    self.op.instance_name = \
8141
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8142

    
8143
    if self.op.ip_check and not self.op.name_check:
8144
      # TODO: make the ip check more flexible and not depend on the name check
8145
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8146
                                 " check", errors.ECODE_INVAL)
8147

    
8148
    # check nics' parameter names
8149
    for nic in self.op.nics:
8150
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8151

    
8152
    # check disks. parameter names and consistent adopt/no-adopt strategy
8153
    has_adopt = has_no_adopt = False
8154
    for disk in self.op.disks:
8155
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8156
      if constants.IDISK_ADOPT in disk:
8157
        has_adopt = True
8158
      else:
8159
        has_no_adopt = True
8160
    if has_adopt and has_no_adopt:
8161
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8162
                                 errors.ECODE_INVAL)
8163
    if has_adopt:
8164
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8165
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8166
                                   " '%s' disk template" %
8167
                                   self.op.disk_template,
8168
                                   errors.ECODE_INVAL)
8169
      if self.op.iallocator is not None:
8170
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8171
                                   " iallocator script", errors.ECODE_INVAL)
8172
      if self.op.mode == constants.INSTANCE_IMPORT:
8173
        raise errors.OpPrereqError("Disk adoption not allowed for"
8174
                                   " instance import", errors.ECODE_INVAL)
8175
    else:
8176
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8177
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8178
                                   " but no 'adopt' parameter given" %
8179
                                   self.op.disk_template,
8180
                                   errors.ECODE_INVAL)
8181

    
8182
    self.adopt_disks = has_adopt
8183

    
8184
    # instance name verification
8185
    if self.op.name_check:
8186
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8187
      self.op.instance_name = self.hostname1.name
8188
      # used in CheckPrereq for ip ping check
8189
      self.check_ip = self.hostname1.ip
8190
    else:
8191
      self.check_ip = None
8192

    
8193
    # file storage checks
8194
    if (self.op.file_driver and
8195
        not self.op.file_driver in constants.FILE_DRIVER):
8196
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8197
                                 self.op.file_driver, errors.ECODE_INVAL)
8198

    
8199
    if self.op.disk_template == constants.DT_FILE:
8200
      opcodes.RequireFileStorage()
8201
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8202
      opcodes.RequireSharedFileStorage()
8203

    
8204
    ### Node/iallocator related checks
8205
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8206

    
8207
    if self.op.pnode is not None:
8208
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8209
        if self.op.snode is None:
8210
          raise errors.OpPrereqError("The networked disk templates need"
8211
                                     " a mirror node", errors.ECODE_INVAL)
8212
      elif self.op.snode:
8213
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8214
                        " template")
8215
        self.op.snode = None
8216

    
8217
    self._cds = _GetClusterDomainSecret()
8218

    
8219
    if self.op.mode == constants.INSTANCE_IMPORT:
8220
      # On import force_variant must be True, because if we forced it at
8221
      # initial install, our only chance when importing it back is that it
8222
      # works again!
8223
      self.op.force_variant = True
8224

    
8225
      if self.op.no_install:
8226
        self.LogInfo("No-installation mode has no effect during import")
8227

    
8228
    elif self.op.mode == constants.INSTANCE_CREATE:
8229
      if self.op.os_type is None:
8230
        raise errors.OpPrereqError("No guest OS specified",
8231
                                   errors.ECODE_INVAL)
8232
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8233
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8234
                                   " installation" % self.op.os_type,
8235
                                   errors.ECODE_STATE)
8236
      if self.op.disk_template is None:
8237
        raise errors.OpPrereqError("No disk template specified",
8238
                                   errors.ECODE_INVAL)
8239

    
8240
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8241
      # Check handshake to ensure both clusters have the same domain secret
8242
      src_handshake = self.op.source_handshake
8243
      if not src_handshake:
8244
        raise errors.OpPrereqError("Missing source handshake",
8245
                                   errors.ECODE_INVAL)
8246

    
8247
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8248
                                                           src_handshake)
8249
      if errmsg:
8250
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8251
                                   errors.ECODE_INVAL)
8252

    
8253
      # Load and check source CA
8254
      self.source_x509_ca_pem = self.op.source_x509_ca
8255
      if not self.source_x509_ca_pem:
8256
        raise errors.OpPrereqError("Missing source X509 CA",
8257
                                   errors.ECODE_INVAL)
8258

    
8259
      try:
8260
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8261
                                                    self._cds)
8262
      except OpenSSL.crypto.Error, err:
8263
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8264
                                   (err, ), errors.ECODE_INVAL)
8265

    
8266
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8267
      if errcode is not None:
8268
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8269
                                   errors.ECODE_INVAL)
8270

    
8271
      self.source_x509_ca = cert
8272

    
8273
      src_instance_name = self.op.source_instance_name
8274
      if not src_instance_name:
8275
        raise errors.OpPrereqError("Missing source instance name",
8276
                                   errors.ECODE_INVAL)
8277

    
8278
      self.source_instance_name = \
8279
          netutils.GetHostname(name=src_instance_name).name
8280

    
8281
    else:
8282
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8283
                                 self.op.mode, errors.ECODE_INVAL)
8284

    
8285
  def ExpandNames(self):
8286
    """ExpandNames for CreateInstance.
8287

8288
    Figure out the right locks for instance creation.
8289

8290
    """
8291
    self.needed_locks = {}
8292

    
8293
    instance_name = self.op.instance_name
8294
    # this is just a preventive check, but someone might still add this
8295
    # instance in the meantime, and creation will fail at lock-add time
8296
    if instance_name in self.cfg.GetInstanceList():
8297
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8298
                                 instance_name, errors.ECODE_EXISTS)
8299

    
8300
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8301

    
8302
    if self.op.iallocator:
8303
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8304
    else:
8305
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8306
      nodelist = [self.op.pnode]
8307
      if self.op.snode is not None:
8308
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8309
        nodelist.append(self.op.snode)
8310
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8311

    
8312
    # in case of import lock the source node too
8313
    if self.op.mode == constants.INSTANCE_IMPORT:
8314
      src_node = self.op.src_node
8315
      src_path = self.op.src_path
8316

    
8317
      if src_path is None:
8318
        self.op.src_path = src_path = self.op.instance_name
8319

    
8320
      if src_node is None:
8321
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8322
        self.op.src_node = None
8323
        if os.path.isabs(src_path):
8324
          raise errors.OpPrereqError("Importing an instance from a path"
8325
                                     " requires a source node option",
8326
                                     errors.ECODE_INVAL)
8327
      else:
8328
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8329
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8330
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8331
        if not os.path.isabs(src_path):
8332
          self.op.src_path = src_path = \
8333
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8334

    
8335
  def _RunAllocator(self):
8336
    """Run the allocator based on input opcode.
8337

8338
    """
8339
    nics = [n.ToDict() for n in self.nics]
8340
    ial = IAllocator(self.cfg, self.rpc,
8341
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8342
                     name=self.op.instance_name,
8343
                     disk_template=self.op.disk_template,
8344
                     tags=self.op.tags,
8345
                     os=self.op.os_type,
8346
                     vcpus=self.be_full[constants.BE_VCPUS],
8347
                     memory=self.be_full[constants.BE_MEMORY],
8348
                     disks=self.disks,
8349
                     nics=nics,
8350
                     hypervisor=self.op.hypervisor,
8351
                     )
8352

    
8353
    ial.Run(self.op.iallocator)
8354

    
8355
    if not ial.success:
8356
      raise errors.OpPrereqError("Can't compute nodes using"
8357
                                 " iallocator '%s': %s" %
8358
                                 (self.op.iallocator, ial.info),
8359
                                 errors.ECODE_NORES)
8360
    if len(ial.result) != ial.required_nodes:
8361
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8362
                                 " of nodes (%s), required %s" %
8363
                                 (self.op.iallocator, len(ial.result),
8364
                                  ial.required_nodes), errors.ECODE_FAULT)
8365
    self.op.pnode = ial.result[0]
8366
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8367
                 self.op.instance_name, self.op.iallocator,
8368
                 utils.CommaJoin(ial.result))
8369
    if ial.required_nodes == 2:
8370
      self.op.snode = ial.result[1]
8371

    
8372
  def BuildHooksEnv(self):
8373
    """Build hooks env.
8374

8375
    This runs on master, primary and secondary nodes of the instance.
8376

8377
    """
8378
    env = {
8379
      "ADD_MODE": self.op.mode,
8380
      }
8381
    if self.op.mode == constants.INSTANCE_IMPORT:
8382
      env["SRC_NODE"] = self.op.src_node
8383
      env["SRC_PATH"] = self.op.src_path
8384
      env["SRC_IMAGES"] = self.src_images
8385

    
8386
    env.update(_BuildInstanceHookEnv(
8387
      name=self.op.instance_name,
8388
      primary_node=self.op.pnode,
8389
      secondary_nodes=self.secondaries,
8390
      status=self.op.start,
8391
      os_type=self.op.os_type,
8392
      memory=self.be_full[constants.BE_MEMORY],
8393
      vcpus=self.be_full[constants.BE_VCPUS],
8394
      nics=_NICListToTuple(self, self.nics),
8395
      disk_template=self.op.disk_template,
8396
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8397
             for d in self.disks],
8398
      bep=self.be_full,
8399
      hvp=self.hv_full,
8400
      hypervisor_name=self.op.hypervisor,
8401
      tags=self.op.tags,
8402
    ))
8403

    
8404
    return env
8405

    
8406
  def BuildHooksNodes(self):
8407
    """Build hooks nodes.
8408

8409
    """
8410
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8411
    return nl, nl
8412

    
8413
  def _ReadExportInfo(self):
8414
    """Reads the export information from disk.
8415

8416
    It will override the opcode source node and path with the actual
8417
    information, if these two were not specified before.
8418

8419
    @return: the export information
8420

8421
    """
8422
    assert self.op.mode == constants.INSTANCE_IMPORT
8423

    
8424
    src_node = self.op.src_node
8425
    src_path = self.op.src_path
8426

    
8427
    if src_node is None:
8428
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8429
      exp_list = self.rpc.call_export_list(locked_nodes)
8430
      found = False
8431
      for node in exp_list:
8432
        if exp_list[node].fail_msg:
8433
          continue
8434
        if src_path in exp_list[node].payload:
8435
          found = True
8436
          self.op.src_node = src_node = node
8437
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8438
                                                       src_path)
8439
          break
8440
      if not found:
8441
        raise errors.OpPrereqError("No export found for relative path %s" %
8442
                                    src_path, errors.ECODE_INVAL)
8443

    
8444
    _CheckNodeOnline(self, src_node)
8445
    result = self.rpc.call_export_info(src_node, src_path)
8446
    result.Raise("No export or invalid export found in dir %s" % src_path)
8447

    
8448
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8449
    if not export_info.has_section(constants.INISECT_EXP):
8450
      raise errors.ProgrammerError("Corrupted export config",
8451
                                   errors.ECODE_ENVIRON)
8452

    
8453
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8454
    if (int(ei_version) != constants.EXPORT_VERSION):
8455
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8456
                                 (ei_version, constants.EXPORT_VERSION),
8457
                                 errors.ECODE_ENVIRON)
8458
    return export_info
8459

    
8460
  def _ReadExportParams(self, einfo):
8461
    """Use export parameters as defaults.
8462

8463
    In case the opcode doesn't specify (as in override) some instance
8464
    parameters, then try to use them from the export information, if
8465
    that declares them.
8466

8467
    """
8468
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8469

    
8470
    if self.op.disk_template is None:
8471
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8472
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8473
                                          "disk_template")
8474
      else:
8475
        raise errors.OpPrereqError("No disk template specified and the export"
8476
                                   " is missing the disk_template information",
8477
                                   errors.ECODE_INVAL)
8478

    
8479
    if not self.op.disks:
8480
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8481
        disks = []
8482
        # TODO: import the disk iv_name too
8483
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8484
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8485
          disks.append({constants.IDISK_SIZE: disk_sz})
8486
        self.op.disks = disks
8487
      else:
8488
        raise errors.OpPrereqError("No disk info specified and the export"
8489
                                   " is missing the disk information",
8490
                                   errors.ECODE_INVAL)
8491

    
8492
    if (not self.op.nics and
8493
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8494
      nics = []
8495
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8496
        ndict = {}
8497
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8498
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8499
          ndict[name] = v
8500
        nics.append(ndict)
8501
      self.op.nics = nics
8502

    
8503
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8504
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8505

    
8506
    if (self.op.hypervisor is None and
8507
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8508
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8509

    
8510
    if einfo.has_section(constants.INISECT_HYP):
8511
      # use the export parameters but do not override the ones
8512
      # specified by the user
8513
      for name, value in einfo.items(constants.INISECT_HYP):
8514
        if name not in self.op.hvparams:
8515
          self.op.hvparams[name] = value
8516

    
8517
    if einfo.has_section(constants.INISECT_BEP):
8518
      # use the parameters, without overriding
8519
      for name, value in einfo.items(constants.INISECT_BEP):
8520
        if name not in self.op.beparams:
8521
          self.op.beparams[name] = value
8522
    else:
8523
      # try to read the parameters old style, from the main section
8524
      for name in constants.BES_PARAMETERS:
8525
        if (name not in self.op.beparams and
8526
            einfo.has_option(constants.INISECT_INS, name)):
8527
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8528

    
8529
    if einfo.has_section(constants.INISECT_OSP):
8530
      # use the parameters, without overriding
8531
      for name, value in einfo.items(constants.INISECT_OSP):
8532
        if name not in self.op.osparams:
8533
          self.op.osparams[name] = value
8534

    
8535
  def _RevertToDefaults(self, cluster):
8536
    """Revert the instance parameters to the default values.
8537

8538
    """
8539
    # hvparams
8540
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8541
    for name in self.op.hvparams.keys():
8542
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8543
        del self.op.hvparams[name]
8544
    # beparams
8545
    be_defs = cluster.SimpleFillBE({})
8546
    for name in self.op.beparams.keys():
8547
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8548
        del self.op.beparams[name]
8549
    # nic params
8550
    nic_defs = cluster.SimpleFillNIC({})
8551
    for nic in self.op.nics:
8552
      for name in constants.NICS_PARAMETERS:
8553
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8554
          del nic[name]
8555
    # osparams
8556
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8557
    for name in self.op.osparams.keys():
8558
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8559
        del self.op.osparams[name]
8560

    
8561
  def _CalculateFileStorageDir(self):
8562
    """Calculate final instance file storage dir.
8563

8564
    """
8565
    # file storage dir calculation/check
8566
    self.instance_file_storage_dir = None
8567
    if self.op.disk_template in constants.DTS_FILEBASED:
8568
      # build the full file storage dir path
8569
      joinargs = []
8570

    
8571
      if self.op.disk_template == constants.DT_SHARED_FILE:
8572
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8573
      else:
8574
        get_fsd_fn = self.cfg.GetFileStorageDir
8575

    
8576
      cfg_storagedir = get_fsd_fn()
8577
      if not cfg_storagedir:
8578
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8579
      joinargs.append(cfg_storagedir)
8580

    
8581
      if self.op.file_storage_dir is not None:
8582
        joinargs.append(self.op.file_storage_dir)
8583

    
8584
      joinargs.append(self.op.instance_name)
8585

    
8586
      # pylint: disable=W0142
8587
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8588

    
8589
  def CheckPrereq(self):
8590
    """Check prerequisites.
8591

8592
    """
8593
    self._CalculateFileStorageDir()
8594

    
8595
    if self.op.mode == constants.INSTANCE_IMPORT:
8596
      export_info = self._ReadExportInfo()
8597
      self._ReadExportParams(export_info)
8598

    
8599
    if (not self.cfg.GetVGName() and
8600
        self.op.disk_template not in constants.DTS_NOT_LVM):
8601
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8602
                                 " instances", errors.ECODE_STATE)
8603

    
8604
    if self.op.hypervisor is None:
8605
      self.op.hypervisor = self.cfg.GetHypervisorType()
8606

    
8607
    cluster = self.cfg.GetClusterInfo()
8608
    enabled_hvs = cluster.enabled_hypervisors
8609
    if self.op.hypervisor not in enabled_hvs:
8610
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8611
                                 " cluster (%s)" % (self.op.hypervisor,
8612
                                  ",".join(enabled_hvs)),
8613
                                 errors.ECODE_STATE)
8614

    
8615
    # Check tag validity
8616
    for tag in self.op.tags:
8617
      objects.TaggableObject.ValidateTag(tag)
8618

    
8619
    # check hypervisor parameter syntax (locally)
8620
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8621
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8622
                                      self.op.hvparams)
8623
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8624
    hv_type.CheckParameterSyntax(filled_hvp)
8625
    self.hv_full = filled_hvp
8626
    # check that we don't specify global parameters on an instance
8627
    _CheckGlobalHvParams(self.op.hvparams)
8628

    
8629
    # fill and remember the beparams dict
8630
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8631
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8632

    
8633
    # build os parameters
8634
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8635

    
8636
    # now that hvp/bep are in final format, let's reset to defaults,
8637
    # if told to do so
8638
    if self.op.identify_defaults:
8639
      self._RevertToDefaults(cluster)
8640

    
8641
    # NIC buildup
8642
    self.nics = []
8643
    for idx, nic in enumerate(self.op.nics):
8644
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8645
      nic_mode = nic_mode_req
8646
      if nic_mode is None:
8647
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8648

    
8649
      # in routed mode, for the first nic, the default ip is 'auto'
8650
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8651
        default_ip_mode = constants.VALUE_AUTO
8652
      else:
8653
        default_ip_mode = constants.VALUE_NONE
8654

    
8655
      # ip validity checks
8656
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8657
      if ip is None or ip.lower() == constants.VALUE_NONE:
8658
        nic_ip = None
8659
      elif ip.lower() == constants.VALUE_AUTO:
8660
        if not self.op.name_check:
8661
          raise errors.OpPrereqError("IP address set to auto but name checks"
8662
                                     " have been skipped",
8663
                                     errors.ECODE_INVAL)
8664
        nic_ip = self.hostname1.ip
8665
      else:
8666
        if not netutils.IPAddress.IsValid(ip):
8667
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8668
                                     errors.ECODE_INVAL)
8669
        nic_ip = ip
8670

    
8671
      # TODO: check the ip address for uniqueness
8672
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8673
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8674
                                   errors.ECODE_INVAL)
8675

    
8676
      # MAC address verification
8677
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8678
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8679
        mac = utils.NormalizeAndValidateMac(mac)
8680

    
8681
        try:
8682
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8683
        except errors.ReservationError:
8684
          raise errors.OpPrereqError("MAC address %s already in use"
8685
                                     " in cluster" % mac,
8686
                                     errors.ECODE_NOTUNIQUE)
8687

    
8688
      #  Build nic parameters
8689
      link = nic.get(constants.INIC_LINK, None)
8690
      nicparams = {}
8691
      if nic_mode_req:
8692
        nicparams[constants.NIC_MODE] = nic_mode_req
8693
      if link:
8694
        nicparams[constants.NIC_LINK] = link
8695

    
8696
      check_params = cluster.SimpleFillNIC(nicparams)
8697
      objects.NIC.CheckParameterSyntax(check_params)
8698
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8699

    
8700
    # disk checks/pre-build
8701
    default_vg = self.cfg.GetVGName()
8702
    self.disks = []
8703
    for disk in self.op.disks:
8704
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8705
      if mode not in constants.DISK_ACCESS_SET:
8706
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8707
                                   mode, errors.ECODE_INVAL)
8708
      size = disk.get(constants.IDISK_SIZE, None)
8709
      if size is None:
8710
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8711
      try:
8712
        size = int(size)
8713
      except (TypeError, ValueError):
8714
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8715
                                   errors.ECODE_INVAL)
8716

    
8717
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8718
      new_disk = {
8719
        constants.IDISK_SIZE: size,
8720
        constants.IDISK_MODE: mode,
8721
        constants.IDISK_VG: data_vg,
8722
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8723
        }
8724
      if constants.IDISK_ADOPT in disk:
8725
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8726
      self.disks.append(new_disk)
8727

    
8728
    if self.op.mode == constants.INSTANCE_IMPORT:
8729

    
8730
      # Check that the new instance doesn't have less disks than the export
8731
      instance_disks = len(self.disks)
8732
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8733
      if instance_disks < export_disks:
8734
        raise errors.OpPrereqError("Not enough disks to import."
8735
                                   " (instance: %d, export: %d)" %
8736
                                   (instance_disks, export_disks),
8737
                                   errors.ECODE_INVAL)
8738

    
8739
      disk_images = []
8740
      for idx in range(export_disks):
8741
        option = "disk%d_dump" % idx
8742
        if export_info.has_option(constants.INISECT_INS, option):
8743
          # FIXME: are the old os-es, disk sizes, etc. useful?
8744
          export_name = export_info.get(constants.INISECT_INS, option)
8745
          image = utils.PathJoin(self.op.src_path, export_name)
8746
          disk_images.append(image)
8747
        else:
8748
          disk_images.append(False)
8749

    
8750
      self.src_images = disk_images
8751

    
8752
      old_name = export_info.get(constants.INISECT_INS, "name")
8753
      try:
8754
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8755
      except (TypeError, ValueError), err:
8756
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8757
                                   " an integer: %s" % str(err),
8758
                                   errors.ECODE_STATE)
8759
      if self.op.instance_name == old_name:
8760
        for idx, nic in enumerate(self.nics):
8761
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8762
            nic_mac_ini = "nic%d_mac" % idx
8763
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8764

    
8765
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8766

    
8767
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8768
    if self.op.ip_check:
8769
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8770
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8771
                                   (self.check_ip, self.op.instance_name),
8772
                                   errors.ECODE_NOTUNIQUE)
8773

    
8774
    #### mac address generation
8775
    # By generating here the mac address both the allocator and the hooks get
8776
    # the real final mac address rather than the 'auto' or 'generate' value.
8777
    # There is a race condition between the generation and the instance object
8778
    # creation, which means that we know the mac is valid now, but we're not
8779
    # sure it will be when we actually add the instance. If things go bad
8780
    # adding the instance will abort because of a duplicate mac, and the
8781
    # creation job will fail.
8782
    for nic in self.nics:
8783
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8784
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8785

    
8786
    #### allocator run
8787

    
8788
    if self.op.iallocator is not None:
8789
      self._RunAllocator()
8790

    
8791
    #### node related checks
8792

    
8793
    # check primary node
8794
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8795
    assert self.pnode is not None, \
8796
      "Cannot retrieve locked node %s" % self.op.pnode
8797
    if pnode.offline:
8798
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8799
                                 pnode.name, errors.ECODE_STATE)
8800
    if pnode.drained:
8801
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8802
                                 pnode.name, errors.ECODE_STATE)
8803
    if not pnode.vm_capable:
8804
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8805
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8806

    
8807
    self.secondaries = []
8808

    
8809
    # mirror node verification
8810
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8811
      if self.op.snode == pnode.name:
8812
        raise errors.OpPrereqError("The secondary node cannot be the"
8813
                                   " primary node", errors.ECODE_INVAL)
8814
      _CheckNodeOnline(self, self.op.snode)
8815
      _CheckNodeNotDrained(self, self.op.snode)
8816
      _CheckNodeVmCapable(self, self.op.snode)
8817
      self.secondaries.append(self.op.snode)
8818

    
8819
    nodenames = [pnode.name] + self.secondaries
8820

    
8821
    if not self.adopt_disks:
8822
      # Check lv size requirements, if not adopting
8823
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8824
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8825

    
8826
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8827
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8828
                                disk[constants.IDISK_ADOPT])
8829
                     for disk in self.disks])
8830
      if len(all_lvs) != len(self.disks):
8831
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8832
                                   errors.ECODE_INVAL)
8833
      for lv_name in all_lvs:
8834
        try:
8835
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8836
          # to ReserveLV uses the same syntax
8837
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8838
        except errors.ReservationError:
8839
          raise errors.OpPrereqError("LV named %s used by another instance" %
8840
                                     lv_name, errors.ECODE_NOTUNIQUE)
8841

    
8842
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8843
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8844

    
8845
      node_lvs = self.rpc.call_lv_list([pnode.name],
8846
                                       vg_names.payload.keys())[pnode.name]
8847
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8848
      node_lvs = node_lvs.payload
8849

    
8850
      delta = all_lvs.difference(node_lvs.keys())
8851
      if delta:
8852
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8853
                                   utils.CommaJoin(delta),
8854
                                   errors.ECODE_INVAL)
8855
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8856
      if online_lvs:
8857
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8858
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8859
                                   errors.ECODE_STATE)
8860
      # update the size of disk based on what is found
8861
      for dsk in self.disks:
8862
        dsk[constants.IDISK_SIZE] = \
8863
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8864
                                        dsk[constants.IDISK_ADOPT])][0]))
8865

    
8866
    elif self.op.disk_template == constants.DT_BLOCK:
8867
      # Normalize and de-duplicate device paths
8868
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8869
                       for disk in self.disks])
8870
      if len(all_disks) != len(self.disks):
8871
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8872
                                   errors.ECODE_INVAL)
8873
      baddisks = [d for d in all_disks
8874
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8875
      if baddisks:
8876
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8877
                                   " cannot be adopted" %
8878
                                   (", ".join(baddisks),
8879
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8880
                                   errors.ECODE_INVAL)
8881

    
8882
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8883
                                            list(all_disks))[pnode.name]
8884
      node_disks.Raise("Cannot get block device information from node %s" %
8885
                       pnode.name)
8886
      node_disks = node_disks.payload
8887
      delta = all_disks.difference(node_disks.keys())
8888
      if delta:
8889
        raise errors.OpPrereqError("Missing block device(s): %s" %
8890
                                   utils.CommaJoin(delta),
8891
                                   errors.ECODE_INVAL)
8892
      for dsk in self.disks:
8893
        dsk[constants.IDISK_SIZE] = \
8894
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8895

    
8896
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8897

    
8898
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8899
    # check OS parameters (remotely)
8900
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8901

    
8902
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8903

    
8904
    # memory check on primary node
8905
    if self.op.start:
8906
      _CheckNodeFreeMemory(self, self.pnode.name,
8907
                           "creating instance %s" % self.op.instance_name,
8908
                           self.be_full[constants.BE_MEMORY],
8909
                           self.op.hypervisor)
8910

    
8911
    self.dry_run_result = list(nodenames)
8912

    
8913
  def Exec(self, feedback_fn):
8914
    """Create and add the instance to the cluster.
8915

8916
    """
8917
    instance = self.op.instance_name
8918
    pnode_name = self.pnode.name
8919

    
8920
    ht_kind = self.op.hypervisor
8921
    if ht_kind in constants.HTS_REQ_PORT:
8922
      network_port = self.cfg.AllocatePort()
8923
    else:
8924
      network_port = None
8925

    
8926
    disks = _GenerateDiskTemplate(self,
8927
                                  self.op.disk_template,
8928
                                  instance, pnode_name,
8929
                                  self.secondaries,
8930
                                  self.disks,
8931
                                  self.instance_file_storage_dir,
8932
                                  self.op.file_driver,
8933
                                  0,
8934
                                  feedback_fn)
8935

    
8936
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8937
                            primary_node=pnode_name,
8938
                            nics=self.nics, disks=disks,
8939
                            disk_template=self.op.disk_template,
8940
                            admin_up=False,
8941
                            network_port=network_port,
8942
                            beparams=self.op.beparams,
8943
                            hvparams=self.op.hvparams,
8944
                            hypervisor=self.op.hypervisor,
8945
                            osparams=self.op.osparams,
8946
                            )
8947

    
8948
    if self.op.tags:
8949
      for tag in self.op.tags:
8950
        iobj.AddTag(tag)
8951

    
8952
    if self.adopt_disks:
8953
      if self.op.disk_template == constants.DT_PLAIN:
8954
        # rename LVs to the newly-generated names; we need to construct
8955
        # 'fake' LV disks with the old data, plus the new unique_id
8956
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8957
        rename_to = []
8958
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8959
          rename_to.append(t_dsk.logical_id)
8960
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8961
          self.cfg.SetDiskID(t_dsk, pnode_name)
8962
        result = self.rpc.call_blockdev_rename(pnode_name,
8963
                                               zip(tmp_disks, rename_to))
8964
        result.Raise("Failed to rename adoped LVs")
8965
    else:
8966
      feedback_fn("* creating instance disks...")
8967
      try:
8968
        _CreateDisks(self, iobj)
8969
      except errors.OpExecError:
8970
        self.LogWarning("Device creation failed, reverting...")
8971
        try:
8972
          _RemoveDisks(self, iobj)
8973
        finally:
8974
          self.cfg.ReleaseDRBDMinors(instance)
8975
          raise
8976

    
8977
    feedback_fn("adding instance %s to cluster config" % instance)
8978

    
8979
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8980

    
8981
    # Declare that we don't want to remove the instance lock anymore, as we've
8982
    # added the instance to the config
8983
    del self.remove_locks[locking.LEVEL_INSTANCE]
8984

    
8985
    if self.op.mode == constants.INSTANCE_IMPORT:
8986
      # Release unused nodes
8987
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8988
    else:
8989
      # Release all nodes
8990
      _ReleaseLocks(self, locking.LEVEL_NODE)
8991

    
8992
    disk_abort = False
8993
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8994
      feedback_fn("* wiping instance disks...")
8995
      try:
8996
        _WipeDisks(self, iobj)
8997
      except errors.OpExecError, err:
8998
        logging.exception("Wiping disks failed")
8999
        self.LogWarning("Wiping instance disks failed (%s)", err)
9000
        disk_abort = True
9001

    
9002
    if disk_abort:
9003
      # Something is already wrong with the disks, don't do anything else
9004
      pass
9005
    elif self.op.wait_for_sync:
9006
      disk_abort = not _WaitForSync(self, iobj)
9007
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9008
      # make sure the disks are not degraded (still sync-ing is ok)
9009
      feedback_fn("* checking mirrors status")
9010
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9011
    else:
9012
      disk_abort = False
9013

    
9014
    if disk_abort:
9015
      _RemoveDisks(self, iobj)
9016
      self.cfg.RemoveInstance(iobj.name)
9017
      # Make sure the instance lock gets removed
9018
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9019
      raise errors.OpExecError("There are some degraded disks for"
9020
                               " this instance")
9021

    
9022
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9023
      if self.op.mode == constants.INSTANCE_CREATE:
9024
        if not self.op.no_install:
9025
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9026
                        not self.op.wait_for_sync)
9027
          if pause_sync:
9028
            feedback_fn("* pausing disk sync to install instance OS")
9029
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9030
                                                              iobj.disks, True)
9031
            for idx, success in enumerate(result.payload):
9032
              if not success:
9033
                logging.warn("pause-sync of instance %s for disk %d failed",
9034
                             instance, idx)
9035

    
9036
          feedback_fn("* running the instance OS create scripts...")
9037
          # FIXME: pass debug option from opcode to backend
9038
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9039
                                                 self.op.debug_level)
9040
          if pause_sync:
9041
            feedback_fn("* resuming disk sync")
9042
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9043
                                                              iobj.disks, False)
9044
            for idx, success in enumerate(result.payload):
9045
              if not success:
9046
                logging.warn("resume-sync of instance %s for disk %d failed",
9047
                             instance, idx)
9048

    
9049
          result.Raise("Could not add os for instance %s"
9050
                       " on node %s" % (instance, pnode_name))
9051

    
9052
      elif self.op.mode == constants.INSTANCE_IMPORT:
9053
        feedback_fn("* running the instance OS import scripts...")
9054

    
9055
        transfers = []
9056

    
9057
        for idx, image in enumerate(self.src_images):
9058
          if not image:
9059
            continue
9060

    
9061
          # FIXME: pass debug option from opcode to backend
9062
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9063
                                             constants.IEIO_FILE, (image, ),
9064
                                             constants.IEIO_SCRIPT,
9065
                                             (iobj.disks[idx], idx),
9066
                                             None)
9067
          transfers.append(dt)
9068

    
9069
        import_result = \
9070
          masterd.instance.TransferInstanceData(self, feedback_fn,
9071
                                                self.op.src_node, pnode_name,
9072
                                                self.pnode.secondary_ip,
9073
                                                iobj, transfers)
9074
        if not compat.all(import_result):
9075
          self.LogWarning("Some disks for instance %s on node %s were not"
9076
                          " imported successfully" % (instance, pnode_name))
9077

    
9078
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9079
        feedback_fn("* preparing remote import...")
9080
        # The source cluster will stop the instance before attempting to make a
9081
        # connection. In some cases stopping an instance can take a long time,
9082
        # hence the shutdown timeout is added to the connection timeout.
9083
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9084
                           self.op.source_shutdown_timeout)
9085
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9086

    
9087
        assert iobj.primary_node == self.pnode.name
9088
        disk_results = \
9089
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9090
                                        self.source_x509_ca,
9091
                                        self._cds, timeouts)
9092
        if not compat.all(disk_results):
9093
          # TODO: Should the instance still be started, even if some disks
9094
          # failed to import (valid for local imports, too)?
9095
          self.LogWarning("Some disks for instance %s on node %s were not"
9096
                          " imported successfully" % (instance, pnode_name))
9097

    
9098
        # Run rename script on newly imported instance
9099
        assert iobj.name == instance
9100
        feedback_fn("Running rename script for %s" % instance)
9101
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9102
                                                   self.source_instance_name,
9103
                                                   self.op.debug_level)
9104
        if result.fail_msg:
9105
          self.LogWarning("Failed to run rename script for %s on node"
9106
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9107

    
9108
      else:
9109
        # also checked in the prereq part
9110
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9111
                                     % self.op.mode)
9112

    
9113
    if self.op.start:
9114
      iobj.admin_up = True
9115
      self.cfg.Update(iobj, feedback_fn)
9116
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9117
      feedback_fn("* starting instance...")
9118
      result = self.rpc.call_instance_start(pnode_name, iobj,
9119
                                            None, None, False)
9120
      result.Raise("Could not start instance")
9121

    
9122
    return list(iobj.all_nodes)
9123

    
9124

    
9125
class LUInstanceConsole(NoHooksLU):
9126
  """Connect to an instance's console.
9127

9128
  This is somewhat special in that it returns the command line that
9129
  you need to run on the master node in order to connect to the
9130
  console.
9131

9132
  """
9133
  REQ_BGL = False
9134

    
9135
  def ExpandNames(self):
9136
    self._ExpandAndLockInstance()
9137

    
9138
  def CheckPrereq(self):
9139
    """Check prerequisites.
9140

9141
    This checks that the instance is in the cluster.
9142

9143
    """
9144
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9145
    assert self.instance is not None, \
9146
      "Cannot retrieve locked instance %s" % self.op.instance_name
9147
    _CheckNodeOnline(self, self.instance.primary_node)
9148

    
9149
  def Exec(self, feedback_fn):
9150
    """Connect to the console of an instance
9151

9152
    """
9153
    instance = self.instance
9154
    node = instance.primary_node
9155

    
9156
    node_insts = self.rpc.call_instance_list([node],
9157
                                             [instance.hypervisor])[node]
9158
    node_insts.Raise("Can't get node information from %s" % node)
9159

    
9160
    if instance.name not in node_insts.payload:
9161
      if instance.admin_up:
9162
        state = constants.INSTST_ERRORDOWN
9163
      else:
9164
        state = constants.INSTST_ADMINDOWN
9165
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9166
                               (instance.name, state))
9167

    
9168
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9169

    
9170
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9171

    
9172

    
9173
def _GetInstanceConsole(cluster, instance):
9174
  """Returns console information for an instance.
9175

9176
  @type cluster: L{objects.Cluster}
9177
  @type instance: L{objects.Instance}
9178
  @rtype: dict
9179

9180
  """
9181
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9182
  # beparams and hvparams are passed separately, to avoid editing the
9183
  # instance and then saving the defaults in the instance itself.
9184
  hvparams = cluster.FillHV(instance)
9185
  beparams = cluster.FillBE(instance)
9186
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9187

    
9188
  assert console.instance == instance.name
9189
  assert console.Validate()
9190

    
9191
  return console.ToDict()
9192

    
9193

    
9194
class LUInstanceReplaceDisks(LogicalUnit):
9195
  """Replace the disks of an instance.
9196

9197
  """
9198
  HPATH = "mirrors-replace"
9199
  HTYPE = constants.HTYPE_INSTANCE
9200
  REQ_BGL = False
9201

    
9202
  def CheckArguments(self):
9203
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9204
                                  self.op.iallocator)
9205

    
9206
  def ExpandNames(self):
9207
    self._ExpandAndLockInstance()
9208

    
9209
    assert locking.LEVEL_NODE not in self.needed_locks
9210
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9211

    
9212
    assert self.op.iallocator is None or self.op.remote_node is None, \
9213
      "Conflicting options"
9214

    
9215
    if self.op.remote_node is not None:
9216
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9217

    
9218
      # Warning: do not remove the locking of the new secondary here
9219
      # unless DRBD8.AddChildren is changed to work in parallel;
9220
      # currently it doesn't since parallel invocations of
9221
      # FindUnusedMinor will conflict
9222
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9223
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9224
    else:
9225
      self.needed_locks[locking.LEVEL_NODE] = []
9226
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9227

    
9228
      if self.op.iallocator is not None:
9229
        # iallocator will select a new node in the same group
9230
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9231

    
9232
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9233
                                   self.op.iallocator, self.op.remote_node,
9234
                                   self.op.disks, False, self.op.early_release)
9235

    
9236
    self.tasklets = [self.replacer]
9237

    
9238
  def DeclareLocks(self, level):
9239
    if level == locking.LEVEL_NODEGROUP:
9240
      assert self.op.remote_node is None
9241
      assert self.op.iallocator is not None
9242
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9243

    
9244
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9245
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9246
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9247

    
9248
    elif level == locking.LEVEL_NODE:
9249
      if self.op.iallocator is not None:
9250
        assert self.op.remote_node is None
9251
        assert not self.needed_locks[locking.LEVEL_NODE]
9252

    
9253
        # Lock member nodes of all locked groups
9254
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9255
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9256
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9257
      else:
9258
        self._LockInstancesNodes()
9259

    
9260
  def BuildHooksEnv(self):
9261
    """Build hooks env.
9262

9263
    This runs on the master, the primary and all the secondaries.
9264

9265
    """
9266
    instance = self.replacer.instance
9267
    env = {
9268
      "MODE": self.op.mode,
9269
      "NEW_SECONDARY": self.op.remote_node,
9270
      "OLD_SECONDARY": instance.secondary_nodes[0],
9271
      }
9272
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9273
    return env
9274

    
9275
  def BuildHooksNodes(self):
9276
    """Build hooks nodes.
9277

9278
    """
9279
    instance = self.replacer.instance
9280
    nl = [
9281
      self.cfg.GetMasterNode(),
9282
      instance.primary_node,
9283
      ]
9284
    if self.op.remote_node is not None:
9285
      nl.append(self.op.remote_node)
9286
    return nl, nl
9287

    
9288
  def CheckPrereq(self):
9289
    """Check prerequisites.
9290

9291
    """
9292
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9293
            self.op.iallocator is None)
9294

    
9295
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9296
    if owned_groups:
9297
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9298

    
9299
    return LogicalUnit.CheckPrereq(self)
9300

    
9301

    
9302
class TLReplaceDisks(Tasklet):
9303
  """Replaces disks for an instance.
9304

9305
  Note: Locking is not within the scope of this class.
9306

9307
  """
9308
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9309
               disks, delay_iallocator, early_release):
9310
    """Initializes this class.
9311

9312
    """
9313
    Tasklet.__init__(self, lu)
9314

    
9315
    # Parameters
9316
    self.instance_name = instance_name
9317
    self.mode = mode
9318
    self.iallocator_name = iallocator_name
9319
    self.remote_node = remote_node
9320
    self.disks = disks
9321
    self.delay_iallocator = delay_iallocator
9322
    self.early_release = early_release
9323

    
9324
    # Runtime data
9325
    self.instance = None
9326
    self.new_node = None
9327
    self.target_node = None
9328
    self.other_node = None
9329
    self.remote_node_info = None
9330
    self.node_secondary_ip = None
9331

    
9332
  @staticmethod
9333
  def CheckArguments(mode, remote_node, iallocator):
9334
    """Helper function for users of this class.
9335

9336
    """
9337
    # check for valid parameter combination
9338
    if mode == constants.REPLACE_DISK_CHG:
9339
      if remote_node is None and iallocator is None:
9340
        raise errors.OpPrereqError("When changing the secondary either an"
9341
                                   " iallocator script must be used or the"
9342
                                   " new node given", errors.ECODE_INVAL)
9343

    
9344
      if remote_node is not None and iallocator is not None:
9345
        raise errors.OpPrereqError("Give either the iallocator or the new"
9346
                                   " secondary, not both", errors.ECODE_INVAL)
9347

    
9348
    elif remote_node is not None or iallocator is not None:
9349
      # Not replacing the secondary
9350
      raise errors.OpPrereqError("The iallocator and new node options can"
9351
                                 " only be used when changing the"
9352
                                 " secondary node", errors.ECODE_INVAL)
9353

    
9354
  @staticmethod
9355
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9356
    """Compute a new secondary node using an IAllocator.
9357

9358
    """
9359
    ial = IAllocator(lu.cfg, lu.rpc,
9360
                     mode=constants.IALLOCATOR_MODE_RELOC,
9361
                     name=instance_name,
9362
                     relocate_from=list(relocate_from))
9363

    
9364
    ial.Run(iallocator_name)
9365

    
9366
    if not ial.success:
9367
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9368
                                 " %s" % (iallocator_name, ial.info),
9369
                                 errors.ECODE_NORES)
9370

    
9371
    if len(ial.result) != ial.required_nodes:
9372
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9373
                                 " of nodes (%s), required %s" %
9374
                                 (iallocator_name,
9375
                                  len(ial.result), ial.required_nodes),
9376
                                 errors.ECODE_FAULT)
9377

    
9378
    remote_node_name = ial.result[0]
9379

    
9380
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9381
               instance_name, remote_node_name)
9382

    
9383
    return remote_node_name
9384

    
9385
  def _FindFaultyDisks(self, node_name):
9386
    """Wrapper for L{_FindFaultyInstanceDisks}.
9387

9388
    """
9389
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9390
                                    node_name, True)
9391

    
9392
  def _CheckDisksActivated(self, instance):
9393
    """Checks if the instance disks are activated.
9394

9395
    @param instance: The instance to check disks
9396
    @return: True if they are activated, False otherwise
9397

9398
    """
9399
    nodes = instance.all_nodes
9400

    
9401
    for idx, dev in enumerate(instance.disks):
9402
      for node in nodes:
9403
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9404
        self.cfg.SetDiskID(dev, node)
9405

    
9406
        result = self.rpc.call_blockdev_find(node, dev)
9407

    
9408
        if result.offline:
9409
          continue
9410
        elif result.fail_msg or not result.payload:
9411
          return False
9412

    
9413
    return True
9414

    
9415
  def CheckPrereq(self):
9416
    """Check prerequisites.
9417

9418
    This checks that the instance is in the cluster.
9419

9420
    """
9421
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9422
    assert instance is not None, \
9423
      "Cannot retrieve locked instance %s" % self.instance_name
9424

    
9425
    if instance.disk_template != constants.DT_DRBD8:
9426
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9427
                                 " instances", errors.ECODE_INVAL)
9428

    
9429
    if len(instance.secondary_nodes) != 1:
9430
      raise errors.OpPrereqError("The instance has a strange layout,"
9431
                                 " expected one secondary but found %d" %
9432
                                 len(instance.secondary_nodes),
9433
                                 errors.ECODE_FAULT)
9434

    
9435
    if not self.delay_iallocator:
9436
      self._CheckPrereq2()
9437

    
9438
  def _CheckPrereq2(self):
9439
    """Check prerequisites, second part.
9440

9441
    This function should always be part of CheckPrereq. It was separated and is
9442
    now called from Exec because during node evacuation iallocator was only
9443
    called with an unmodified cluster model, not taking planned changes into
9444
    account.
9445

9446
    """
9447
    instance = self.instance
9448
    secondary_node = instance.secondary_nodes[0]
9449

    
9450
    if self.iallocator_name is None:
9451
      remote_node = self.remote_node
9452
    else:
9453
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9454
                                       instance.name, instance.secondary_nodes)
9455

    
9456
    if remote_node is None:
9457
      self.remote_node_info = None
9458
    else:
9459
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9460
             "Remote node '%s' is not locked" % remote_node
9461

    
9462
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9463
      assert self.remote_node_info is not None, \
9464
        "Cannot retrieve locked node %s" % remote_node
9465

    
9466
    if remote_node == self.instance.primary_node:
9467
      raise errors.OpPrereqError("The specified node is the primary node of"
9468
                                 " the instance", errors.ECODE_INVAL)
9469

    
9470
    if remote_node == secondary_node:
9471
      raise errors.OpPrereqError("The specified node is already the"
9472
                                 " secondary node of the instance",
9473
                                 errors.ECODE_INVAL)
9474

    
9475
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9476
                                    constants.REPLACE_DISK_CHG):
9477
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9478
                                 errors.ECODE_INVAL)
9479

    
9480
    if self.mode == constants.REPLACE_DISK_AUTO:
9481
      if not self._CheckDisksActivated(instance):
9482
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9483
                                   " first" % self.instance_name,
9484
                                   errors.ECODE_STATE)
9485
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9486
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9487

    
9488
      if faulty_primary and faulty_secondary:
9489
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9490
                                   " one node and can not be repaired"
9491
                                   " automatically" % self.instance_name,
9492
                                   errors.ECODE_STATE)
9493

    
9494
      if faulty_primary:
9495
        self.disks = faulty_primary
9496
        self.target_node = instance.primary_node
9497
        self.other_node = secondary_node
9498
        check_nodes = [self.target_node, self.other_node]
9499
      elif faulty_secondary:
9500
        self.disks = faulty_secondary
9501
        self.target_node = secondary_node
9502
        self.other_node = instance.primary_node
9503
        check_nodes = [self.target_node, self.other_node]
9504
      else:
9505
        self.disks = []
9506
        check_nodes = []
9507

    
9508
    else:
9509
      # Non-automatic modes
9510
      if self.mode == constants.REPLACE_DISK_PRI:
9511
        self.target_node = instance.primary_node
9512
        self.other_node = secondary_node
9513
        check_nodes = [self.target_node, self.other_node]
9514

    
9515
      elif self.mode == constants.REPLACE_DISK_SEC:
9516
        self.target_node = secondary_node
9517
        self.other_node = instance.primary_node
9518
        check_nodes = [self.target_node, self.other_node]
9519

    
9520
      elif self.mode == constants.REPLACE_DISK_CHG:
9521
        self.new_node = remote_node
9522
        self.other_node = instance.primary_node
9523
        self.target_node = secondary_node
9524
        check_nodes = [self.new_node, self.other_node]
9525

    
9526
        _CheckNodeNotDrained(self.lu, remote_node)
9527
        _CheckNodeVmCapable(self.lu, remote_node)
9528

    
9529
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9530
        assert old_node_info is not None
9531
        if old_node_info.offline and not self.early_release:
9532
          # doesn't make sense to delay the release
9533
          self.early_release = True
9534
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9535
                          " early-release mode", secondary_node)
9536

    
9537
      else:
9538
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9539
                                     self.mode)
9540

    
9541
      # If not specified all disks should be replaced
9542
      if not self.disks:
9543
        self.disks = range(len(self.instance.disks))
9544

    
9545
    for node in check_nodes:
9546
      _CheckNodeOnline(self.lu, node)
9547

    
9548
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9549
                                                          self.other_node,
9550
                                                          self.target_node]
9551
                              if node_name is not None)
9552

    
9553
    # Release unneeded node locks
9554
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9555

    
9556
    # Release any owned node group
9557
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9558
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9559

    
9560
    # Check whether disks are valid
9561
    for disk_idx in self.disks:
9562
      instance.FindDisk(disk_idx)
9563

    
9564
    # Get secondary node IP addresses
9565
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9566
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9567

    
9568
  def Exec(self, feedback_fn):
9569
    """Execute disk replacement.
9570

9571
    This dispatches the disk replacement to the appropriate handler.
9572

9573
    """
9574
    if self.delay_iallocator:
9575
      self._CheckPrereq2()
9576

    
9577
    if __debug__:
9578
      # Verify owned locks before starting operation
9579
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9580
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9581
          ("Incorrect node locks, owning %s, expected %s" %
9582
           (owned_nodes, self.node_secondary_ip.keys()))
9583

    
9584
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9585
      assert list(owned_instances) == [self.instance_name], \
9586
          "Instance '%s' not locked" % self.instance_name
9587

    
9588
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9589
          "Should not own any node group lock at this point"
9590

    
9591
    if not self.disks:
9592
      feedback_fn("No disks need replacement")
9593
      return
9594

    
9595
    feedback_fn("Replacing disk(s) %s for %s" %
9596
                (utils.CommaJoin(self.disks), self.instance.name))
9597

    
9598
    activate_disks = (not self.instance.admin_up)
9599

    
9600
    # Activate the instance disks if we're replacing them on a down instance
9601
    if activate_disks:
9602
      _StartInstanceDisks(self.lu, self.instance, True)
9603

    
9604
    try:
9605
      # Should we replace the secondary node?
9606
      if self.new_node is not None:
9607
        fn = self._ExecDrbd8Secondary
9608
      else:
9609
        fn = self._ExecDrbd8DiskOnly
9610

    
9611
      result = fn(feedback_fn)
9612
    finally:
9613
      # Deactivate the instance disks if we're replacing them on a
9614
      # down instance
9615
      if activate_disks:
9616
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9617

    
9618
    if __debug__:
9619
      # Verify owned locks
9620
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9621
      nodes = frozenset(self.node_secondary_ip)
9622
      assert ((self.early_release and not owned_nodes) or
9623
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9624
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9625
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9626

    
9627
    return result
9628

    
9629
  def _CheckVolumeGroup(self, nodes):
9630
    self.lu.LogInfo("Checking volume groups")
9631

    
9632
    vgname = self.cfg.GetVGName()
9633

    
9634
    # Make sure volume group exists on all involved nodes
9635
    results = self.rpc.call_vg_list(nodes)
9636
    if not results:
9637
      raise errors.OpExecError("Can't list volume groups on the nodes")
9638

    
9639
    for node in nodes:
9640
      res = results[node]
9641
      res.Raise("Error checking node %s" % node)
9642
      if vgname not in res.payload:
9643
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9644
                                 (vgname, node))
9645

    
9646
  def _CheckDisksExistence(self, nodes):
9647
    # Check disk existence
9648
    for idx, dev in enumerate(self.instance.disks):
9649
      if idx not in self.disks:
9650
        continue
9651

    
9652
      for node in nodes:
9653
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9654
        self.cfg.SetDiskID(dev, node)
9655

    
9656
        result = self.rpc.call_blockdev_find(node, dev)
9657

    
9658
        msg = result.fail_msg
9659
        if msg or not result.payload:
9660
          if not msg:
9661
            msg = "disk not found"
9662
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9663
                                   (idx, node, msg))
9664

    
9665
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9666
    for idx, dev in enumerate(self.instance.disks):
9667
      if idx not in self.disks:
9668
        continue
9669

    
9670
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9671
                      (idx, node_name))
9672

    
9673
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9674
                                   ldisk=ldisk):
9675
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9676
                                 " replace disks for instance %s" %
9677
                                 (node_name, self.instance.name))
9678

    
9679
  def _CreateNewStorage(self, node_name):
9680
    """Create new storage on the primary or secondary node.
9681

9682
    This is only used for same-node replaces, not for changing the
9683
    secondary node, hence we don't want to modify the existing disk.
9684

9685
    """
9686
    iv_names = {}
9687

    
9688
    for idx, dev in enumerate(self.instance.disks):
9689
      if idx not in self.disks:
9690
        continue
9691

    
9692
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9693

    
9694
      self.cfg.SetDiskID(dev, node_name)
9695

    
9696
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9697
      names = _GenerateUniqueNames(self.lu, lv_names)
9698

    
9699
      vg_data = dev.children[0].logical_id[0]
9700
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9701
                             logical_id=(vg_data, names[0]))
9702
      vg_meta = dev.children[1].logical_id[0]
9703
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9704
                             logical_id=(vg_meta, names[1]))
9705

    
9706
      new_lvs = [lv_data, lv_meta]
9707
      old_lvs = [child.Copy() for child in dev.children]
9708
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9709

    
9710
      # we pass force_create=True to force the LVM creation
9711
      for new_lv in new_lvs:
9712
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9713
                        _GetInstanceInfoText(self.instance), False)
9714

    
9715
    return iv_names
9716

    
9717
  def _CheckDevices(self, node_name, iv_names):
9718
    for name, (dev, _, _) in iv_names.iteritems():
9719
      self.cfg.SetDiskID(dev, node_name)
9720

    
9721
      result = self.rpc.call_blockdev_find(node_name, dev)
9722

    
9723
      msg = result.fail_msg
9724
      if msg or not result.payload:
9725
        if not msg:
9726
          msg = "disk not found"
9727
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9728
                                 (name, msg))
9729

    
9730
      if result.payload.is_degraded:
9731
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9732

    
9733
  def _RemoveOldStorage(self, node_name, iv_names):
9734
    for name, (_, old_lvs, _) in iv_names.iteritems():
9735
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9736

    
9737
      for lv in old_lvs:
9738
        self.cfg.SetDiskID(lv, node_name)
9739

    
9740
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9741
        if msg:
9742
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9743
                             hint="remove unused LVs manually")
9744

    
9745
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9746
    """Replace a disk on the primary or secondary for DRBD 8.
9747

9748
    The algorithm for replace is quite complicated:
9749

9750
      1. for each disk to be replaced:
9751

9752
        1. create new LVs on the target node with unique names
9753
        1. detach old LVs from the drbd device
9754
        1. rename old LVs to name_replaced.<time_t>
9755
        1. rename new LVs to old LVs
9756
        1. attach the new LVs (with the old names now) to the drbd device
9757

9758
      1. wait for sync across all devices
9759

9760
      1. for each modified disk:
9761

9762
        1. remove old LVs (which have the name name_replaces.<time_t>)
9763

9764
    Failures are not very well handled.
9765

9766
    """
9767
    steps_total = 6
9768

    
9769
    # Step: check device activation
9770
    self.lu.LogStep(1, steps_total, "Check device existence")
9771
    self._CheckDisksExistence([self.other_node, self.target_node])
9772
    self._CheckVolumeGroup([self.target_node, self.other_node])
9773

    
9774
    # Step: check other node consistency
9775
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9776
    self._CheckDisksConsistency(self.other_node,
9777
                                self.other_node == self.instance.primary_node,
9778
                                False)
9779

    
9780
    # Step: create new storage
9781
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9782
    iv_names = self._CreateNewStorage(self.target_node)
9783

    
9784
    # Step: for each lv, detach+rename*2+attach
9785
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9786
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9787
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9788

    
9789
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9790
                                                     old_lvs)
9791
      result.Raise("Can't detach drbd from local storage on node"
9792
                   " %s for device %s" % (self.target_node, dev.iv_name))
9793
      #dev.children = []
9794
      #cfg.Update(instance)
9795

    
9796
      # ok, we created the new LVs, so now we know we have the needed
9797
      # storage; as such, we proceed on the target node to rename
9798
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9799
      # using the assumption that logical_id == physical_id (which in
9800
      # turn is the unique_id on that node)
9801

    
9802
      # FIXME(iustin): use a better name for the replaced LVs
9803
      temp_suffix = int(time.time())
9804
      ren_fn = lambda d, suff: (d.physical_id[0],
9805
                                d.physical_id[1] + "_replaced-%s" % suff)
9806

    
9807
      # Build the rename list based on what LVs exist on the node
9808
      rename_old_to_new = []
9809
      for to_ren in old_lvs:
9810
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9811
        if not result.fail_msg and result.payload:
9812
          # device exists
9813
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9814

    
9815
      self.lu.LogInfo("Renaming the old LVs on the target node")
9816
      result = self.rpc.call_blockdev_rename(self.target_node,
9817
                                             rename_old_to_new)
9818
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9819

    
9820
      # Now we rename the new LVs to the old LVs
9821
      self.lu.LogInfo("Renaming the new LVs on the target node")
9822
      rename_new_to_old = [(new, old.physical_id)
9823
                           for old, new in zip(old_lvs, new_lvs)]
9824
      result = self.rpc.call_blockdev_rename(self.target_node,
9825
                                             rename_new_to_old)
9826
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9827

    
9828
      # Intermediate steps of in memory modifications
9829
      for old, new in zip(old_lvs, new_lvs):
9830
        new.logical_id = old.logical_id
9831
        self.cfg.SetDiskID(new, self.target_node)
9832

    
9833
      # We need to modify old_lvs so that removal later removes the
9834
      # right LVs, not the newly added ones; note that old_lvs is a
9835
      # copy here
9836
      for disk in old_lvs:
9837
        disk.logical_id = ren_fn(disk, temp_suffix)
9838
        self.cfg.SetDiskID(disk, self.target_node)
9839

    
9840
      # Now that the new lvs have the old name, we can add them to the device
9841
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9842
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9843
                                                  new_lvs)
9844
      msg = result.fail_msg
9845
      if msg:
9846
        for new_lv in new_lvs:
9847
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9848
                                               new_lv).fail_msg
9849
          if msg2:
9850
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9851
                               hint=("cleanup manually the unused logical"
9852
                                     "volumes"))
9853
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9854

    
9855
    cstep = 5
9856
    if self.early_release:
9857
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9858
      cstep += 1
9859
      self._RemoveOldStorage(self.target_node, iv_names)
9860
      # WARNING: we release both node locks here, do not do other RPCs
9861
      # than WaitForSync to the primary node
9862
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9863
                    names=[self.target_node, self.other_node])
9864

    
9865
    # Wait for sync
9866
    # This can fail as the old devices are degraded and _WaitForSync
9867
    # does a combined result over all disks, so we don't check its return value
9868
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9869
    cstep += 1
9870
    _WaitForSync(self.lu, self.instance)
9871

    
9872
    # Check all devices manually
9873
    self._CheckDevices(self.instance.primary_node, iv_names)
9874

    
9875
    # Step: remove old storage
9876
    if not self.early_release:
9877
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9878
      cstep += 1
9879
      self._RemoveOldStorage(self.target_node, iv_names)
9880

    
9881
  def _ExecDrbd8Secondary(self, feedback_fn):
9882
    """Replace the secondary node for DRBD 8.
9883

9884
    The algorithm for replace is quite complicated:
9885
      - for all disks of the instance:
9886
        - create new LVs on the new node with same names
9887
        - shutdown the drbd device on the old secondary
9888
        - disconnect the drbd network on the primary
9889
        - create the drbd device on the new secondary
9890
        - network attach the drbd on the primary, using an artifice:
9891
          the drbd code for Attach() will connect to the network if it
9892
          finds a device which is connected to the good local disks but
9893
          not network enabled
9894
      - wait for sync across all devices
9895
      - remove all disks from the old secondary
9896

9897
    Failures are not very well handled.
9898

9899
    """
9900
    steps_total = 6
9901

    
9902
    pnode = self.instance.primary_node
9903

    
9904
    # Step: check device activation
9905
    self.lu.LogStep(1, steps_total, "Check device existence")
9906
    self._CheckDisksExistence([self.instance.primary_node])
9907
    self._CheckVolumeGroup([self.instance.primary_node])
9908

    
9909
    # Step: check other node consistency
9910
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9911
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9912

    
9913
    # Step: create new storage
9914
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9915
    for idx, dev in enumerate(self.instance.disks):
9916
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9917
                      (self.new_node, idx))
9918
      # we pass force_create=True to force LVM creation
9919
      for new_lv in dev.children:
9920
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9921
                        _GetInstanceInfoText(self.instance), False)
9922

    
9923
    # Step 4: dbrd minors and drbd setups changes
9924
    # after this, we must manually remove the drbd minors on both the
9925
    # error and the success paths
9926
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9927
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9928
                                         for dev in self.instance.disks],
9929
                                        self.instance.name)
9930
    logging.debug("Allocated minors %r", minors)
9931

    
9932
    iv_names = {}
9933
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9934
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9935
                      (self.new_node, idx))
9936
      # create new devices on new_node; note that we create two IDs:
9937
      # one without port, so the drbd will be activated without
9938
      # networking information on the new node at this stage, and one
9939
      # with network, for the latter activation in step 4
9940
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9941
      if self.instance.primary_node == o_node1:
9942
        p_minor = o_minor1
9943
      else:
9944
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9945
        p_minor = o_minor2
9946

    
9947
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9948
                      p_minor, new_minor, o_secret)
9949
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9950
                    p_minor, new_minor, o_secret)
9951

    
9952
      iv_names[idx] = (dev, dev.children, new_net_id)
9953
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9954
                    new_net_id)
9955
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9956
                              logical_id=new_alone_id,
9957
                              children=dev.children,
9958
                              size=dev.size)
9959
      try:
9960
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9961
                              _GetInstanceInfoText(self.instance), False)
9962
      except errors.GenericError:
9963
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9964
        raise
9965

    
9966
    # We have new devices, shutdown the drbd on the old secondary
9967
    for idx, dev in enumerate(self.instance.disks):
9968
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9969
      self.cfg.SetDiskID(dev, self.target_node)
9970
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9971
      if msg:
9972
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9973
                           "node: %s" % (idx, msg),
9974
                           hint=("Please cleanup this device manually as"
9975
                                 " soon as possible"))
9976

    
9977
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9978
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9979
                                               self.instance.disks)[pnode]
9980

    
9981
    msg = result.fail_msg
9982
    if msg:
9983
      # detaches didn't succeed (unlikely)
9984
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9985
      raise errors.OpExecError("Can't detach the disks from the network on"
9986
                               " old node: %s" % (msg,))
9987

    
9988
    # if we managed to detach at least one, we update all the disks of
9989
    # the instance to point to the new secondary
9990
    self.lu.LogInfo("Updating instance configuration")
9991
    for dev, _, new_logical_id in iv_names.itervalues():
9992
      dev.logical_id = new_logical_id
9993
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9994

    
9995
    self.cfg.Update(self.instance, feedback_fn)
9996

    
9997
    # and now perform the drbd attach
9998
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9999
                    " (standalone => connected)")
10000
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10001
                                            self.new_node],
10002
                                           self.node_secondary_ip,
10003
                                           self.instance.disks,
10004
                                           self.instance.name,
10005
                                           False)
10006
    for to_node, to_result in result.items():
10007
      msg = to_result.fail_msg
10008
      if msg:
10009
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10010
                           to_node, msg,
10011
                           hint=("please do a gnt-instance info to see the"
10012
                                 " status of disks"))
10013
    cstep = 5
10014
    if self.early_release:
10015
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10016
      cstep += 1
10017
      self._RemoveOldStorage(self.target_node, iv_names)
10018
      # WARNING: we release all node locks here, do not do other RPCs
10019
      # than WaitForSync to the primary node
10020
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10021
                    names=[self.instance.primary_node,
10022
                           self.target_node,
10023
                           self.new_node])
10024

    
10025
    # Wait for sync
10026
    # This can fail as the old devices are degraded and _WaitForSync
10027
    # does a combined result over all disks, so we don't check its return value
10028
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10029
    cstep += 1
10030
    _WaitForSync(self.lu, self.instance)
10031

    
10032
    # Check all devices manually
10033
    self._CheckDevices(self.instance.primary_node, iv_names)
10034

    
10035
    # Step: remove old storage
10036
    if not self.early_release:
10037
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10038
      self._RemoveOldStorage(self.target_node, iv_names)
10039

    
10040

    
10041
class LURepairNodeStorage(NoHooksLU):
10042
  """Repairs the volume group on a node.
10043

10044
  """
10045
  REQ_BGL = False
10046

    
10047
  def CheckArguments(self):
10048
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10049

    
10050
    storage_type = self.op.storage_type
10051

    
10052
    if (constants.SO_FIX_CONSISTENCY not in
10053
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10054
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10055
                                 " repaired" % storage_type,
10056
                                 errors.ECODE_INVAL)
10057

    
10058
  def ExpandNames(self):
10059
    self.needed_locks = {
10060
      locking.LEVEL_NODE: [self.op.node_name],
10061
      }
10062

    
10063
  def _CheckFaultyDisks(self, instance, node_name):
10064
    """Ensure faulty disks abort the opcode or at least warn."""
10065
    try:
10066
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10067
                                  node_name, True):
10068
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10069
                                   " node '%s'" % (instance.name, node_name),
10070
                                   errors.ECODE_STATE)
10071
    except errors.OpPrereqError, err:
10072
      if self.op.ignore_consistency:
10073
        self.proc.LogWarning(str(err.args[0]))
10074
      else:
10075
        raise
10076

    
10077
  def CheckPrereq(self):
10078
    """Check prerequisites.
10079

10080
    """
10081
    # Check whether any instance on this node has faulty disks
10082
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10083
      if not inst.admin_up:
10084
        continue
10085
      check_nodes = set(inst.all_nodes)
10086
      check_nodes.discard(self.op.node_name)
10087
      for inst_node_name in check_nodes:
10088
        self._CheckFaultyDisks(inst, inst_node_name)
10089

    
10090
  def Exec(self, feedback_fn):
10091
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10092
                (self.op.name, self.op.node_name))
10093

    
10094
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10095
    result = self.rpc.call_storage_execute(self.op.node_name,
10096
                                           self.op.storage_type, st_args,
10097
                                           self.op.name,
10098
                                           constants.SO_FIX_CONSISTENCY)
10099
    result.Raise("Failed to repair storage unit '%s' on %s" %
10100
                 (self.op.name, self.op.node_name))
10101

    
10102

    
10103
class LUNodeEvacuate(NoHooksLU):
10104
  """Evacuates instances off a list of nodes.
10105

10106
  """
10107
  REQ_BGL = False
10108

    
10109
  def CheckArguments(self):
10110
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10111

    
10112
  def ExpandNames(self):
10113
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10114

    
10115
    if self.op.remote_node is not None:
10116
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10117
      assert self.op.remote_node
10118

    
10119
      if self.op.remote_node == self.op.node_name:
10120
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10121
                                   " secondary node", errors.ECODE_INVAL)
10122

    
10123
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10124
        raise errors.OpPrereqError("Without the use of an iallocator only"
10125
                                   " secondary instances can be evacuated",
10126
                                   errors.ECODE_INVAL)
10127

    
10128
    # Declare locks
10129
    self.share_locks = _ShareAll()
10130
    self.needed_locks = {
10131
      locking.LEVEL_INSTANCE: [],
10132
      locking.LEVEL_NODEGROUP: [],
10133
      locking.LEVEL_NODE: [],
10134
      }
10135

    
10136
    # Determine nodes (via group) optimistically, needs verification once locks
10137
    # have been acquired
10138
    self.lock_nodes = self._DetermineNodes()
10139

    
10140
  def _DetermineNodes(self):
10141
    """Gets the list of nodes to operate on.
10142

10143
    """
10144
    if self.op.remote_node is None:
10145
      # Iallocator will choose any node(s) in the same group
10146
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10147
    else:
10148
      group_nodes = frozenset([self.op.remote_node])
10149

    
10150
    # Determine nodes to be locked
10151
    return set([self.op.node_name]) | group_nodes
10152

    
10153
  def _DetermineInstances(self):
10154
    """Builds list of instances to operate on.
10155

10156
    """
10157
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10158

    
10159
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10160
      # Primary instances only
10161
      inst_fn = _GetNodePrimaryInstances
10162
      assert self.op.remote_node is None, \
10163
        "Evacuating primary instances requires iallocator"
10164
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10165
      # Secondary instances only
10166
      inst_fn = _GetNodeSecondaryInstances
10167
    else:
10168
      # All instances
10169
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10170
      inst_fn = _GetNodeInstances
10171
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10172
      # per instance
10173
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10174
                                 " interface it is not possible to evacuate"
10175
                                 " all instances at once; specify explicitly"
10176
                                 " whether to evacuate primary or secondary"
10177
                                 " instances",
10178
                                 errors.ECODE_INVAL)
10179

    
10180
    return inst_fn(self.cfg, self.op.node_name)
10181

    
10182
  def DeclareLocks(self, level):
10183
    if level == locking.LEVEL_INSTANCE:
10184
      # Lock instances optimistically, needs verification once node and group
10185
      # locks have been acquired
10186
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10187
        set(i.name for i in self._DetermineInstances())
10188

    
10189
    elif level == locking.LEVEL_NODEGROUP:
10190
      # Lock node groups for all potential target nodes optimistically, needs
10191
      # verification once nodes have been acquired
10192
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10193
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10194

    
10195
    elif level == locking.LEVEL_NODE:
10196
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10197

    
10198
  def CheckPrereq(self):
10199
    # Verify locks
10200
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10201
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10202
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10203

    
10204
    need_nodes = self._DetermineNodes()
10205

    
10206
    if not owned_nodes.issuperset(need_nodes):
10207
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10208
                                 " locks were acquired, current nodes are"
10209
                                 " are '%s', used to be '%s'; retry the"
10210
                                 " operation" %
10211
                                 (self.op.node_name,
10212
                                  utils.CommaJoin(need_nodes),
10213
                                  utils.CommaJoin(owned_nodes)),
10214
                                 errors.ECODE_STATE)
10215

    
10216
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10217
    if owned_groups != wanted_groups:
10218
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10219
                               " current groups are '%s', used to be '%s';"
10220
                               " retry the operation" %
10221
                               (utils.CommaJoin(wanted_groups),
10222
                                utils.CommaJoin(owned_groups)))
10223

    
10224
    # Determine affected instances
10225
    self.instances = self._DetermineInstances()
10226
    self.instance_names = [i.name for i in self.instances]
10227

    
10228
    if set(self.instance_names) != owned_instances:
10229
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10230
                               " were acquired, current instances are '%s',"
10231
                               " used to be '%s'; retry the operation" %
10232
                               (self.op.node_name,
10233
                                utils.CommaJoin(self.instance_names),
10234
                                utils.CommaJoin(owned_instances)))
10235

    
10236
    if self.instance_names:
10237
      self.LogInfo("Evacuating instances from node '%s': %s",
10238
                   self.op.node_name,
10239
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10240
    else:
10241
      self.LogInfo("No instances to evacuate from node '%s'",
10242
                   self.op.node_name)
10243

    
10244
    if self.op.remote_node is not None:
10245
      for i in self.instances:
10246
        if i.primary_node == self.op.remote_node:
10247
          raise errors.OpPrereqError("Node %s is the primary node of"
10248
                                     " instance %s, cannot use it as"
10249
                                     " secondary" %
10250
                                     (self.op.remote_node, i.name),
10251
                                     errors.ECODE_INVAL)
10252

    
10253
  def Exec(self, feedback_fn):
10254
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10255

    
10256
    if not self.instance_names:
10257
      # No instances to evacuate
10258
      jobs = []
10259

    
10260
    elif self.op.iallocator is not None:
10261
      # TODO: Implement relocation to other group
10262
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10263
                       evac_mode=self.op.mode,
10264
                       instances=list(self.instance_names))
10265

    
10266
      ial.Run(self.op.iallocator)
10267

    
10268
      if not ial.success:
10269
        raise errors.OpPrereqError("Can't compute node evacuation using"
10270
                                   " iallocator '%s': %s" %
10271
                                   (self.op.iallocator, ial.info),
10272
                                   errors.ECODE_NORES)
10273

    
10274
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10275

    
10276
    elif self.op.remote_node is not None:
10277
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10278
      jobs = [
10279
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10280
                                        remote_node=self.op.remote_node,
10281
                                        disks=[],
10282
                                        mode=constants.REPLACE_DISK_CHG,
10283
                                        early_release=self.op.early_release)]
10284
        for instance_name in self.instance_names
10285
        ]
10286

    
10287
    else:
10288
      raise errors.ProgrammerError("No iallocator or remote node")
10289

    
10290
    return ResultWithJobs(jobs)
10291

    
10292

    
10293
def _SetOpEarlyRelease(early_release, op):
10294
  """Sets C{early_release} flag on opcodes if available.
10295

10296
  """
10297
  try:
10298
    op.early_release = early_release
10299
  except AttributeError:
10300
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10301

    
10302
  return op
10303

    
10304

    
10305
def _NodeEvacDest(use_nodes, group, nodes):
10306
  """Returns group or nodes depending on caller's choice.
10307

10308
  """
10309
  if use_nodes:
10310
    return utils.CommaJoin(nodes)
10311
  else:
10312
    return group
10313

    
10314

    
10315
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10316
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10317

10318
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10319
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10320

10321
  @type lu: L{LogicalUnit}
10322
  @param lu: Logical unit instance
10323
  @type alloc_result: tuple/list
10324
  @param alloc_result: Result from iallocator
10325
  @type early_release: bool
10326
  @param early_release: Whether to release locks early if possible
10327
  @type use_nodes: bool
10328
  @param use_nodes: Whether to display node names instead of groups
10329

10330
  """
10331
  (moved, failed, jobs) = alloc_result
10332

    
10333
  if failed:
10334
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10335
                                 for (name, reason) in failed)
10336
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10337
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10338

    
10339
  if moved:
10340
    lu.LogInfo("Instances to be moved: %s",
10341
               utils.CommaJoin("%s (to %s)" %
10342
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10343
                               for (name, group, nodes) in moved))
10344

    
10345
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10346
              map(opcodes.OpCode.LoadOpCode, ops))
10347
          for ops in jobs]
10348

    
10349

    
10350
class LUInstanceGrowDisk(LogicalUnit):
10351
  """Grow a disk of an instance.
10352

10353
  """
10354
  HPATH = "disk-grow"
10355
  HTYPE = constants.HTYPE_INSTANCE
10356
  REQ_BGL = False
10357

    
10358
  def ExpandNames(self):
10359
    self._ExpandAndLockInstance()
10360
    self.needed_locks[locking.LEVEL_NODE] = []
10361
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10362

    
10363
  def DeclareLocks(self, level):
10364
    if level == locking.LEVEL_NODE:
10365
      self._LockInstancesNodes()
10366

    
10367
  def BuildHooksEnv(self):
10368
    """Build hooks env.
10369

10370
    This runs on the master, the primary and all the secondaries.
10371

10372
    """
10373
    env = {
10374
      "DISK": self.op.disk,
10375
      "AMOUNT": self.op.amount,
10376
      }
10377
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10378
    return env
10379

    
10380
  def BuildHooksNodes(self):
10381
    """Build hooks nodes.
10382

10383
    """
10384
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10385
    return (nl, nl)
10386

    
10387
  def CheckPrereq(self):
10388
    """Check prerequisites.
10389

10390
    This checks that the instance is in the cluster.
10391

10392
    """
10393
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10394
    assert instance is not None, \
10395
      "Cannot retrieve locked instance %s" % self.op.instance_name
10396
    nodenames = list(instance.all_nodes)
10397
    for node in nodenames:
10398
      _CheckNodeOnline(self, node)
10399

    
10400
    self.instance = instance
10401

    
10402
    if instance.disk_template not in constants.DTS_GROWABLE:
10403
      raise errors.OpPrereqError("Instance's disk layout does not support"
10404
                                 " growing", errors.ECODE_INVAL)
10405

    
10406
    self.disk = instance.FindDisk(self.op.disk)
10407

    
10408
    if instance.disk_template not in (constants.DT_FILE,
10409
                                      constants.DT_SHARED_FILE):
10410
      # TODO: check the free disk space for file, when that feature will be
10411
      # supported
10412
      _CheckNodesFreeDiskPerVG(self, nodenames,
10413
                               self.disk.ComputeGrowth(self.op.amount))
10414

    
10415
  def Exec(self, feedback_fn):
10416
    """Execute disk grow.
10417

10418
    """
10419
    instance = self.instance
10420
    disk = self.disk
10421

    
10422
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10423
    if not disks_ok:
10424
      raise errors.OpExecError("Cannot activate block device to grow")
10425

    
10426
    # First run all grow ops in dry-run mode
10427
    for node in instance.all_nodes:
10428
      self.cfg.SetDiskID(disk, node)
10429
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10430
      result.Raise("Grow request failed to node %s" % node)
10431

    
10432
    # We know that (as far as we can test) operations across different
10433
    # nodes will succeed, time to run it for real
10434
    for node in instance.all_nodes:
10435
      self.cfg.SetDiskID(disk, node)
10436
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10437
      result.Raise("Grow request failed to node %s" % node)
10438

    
10439
      # TODO: Rewrite code to work properly
10440
      # DRBD goes into sync mode for a short amount of time after executing the
10441
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10442
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10443
      # time is a work-around.
10444
      time.sleep(5)
10445

    
10446
    disk.RecordGrow(self.op.amount)
10447
    self.cfg.Update(instance, feedback_fn)
10448
    if self.op.wait_for_sync:
10449
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10450
      if disk_abort:
10451
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10452
                             " status; please check the instance")
10453
      if not instance.admin_up:
10454
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10455
    elif not instance.admin_up:
10456
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10457
                           " not supposed to be running because no wait for"
10458
                           " sync mode was requested")
10459

    
10460

    
10461
class LUInstanceQueryData(NoHooksLU):
10462
  """Query runtime instance data.
10463

10464
  """
10465
  REQ_BGL = False
10466

    
10467
  def ExpandNames(self):
10468
    self.needed_locks = {}
10469

    
10470
    # Use locking if requested or when non-static information is wanted
10471
    if not (self.op.static or self.op.use_locking):
10472
      self.LogWarning("Non-static data requested, locks need to be acquired")
10473
      self.op.use_locking = True
10474

    
10475
    if self.op.instances or not self.op.use_locking:
10476
      # Expand instance names right here
10477
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10478
    else:
10479
      # Will use acquired locks
10480
      self.wanted_names = None
10481

    
10482
    if self.op.use_locking:
10483
      self.share_locks = _ShareAll()
10484

    
10485
      if self.wanted_names is None:
10486
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10487
      else:
10488
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10489

    
10490
      self.needed_locks[locking.LEVEL_NODE] = []
10491
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10492

    
10493
  def DeclareLocks(self, level):
10494
    if self.op.use_locking and level == locking.LEVEL_NODE:
10495
      self._LockInstancesNodes()
10496

    
10497
  def CheckPrereq(self):
10498
    """Check prerequisites.
10499

10500
    This only checks the optional instance list against the existing names.
10501

10502
    """
10503
    if self.wanted_names is None:
10504
      assert self.op.use_locking, "Locking was not used"
10505
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10506

    
10507
    self.wanted_instances = \
10508
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10509

    
10510
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10511
    """Returns the status of a block device
10512

10513
    """
10514
    if self.op.static or not node:
10515
      return None
10516

    
10517
    self.cfg.SetDiskID(dev, node)
10518

    
10519
    result = self.rpc.call_blockdev_find(node, dev)
10520
    if result.offline:
10521
      return None
10522

    
10523
    result.Raise("Can't compute disk status for %s" % instance_name)
10524

    
10525
    status = result.payload
10526
    if status is None:
10527
      return None
10528

    
10529
    return (status.dev_path, status.major, status.minor,
10530
            status.sync_percent, status.estimated_time,
10531
            status.is_degraded, status.ldisk_status)
10532

    
10533
  def _ComputeDiskStatus(self, instance, snode, dev):
10534
    """Compute block device status.
10535

10536
    """
10537
    if dev.dev_type in constants.LDS_DRBD:
10538
      # we change the snode then (otherwise we use the one passed in)
10539
      if dev.logical_id[0] == instance.primary_node:
10540
        snode = dev.logical_id[1]
10541
      else:
10542
        snode = dev.logical_id[0]
10543

    
10544
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10545
                                              instance.name, dev)
10546
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10547

    
10548
    if dev.children:
10549
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10550
                                        instance, snode),
10551
                         dev.children)
10552
    else:
10553
      dev_children = []
10554

    
10555
    return {
10556
      "iv_name": dev.iv_name,
10557
      "dev_type": dev.dev_type,
10558
      "logical_id": dev.logical_id,
10559
      "physical_id": dev.physical_id,
10560
      "pstatus": dev_pstatus,
10561
      "sstatus": dev_sstatus,
10562
      "children": dev_children,
10563
      "mode": dev.mode,
10564
      "size": dev.size,
10565
      }
10566

    
10567
  def Exec(self, feedback_fn):
10568
    """Gather and return data"""
10569
    result = {}
10570

    
10571
    cluster = self.cfg.GetClusterInfo()
10572

    
10573
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10574
                                          for i in self.wanted_instances)
10575
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10576
      if self.op.static or pnode.offline:
10577
        remote_state = None
10578
        if pnode.offline:
10579
          self.LogWarning("Primary node %s is marked offline, returning static"
10580
                          " information only for instance %s" %
10581
                          (pnode.name, instance.name))
10582
      else:
10583
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10584
                                                  instance.name,
10585
                                                  instance.hypervisor)
10586
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10587
        remote_info = remote_info.payload
10588
        if remote_info and "state" in remote_info:
10589
          remote_state = "up"
10590
        else:
10591
          remote_state = "down"
10592

    
10593
      if instance.admin_up:
10594
        config_state = "up"
10595
      else:
10596
        config_state = "down"
10597

    
10598
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10599
                  instance.disks)
10600

    
10601
      result[instance.name] = {
10602
        "name": instance.name,
10603
        "config_state": config_state,
10604
        "run_state": remote_state,
10605
        "pnode": instance.primary_node,
10606
        "snodes": instance.secondary_nodes,
10607
        "os": instance.os,
10608
        # this happens to be the same format used for hooks
10609
        "nics": _NICListToTuple(self, instance.nics),
10610
        "disk_template": instance.disk_template,
10611
        "disks": disks,
10612
        "hypervisor": instance.hypervisor,
10613
        "network_port": instance.network_port,
10614
        "hv_instance": instance.hvparams,
10615
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10616
        "be_instance": instance.beparams,
10617
        "be_actual": cluster.FillBE(instance),
10618
        "os_instance": instance.osparams,
10619
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10620
        "serial_no": instance.serial_no,
10621
        "mtime": instance.mtime,
10622
        "ctime": instance.ctime,
10623
        "uuid": instance.uuid,
10624
        }
10625

    
10626
    return result
10627

    
10628

    
10629
class LUInstanceSetParams(LogicalUnit):
10630
  """Modifies an instances's parameters.
10631

10632
  """
10633
  HPATH = "instance-modify"
10634
  HTYPE = constants.HTYPE_INSTANCE
10635
  REQ_BGL = False
10636

    
10637
  def CheckArguments(self):
10638
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10639
            self.op.hvparams or self.op.beparams or self.op.os_name):
10640
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10641

    
10642
    if self.op.hvparams:
10643
      _CheckGlobalHvParams(self.op.hvparams)
10644

    
10645
    # Disk validation
10646
    disk_addremove = 0
10647
    for disk_op, disk_dict in self.op.disks:
10648
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10649
      if disk_op == constants.DDM_REMOVE:
10650
        disk_addremove += 1
10651
        continue
10652
      elif disk_op == constants.DDM_ADD:
10653
        disk_addremove += 1
10654
      else:
10655
        if not isinstance(disk_op, int):
10656
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10657
        if not isinstance(disk_dict, dict):
10658
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10659
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10660

    
10661
      if disk_op == constants.DDM_ADD:
10662
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10663
        if mode not in constants.DISK_ACCESS_SET:
10664
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10665
                                     errors.ECODE_INVAL)
10666
        size = disk_dict.get(constants.IDISK_SIZE, None)
10667
        if size is None:
10668
          raise errors.OpPrereqError("Required disk parameter size missing",
10669
                                     errors.ECODE_INVAL)
10670
        try:
10671
          size = int(size)
10672
        except (TypeError, ValueError), err:
10673
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10674
                                     str(err), errors.ECODE_INVAL)
10675
        disk_dict[constants.IDISK_SIZE] = size
10676
      else:
10677
        # modification of disk
10678
        if constants.IDISK_SIZE in disk_dict:
10679
          raise errors.OpPrereqError("Disk size change not possible, use"
10680
                                     " grow-disk", errors.ECODE_INVAL)
10681

    
10682
    if disk_addremove > 1:
10683
      raise errors.OpPrereqError("Only one disk add or remove operation"
10684
                                 " supported at a time", errors.ECODE_INVAL)
10685

    
10686
    if self.op.disks and self.op.disk_template is not None:
10687
      raise errors.OpPrereqError("Disk template conversion and other disk"
10688
                                 " changes not supported at the same time",
10689
                                 errors.ECODE_INVAL)
10690

    
10691
    if (self.op.disk_template and
10692
        self.op.disk_template in constants.DTS_INT_MIRROR and
10693
        self.op.remote_node is None):
10694
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10695
                                 " one requires specifying a secondary node",
10696
                                 errors.ECODE_INVAL)
10697

    
10698
    # NIC validation
10699
    nic_addremove = 0
10700
    for nic_op, nic_dict in self.op.nics:
10701
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10702
      if nic_op == constants.DDM_REMOVE:
10703
        nic_addremove += 1
10704
        continue
10705
      elif nic_op == constants.DDM_ADD:
10706
        nic_addremove += 1
10707
      else:
10708
        if not isinstance(nic_op, int):
10709
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10710
        if not isinstance(nic_dict, dict):
10711
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10712
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10713

    
10714
      # nic_dict should be a dict
10715
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10716
      if nic_ip is not None:
10717
        if nic_ip.lower() == constants.VALUE_NONE:
10718
          nic_dict[constants.INIC_IP] = None
10719
        else:
10720
          if not netutils.IPAddress.IsValid(nic_ip):
10721
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10722
                                       errors.ECODE_INVAL)
10723

    
10724
      nic_bridge = nic_dict.get("bridge", None)
10725
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10726
      if nic_bridge and nic_link:
10727
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10728
                                   " at the same time", errors.ECODE_INVAL)
10729
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10730
        nic_dict["bridge"] = None
10731
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10732
        nic_dict[constants.INIC_LINK] = None
10733

    
10734
      if nic_op == constants.DDM_ADD:
10735
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10736
        if nic_mac is None:
10737
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10738

    
10739
      if constants.INIC_MAC in nic_dict:
10740
        nic_mac = nic_dict[constants.INIC_MAC]
10741
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10742
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10743

    
10744
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10745
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10746
                                     " modifying an existing nic",
10747
                                     errors.ECODE_INVAL)
10748

    
10749
    if nic_addremove > 1:
10750
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10751
                                 " supported at a time", errors.ECODE_INVAL)
10752

    
10753
  def ExpandNames(self):
10754
    self._ExpandAndLockInstance()
10755
    self.needed_locks[locking.LEVEL_NODE] = []
10756
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10757

    
10758
  def DeclareLocks(self, level):
10759
    if level == locking.LEVEL_NODE:
10760
      self._LockInstancesNodes()
10761
      if self.op.disk_template and self.op.remote_node:
10762
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10763
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10764

    
10765
  def BuildHooksEnv(self):
10766
    """Build hooks env.
10767

10768
    This runs on the master, primary and secondaries.
10769

10770
    """
10771
    args = dict()
10772
    if constants.BE_MEMORY in self.be_new:
10773
      args["memory"] = self.be_new[constants.BE_MEMORY]
10774
    if constants.BE_VCPUS in self.be_new:
10775
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10776
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10777
    # information at all.
10778
    if self.op.nics:
10779
      args["nics"] = []
10780
      nic_override = dict(self.op.nics)
10781
      for idx, nic in enumerate(self.instance.nics):
10782
        if idx in nic_override:
10783
          this_nic_override = nic_override[idx]
10784
        else:
10785
          this_nic_override = {}
10786
        if constants.INIC_IP in this_nic_override:
10787
          ip = this_nic_override[constants.INIC_IP]
10788
        else:
10789
          ip = nic.ip
10790
        if constants.INIC_MAC in this_nic_override:
10791
          mac = this_nic_override[constants.INIC_MAC]
10792
        else:
10793
          mac = nic.mac
10794
        if idx in self.nic_pnew:
10795
          nicparams = self.nic_pnew[idx]
10796
        else:
10797
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10798
        mode = nicparams[constants.NIC_MODE]
10799
        link = nicparams[constants.NIC_LINK]
10800
        args["nics"].append((ip, mac, mode, link))
10801
      if constants.DDM_ADD in nic_override:
10802
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10803
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10804
        nicparams = self.nic_pnew[constants.DDM_ADD]
10805
        mode = nicparams[constants.NIC_MODE]
10806
        link = nicparams[constants.NIC_LINK]
10807
        args["nics"].append((ip, mac, mode, link))
10808
      elif constants.DDM_REMOVE in nic_override:
10809
        del args["nics"][-1]
10810

    
10811
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10812
    if self.op.disk_template:
10813
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10814

    
10815
    return env
10816

    
10817
  def BuildHooksNodes(self):
10818
    """Build hooks nodes.
10819

10820
    """
10821
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10822
    return (nl, nl)
10823

    
10824
  def CheckPrereq(self):
10825
    """Check prerequisites.
10826

10827
    This only checks the instance list against the existing names.
10828

10829
    """
10830
    # checking the new params on the primary/secondary nodes
10831

    
10832
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10833
    cluster = self.cluster = self.cfg.GetClusterInfo()
10834
    assert self.instance is not None, \
10835
      "Cannot retrieve locked instance %s" % self.op.instance_name
10836
    pnode = instance.primary_node
10837
    nodelist = list(instance.all_nodes)
10838

    
10839
    # OS change
10840
    if self.op.os_name and not self.op.force:
10841
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10842
                      self.op.force_variant)
10843
      instance_os = self.op.os_name
10844
    else:
10845
      instance_os = instance.os
10846

    
10847
    if self.op.disk_template:
10848
      if instance.disk_template == self.op.disk_template:
10849
        raise errors.OpPrereqError("Instance already has disk template %s" %
10850
                                   instance.disk_template, errors.ECODE_INVAL)
10851

    
10852
      if (instance.disk_template,
10853
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10854
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10855
                                   " %s to %s" % (instance.disk_template,
10856
                                                  self.op.disk_template),
10857
                                   errors.ECODE_INVAL)
10858
      _CheckInstanceDown(self, instance, "cannot change disk template")
10859
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10860
        if self.op.remote_node == pnode:
10861
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10862
                                     " as the primary node of the instance" %
10863
                                     self.op.remote_node, errors.ECODE_STATE)
10864
        _CheckNodeOnline(self, self.op.remote_node)
10865
        _CheckNodeNotDrained(self, self.op.remote_node)
10866
        # FIXME: here we assume that the old instance type is DT_PLAIN
10867
        assert instance.disk_template == constants.DT_PLAIN
10868
        disks = [{constants.IDISK_SIZE: d.size,
10869
                  constants.IDISK_VG: d.logical_id[0]}
10870
                 for d in instance.disks]
10871
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10872
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10873

    
10874
    # hvparams processing
10875
    if self.op.hvparams:
10876
      hv_type = instance.hypervisor
10877
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10878
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10879
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10880

    
10881
      # local check
10882
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10883
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10884
      self.hv_new = hv_new # the new actual values
10885
      self.hv_inst = i_hvdict # the new dict (without defaults)
10886
    else:
10887
      self.hv_new = self.hv_inst = {}
10888

    
10889
    # beparams processing
10890
    if self.op.beparams:
10891
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10892
                                   use_none=True)
10893
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10894
      be_new = cluster.SimpleFillBE(i_bedict)
10895
      self.be_new = be_new # the new actual values
10896
      self.be_inst = i_bedict # the new dict (without defaults)
10897
    else:
10898
      self.be_new = self.be_inst = {}
10899
    be_old = cluster.FillBE(instance)
10900

    
10901
    # osparams processing
10902
    if self.op.osparams:
10903
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10904
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10905
      self.os_inst = i_osdict # the new dict (without defaults)
10906
    else:
10907
      self.os_inst = {}
10908

    
10909
    self.warn = []
10910

    
10911
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10912
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10913
      mem_check_list = [pnode]
10914
      if be_new[constants.BE_AUTO_BALANCE]:
10915
        # either we changed auto_balance to yes or it was from before
10916
        mem_check_list.extend(instance.secondary_nodes)
10917
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10918
                                                  instance.hypervisor)
10919
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10920
                                         instance.hypervisor)
10921
      pninfo = nodeinfo[pnode]
10922
      msg = pninfo.fail_msg
10923
      if msg:
10924
        # Assume the primary node is unreachable and go ahead
10925
        self.warn.append("Can't get info from primary node %s: %s" %
10926
                         (pnode, msg))
10927
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10928
        self.warn.append("Node data from primary node %s doesn't contain"
10929
                         " free memory information" % pnode)
10930
      elif instance_info.fail_msg:
10931
        self.warn.append("Can't get instance runtime information: %s" %
10932
                        instance_info.fail_msg)
10933
      else:
10934
        if instance_info.payload:
10935
          current_mem = int(instance_info.payload["memory"])
10936
        else:
10937
          # Assume instance not running
10938
          # (there is a slight race condition here, but it's not very probable,
10939
          # and we have no other way to check)
10940
          current_mem = 0
10941
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10942
                    pninfo.payload["memory_free"])
10943
        if miss_mem > 0:
10944
          raise errors.OpPrereqError("This change will prevent the instance"
10945
                                     " from starting, due to %d MB of memory"
10946
                                     " missing on its primary node" % miss_mem,
10947
                                     errors.ECODE_NORES)
10948

    
10949
      if be_new[constants.BE_AUTO_BALANCE]:
10950
        for node, nres in nodeinfo.items():
10951
          if node not in instance.secondary_nodes:
10952
            continue
10953
          nres.Raise("Can't get info from secondary node %s" % node,
10954
                     prereq=True, ecode=errors.ECODE_STATE)
10955
          if not isinstance(nres.payload.get("memory_free", None), int):
10956
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10957
                                       " memory information" % node,
10958
                                       errors.ECODE_STATE)
10959
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10960
            raise errors.OpPrereqError("This change will prevent the instance"
10961
                                       " from failover to its secondary node"
10962
                                       " %s, due to not enough memory" % node,
10963
                                       errors.ECODE_STATE)
10964

    
10965
    # NIC processing
10966
    self.nic_pnew = {}
10967
    self.nic_pinst = {}
10968
    for nic_op, nic_dict in self.op.nics:
10969
      if nic_op == constants.DDM_REMOVE:
10970
        if not instance.nics:
10971
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10972
                                     errors.ECODE_INVAL)
10973
        continue
10974
      if nic_op != constants.DDM_ADD:
10975
        # an existing nic
10976
        if not instance.nics:
10977
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10978
                                     " no NICs" % nic_op,
10979
                                     errors.ECODE_INVAL)
10980
        if nic_op < 0 or nic_op >= len(instance.nics):
10981
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10982
                                     " are 0 to %d" %
10983
                                     (nic_op, len(instance.nics) - 1),
10984
                                     errors.ECODE_INVAL)
10985
        old_nic_params = instance.nics[nic_op].nicparams
10986
        old_nic_ip = instance.nics[nic_op].ip
10987
      else:
10988
        old_nic_params = {}
10989
        old_nic_ip = None
10990

    
10991
      update_params_dict = dict([(key, nic_dict[key])
10992
                                 for key in constants.NICS_PARAMETERS
10993
                                 if key in nic_dict])
10994

    
10995
      if "bridge" in nic_dict:
10996
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10997

    
10998
      new_nic_params = _GetUpdatedParams(old_nic_params,
10999
                                         update_params_dict)
11000
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11001
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11002
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11003
      self.nic_pinst[nic_op] = new_nic_params
11004
      self.nic_pnew[nic_op] = new_filled_nic_params
11005
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11006

    
11007
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11008
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11009
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11010
        if msg:
11011
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11012
          if self.op.force:
11013
            self.warn.append(msg)
11014
          else:
11015
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11016
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11017
        if constants.INIC_IP in nic_dict:
11018
          nic_ip = nic_dict[constants.INIC_IP]
11019
        else:
11020
          nic_ip = old_nic_ip
11021
        if nic_ip is None:
11022
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11023
                                     " on a routed nic", errors.ECODE_INVAL)
11024
      if constants.INIC_MAC in nic_dict:
11025
        nic_mac = nic_dict[constants.INIC_MAC]
11026
        if nic_mac is None:
11027
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11028
                                     errors.ECODE_INVAL)
11029
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11030
          # otherwise generate the mac
11031
          nic_dict[constants.INIC_MAC] = \
11032
            self.cfg.GenerateMAC(self.proc.GetECId())
11033
        else:
11034
          # or validate/reserve the current one
11035
          try:
11036
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11037
          except errors.ReservationError:
11038
            raise errors.OpPrereqError("MAC address %s already in use"
11039
                                       " in cluster" % nic_mac,
11040
                                       errors.ECODE_NOTUNIQUE)
11041

    
11042
    # DISK processing
11043
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11044
      raise errors.OpPrereqError("Disk operations not supported for"
11045
                                 " diskless instances",
11046
                                 errors.ECODE_INVAL)
11047
    for disk_op, _ in self.op.disks:
11048
      if disk_op == constants.DDM_REMOVE:
11049
        if len(instance.disks) == 1:
11050
          raise errors.OpPrereqError("Cannot remove the last disk of"
11051
                                     " an instance", errors.ECODE_INVAL)
11052
        _CheckInstanceDown(self, instance, "cannot remove disks")
11053

    
11054
      if (disk_op == constants.DDM_ADD and
11055
          len(instance.disks) >= constants.MAX_DISKS):
11056
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11057
                                   " add more" % constants.MAX_DISKS,
11058
                                   errors.ECODE_STATE)
11059
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11060
        # an existing disk
11061
        if disk_op < 0 or disk_op >= len(instance.disks):
11062
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11063
                                     " are 0 to %d" %
11064
                                     (disk_op, len(instance.disks)),
11065
                                     errors.ECODE_INVAL)
11066

    
11067
    return
11068

    
11069
  def _ConvertPlainToDrbd(self, feedback_fn):
11070
    """Converts an instance from plain to drbd.
11071

11072
    """
11073
    feedback_fn("Converting template to drbd")
11074
    instance = self.instance
11075
    pnode = instance.primary_node
11076
    snode = self.op.remote_node
11077

    
11078
    # create a fake disk info for _GenerateDiskTemplate
11079
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11080
                  constants.IDISK_VG: d.logical_id[0]}
11081
                 for d in instance.disks]
11082
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11083
                                      instance.name, pnode, [snode],
11084
                                      disk_info, None, None, 0, feedback_fn)
11085
    info = _GetInstanceInfoText(instance)
11086
    feedback_fn("Creating aditional volumes...")
11087
    # first, create the missing data and meta devices
11088
    for disk in new_disks:
11089
      # unfortunately this is... not too nice
11090
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11091
                            info, True)
11092
      for child in disk.children:
11093
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11094
    # at this stage, all new LVs have been created, we can rename the
11095
    # old ones
11096
    feedback_fn("Renaming original volumes...")
11097
    rename_list = [(o, n.children[0].logical_id)
11098
                   for (o, n) in zip(instance.disks, new_disks)]
11099
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11100
    result.Raise("Failed to rename original LVs")
11101

    
11102
    feedback_fn("Initializing DRBD devices...")
11103
    # all child devices are in place, we can now create the DRBD devices
11104
    for disk in new_disks:
11105
      for node in [pnode, snode]:
11106
        f_create = node == pnode
11107
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11108

    
11109
    # at this point, the instance has been modified
11110
    instance.disk_template = constants.DT_DRBD8
11111
    instance.disks = new_disks
11112
    self.cfg.Update(instance, feedback_fn)
11113

    
11114
    # disks are created, waiting for sync
11115
    disk_abort = not _WaitForSync(self, instance,
11116
                                  oneshot=not self.op.wait_for_sync)
11117
    if disk_abort:
11118
      raise errors.OpExecError("There are some degraded disks for"
11119
                               " this instance, please cleanup manually")
11120

    
11121
  def _ConvertDrbdToPlain(self, feedback_fn):
11122
    """Converts an instance from drbd to plain.
11123

11124
    """
11125
    instance = self.instance
11126
    assert len(instance.secondary_nodes) == 1
11127
    pnode = instance.primary_node
11128
    snode = instance.secondary_nodes[0]
11129
    feedback_fn("Converting template to plain")
11130

    
11131
    old_disks = instance.disks
11132
    new_disks = [d.children[0] for d in old_disks]
11133

    
11134
    # copy over size and mode
11135
    for parent, child in zip(old_disks, new_disks):
11136
      child.size = parent.size
11137
      child.mode = parent.mode
11138

    
11139
    # this is a DRBD disk, return its port to the pool
11140
    # NOTE: this must be done right before the call to cfg.Update!
11141
    for disk in old_disks:
11142
      tcp_port = disk.logical_id[2]
11143
      self.cfg.AddTcpUdpPort(tcp_port)
11144

    
11145
    # update instance structure
11146
    instance.disks = new_disks
11147
    instance.disk_template = constants.DT_PLAIN
11148
    self.cfg.Update(instance, feedback_fn)
11149

    
11150
    feedback_fn("Removing volumes on the secondary node...")
11151
    for disk in old_disks:
11152
      self.cfg.SetDiskID(disk, snode)
11153
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11154
      if msg:
11155
        self.LogWarning("Could not remove block device %s on node %s,"
11156
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11157

    
11158
    feedback_fn("Removing unneeded volumes on the primary node...")
11159
    for idx, disk in enumerate(old_disks):
11160
      meta = disk.children[1]
11161
      self.cfg.SetDiskID(meta, pnode)
11162
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11163
      if msg:
11164
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11165
                        " continuing anyway: %s", idx, pnode, msg)
11166

    
11167
  def Exec(self, feedback_fn):
11168
    """Modifies an instance.
11169

11170
    All parameters take effect only at the next restart of the instance.
11171

11172
    """
11173
    # Process here the warnings from CheckPrereq, as we don't have a
11174
    # feedback_fn there.
11175
    for warn in self.warn:
11176
      feedback_fn("WARNING: %s" % warn)
11177

    
11178
    result = []
11179
    instance = self.instance
11180
    # disk changes
11181
    for disk_op, disk_dict in self.op.disks:
11182
      if disk_op == constants.DDM_REMOVE:
11183
        # remove the last disk
11184
        device = instance.disks.pop()
11185
        device_idx = len(instance.disks)
11186
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11187
          self.cfg.SetDiskID(disk, node)
11188
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11189
          if msg:
11190
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11191
                            " continuing anyway", device_idx, node, msg)
11192
        result.append(("disk/%d" % device_idx, "remove"))
11193

    
11194
        # if this is a DRBD disk, return its port to the pool
11195
        if device.dev_type in constants.LDS_DRBD:
11196
          tcp_port = device.logical_id[2]
11197
          self.cfg.AddTcpUdpPort(tcp_port)
11198
      elif disk_op == constants.DDM_ADD:
11199
        # add a new disk
11200
        if instance.disk_template in (constants.DT_FILE,
11201
                                        constants.DT_SHARED_FILE):
11202
          file_driver, file_path = instance.disks[0].logical_id
11203
          file_path = os.path.dirname(file_path)
11204
        else:
11205
          file_driver = file_path = None
11206
        disk_idx_base = len(instance.disks)
11207
        new_disk = _GenerateDiskTemplate(self,
11208
                                         instance.disk_template,
11209
                                         instance.name, instance.primary_node,
11210
                                         instance.secondary_nodes,
11211
                                         [disk_dict],
11212
                                         file_path,
11213
                                         file_driver,
11214
                                         disk_idx_base, feedback_fn)[0]
11215
        instance.disks.append(new_disk)
11216
        info = _GetInstanceInfoText(instance)
11217

    
11218
        logging.info("Creating volume %s for instance %s",
11219
                     new_disk.iv_name, instance.name)
11220
        # Note: this needs to be kept in sync with _CreateDisks
11221
        #HARDCODE
11222
        for node in instance.all_nodes:
11223
          f_create = node == instance.primary_node
11224
          try:
11225
            _CreateBlockDev(self, node, instance, new_disk,
11226
                            f_create, info, f_create)
11227
          except errors.OpExecError, err:
11228
            self.LogWarning("Failed to create volume %s (%s) on"
11229
                            " node %s: %s",
11230
                            new_disk.iv_name, new_disk, node, err)
11231
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11232
                       (new_disk.size, new_disk.mode)))
11233
      else:
11234
        # change a given disk
11235
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11236
        result.append(("disk.mode/%d" % disk_op,
11237
                       disk_dict[constants.IDISK_MODE]))
11238

    
11239
    if self.op.disk_template:
11240
      r_shut = _ShutdownInstanceDisks(self, instance)
11241
      if not r_shut:
11242
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11243
                                 " proceed with disk template conversion")
11244
      mode = (instance.disk_template, self.op.disk_template)
11245
      try:
11246
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11247
      except:
11248
        self.cfg.ReleaseDRBDMinors(instance.name)
11249
        raise
11250
      result.append(("disk_template", self.op.disk_template))
11251

    
11252
    # NIC changes
11253
    for nic_op, nic_dict in self.op.nics:
11254
      if nic_op == constants.DDM_REMOVE:
11255
        # remove the last nic
11256
        del instance.nics[-1]
11257
        result.append(("nic.%d" % len(instance.nics), "remove"))
11258
      elif nic_op == constants.DDM_ADD:
11259
        # mac and bridge should be set, by now
11260
        mac = nic_dict[constants.INIC_MAC]
11261
        ip = nic_dict.get(constants.INIC_IP, None)
11262
        nicparams = self.nic_pinst[constants.DDM_ADD]
11263
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11264
        instance.nics.append(new_nic)
11265
        result.append(("nic.%d" % (len(instance.nics) - 1),
11266
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11267
                       (new_nic.mac, new_nic.ip,
11268
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11269
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11270
                       )))
11271
      else:
11272
        for key in (constants.INIC_MAC, constants.INIC_IP):
11273
          if key in nic_dict:
11274
            setattr(instance.nics[nic_op], key, nic_dict[key])
11275
        if nic_op in self.nic_pinst:
11276
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11277
        for key, val in nic_dict.iteritems():
11278
          result.append(("nic.%s/%d" % (key, nic_op), val))
11279

    
11280
    # hvparams changes
11281
    if self.op.hvparams:
11282
      instance.hvparams = self.hv_inst
11283
      for key, val in self.op.hvparams.iteritems():
11284
        result.append(("hv/%s" % key, val))
11285

    
11286
    # beparams changes
11287
    if self.op.beparams:
11288
      instance.beparams = self.be_inst
11289
      for key, val in self.op.beparams.iteritems():
11290
        result.append(("be/%s" % key, val))
11291

    
11292
    # OS change
11293
    if self.op.os_name:
11294
      instance.os = self.op.os_name
11295

    
11296
    # osparams changes
11297
    if self.op.osparams:
11298
      instance.osparams = self.os_inst
11299
      for key, val in self.op.osparams.iteritems():
11300
        result.append(("os/%s" % key, val))
11301

    
11302
    self.cfg.Update(instance, feedback_fn)
11303

    
11304
    return result
11305

    
11306
  _DISK_CONVERSIONS = {
11307
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11308
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11309
    }
11310

    
11311

    
11312
class LUInstanceChangeGroup(LogicalUnit):
11313
  HPATH = "instance-change-group"
11314
  HTYPE = constants.HTYPE_INSTANCE
11315
  REQ_BGL = False
11316

    
11317
  def ExpandNames(self):
11318
    self.share_locks = _ShareAll()
11319
    self.needed_locks = {
11320
      locking.LEVEL_NODEGROUP: [],
11321
      locking.LEVEL_NODE: [],
11322
      }
11323

    
11324
    self._ExpandAndLockInstance()
11325

    
11326
    if self.op.target_groups:
11327
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11328
                                  self.op.target_groups)
11329
    else:
11330
      self.req_target_uuids = None
11331

    
11332
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11333

    
11334
  def DeclareLocks(self, level):
11335
    if level == locking.LEVEL_NODEGROUP:
11336
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11337

    
11338
      if self.req_target_uuids:
11339
        lock_groups = set(self.req_target_uuids)
11340

    
11341
        # Lock all groups used by instance optimistically; this requires going
11342
        # via the node before it's locked, requiring verification later on
11343
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11344
        lock_groups.update(instance_groups)
11345
      else:
11346
        # No target groups, need to lock all of them
11347
        lock_groups = locking.ALL_SET
11348

    
11349
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11350

    
11351
    elif level == locking.LEVEL_NODE:
11352
      if self.req_target_uuids:
11353
        # Lock all nodes used by instances
11354
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11355
        self._LockInstancesNodes()
11356

    
11357
        # Lock all nodes in all potential target groups
11358
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11359
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11360
        member_nodes = [node_name
11361
                        for group in lock_groups
11362
                        for node_name in self.cfg.GetNodeGroup(group).members]
11363
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11364
      else:
11365
        # Lock all nodes as all groups are potential targets
11366
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11367

    
11368
  def CheckPrereq(self):
11369
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11370
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11371
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11372

    
11373
    assert (self.req_target_uuids is None or
11374
            owned_groups.issuperset(self.req_target_uuids))
11375
    assert owned_instances == set([self.op.instance_name])
11376

    
11377
    # Get instance information
11378
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11379

    
11380
    # Check if node groups for locked instance are still correct
11381
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11382
      ("Instance %s's nodes changed while we kept the lock" %
11383
       self.op.instance_name)
11384

    
11385
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11386
                                           owned_groups)
11387

    
11388
    if self.req_target_uuids:
11389
      # User requested specific target groups
11390
      self.target_uuids = frozenset(self.req_target_uuids)
11391
    else:
11392
      # All groups except those used by the instance are potential targets
11393
      self.target_uuids = owned_groups - inst_groups
11394

    
11395
    conflicting_groups = self.target_uuids & inst_groups
11396
    if conflicting_groups:
11397
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11398
                                 " used by the instance '%s'" %
11399
                                 (utils.CommaJoin(conflicting_groups),
11400
                                  self.op.instance_name),
11401
                                 errors.ECODE_INVAL)
11402

    
11403
    if not self.target_uuids:
11404
      raise errors.OpPrereqError("There are no possible target groups",
11405
                                 errors.ECODE_INVAL)
11406

    
11407
  def BuildHooksEnv(self):
11408
    """Build hooks env.
11409

11410
    """
11411
    assert self.target_uuids
11412

    
11413
    env = {
11414
      "TARGET_GROUPS": " ".join(self.target_uuids),
11415
      }
11416

    
11417
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11418

    
11419
    return env
11420

    
11421
  def BuildHooksNodes(self):
11422
    """Build hooks nodes.
11423

11424
    """
11425
    mn = self.cfg.GetMasterNode()
11426
    return ([mn], [mn])
11427

    
11428
  def Exec(self, feedback_fn):
11429
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11430

    
11431
    assert instances == [self.op.instance_name], "Instance not locked"
11432

    
11433
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11434
                     instances=instances, target_groups=list(self.target_uuids))
11435

    
11436
    ial.Run(self.op.iallocator)
11437

    
11438
    if not ial.success:
11439
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11440
                                 " instance '%s' using iallocator '%s': %s" %
11441
                                 (self.op.instance_name, self.op.iallocator,
11442
                                  ial.info),
11443
                                 errors.ECODE_NORES)
11444

    
11445
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11446

    
11447
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11448
                 " instance '%s'", len(jobs), self.op.instance_name)
11449

    
11450
    return ResultWithJobs(jobs)
11451

    
11452

    
11453
class LUBackupQuery(NoHooksLU):
11454
  """Query the exports list
11455

11456
  """
11457
  REQ_BGL = False
11458

    
11459
  def ExpandNames(self):
11460
    self.needed_locks = {}
11461
    self.share_locks[locking.LEVEL_NODE] = 1
11462
    if not self.op.nodes:
11463
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11464
    else:
11465
      self.needed_locks[locking.LEVEL_NODE] = \
11466
        _GetWantedNodes(self, self.op.nodes)
11467

    
11468
  def Exec(self, feedback_fn):
11469
    """Compute the list of all the exported system images.
11470

11471
    @rtype: dict
11472
    @return: a dictionary with the structure node->(export-list)
11473
        where export-list is a list of the instances exported on
11474
        that node.
11475

11476
    """
11477
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11478
    rpcresult = self.rpc.call_export_list(self.nodes)
11479
    result = {}
11480
    for node in rpcresult:
11481
      if rpcresult[node].fail_msg:
11482
        result[node] = False
11483
      else:
11484
        result[node] = rpcresult[node].payload
11485

    
11486
    return result
11487

    
11488

    
11489
class LUBackupPrepare(NoHooksLU):
11490
  """Prepares an instance for an export and returns useful information.
11491

11492
  """
11493
  REQ_BGL = False
11494

    
11495
  def ExpandNames(self):
11496
    self._ExpandAndLockInstance()
11497

    
11498
  def CheckPrereq(self):
11499
    """Check prerequisites.
11500

11501
    """
11502
    instance_name = self.op.instance_name
11503

    
11504
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11505
    assert self.instance is not None, \
11506
          "Cannot retrieve locked instance %s" % self.op.instance_name
11507
    _CheckNodeOnline(self, self.instance.primary_node)
11508

    
11509
    self._cds = _GetClusterDomainSecret()
11510

    
11511
  def Exec(self, feedback_fn):
11512
    """Prepares an instance for an export.
11513

11514
    """
11515
    instance = self.instance
11516

    
11517
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11518
      salt = utils.GenerateSecret(8)
11519

    
11520
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11521
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11522
                                              constants.RIE_CERT_VALIDITY)
11523
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11524

    
11525
      (name, cert_pem) = result.payload
11526

    
11527
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11528
                                             cert_pem)
11529

    
11530
      return {
11531
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11532
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11533
                          salt),
11534
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11535
        }
11536

    
11537
    return None
11538

    
11539

    
11540
class LUBackupExport(LogicalUnit):
11541
  """Export an instance to an image in the cluster.
11542

11543
  """
11544
  HPATH = "instance-export"
11545
  HTYPE = constants.HTYPE_INSTANCE
11546
  REQ_BGL = False
11547

    
11548
  def CheckArguments(self):
11549
    """Check the arguments.
11550

11551
    """
11552
    self.x509_key_name = self.op.x509_key_name
11553
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11554

    
11555
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11556
      if not self.x509_key_name:
11557
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11558
                                   errors.ECODE_INVAL)
11559

    
11560
      if not self.dest_x509_ca_pem:
11561
        raise errors.OpPrereqError("Missing destination X509 CA",
11562
                                   errors.ECODE_INVAL)
11563

    
11564
  def ExpandNames(self):
11565
    self._ExpandAndLockInstance()
11566

    
11567
    # Lock all nodes for local exports
11568
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11569
      # FIXME: lock only instance primary and destination node
11570
      #
11571
      # Sad but true, for now we have do lock all nodes, as we don't know where
11572
      # the previous export might be, and in this LU we search for it and
11573
      # remove it from its current node. In the future we could fix this by:
11574
      #  - making a tasklet to search (share-lock all), then create the
11575
      #    new one, then one to remove, after
11576
      #  - removing the removal operation altogether
11577
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11578

    
11579
  def DeclareLocks(self, level):
11580
    """Last minute lock declaration."""
11581
    # All nodes are locked anyway, so nothing to do here.
11582

    
11583
  def BuildHooksEnv(self):
11584
    """Build hooks env.
11585

11586
    This will run on the master, primary node and target node.
11587

11588
    """
11589
    env = {
11590
      "EXPORT_MODE": self.op.mode,
11591
      "EXPORT_NODE": self.op.target_node,
11592
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11593
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11594
      # TODO: Generic function for boolean env variables
11595
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11596
      }
11597

    
11598
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11599

    
11600
    return env
11601

    
11602
  def BuildHooksNodes(self):
11603
    """Build hooks nodes.
11604

11605
    """
11606
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11607

    
11608
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11609
      nl.append(self.op.target_node)
11610

    
11611
    return (nl, nl)
11612

    
11613
  def CheckPrereq(self):
11614
    """Check prerequisites.
11615

11616
    This checks that the instance and node names are valid.
11617

11618
    """
11619
    instance_name = self.op.instance_name
11620

    
11621
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11622
    assert self.instance is not None, \
11623
          "Cannot retrieve locked instance %s" % self.op.instance_name
11624
    _CheckNodeOnline(self, self.instance.primary_node)
11625

    
11626
    if (self.op.remove_instance and self.instance.admin_up and
11627
        not self.op.shutdown):
11628
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11629
                                 " down before")
11630

    
11631
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11632
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11633
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11634
      assert self.dst_node is not None
11635

    
11636
      _CheckNodeOnline(self, self.dst_node.name)
11637
      _CheckNodeNotDrained(self, self.dst_node.name)
11638

    
11639
      self._cds = None
11640
      self.dest_disk_info = None
11641
      self.dest_x509_ca = None
11642

    
11643
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11644
      self.dst_node = None
11645

    
11646
      if len(self.op.target_node) != len(self.instance.disks):
11647
        raise errors.OpPrereqError(("Received destination information for %s"
11648
                                    " disks, but instance %s has %s disks") %
11649
                                   (len(self.op.target_node), instance_name,
11650
                                    len(self.instance.disks)),
11651
                                   errors.ECODE_INVAL)
11652

    
11653
      cds = _GetClusterDomainSecret()
11654

    
11655
      # Check X509 key name
11656
      try:
11657
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11658
      except (TypeError, ValueError), err:
11659
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11660

    
11661
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11662
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11663
                                   errors.ECODE_INVAL)
11664

    
11665
      # Load and verify CA
11666
      try:
11667
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11668
      except OpenSSL.crypto.Error, err:
11669
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11670
                                   (err, ), errors.ECODE_INVAL)
11671

    
11672
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11673
      if errcode is not None:
11674
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11675
                                   (msg, ), errors.ECODE_INVAL)
11676

    
11677
      self.dest_x509_ca = cert
11678

    
11679
      # Verify target information
11680
      disk_info = []
11681
      for idx, disk_data in enumerate(self.op.target_node):
11682
        try:
11683
          (host, port, magic) = \
11684
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11685
        except errors.GenericError, err:
11686
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11687
                                     (idx, err), errors.ECODE_INVAL)
11688

    
11689
        disk_info.append((host, port, magic))
11690

    
11691
      assert len(disk_info) == len(self.op.target_node)
11692
      self.dest_disk_info = disk_info
11693

    
11694
    else:
11695
      raise errors.ProgrammerError("Unhandled export mode %r" %
11696
                                   self.op.mode)
11697

    
11698
    # instance disk type verification
11699
    # TODO: Implement export support for file-based disks
11700
    for disk in self.instance.disks:
11701
      if disk.dev_type == constants.LD_FILE:
11702
        raise errors.OpPrereqError("Export not supported for instances with"
11703
                                   " file-based disks", errors.ECODE_INVAL)
11704

    
11705
  def _CleanupExports(self, feedback_fn):
11706
    """Removes exports of current instance from all other nodes.
11707

11708
    If an instance in a cluster with nodes A..D was exported to node C, its
11709
    exports will be removed from the nodes A, B and D.
11710

11711
    """
11712
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11713

    
11714
    nodelist = self.cfg.GetNodeList()
11715
    nodelist.remove(self.dst_node.name)
11716

    
11717
    # on one-node clusters nodelist will be empty after the removal
11718
    # if we proceed the backup would be removed because OpBackupQuery
11719
    # substitutes an empty list with the full cluster node list.
11720
    iname = self.instance.name
11721
    if nodelist:
11722
      feedback_fn("Removing old exports for instance %s" % iname)
11723
      exportlist = self.rpc.call_export_list(nodelist)
11724
      for node in exportlist:
11725
        if exportlist[node].fail_msg:
11726
          continue
11727
        if iname in exportlist[node].payload:
11728
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11729
          if msg:
11730
            self.LogWarning("Could not remove older export for instance %s"
11731
                            " on node %s: %s", iname, node, msg)
11732

    
11733
  def Exec(self, feedback_fn):
11734
    """Export an instance to an image in the cluster.
11735

11736
    """
11737
    assert self.op.mode in constants.EXPORT_MODES
11738

    
11739
    instance = self.instance
11740
    src_node = instance.primary_node
11741

    
11742
    if self.op.shutdown:
11743
      # shutdown the instance, but not the disks
11744
      feedback_fn("Shutting down instance %s" % instance.name)
11745
      result = self.rpc.call_instance_shutdown(src_node, instance,
11746
                                               self.op.shutdown_timeout)
11747
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11748
      result.Raise("Could not shutdown instance %s on"
11749
                   " node %s" % (instance.name, src_node))
11750

    
11751
    # set the disks ID correctly since call_instance_start needs the
11752
    # correct drbd minor to create the symlinks
11753
    for disk in instance.disks:
11754
      self.cfg.SetDiskID(disk, src_node)
11755

    
11756
    activate_disks = (not instance.admin_up)
11757

    
11758
    if activate_disks:
11759
      # Activate the instance disks if we'exporting a stopped instance
11760
      feedback_fn("Activating disks for %s" % instance.name)
11761
      _StartInstanceDisks(self, instance, None)
11762

    
11763
    try:
11764
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11765
                                                     instance)
11766

    
11767
      helper.CreateSnapshots()
11768
      try:
11769
        if (self.op.shutdown and instance.admin_up and
11770
            not self.op.remove_instance):
11771
          assert not activate_disks
11772
          feedback_fn("Starting instance %s" % instance.name)
11773
          result = self.rpc.call_instance_start(src_node, instance,
11774
                                                None, None, False)
11775
          msg = result.fail_msg
11776
          if msg:
11777
            feedback_fn("Failed to start instance: %s" % msg)
11778
            _ShutdownInstanceDisks(self, instance)
11779
            raise errors.OpExecError("Could not start instance: %s" % msg)
11780

    
11781
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11782
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11783
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11784
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11785
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11786

    
11787
          (key_name, _, _) = self.x509_key_name
11788

    
11789
          dest_ca_pem = \
11790
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11791
                                            self.dest_x509_ca)
11792

    
11793
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11794
                                                     key_name, dest_ca_pem,
11795
                                                     timeouts)
11796
      finally:
11797
        helper.Cleanup()
11798

    
11799
      # Check for backwards compatibility
11800
      assert len(dresults) == len(instance.disks)
11801
      assert compat.all(isinstance(i, bool) for i in dresults), \
11802
             "Not all results are boolean: %r" % dresults
11803

    
11804
    finally:
11805
      if activate_disks:
11806
        feedback_fn("Deactivating disks for %s" % instance.name)
11807
        _ShutdownInstanceDisks(self, instance)
11808

    
11809
    if not (compat.all(dresults) and fin_resu):
11810
      failures = []
11811
      if not fin_resu:
11812
        failures.append("export finalization")
11813
      if not compat.all(dresults):
11814
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11815
                               if not dsk)
11816
        failures.append("disk export: disk(s) %s" % fdsk)
11817

    
11818
      raise errors.OpExecError("Export failed, errors in %s" %
11819
                               utils.CommaJoin(failures))
11820

    
11821
    # At this point, the export was successful, we can cleanup/finish
11822

    
11823
    # Remove instance if requested
11824
    if self.op.remove_instance:
11825
      feedback_fn("Removing instance %s" % instance.name)
11826
      _RemoveInstance(self, feedback_fn, instance,
11827
                      self.op.ignore_remove_failures)
11828

    
11829
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11830
      self._CleanupExports(feedback_fn)
11831

    
11832
    return fin_resu, dresults
11833

    
11834

    
11835
class LUBackupRemove(NoHooksLU):
11836
  """Remove exports related to the named instance.
11837

11838
  """
11839
  REQ_BGL = False
11840

    
11841
  def ExpandNames(self):
11842
    self.needed_locks = {}
11843
    # We need all nodes to be locked in order for RemoveExport to work, but we
11844
    # don't need to lock the instance itself, as nothing will happen to it (and
11845
    # we can remove exports also for a removed instance)
11846
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11847

    
11848
  def Exec(self, feedback_fn):
11849
    """Remove any export.
11850

11851
    """
11852
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11853
    # If the instance was not found we'll try with the name that was passed in.
11854
    # This will only work if it was an FQDN, though.
11855
    fqdn_warn = False
11856
    if not instance_name:
11857
      fqdn_warn = True
11858
      instance_name = self.op.instance_name
11859

    
11860
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11861
    exportlist = self.rpc.call_export_list(locked_nodes)
11862
    found = False
11863
    for node in exportlist:
11864
      msg = exportlist[node].fail_msg
11865
      if msg:
11866
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11867
        continue
11868
      if instance_name in exportlist[node].payload:
11869
        found = True
11870
        result = self.rpc.call_export_remove(node, instance_name)
11871
        msg = result.fail_msg
11872
        if msg:
11873
          logging.error("Could not remove export for instance %s"
11874
                        " on node %s: %s", instance_name, node, msg)
11875

    
11876
    if fqdn_warn and not found:
11877
      feedback_fn("Export not found. If trying to remove an export belonging"
11878
                  " to a deleted instance please use its Fully Qualified"
11879
                  " Domain Name.")
11880

    
11881

    
11882
class LUGroupAdd(LogicalUnit):
11883
  """Logical unit for creating node groups.
11884

11885
  """
11886
  HPATH = "group-add"
11887
  HTYPE = constants.HTYPE_GROUP
11888
  REQ_BGL = False
11889

    
11890
  def ExpandNames(self):
11891
    # We need the new group's UUID here so that we can create and acquire the
11892
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11893
    # that it should not check whether the UUID exists in the configuration.
11894
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11895
    self.needed_locks = {}
11896
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11897

    
11898
  def CheckPrereq(self):
11899
    """Check prerequisites.
11900

11901
    This checks that the given group name is not an existing node group
11902
    already.
11903

11904
    """
11905
    try:
11906
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11907
    except errors.OpPrereqError:
11908
      pass
11909
    else:
11910
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11911
                                 " node group (UUID: %s)" %
11912
                                 (self.op.group_name, existing_uuid),
11913
                                 errors.ECODE_EXISTS)
11914

    
11915
    if self.op.ndparams:
11916
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11917

    
11918
  def BuildHooksEnv(self):
11919
    """Build hooks env.
11920

11921
    """
11922
    return {
11923
      "GROUP_NAME": self.op.group_name,
11924
      }
11925

    
11926
  def BuildHooksNodes(self):
11927
    """Build hooks nodes.
11928

11929
    """
11930
    mn = self.cfg.GetMasterNode()
11931
    return ([mn], [mn])
11932

    
11933
  def Exec(self, feedback_fn):
11934
    """Add the node group to the cluster.
11935

11936
    """
11937
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11938
                                  uuid=self.group_uuid,
11939
                                  alloc_policy=self.op.alloc_policy,
11940
                                  ndparams=self.op.ndparams)
11941

    
11942
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11943
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11944

    
11945

    
11946
class LUGroupAssignNodes(NoHooksLU):
11947
  """Logical unit for assigning nodes to groups.
11948

11949
  """
11950
  REQ_BGL = False
11951

    
11952
  def ExpandNames(self):
11953
    # These raise errors.OpPrereqError on their own:
11954
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11955
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11956

    
11957
    # We want to lock all the affected nodes and groups. We have readily
11958
    # available the list of nodes, and the *destination* group. To gather the
11959
    # list of "source" groups, we need to fetch node information later on.
11960
    self.needed_locks = {
11961
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11962
      locking.LEVEL_NODE: self.op.nodes,
11963
      }
11964

    
11965
  def DeclareLocks(self, level):
11966
    if level == locking.LEVEL_NODEGROUP:
11967
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11968

    
11969
      # Try to get all affected nodes' groups without having the group or node
11970
      # lock yet. Needs verification later in the code flow.
11971
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11972

    
11973
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11974

    
11975
  def CheckPrereq(self):
11976
    """Check prerequisites.
11977

11978
    """
11979
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11980
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11981
            frozenset(self.op.nodes))
11982

    
11983
    expected_locks = (set([self.group_uuid]) |
11984
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11985
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11986
    if actual_locks != expected_locks:
11987
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11988
                               " current groups are '%s', used to be '%s'" %
11989
                               (utils.CommaJoin(expected_locks),
11990
                                utils.CommaJoin(actual_locks)))
11991

    
11992
    self.node_data = self.cfg.GetAllNodesInfo()
11993
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11994
    instance_data = self.cfg.GetAllInstancesInfo()
11995

    
11996
    if self.group is None:
11997
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11998
                               (self.op.group_name, self.group_uuid))
11999

    
12000
    (new_splits, previous_splits) = \
12001
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12002
                                             for node in self.op.nodes],
12003
                                            self.node_data, instance_data)
12004

    
12005
    if new_splits:
12006
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12007

    
12008
      if not self.op.force:
12009
        raise errors.OpExecError("The following instances get split by this"
12010
                                 " change and --force was not given: %s" %
12011
                                 fmt_new_splits)
12012
      else:
12013
        self.LogWarning("This operation will split the following instances: %s",
12014
                        fmt_new_splits)
12015

    
12016
        if previous_splits:
12017
          self.LogWarning("In addition, these already-split instances continue"
12018
                          " to be split across groups: %s",
12019
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12020

    
12021
  def Exec(self, feedback_fn):
12022
    """Assign nodes to a new group.
12023

12024
    """
12025
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12026

    
12027
    self.cfg.AssignGroupNodes(mods)
12028

    
12029
  @staticmethod
12030
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12031
    """Check for split instances after a node assignment.
12032

12033
    This method considers a series of node assignments as an atomic operation,
12034
    and returns information about split instances after applying the set of
12035
    changes.
12036

12037
    In particular, it returns information about newly split instances, and
12038
    instances that were already split, and remain so after the change.
12039

12040
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12041
    considered.
12042

12043
    @type changes: list of (node_name, new_group_uuid) pairs.
12044
    @param changes: list of node assignments to consider.
12045
    @param node_data: a dict with data for all nodes
12046
    @param instance_data: a dict with all instances to consider
12047
    @rtype: a two-tuple
12048
    @return: a list of instances that were previously okay and result split as a
12049
      consequence of this change, and a list of instances that were previously
12050
      split and this change does not fix.
12051

12052
    """
12053
    changed_nodes = dict((node, group) for node, group in changes
12054
                         if node_data[node].group != group)
12055

    
12056
    all_split_instances = set()
12057
    previously_split_instances = set()
12058

    
12059
    def InstanceNodes(instance):
12060
      return [instance.primary_node] + list(instance.secondary_nodes)
12061

    
12062
    for inst in instance_data.values():
12063
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12064
        continue
12065

    
12066
      instance_nodes = InstanceNodes(inst)
12067

    
12068
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12069
        previously_split_instances.add(inst.name)
12070

    
12071
      if len(set(changed_nodes.get(node, node_data[node].group)
12072
                 for node in instance_nodes)) > 1:
12073
        all_split_instances.add(inst.name)
12074

    
12075
    return (list(all_split_instances - previously_split_instances),
12076
            list(previously_split_instances & all_split_instances))
12077

    
12078

    
12079
class _GroupQuery(_QueryBase):
12080
  FIELDS = query.GROUP_FIELDS
12081

    
12082
  def ExpandNames(self, lu):
12083
    lu.needed_locks = {}
12084

    
12085
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12086
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12087

    
12088
    if not self.names:
12089
      self.wanted = [name_to_uuid[name]
12090
                     for name in utils.NiceSort(name_to_uuid.keys())]
12091
    else:
12092
      # Accept names to be either names or UUIDs.
12093
      missing = []
12094
      self.wanted = []
12095
      all_uuid = frozenset(self._all_groups.keys())
12096

    
12097
      for name in self.names:
12098
        if name in all_uuid:
12099
          self.wanted.append(name)
12100
        elif name in name_to_uuid:
12101
          self.wanted.append(name_to_uuid[name])
12102
        else:
12103
          missing.append(name)
12104

    
12105
      if missing:
12106
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12107
                                   utils.CommaJoin(missing),
12108
                                   errors.ECODE_NOENT)
12109

    
12110
  def DeclareLocks(self, lu, level):
12111
    pass
12112

    
12113
  def _GetQueryData(self, lu):
12114
    """Computes the list of node groups and their attributes.
12115

12116
    """
12117
    do_nodes = query.GQ_NODE in self.requested_data
12118
    do_instances = query.GQ_INST in self.requested_data
12119

    
12120
    group_to_nodes = None
12121
    group_to_instances = None
12122

    
12123
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12124
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12125
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12126
    # instance->node. Hence, we will need to process nodes even if we only need
12127
    # instance information.
12128
    if do_nodes or do_instances:
12129
      all_nodes = lu.cfg.GetAllNodesInfo()
12130
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12131
      node_to_group = {}
12132

    
12133
      for node in all_nodes.values():
12134
        if node.group in group_to_nodes:
12135
          group_to_nodes[node.group].append(node.name)
12136
          node_to_group[node.name] = node.group
12137

    
12138
      if do_instances:
12139
        all_instances = lu.cfg.GetAllInstancesInfo()
12140
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12141

    
12142
        for instance in all_instances.values():
12143
          node = instance.primary_node
12144
          if node in node_to_group:
12145
            group_to_instances[node_to_group[node]].append(instance.name)
12146

    
12147
        if not do_nodes:
12148
          # Do not pass on node information if it was not requested.
12149
          group_to_nodes = None
12150

    
12151
    return query.GroupQueryData([self._all_groups[uuid]
12152
                                 for uuid in self.wanted],
12153
                                group_to_nodes, group_to_instances)
12154

    
12155

    
12156
class LUGroupQuery(NoHooksLU):
12157
  """Logical unit for querying node groups.
12158

12159
  """
12160
  REQ_BGL = False
12161

    
12162
  def CheckArguments(self):
12163
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12164
                          self.op.output_fields, False)
12165

    
12166
  def ExpandNames(self):
12167
    self.gq.ExpandNames(self)
12168

    
12169
  def DeclareLocks(self, level):
12170
    self.gq.DeclareLocks(self, level)
12171

    
12172
  def Exec(self, feedback_fn):
12173
    return self.gq.OldStyleQuery(self)
12174

    
12175

    
12176
class LUGroupSetParams(LogicalUnit):
12177
  """Modifies the parameters of a node group.
12178

12179
  """
12180
  HPATH = "group-modify"
12181
  HTYPE = constants.HTYPE_GROUP
12182
  REQ_BGL = False
12183

    
12184
  def CheckArguments(self):
12185
    all_changes = [
12186
      self.op.ndparams,
12187
      self.op.alloc_policy,
12188
      ]
12189

    
12190
    if all_changes.count(None) == len(all_changes):
12191
      raise errors.OpPrereqError("Please pass at least one modification",
12192
                                 errors.ECODE_INVAL)
12193

    
12194
  def ExpandNames(self):
12195
    # This raises errors.OpPrereqError on its own:
12196
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12197

    
12198
    self.needed_locks = {
12199
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12200
      }
12201

    
12202
  def CheckPrereq(self):
12203
    """Check prerequisites.
12204

12205
    """
12206
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12207

    
12208
    if self.group is None:
12209
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12210
                               (self.op.group_name, self.group_uuid))
12211

    
12212
    if self.op.ndparams:
12213
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12214
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12215
      self.new_ndparams = new_ndparams
12216

    
12217
  def BuildHooksEnv(self):
12218
    """Build hooks env.
12219

12220
    """
12221
    return {
12222
      "GROUP_NAME": self.op.group_name,
12223
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12224
      }
12225

    
12226
  def BuildHooksNodes(self):
12227
    """Build hooks nodes.
12228

12229
    """
12230
    mn = self.cfg.GetMasterNode()
12231
    return ([mn], [mn])
12232

    
12233
  def Exec(self, feedback_fn):
12234
    """Modifies the node group.
12235

12236
    """
12237
    result = []
12238

    
12239
    if self.op.ndparams:
12240
      self.group.ndparams = self.new_ndparams
12241
      result.append(("ndparams", str(self.group.ndparams)))
12242

    
12243
    if self.op.alloc_policy:
12244
      self.group.alloc_policy = self.op.alloc_policy
12245

    
12246
    self.cfg.Update(self.group, feedback_fn)
12247
    return result
12248

    
12249

    
12250
class LUGroupRemove(LogicalUnit):
12251
  HPATH = "group-remove"
12252
  HTYPE = constants.HTYPE_GROUP
12253
  REQ_BGL = False
12254

    
12255
  def ExpandNames(self):
12256
    # This will raises errors.OpPrereqError on its own:
12257
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12258
    self.needed_locks = {
12259
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12260
      }
12261

    
12262
  def CheckPrereq(self):
12263
    """Check prerequisites.
12264

12265
    This checks that the given group name exists as a node group, that is
12266
    empty (i.e., contains no nodes), and that is not the last group of the
12267
    cluster.
12268

12269
    """
12270
    # Verify that the group is empty.
12271
    group_nodes = [node.name
12272
                   for node in self.cfg.GetAllNodesInfo().values()
12273
                   if node.group == self.group_uuid]
12274

    
12275
    if group_nodes:
12276
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12277
                                 " nodes: %s" %
12278
                                 (self.op.group_name,
12279
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12280
                                 errors.ECODE_STATE)
12281

    
12282
    # Verify the cluster would not be left group-less.
12283
    if len(self.cfg.GetNodeGroupList()) == 1:
12284
      raise errors.OpPrereqError("Group '%s' is the only group,"
12285
                                 " cannot be removed" %
12286
                                 self.op.group_name,
12287
                                 errors.ECODE_STATE)
12288

    
12289
  def BuildHooksEnv(self):
12290
    """Build hooks env.
12291

12292
    """
12293
    return {
12294
      "GROUP_NAME": self.op.group_name,
12295
      }
12296

    
12297
  def BuildHooksNodes(self):
12298
    """Build hooks nodes.
12299

12300
    """
12301
    mn = self.cfg.GetMasterNode()
12302
    return ([mn], [mn])
12303

    
12304
  def Exec(self, feedback_fn):
12305
    """Remove the node group.
12306

12307
    """
12308
    try:
12309
      self.cfg.RemoveNodeGroup(self.group_uuid)
12310
    except errors.ConfigurationError:
12311
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12312
                               (self.op.group_name, self.group_uuid))
12313

    
12314
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12315

    
12316

    
12317
class LUGroupRename(LogicalUnit):
12318
  HPATH = "group-rename"
12319
  HTYPE = constants.HTYPE_GROUP
12320
  REQ_BGL = False
12321

    
12322
  def ExpandNames(self):
12323
    # This raises errors.OpPrereqError on its own:
12324
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12325

    
12326
    self.needed_locks = {
12327
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12328
      }
12329

    
12330
  def CheckPrereq(self):
12331
    """Check prerequisites.
12332

12333
    Ensures requested new name is not yet used.
12334

12335
    """
12336
    try:
12337
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12338
    except errors.OpPrereqError:
12339
      pass
12340
    else:
12341
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12342
                                 " node group (UUID: %s)" %
12343
                                 (self.op.new_name, new_name_uuid),
12344
                                 errors.ECODE_EXISTS)
12345

    
12346
  def BuildHooksEnv(self):
12347
    """Build hooks env.
12348

12349
    """
12350
    return {
12351
      "OLD_NAME": self.op.group_name,
12352
      "NEW_NAME": self.op.new_name,
12353
      }
12354

    
12355
  def BuildHooksNodes(self):
12356
    """Build hooks nodes.
12357

12358
    """
12359
    mn = self.cfg.GetMasterNode()
12360

    
12361
    all_nodes = self.cfg.GetAllNodesInfo()
12362
    all_nodes.pop(mn, None)
12363

    
12364
    run_nodes = [mn]
12365
    run_nodes.extend(node.name for node in all_nodes.values()
12366
                     if node.group == self.group_uuid)
12367

    
12368
    return (run_nodes, run_nodes)
12369

    
12370
  def Exec(self, feedback_fn):
12371
    """Rename the node group.
12372

12373
    """
12374
    group = self.cfg.GetNodeGroup(self.group_uuid)
12375

    
12376
    if group is None:
12377
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12378
                               (self.op.group_name, self.group_uuid))
12379

    
12380
    group.name = self.op.new_name
12381
    self.cfg.Update(group, feedback_fn)
12382

    
12383
    return self.op.new_name
12384

    
12385

    
12386
class LUGroupEvacuate(LogicalUnit):
12387
  HPATH = "group-evacuate"
12388
  HTYPE = constants.HTYPE_GROUP
12389
  REQ_BGL = False
12390

    
12391
  def ExpandNames(self):
12392
    # This raises errors.OpPrereqError on its own:
12393
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12394

    
12395
    if self.op.target_groups:
12396
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12397
                                  self.op.target_groups)
12398
    else:
12399
      self.req_target_uuids = []
12400

    
12401
    if self.group_uuid in self.req_target_uuids:
12402
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12403
                                 " as a target group (targets are %s)" %
12404
                                 (self.group_uuid,
12405
                                  utils.CommaJoin(self.req_target_uuids)),
12406
                                 errors.ECODE_INVAL)
12407

    
12408
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12409

    
12410
    self.share_locks = _ShareAll()
12411
    self.needed_locks = {
12412
      locking.LEVEL_INSTANCE: [],
12413
      locking.LEVEL_NODEGROUP: [],
12414
      locking.LEVEL_NODE: [],
12415
      }
12416

    
12417
  def DeclareLocks(self, level):
12418
    if level == locking.LEVEL_INSTANCE:
12419
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12420

    
12421
      # Lock instances optimistically, needs verification once node and group
12422
      # locks have been acquired
12423
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12424
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12425

    
12426
    elif level == locking.LEVEL_NODEGROUP:
12427
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12428

    
12429
      if self.req_target_uuids:
12430
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12431

    
12432
        # Lock all groups used by instances optimistically; this requires going
12433
        # via the node before it's locked, requiring verification later on
12434
        lock_groups.update(group_uuid
12435
                           for instance_name in
12436
                             self.owned_locks(locking.LEVEL_INSTANCE)
12437
                           for group_uuid in
12438
                             self.cfg.GetInstanceNodeGroups(instance_name))
12439
      else:
12440
        # No target groups, need to lock all of them
12441
        lock_groups = locking.ALL_SET
12442

    
12443
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12444

    
12445
    elif level == locking.LEVEL_NODE:
12446
      # This will only lock the nodes in the group to be evacuated which
12447
      # contain actual instances
12448
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12449
      self._LockInstancesNodes()
12450

    
12451
      # Lock all nodes in group to be evacuated and target groups
12452
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12453
      assert self.group_uuid in owned_groups
12454
      member_nodes = [node_name
12455
                      for group in owned_groups
12456
                      for node_name in self.cfg.GetNodeGroup(group).members]
12457
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12458

    
12459
  def CheckPrereq(self):
12460
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12461
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12462
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12463

    
12464
    assert owned_groups.issuperset(self.req_target_uuids)
12465
    assert self.group_uuid in owned_groups
12466

    
12467
    # Check if locked instances are still correct
12468
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12469

    
12470
    # Get instance information
12471
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12472

    
12473
    # Check if node groups for locked instances are still correct
12474
    for instance_name in owned_instances:
12475
      inst = self.instances[instance_name]
12476
      assert owned_nodes.issuperset(inst.all_nodes), \
12477
        "Instance %s's nodes changed while we kept the lock" % instance_name
12478

    
12479
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12480
                                             owned_groups)
12481

    
12482
      assert self.group_uuid in inst_groups, \
12483
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12484

    
12485
    if self.req_target_uuids:
12486
      # User requested specific target groups
12487
      self.target_uuids = self.req_target_uuids
12488
    else:
12489
      # All groups except the one to be evacuated are potential targets
12490
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12491
                           if group_uuid != self.group_uuid]
12492

    
12493
      if not self.target_uuids:
12494
        raise errors.OpPrereqError("There are no possible target groups",
12495
                                   errors.ECODE_INVAL)
12496

    
12497
  def BuildHooksEnv(self):
12498
    """Build hooks env.
12499

12500
    """
12501
    return {
12502
      "GROUP_NAME": self.op.group_name,
12503
      "TARGET_GROUPS": " ".join(self.target_uuids),
12504
      }
12505

    
12506
  def BuildHooksNodes(self):
12507
    """Build hooks nodes.
12508

12509
    """
12510
    mn = self.cfg.GetMasterNode()
12511

    
12512
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12513

    
12514
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12515

    
12516
    return (run_nodes, run_nodes)
12517

    
12518
  def Exec(self, feedback_fn):
12519
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12520

    
12521
    assert self.group_uuid not in self.target_uuids
12522

    
12523
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12524
                     instances=instances, target_groups=self.target_uuids)
12525

    
12526
    ial.Run(self.op.iallocator)
12527

    
12528
    if not ial.success:
12529
      raise errors.OpPrereqError("Can't compute group evacuation using"
12530
                                 " iallocator '%s': %s" %
12531
                                 (self.op.iallocator, ial.info),
12532
                                 errors.ECODE_NORES)
12533

    
12534
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12535

    
12536
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12537
                 len(jobs), self.op.group_name)
12538

    
12539
    return ResultWithJobs(jobs)
12540

    
12541

    
12542
class TagsLU(NoHooksLU): # pylint: disable=W0223
12543
  """Generic tags LU.
12544

12545
  This is an abstract class which is the parent of all the other tags LUs.
12546

12547
  """
12548
  def ExpandNames(self):
12549
    self.group_uuid = None
12550
    self.needed_locks = {}
12551
    if self.op.kind == constants.TAG_NODE:
12552
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12553
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12554
    elif self.op.kind == constants.TAG_INSTANCE:
12555
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12556
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12557
    elif self.op.kind == constants.TAG_NODEGROUP:
12558
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12559

    
12560
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12561
    # not possible to acquire the BGL based on opcode parameters)
12562

    
12563
  def CheckPrereq(self):
12564
    """Check prerequisites.
12565

12566
    """
12567
    if self.op.kind == constants.TAG_CLUSTER:
12568
      self.target = self.cfg.GetClusterInfo()
12569
    elif self.op.kind == constants.TAG_NODE:
12570
      self.target = self.cfg.GetNodeInfo(self.op.name)
12571
    elif self.op.kind == constants.TAG_INSTANCE:
12572
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12573
    elif self.op.kind == constants.TAG_NODEGROUP:
12574
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12575
    else:
12576
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12577
                                 str(self.op.kind), errors.ECODE_INVAL)
12578

    
12579

    
12580
class LUTagsGet(TagsLU):
12581
  """Returns the tags of a given object.
12582

12583
  """
12584
  REQ_BGL = False
12585

    
12586
  def ExpandNames(self):
12587
    TagsLU.ExpandNames(self)
12588

    
12589
    # Share locks as this is only a read operation
12590
    self.share_locks = _ShareAll()
12591

    
12592
  def Exec(self, feedback_fn):
12593
    """Returns the tag list.
12594

12595
    """
12596
    return list(self.target.GetTags())
12597

    
12598

    
12599
class LUTagsSearch(NoHooksLU):
12600
  """Searches the tags for a given pattern.
12601

12602
  """
12603
  REQ_BGL = False
12604

    
12605
  def ExpandNames(self):
12606
    self.needed_locks = {}
12607

    
12608
  def CheckPrereq(self):
12609
    """Check prerequisites.
12610

12611
    This checks the pattern passed for validity by compiling it.
12612

12613
    """
12614
    try:
12615
      self.re = re.compile(self.op.pattern)
12616
    except re.error, err:
12617
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12618
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12619

    
12620
  def Exec(self, feedback_fn):
12621
    """Returns the tag list.
12622

12623
    """
12624
    cfg = self.cfg
12625
    tgts = [("/cluster", cfg.GetClusterInfo())]
12626
    ilist = cfg.GetAllInstancesInfo().values()
12627
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12628
    nlist = cfg.GetAllNodesInfo().values()
12629
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12630
    tgts.extend(("/nodegroup/%s" % n.name, n)
12631
                for n in cfg.GetAllNodeGroupsInfo().values())
12632
    results = []
12633
    for path, target in tgts:
12634
      for tag in target.GetTags():
12635
        if self.re.search(tag):
12636
          results.append((path, tag))
12637
    return results
12638

    
12639

    
12640
class LUTagsSet(TagsLU):
12641
  """Sets a tag on a given object.
12642

12643
  """
12644
  REQ_BGL = False
12645

    
12646
  def CheckPrereq(self):
12647
    """Check prerequisites.
12648

12649
    This checks the type and length of the tag name and value.
12650

12651
    """
12652
    TagsLU.CheckPrereq(self)
12653
    for tag in self.op.tags:
12654
      objects.TaggableObject.ValidateTag(tag)
12655

    
12656
  def Exec(self, feedback_fn):
12657
    """Sets the tag.
12658

12659
    """
12660
    try:
12661
      for tag in self.op.tags:
12662
        self.target.AddTag(tag)
12663
    except errors.TagError, err:
12664
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12665
    self.cfg.Update(self.target, feedback_fn)
12666

    
12667

    
12668
class LUTagsDel(TagsLU):
12669
  """Delete a list of tags from a given object.
12670

12671
  """
12672
  REQ_BGL = False
12673

    
12674
  def CheckPrereq(self):
12675
    """Check prerequisites.
12676

12677
    This checks that we have the given tag.
12678

12679
    """
12680
    TagsLU.CheckPrereq(self)
12681
    for tag in self.op.tags:
12682
      objects.TaggableObject.ValidateTag(tag)
12683
    del_tags = frozenset(self.op.tags)
12684
    cur_tags = self.target.GetTags()
12685

    
12686
    diff_tags = del_tags - cur_tags
12687
    if diff_tags:
12688
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12689
      raise errors.OpPrereqError("Tag(s) %s not found" %
12690
                                 (utils.CommaJoin(diff_names), ),
12691
                                 errors.ECODE_NOENT)
12692

    
12693
  def Exec(self, feedback_fn):
12694
    """Remove the tag from the object.
12695

12696
    """
12697
    for tag in self.op.tags:
12698
      self.target.RemoveTag(tag)
12699
    self.cfg.Update(self.target, feedback_fn)
12700

    
12701

    
12702
class LUTestDelay(NoHooksLU):
12703
  """Sleep for a specified amount of time.
12704

12705
  This LU sleeps on the master and/or nodes for a specified amount of
12706
  time.
12707

12708
  """
12709
  REQ_BGL = False
12710

    
12711
  def ExpandNames(self):
12712
    """Expand names and set required locks.
12713

12714
    This expands the node list, if any.
12715

12716
    """
12717
    self.needed_locks = {}
12718
    if self.op.on_nodes:
12719
      # _GetWantedNodes can be used here, but is not always appropriate to use
12720
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12721
      # more information.
12722
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12723
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12724

    
12725
  def _TestDelay(self):
12726
    """Do the actual sleep.
12727

12728
    """
12729
    if self.op.on_master:
12730
      if not utils.TestDelay(self.op.duration):
12731
        raise errors.OpExecError("Error during master delay test")
12732
    if self.op.on_nodes:
12733
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12734
      for node, node_result in result.items():
12735
        node_result.Raise("Failure during rpc call to node %s" % node)
12736

    
12737
  def Exec(self, feedback_fn):
12738
    """Execute the test delay opcode, with the wanted repetitions.
12739

12740
    """
12741
    if self.op.repeat == 0:
12742
      self._TestDelay()
12743
    else:
12744
      top_value = self.op.repeat - 1
12745
      for i in range(self.op.repeat):
12746
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12747
        self._TestDelay()
12748

    
12749

    
12750
class LUTestJqueue(NoHooksLU):
12751
  """Utility LU to test some aspects of the job queue.
12752

12753
  """
12754
  REQ_BGL = False
12755

    
12756
  # Must be lower than default timeout for WaitForJobChange to see whether it
12757
  # notices changed jobs
12758
  _CLIENT_CONNECT_TIMEOUT = 20.0
12759
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12760

    
12761
  @classmethod
12762
  def _NotifyUsingSocket(cls, cb, errcls):
12763
    """Opens a Unix socket and waits for another program to connect.
12764

12765
    @type cb: callable
12766
    @param cb: Callback to send socket name to client
12767
    @type errcls: class
12768
    @param errcls: Exception class to use for errors
12769

12770
    """
12771
    # Using a temporary directory as there's no easy way to create temporary
12772
    # sockets without writing a custom loop around tempfile.mktemp and
12773
    # socket.bind
12774
    tmpdir = tempfile.mkdtemp()
12775
    try:
12776
      tmpsock = utils.PathJoin(tmpdir, "sock")
12777

    
12778
      logging.debug("Creating temporary socket at %s", tmpsock)
12779
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12780
      try:
12781
        sock.bind(tmpsock)
12782
        sock.listen(1)
12783

    
12784
        # Send details to client
12785
        cb(tmpsock)
12786

    
12787
        # Wait for client to connect before continuing
12788
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12789
        try:
12790
          (conn, _) = sock.accept()
12791
        except socket.error, err:
12792
          raise errcls("Client didn't connect in time (%s)" % err)
12793
      finally:
12794
        sock.close()
12795
    finally:
12796
      # Remove as soon as client is connected
12797
      shutil.rmtree(tmpdir)
12798

    
12799
    # Wait for client to close
12800
    try:
12801
      try:
12802
        # pylint: disable=E1101
12803
        # Instance of '_socketobject' has no ... member
12804
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12805
        conn.recv(1)
12806
      except socket.error, err:
12807
        raise errcls("Client failed to confirm notification (%s)" % err)
12808
    finally:
12809
      conn.close()
12810

    
12811
  def _SendNotification(self, test, arg, sockname):
12812
    """Sends a notification to the client.
12813

12814
    @type test: string
12815
    @param test: Test name
12816
    @param arg: Test argument (depends on test)
12817
    @type sockname: string
12818
    @param sockname: Socket path
12819

12820
    """
12821
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12822

    
12823
  def _Notify(self, prereq, test, arg):
12824
    """Notifies the client of a test.
12825

12826
    @type prereq: bool
12827
    @param prereq: Whether this is a prereq-phase test
12828
    @type test: string
12829
    @param test: Test name
12830
    @param arg: Test argument (depends on test)
12831

12832
    """
12833
    if prereq:
12834
      errcls = errors.OpPrereqError
12835
    else:
12836
      errcls = errors.OpExecError
12837

    
12838
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12839
                                                  test, arg),
12840
                                   errcls)
12841

    
12842
  def CheckArguments(self):
12843
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12844
    self.expandnames_calls = 0
12845

    
12846
  def ExpandNames(self):
12847
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12848
    if checkargs_calls < 1:
12849
      raise errors.ProgrammerError("CheckArguments was not called")
12850

    
12851
    self.expandnames_calls += 1
12852

    
12853
    if self.op.notify_waitlock:
12854
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12855

    
12856
    self.LogInfo("Expanding names")
12857

    
12858
    # Get lock on master node (just to get a lock, not for a particular reason)
12859
    self.needed_locks = {
12860
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12861
      }
12862

    
12863
  def Exec(self, feedback_fn):
12864
    if self.expandnames_calls < 1:
12865
      raise errors.ProgrammerError("ExpandNames was not called")
12866

    
12867
    if self.op.notify_exec:
12868
      self._Notify(False, constants.JQT_EXEC, None)
12869

    
12870
    self.LogInfo("Executing")
12871

    
12872
    if self.op.log_messages:
12873
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12874
      for idx, msg in enumerate(self.op.log_messages):
12875
        self.LogInfo("Sending log message %s", idx + 1)
12876
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12877
        # Report how many test messages have been sent
12878
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12879

    
12880
    if self.op.fail:
12881
      raise errors.OpExecError("Opcode failure was requested")
12882

    
12883
    return True
12884

    
12885

    
12886
class IAllocator(object):
12887
  """IAllocator framework.
12888

12889
  An IAllocator instance has three sets of attributes:
12890
    - cfg that is needed to query the cluster
12891
    - input data (all members of the _KEYS class attribute are required)
12892
    - four buffer attributes (in|out_data|text), that represent the
12893
      input (to the external script) in text and data structure format,
12894
      and the output from it, again in two formats
12895
    - the result variables from the script (success, info, nodes) for
12896
      easy usage
12897

12898
  """
12899
  # pylint: disable=R0902
12900
  # lots of instance attributes
12901

    
12902
  def __init__(self, cfg, rpc, mode, **kwargs):
12903
    self.cfg = cfg
12904
    self.rpc = rpc
12905
    # init buffer variables
12906
    self.in_text = self.out_text = self.in_data = self.out_data = None
12907
    # init all input fields so that pylint is happy
12908
    self.mode = mode
12909
    self.memory = self.disks = self.disk_template = None
12910
    self.os = self.tags = self.nics = self.vcpus = None
12911
    self.hypervisor = None
12912
    self.relocate_from = None
12913
    self.name = None
12914
    self.instances = None
12915
    self.evac_mode = None
12916
    self.target_groups = []
12917
    # computed fields
12918
    self.required_nodes = None
12919
    # init result fields
12920
    self.success = self.info = self.result = None
12921

    
12922
    try:
12923
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12924
    except KeyError:
12925
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12926
                                   " IAllocator" % self.mode)
12927

    
12928
    keyset = [n for (n, _) in keydata]
12929

    
12930
    for key in kwargs:
12931
      if key not in keyset:
12932
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12933
                                     " IAllocator" % key)
12934
      setattr(self, key, kwargs[key])
12935

    
12936
    for key in keyset:
12937
      if key not in kwargs:
12938
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12939
                                     " IAllocator" % key)
12940
    self._BuildInputData(compat.partial(fn, self), keydata)
12941

    
12942
  def _ComputeClusterData(self):
12943
    """Compute the generic allocator input data.
12944

12945
    This is the data that is independent of the actual operation.
12946

12947
    """
12948
    cfg = self.cfg
12949
    cluster_info = cfg.GetClusterInfo()
12950
    # cluster data
12951
    data = {
12952
      "version": constants.IALLOCATOR_VERSION,
12953
      "cluster_name": cfg.GetClusterName(),
12954
      "cluster_tags": list(cluster_info.GetTags()),
12955
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12956
      # we don't have job IDs
12957
      }
12958
    ninfo = cfg.GetAllNodesInfo()
12959
    iinfo = cfg.GetAllInstancesInfo().values()
12960
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12961

    
12962
    # node data
12963
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12964

    
12965
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12966
      hypervisor_name = self.hypervisor
12967
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12968
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12969
    else:
12970
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12971

    
12972
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12973
                                        hypervisor_name)
12974
    node_iinfo = \
12975
      self.rpc.call_all_instances_info(node_list,
12976
                                       cluster_info.enabled_hypervisors)
12977

    
12978
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12979

    
12980
    config_ndata = self._ComputeBasicNodeData(ninfo)
12981
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12982
                                                 i_list, config_ndata)
12983
    assert len(data["nodes"]) == len(ninfo), \
12984
        "Incomplete node data computed"
12985

    
12986
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12987

    
12988
    self.in_data = data
12989

    
12990
  @staticmethod
12991
  def _ComputeNodeGroupData(cfg):
12992
    """Compute node groups data.
12993

12994
    """
12995
    ng = dict((guuid, {
12996
      "name": gdata.name,
12997
      "alloc_policy": gdata.alloc_policy,
12998
      })
12999
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13000

    
13001
    return ng
13002

    
13003
  @staticmethod
13004
  def _ComputeBasicNodeData(node_cfg):
13005
    """Compute global node data.
13006

13007
    @rtype: dict
13008
    @returns: a dict of name: (node dict, node config)
13009

13010
    """
13011
    # fill in static (config-based) values
13012
    node_results = dict((ninfo.name, {
13013
      "tags": list(ninfo.GetTags()),
13014
      "primary_ip": ninfo.primary_ip,
13015
      "secondary_ip": ninfo.secondary_ip,
13016
      "offline": ninfo.offline,
13017
      "drained": ninfo.drained,
13018
      "master_candidate": ninfo.master_candidate,
13019
      "group": ninfo.group,
13020
      "master_capable": ninfo.master_capable,
13021
      "vm_capable": ninfo.vm_capable,
13022
      })
13023
      for ninfo in node_cfg.values())
13024

    
13025
    return node_results
13026

    
13027
  @staticmethod
13028
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13029
                              node_results):
13030
    """Compute global node data.
13031

13032
    @param node_results: the basic node structures as filled from the config
13033

13034
    """
13035
    # make a copy of the current dict
13036
    node_results = dict(node_results)
13037
    for nname, nresult in node_data.items():
13038
      assert nname in node_results, "Missing basic data for node %s" % nname
13039
      ninfo = node_cfg[nname]
13040

    
13041
      if not (ninfo.offline or ninfo.drained):
13042
        nresult.Raise("Can't get data for node %s" % nname)
13043
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13044
                                nname)
13045
        remote_info = nresult.payload
13046

    
13047
        for attr in ["memory_total", "memory_free", "memory_dom0",
13048
                     "vg_size", "vg_free", "cpu_total"]:
13049
          if attr not in remote_info:
13050
            raise errors.OpExecError("Node '%s' didn't return attribute"
13051
                                     " '%s'" % (nname, attr))
13052
          if not isinstance(remote_info[attr], int):
13053
            raise errors.OpExecError("Node '%s' returned invalid value"
13054
                                     " for '%s': %s" %
13055
                                     (nname, attr, remote_info[attr]))
13056
        # compute memory used by primary instances
13057
        i_p_mem = i_p_up_mem = 0
13058
        for iinfo, beinfo in i_list:
13059
          if iinfo.primary_node == nname:
13060
            i_p_mem += beinfo[constants.BE_MEMORY]
13061
            if iinfo.name not in node_iinfo[nname].payload:
13062
              i_used_mem = 0
13063
            else:
13064
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13065
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13066
            remote_info["memory_free"] -= max(0, i_mem_diff)
13067

    
13068
            if iinfo.admin_up:
13069
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13070

    
13071
        # compute memory used by instances
13072
        pnr_dyn = {
13073
          "total_memory": remote_info["memory_total"],
13074
          "reserved_memory": remote_info["memory_dom0"],
13075
          "free_memory": remote_info["memory_free"],
13076
          "total_disk": remote_info["vg_size"],
13077
          "free_disk": remote_info["vg_free"],
13078
          "total_cpus": remote_info["cpu_total"],
13079
          "i_pri_memory": i_p_mem,
13080
          "i_pri_up_memory": i_p_up_mem,
13081
          }
13082
        pnr_dyn.update(node_results[nname])
13083
        node_results[nname] = pnr_dyn
13084

    
13085
    return node_results
13086

    
13087
  @staticmethod
13088
  def _ComputeInstanceData(cluster_info, i_list):
13089
    """Compute global instance data.
13090

13091
    """
13092
    instance_data = {}
13093
    for iinfo, beinfo in i_list:
13094
      nic_data = []
13095
      for nic in iinfo.nics:
13096
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13097
        nic_dict = {
13098
          "mac": nic.mac,
13099
          "ip": nic.ip,
13100
          "mode": filled_params[constants.NIC_MODE],
13101
          "link": filled_params[constants.NIC_LINK],
13102
          }
13103
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13104
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13105
        nic_data.append(nic_dict)
13106
      pir = {
13107
        "tags": list(iinfo.GetTags()),
13108
        "admin_up": iinfo.admin_up,
13109
        "vcpus": beinfo[constants.BE_VCPUS],
13110
        "memory": beinfo[constants.BE_MEMORY],
13111
        "os": iinfo.os,
13112
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13113
        "nics": nic_data,
13114
        "disks": [{constants.IDISK_SIZE: dsk.size,
13115
                   constants.IDISK_MODE: dsk.mode}
13116
                  for dsk in iinfo.disks],
13117
        "disk_template": iinfo.disk_template,
13118
        "hypervisor": iinfo.hypervisor,
13119
        }
13120
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13121
                                                 pir["disks"])
13122
      instance_data[iinfo.name] = pir
13123

    
13124
    return instance_data
13125

    
13126
  def _AddNewInstance(self):
13127
    """Add new instance data to allocator structure.
13128

13129
    This in combination with _AllocatorGetClusterData will create the
13130
    correct structure needed as input for the allocator.
13131

13132
    The checks for the completeness of the opcode must have already been
13133
    done.
13134

13135
    """
13136
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13137

    
13138
    if self.disk_template in constants.DTS_INT_MIRROR:
13139
      self.required_nodes = 2
13140
    else:
13141
      self.required_nodes = 1
13142

    
13143
    request = {
13144
      "name": self.name,
13145
      "disk_template": self.disk_template,
13146
      "tags": self.tags,
13147
      "os": self.os,
13148
      "vcpus": self.vcpus,
13149
      "memory": self.memory,
13150
      "disks": self.disks,
13151
      "disk_space_total": disk_space,
13152
      "nics": self.nics,
13153
      "required_nodes": self.required_nodes,
13154
      "hypervisor": self.hypervisor,
13155
      }
13156

    
13157
    return request
13158

    
13159
  def _AddRelocateInstance(self):
13160
    """Add relocate instance data to allocator structure.
13161

13162
    This in combination with _IAllocatorGetClusterData will create the
13163
    correct structure needed as input for the allocator.
13164

13165
    The checks for the completeness of the opcode must have already been
13166
    done.
13167

13168
    """
13169
    instance = self.cfg.GetInstanceInfo(self.name)
13170
    if instance is None:
13171
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13172
                                   " IAllocator" % self.name)
13173

    
13174
    if instance.disk_template not in constants.DTS_MIRRORED:
13175
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13176
                                 errors.ECODE_INVAL)
13177

    
13178
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13179
        len(instance.secondary_nodes) != 1:
13180
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13181
                                 errors.ECODE_STATE)
13182

    
13183
    self.required_nodes = 1
13184
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13185
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13186

    
13187
    request = {
13188
      "name": self.name,
13189
      "disk_space_total": disk_space,
13190
      "required_nodes": self.required_nodes,
13191
      "relocate_from": self.relocate_from,
13192
      }
13193
    return request
13194

    
13195
  def _AddNodeEvacuate(self):
13196
    """Get data for node-evacuate requests.
13197

13198
    """
13199
    return {
13200
      "instances": self.instances,
13201
      "evac_mode": self.evac_mode,
13202
      }
13203

    
13204
  def _AddChangeGroup(self):
13205
    """Get data for node-evacuate requests.
13206

13207
    """
13208
    return {
13209
      "instances": self.instances,
13210
      "target_groups": self.target_groups,
13211
      }
13212

    
13213
  def _BuildInputData(self, fn, keydata):
13214
    """Build input data structures.
13215

13216
    """
13217
    self._ComputeClusterData()
13218

    
13219
    request = fn()
13220
    request["type"] = self.mode
13221
    for keyname, keytype in keydata:
13222
      if keyname not in request:
13223
        raise errors.ProgrammerError("Request parameter %s is missing" %
13224
                                     keyname)
13225
      val = request[keyname]
13226
      if not keytype(val):
13227
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13228
                                     " validation, value %s, expected"
13229
                                     " type %s" % (keyname, val, keytype))
13230
    self.in_data["request"] = request
13231

    
13232
    self.in_text = serializer.Dump(self.in_data)
13233

    
13234
  _STRING_LIST = ht.TListOf(ht.TString)
13235
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13236
     # pylint: disable=E1101
13237
     # Class '...' has no 'OP_ID' member
13238
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13239
                          opcodes.OpInstanceMigrate.OP_ID,
13240
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13241
     })))
13242

    
13243
  _NEVAC_MOVED = \
13244
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13245
                       ht.TItems([ht.TNonEmptyString,
13246
                                  ht.TNonEmptyString,
13247
                                  ht.TListOf(ht.TNonEmptyString),
13248
                                 ])))
13249
  _NEVAC_FAILED = \
13250
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13251
                       ht.TItems([ht.TNonEmptyString,
13252
                                  ht.TMaybeString,
13253
                                 ])))
13254
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13255
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13256

    
13257
  _MODE_DATA = {
13258
    constants.IALLOCATOR_MODE_ALLOC:
13259
      (_AddNewInstance,
13260
       [
13261
        ("name", ht.TString),
13262
        ("memory", ht.TInt),
13263
        ("disks", ht.TListOf(ht.TDict)),
13264
        ("disk_template", ht.TString),
13265
        ("os", ht.TString),
13266
        ("tags", _STRING_LIST),
13267
        ("nics", ht.TListOf(ht.TDict)),
13268
        ("vcpus", ht.TInt),
13269
        ("hypervisor", ht.TString),
13270
        ], ht.TList),
13271
    constants.IALLOCATOR_MODE_RELOC:
13272
      (_AddRelocateInstance,
13273
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13274
       ht.TList),
13275
     constants.IALLOCATOR_MODE_NODE_EVAC:
13276
      (_AddNodeEvacuate, [
13277
        ("instances", _STRING_LIST),
13278
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13279
        ], _NEVAC_RESULT),
13280
     constants.IALLOCATOR_MODE_CHG_GROUP:
13281
      (_AddChangeGroup, [
13282
        ("instances", _STRING_LIST),
13283
        ("target_groups", _STRING_LIST),
13284
        ], _NEVAC_RESULT),
13285
    }
13286

    
13287
  def Run(self, name, validate=True, call_fn=None):
13288
    """Run an instance allocator and return the results.
13289

13290
    """
13291
    if call_fn is None:
13292
      call_fn = self.rpc.call_iallocator_runner
13293

    
13294
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13295
    result.Raise("Failure while running the iallocator script")
13296

    
13297
    self.out_text = result.payload
13298
    if validate:
13299
      self._ValidateResult()
13300

    
13301
  def _ValidateResult(self):
13302
    """Process the allocator results.
13303

13304
    This will process and if successful save the result in
13305
    self.out_data and the other parameters.
13306

13307
    """
13308
    try:
13309
      rdict = serializer.Load(self.out_text)
13310
    except Exception, err:
13311
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13312

    
13313
    if not isinstance(rdict, dict):
13314
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13315

    
13316
    # TODO: remove backwards compatiblity in later versions
13317
    if "nodes" in rdict and "result" not in rdict:
13318
      rdict["result"] = rdict["nodes"]
13319
      del rdict["nodes"]
13320

    
13321
    for key in "success", "info", "result":
13322
      if key not in rdict:
13323
        raise errors.OpExecError("Can't parse iallocator results:"
13324
                                 " missing key '%s'" % key)
13325
      setattr(self, key, rdict[key])
13326

    
13327
    if not self._result_check(self.result):
13328
      raise errors.OpExecError("Iallocator returned invalid result,"
13329
                               " expected %s, got %s" %
13330
                               (self._result_check, self.result),
13331
                               errors.ECODE_INVAL)
13332

    
13333
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13334
      assert self.relocate_from is not None
13335
      assert self.required_nodes == 1
13336

    
13337
      node2group = dict((name, ndata["group"])
13338
                        for (name, ndata) in self.in_data["nodes"].items())
13339

    
13340
      fn = compat.partial(self._NodesToGroups, node2group,
13341
                          self.in_data["nodegroups"])
13342

    
13343
      instance = self.cfg.GetInstanceInfo(self.name)
13344
      request_groups = fn(self.relocate_from + [instance.primary_node])
13345
      result_groups = fn(rdict["result"] + [instance.primary_node])
13346

    
13347
      if self.success and not set(result_groups).issubset(request_groups):
13348
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13349
                                 " differ from original groups (%s)" %
13350
                                 (utils.CommaJoin(result_groups),
13351
                                  utils.CommaJoin(request_groups)))
13352

    
13353
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13354
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13355

    
13356
    self.out_data = rdict
13357

    
13358
  @staticmethod
13359
  def _NodesToGroups(node2group, groups, nodes):
13360
    """Returns a list of unique group names for a list of nodes.
13361

13362
    @type node2group: dict
13363
    @param node2group: Map from node name to group UUID
13364
    @type groups: dict
13365
    @param groups: Group information
13366
    @type nodes: list
13367
    @param nodes: Node names
13368

13369
    """
13370
    result = set()
13371

    
13372
    for node in nodes:
13373
      try:
13374
        group_uuid = node2group[node]
13375
      except KeyError:
13376
        # Ignore unknown node
13377
        pass
13378
      else:
13379
        try:
13380
          group = groups[group_uuid]
13381
        except KeyError:
13382
          # Can't find group, let's use UUID
13383
          group_name = group_uuid
13384
        else:
13385
          group_name = group["name"]
13386

    
13387
        result.add(group_name)
13388

    
13389
    return sorted(result)
13390

    
13391

    
13392
class LUTestAllocator(NoHooksLU):
13393
  """Run allocator tests.
13394

13395
  This LU runs the allocator tests
13396

13397
  """
13398
  def CheckPrereq(self):
13399
    """Check prerequisites.
13400

13401
    This checks the opcode parameters depending on the director and mode test.
13402

13403
    """
13404
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13405
      for attr in ["memory", "disks", "disk_template",
13406
                   "os", "tags", "nics", "vcpus"]:
13407
        if not hasattr(self.op, attr):
13408
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13409
                                     attr, errors.ECODE_INVAL)
13410
      iname = self.cfg.ExpandInstanceName(self.op.name)
13411
      if iname is not None:
13412
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13413
                                   iname, errors.ECODE_EXISTS)
13414
      if not isinstance(self.op.nics, list):
13415
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13416
                                   errors.ECODE_INVAL)
13417
      if not isinstance(self.op.disks, list):
13418
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13419
                                   errors.ECODE_INVAL)
13420
      for row in self.op.disks:
13421
        if (not isinstance(row, dict) or
13422
            constants.IDISK_SIZE not in row or
13423
            not isinstance(row[constants.IDISK_SIZE], int) or
13424
            constants.IDISK_MODE not in row or
13425
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13426
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13427
                                     " parameter", errors.ECODE_INVAL)
13428
      if self.op.hypervisor is None:
13429
        self.op.hypervisor = self.cfg.GetHypervisorType()
13430
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13431
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13432
      self.op.name = fname
13433
      self.relocate_from = \
13434
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13435
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13436
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13437
      if not self.op.instances:
13438
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13439
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13440
    else:
13441
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13442
                                 self.op.mode, errors.ECODE_INVAL)
13443

    
13444
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13445
      if self.op.allocator is None:
13446
        raise errors.OpPrereqError("Missing allocator name",
13447
                                   errors.ECODE_INVAL)
13448
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13449
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13450
                                 self.op.direction, errors.ECODE_INVAL)
13451

    
13452
  def Exec(self, feedback_fn):
13453
    """Run the allocator test.
13454

13455
    """
13456
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13457
      ial = IAllocator(self.cfg, self.rpc,
13458
                       mode=self.op.mode,
13459
                       name=self.op.name,
13460
                       memory=self.op.memory,
13461
                       disks=self.op.disks,
13462
                       disk_template=self.op.disk_template,
13463
                       os=self.op.os,
13464
                       tags=self.op.tags,
13465
                       nics=self.op.nics,
13466
                       vcpus=self.op.vcpus,
13467
                       hypervisor=self.op.hypervisor,
13468
                       )
13469
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13470
      ial = IAllocator(self.cfg, self.rpc,
13471
                       mode=self.op.mode,
13472
                       name=self.op.name,
13473
                       relocate_from=list(self.relocate_from),
13474
                       )
13475
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13476
      ial = IAllocator(self.cfg, self.rpc,
13477
                       mode=self.op.mode,
13478
                       instances=self.op.instances,
13479
                       target_groups=self.op.target_groups)
13480
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13481
      ial = IAllocator(self.cfg, self.rpc,
13482
                       mode=self.op.mode,
13483
                       instances=self.op.instances,
13484
                       evac_mode=self.op.evac_mode)
13485
    else:
13486
      raise errors.ProgrammerError("Uncatched mode %s in"
13487
                                   " LUTestAllocator.Exec", self.op.mode)
13488

    
13489
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13490
      result = ial.in_text
13491
    else:
13492
      ial.Run(self.op.allocator, validate=False)
13493
      result = ial.out_text
13494
    return result
13495

    
13496

    
13497
#: Query type implementations
13498
_QUERY_IMPL = {
13499
  constants.QR_INSTANCE: _InstanceQuery,
13500
  constants.QR_NODE: _NodeQuery,
13501
  constants.QR_GROUP: _GroupQuery,
13502
  constants.QR_OS: _OsQuery,
13503
  }
13504

    
13505
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13506

    
13507

    
13508
def _GetQueryImplementation(name):
13509
  """Returns the implemtnation for a query type.
13510

13511
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13512

13513
  """
13514
  try:
13515
    return _QUERY_IMPL[name]
13516
  except KeyError:
13517
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13518
                               errors.ECODE_INVAL)