Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 69ab2e12

History | View | Annotate | Download (474.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_all_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113
           "Found file listed in more than one file list"
2114

    
2115
    # Define functions determining which nodes to consider for a file
2116
    files2nodefn = [
2117
      (files_all, None),
2118
      (files_all_opt, None),
2119
      (files_mc, lambda node: (node.master_candidate or
2120
                               node.name == master_node)),
2121
      (files_vm, lambda node: node.vm_capable),
2122
      ]
2123

    
2124
    # Build mapping from filename to list of nodes which should have the file
2125
    nodefiles = {}
2126
    for (files, fn) in files2nodefn:
2127
      if fn is None:
2128
        filenodes = nodeinfo
2129
      else:
2130
        filenodes = filter(fn, nodeinfo)
2131
      nodefiles.update((filename,
2132
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2133
                       for filename in files)
2134

    
2135
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136

    
2137
    fileinfo = dict((filename, {}) for filename in nodefiles)
2138
    ignore_nodes = set()
2139

    
2140
    for node in nodeinfo:
2141
      if node.offline:
2142
        ignore_nodes.add(node.name)
2143
        continue
2144

    
2145
      nresult = all_nvinfo[node.name]
2146

    
2147
      if nresult.fail_msg or not nresult.payload:
2148
        node_files = None
2149
      else:
2150
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151

    
2152
      test = not (node_files and isinstance(node_files, dict))
2153
      errorif(test, cls.ENODEFILECHECK, node.name,
2154
              "Node did not return file checksum data")
2155
      if test:
2156
        ignore_nodes.add(node.name)
2157
        continue
2158

    
2159
      # Build per-checksum mapping from filename to nodes having it
2160
      for (filename, checksum) in node_files.items():
2161
        assert filename in nodefiles
2162
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163

    
2164
    for (filename, checksums) in fileinfo.items():
2165
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166

    
2167
      # Nodes having the file
2168
      with_file = frozenset(node_name
2169
                            for nodes in fileinfo[filename].values()
2170
                            for node_name in nodes) - ignore_nodes
2171

    
2172
      expected_nodes = nodefiles[filename] - ignore_nodes
2173

    
2174
      # Nodes missing file
2175
      missing_file = expected_nodes - with_file
2176

    
2177
      if filename in files_all_opt:
2178
        # All or no nodes
2179
        errorif(missing_file and missing_file != expected_nodes,
2180
                cls.ECLUSTERFILECHECK, None,
2181
                "File %s is optional, but it must exist on all or no"
2182
                " nodes (not found on %s)",
2183
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184
      else:
2185
        # Non-optional files
2186
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187
                "File %s is missing from node(s) %s", filename,
2188
                utils.CommaJoin(utils.NiceSort(missing_file)))
2189

    
2190
        # Warn if a node has a file it shouldn't
2191
        unexpected = with_file - expected_nodes
2192
        errorif(unexpected,
2193
                cls.ECLUSTERFILECHECK, None,
2194
                "File %s should not exist on node(s) %s",
2195
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196

    
2197
      # See if there are multiple versions of the file
2198
      test = len(checksums) > 1
2199
      if test:
2200
        variants = ["variant %s on %s" %
2201
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202
                    for (idx, (checksum, nodes)) in
2203
                      enumerate(sorted(checksums.items()))]
2204
      else:
2205
        variants = []
2206

    
2207
      errorif(test, cls.ECLUSTERFILECHECK, None,
2208
              "File %s found with %s different checksums (%s)",
2209
              filename, len(checksums), "; ".join(variants))
2210

    
2211
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212
                      drbd_map):
2213
    """Verifies and the node DRBD status.
2214

2215
    @type ninfo: L{objects.Node}
2216
    @param ninfo: the node to check
2217
    @param nresult: the remote results for the node
2218
    @param instanceinfo: the dict of instances
2219
    @param drbd_helper: the configured DRBD usermode helper
2220
    @param drbd_map: the DRBD map as returned by
2221
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222

2223
    """
2224
    node = ninfo.name
2225
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226

    
2227
    if drbd_helper:
2228
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229
      test = (helper_result == None)
2230
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
               "no drbd usermode helper returned")
2232
      if helper_result:
2233
        status, payload = helper_result
2234
        test = not status
2235
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236
                 "drbd usermode helper check unsuccessful: %s", payload)
2237
        test = status and (payload != drbd_helper)
2238
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239
                 "wrong drbd usermode helper: %s", payload)
2240

    
2241
    # compute the DRBD minors
2242
    node_drbd = {}
2243
    for minor, instance in drbd_map[node].items():
2244
      test = instance not in instanceinfo
2245
      _ErrorIf(test, self.ECLUSTERCFG, None,
2246
               "ghost instance '%s' in temporary DRBD map", instance)
2247
        # ghost instance should not be running, but otherwise we
2248
        # don't give double warnings (both ghost instance and
2249
        # unallocated minor in use)
2250
      if test:
2251
        node_drbd[minor] = (instance, False)
2252
      else:
2253
        instance = instanceinfo[instance]
2254
        node_drbd[minor] = (instance.name, instance.admin_up)
2255

    
2256
    # and now check them
2257
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258
    test = not isinstance(used_minors, (tuple, list))
2259
    _ErrorIf(test, self.ENODEDRBD, node,
2260
             "cannot parse drbd status file: %s", str(used_minors))
2261
    if test:
2262
      # we cannot check drbd status
2263
      return
2264

    
2265
    for minor, (iname, must_exist) in node_drbd.items():
2266
      test = minor not in used_minors and must_exist
2267
      _ErrorIf(test, self.ENODEDRBD, node,
2268
               "drbd minor %d of instance %s is not active", minor, iname)
2269
    for minor in used_minors:
2270
      test = minor not in node_drbd
2271
      _ErrorIf(test, self.ENODEDRBD, node,
2272
               "unallocated drbd minor %d is in use", minor)
2273

    
2274
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275
    """Builds the node OS structures.
2276

2277
    @type ninfo: L{objects.Node}
2278
    @param ninfo: the node to check
2279
    @param nresult: the remote results for the node
2280
    @param nimg: the node image object
2281

2282
    """
2283
    node = ninfo.name
2284
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285

    
2286
    remote_os = nresult.get(constants.NV_OSLIST, None)
2287
    test = (not isinstance(remote_os, list) or
2288
            not compat.all(isinstance(v, list) and len(v) == 7
2289
                           for v in remote_os))
2290

    
2291
    _ErrorIf(test, self.ENODEOS, node,
2292
             "node hasn't returned valid OS data")
2293

    
2294
    nimg.os_fail = test
2295

    
2296
    if test:
2297
      return
2298

    
2299
    os_dict = {}
2300

    
2301
    for (name, os_path, status, diagnose,
2302
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303

    
2304
      if name not in os_dict:
2305
        os_dict[name] = []
2306

    
2307
      # parameters is a list of lists instead of list of tuples due to
2308
      # JSON lacking a real tuple type, fix it:
2309
      parameters = [tuple(v) for v in parameters]
2310
      os_dict[name].append((os_path, status, diagnose,
2311
                            set(variants), set(parameters), set(api_ver)))
2312

    
2313
    nimg.oslist = os_dict
2314

    
2315
  def _VerifyNodeOS(self, ninfo, nimg, base):
2316
    """Verifies the node OS list.
2317

2318
    @type ninfo: L{objects.Node}
2319
    @param ninfo: the node to check
2320
    @param nimg: the node image object
2321
    @param base: the 'template' node we match against (e.g. from the master)
2322

2323
    """
2324
    node = ninfo.name
2325
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326

    
2327
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328

    
2329
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330
    for os_name, os_data in nimg.oslist.items():
2331
      assert os_data, "Empty OS status for OS %s?!" % os_name
2332
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333
      _ErrorIf(not f_status, self.ENODEOS, node,
2334
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2337
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2338
      # comparisons with the 'base' image
2339
      test = os_name not in base.oslist
2340
      _ErrorIf(test, self.ENODEOS, node,
2341
               "Extra OS %s not present on reference node (%s)",
2342
               os_name, base.name)
2343
      if test:
2344
        continue
2345
      assert base.oslist[os_name], "Base node has empty OS status?"
2346
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347
      if not b_status:
2348
        # base OS is invalid, skipping
2349
        continue
2350
      for kind, a, b in [("API version", f_api, b_api),
2351
                         ("variants list", f_var, b_var),
2352
                         ("parameters", beautify_params(f_param),
2353
                          beautify_params(b_param))]:
2354
        _ErrorIf(a != b, self.ENODEOS, node,
2355
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356
                 kind, os_name, base.name,
2357
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358

    
2359
    # check any missing OSes
2360
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361
    _ErrorIf(missing, self.ENODEOS, node,
2362
             "OSes present on reference node %s but missing on this node: %s",
2363
             base.name, utils.CommaJoin(missing))
2364

    
2365
  def _VerifyOob(self, ninfo, nresult):
2366
    """Verifies out of band functionality of a node.
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371

2372
    """
2373
    node = ninfo.name
2374
    # We just have to verify the paths on master and/or master candidates
2375
    # as the oob helper is invoked on the master
2376
    if ((ninfo.master_candidate or ninfo.master_capable) and
2377
        constants.NV_OOB_PATHS in nresult):
2378
      for path_result in nresult[constants.NV_OOB_PATHS]:
2379
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380

    
2381
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382
    """Verifies and updates the node volume data.
2383

2384
    This function will update a L{NodeImage}'s internal structures
2385
    with data from the remote call.
2386

2387
    @type ninfo: L{objects.Node}
2388
    @param ninfo: the node to check
2389
    @param nresult: the remote results for the node
2390
    @param nimg: the node image object
2391
    @param vg_name: the configured VG name
2392

2393
    """
2394
    node = ninfo.name
2395
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396

    
2397
    nimg.lvm_fail = True
2398
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399
    if vg_name is None:
2400
      pass
2401
    elif isinstance(lvdata, basestring):
2402
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403
               utils.SafeEncode(lvdata))
2404
    elif not isinstance(lvdata, dict):
2405
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406
    else:
2407
      nimg.volumes = lvdata
2408
      nimg.lvm_fail = False
2409

    
2410
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411
    """Verifies and updates the node instance list.
2412

2413
    If the listing was successful, then updates this node's instance
2414
    list. Otherwise, it marks the RPC call as failed for the instance
2415
    list key.
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421

2422
    """
2423
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2424
    test = not isinstance(idata, list)
2425
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2427
    if test:
2428
      nimg.hyp_fail = True
2429
    else:
2430
      nimg.instances = idata
2431

    
2432
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433
    """Verifies and computes a node information map
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    # try to read free memory (from the hypervisor)
2446
    hv_info = nresult.get(constants.NV_HVINFO, None)
2447
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449
    if not test:
2450
      try:
2451
        nimg.mfree = int(hv_info["memory_free"])
2452
      except (ValueError, TypeError):
2453
        _ErrorIf(True, self.ENODERPC, node,
2454
                 "node returned invalid nodeinfo, check hypervisor")
2455

    
2456
    # FIXME: devise a free space model for file based instances as well
2457
    if vg_name is not None:
2458
      test = (constants.NV_VGLIST not in nresult or
2459
              vg_name not in nresult[constants.NV_VGLIST])
2460
      _ErrorIf(test, self.ENODELVM, node,
2461
               "node didn't return data for the volume group '%s'"
2462
               " - it is either missing or broken", vg_name)
2463
      if not test:
2464
        try:
2465
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466
        except (ValueError, TypeError):
2467
          _ErrorIf(True, self.ENODERPC, node,
2468
                   "node returned invalid LVM info, check LVM status")
2469

    
2470
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471
    """Gets per-disk status information for all instances.
2472

2473
    @type nodelist: list of strings
2474
    @param nodelist: Node names
2475
    @type node_image: dict of (name, L{objects.Node})
2476
    @param node_image: Node objects
2477
    @type instanceinfo: dict of (name, L{objects.Instance})
2478
    @param instanceinfo: Instance objects
2479
    @rtype: {instance: {node: [(succes, payload)]}}
2480
    @return: a dictionary of per-instance dictionaries with nodes as
2481
        keys and disk information as values; the disk information is a
2482
        list of tuples (success, payload)
2483

2484
    """
2485
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486

    
2487
    node_disks = {}
2488
    node_disks_devonly = {}
2489
    diskless_instances = set()
2490
    diskless = constants.DT_DISKLESS
2491

    
2492
    for nname in nodelist:
2493
      node_instances = list(itertools.chain(node_image[nname].pinst,
2494
                                            node_image[nname].sinst))
2495
      diskless_instances.update(inst for inst in node_instances
2496
                                if instanceinfo[inst].disk_template == diskless)
2497
      disks = [(inst, disk)
2498
               for inst in node_instances
2499
               for disk in instanceinfo[inst].disks]
2500

    
2501
      if not disks:
2502
        # No need to collect data
2503
        continue
2504

    
2505
      node_disks[nname] = disks
2506

    
2507
      # Creating copies as SetDiskID below will modify the objects and that can
2508
      # lead to incorrect data returned from nodes
2509
      devonly = [dev.Copy() for (_, dev) in disks]
2510

    
2511
      for dev in devonly:
2512
        self.cfg.SetDiskID(dev, nname)
2513

    
2514
      node_disks_devonly[nname] = devonly
2515

    
2516
    assert len(node_disks) == len(node_disks_devonly)
2517

    
2518
    # Collect data from all nodes with disks
2519
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520
                                                          node_disks_devonly)
2521

    
2522
    assert len(result) == len(node_disks)
2523

    
2524
    instdisk = {}
2525

    
2526
    for (nname, nres) in result.items():
2527
      disks = node_disks[nname]
2528

    
2529
      if nres.offline:
2530
        # No data from this node
2531
        data = len(disks) * [(False, "node offline")]
2532
      else:
2533
        msg = nres.fail_msg
2534
        _ErrorIf(msg, self.ENODERPC, nname,
2535
                 "while getting disk information: %s", msg)
2536
        if msg:
2537
          # No data from this node
2538
          data = len(disks) * [(False, msg)]
2539
        else:
2540
          data = []
2541
          for idx, i in enumerate(nres.payload):
2542
            if isinstance(i, (tuple, list)) and len(i) == 2:
2543
              data.append(i)
2544
            else:
2545
              logging.warning("Invalid result from node %s, entry %d: %s",
2546
                              nname, idx, i)
2547
              data.append((False, "Invalid result from the remote node"))
2548

    
2549
      for ((inst, _), status) in zip(disks, data):
2550
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551

    
2552
    # Add empty entries for diskless instances.
2553
    for inst in diskless_instances:
2554
      assert inst not in instdisk
2555
      instdisk[inst] = {}
2556

    
2557
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559
                      compat.all(isinstance(s, (tuple, list)) and
2560
                                 len(s) == 2 for s in statuses)
2561
                      for inst, nnames in instdisk.items()
2562
                      for nname, statuses in nnames.items())
2563
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564

    
2565
    return instdisk
2566

    
2567
  @staticmethod
2568
  def _SshNodeSelector(group_uuid, all_nodes):
2569
    """Create endless iterators for all potential SSH check hosts.
2570

2571
    """
2572
    nodes = [node for node in all_nodes
2573
             if (node.group != group_uuid and
2574
                 not node.offline)]
2575
    keyfunc = operator.attrgetter("group")
2576

    
2577
    return map(itertools.cycle,
2578
               [sorted(map(operator.attrgetter("name"), names))
2579
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580
                                                  keyfunc)])
2581

    
2582
  @classmethod
2583
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584
    """Choose which nodes should talk to which other nodes.
2585

2586
    We will make nodes contact all nodes in their group, and one node from
2587
    every other group.
2588

2589
    @warning: This algorithm has a known issue if one node group is much
2590
      smaller than others (e.g. just one node). In such a case all other
2591
      nodes will talk to the single node.
2592

2593
    """
2594
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596

    
2597
    return (online_nodes,
2598
            dict((name, sorted([i.next() for i in sel]))
2599
                 for name in online_nodes))
2600

    
2601
  def BuildHooksEnv(self):
2602
    """Build hooks env.
2603

2604
    Cluster-Verify hooks just ran in the post phase and their failure makes
2605
    the output be logged in the verify output and the verification to fail.
2606

2607
    """
2608
    env = {
2609
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610
      }
2611

    
2612
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613
               for node in self.my_node_info.values())
2614

    
2615
    return env
2616

    
2617
  def BuildHooksNodes(self):
2618
    """Build hooks nodes.
2619

2620
    """
2621
    return ([], self.my_node_names)
2622

    
2623
  def Exec(self, feedback_fn):
2624
    """Verify integrity of the node group, performing various test on nodes.
2625

2626
    """
2627
    # This method has too many local variables. pylint: disable=R0914
2628
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629

    
2630
    if not self.my_node_names:
2631
      # empty node group
2632
      feedback_fn("* Empty node group, skipping verification")
2633
      return True
2634

    
2635
    self.bad = False
2636
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637
    verbose = self.op.verbose
2638
    self._feedback_fn = feedback_fn
2639

    
2640
    vg_name = self.cfg.GetVGName()
2641
    drbd_helper = self.cfg.GetDRBDHelper()
2642
    cluster = self.cfg.GetClusterInfo()
2643
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644
    hypervisors = cluster.enabled_hypervisors
2645
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646

    
2647
    i_non_redundant = [] # Non redundant instances
2648
    i_non_a_balanced = [] # Non auto-balanced instances
2649
    n_offline = 0 # Count of offline nodes
2650
    n_drained = 0 # Count of nodes being drained
2651
    node_vol_should = {}
2652

    
2653
    # FIXME: verify OS list
2654

    
2655
    # File verification
2656
    filemap = _ComputeAncillaryFiles(cluster, False)
2657

    
2658
    # do local checksums
2659
    master_node = self.master_node = self.cfg.GetMasterNode()
2660
    master_ip = self.cfg.GetMasterIP()
2661

    
2662
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663

    
2664
    node_verify_param = {
2665
      constants.NV_FILELIST:
2666
        utils.UniqueSequence(filename
2667
                             for files in filemap
2668
                             for filename in files),
2669
      constants.NV_NODELIST:
2670
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671
                                  self.all_node_info.values()),
2672
      constants.NV_HYPERVISOR: hypervisors,
2673
      constants.NV_HVPARAMS:
2674
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676
                                 for node in node_data_list
2677
                                 if not node.offline],
2678
      constants.NV_INSTANCELIST: hypervisors,
2679
      constants.NV_VERSION: None,
2680
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681
      constants.NV_NODESETUP: None,
2682
      constants.NV_TIME: None,
2683
      constants.NV_MASTERIP: (master_node, master_ip),
2684
      constants.NV_OSLIST: None,
2685
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686
      }
2687

    
2688
    if vg_name is not None:
2689
      node_verify_param[constants.NV_VGLIST] = None
2690
      node_verify_param[constants.NV_LVLIST] = vg_name
2691
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2692
      node_verify_param[constants.NV_DRBDLIST] = None
2693

    
2694
    if drbd_helper:
2695
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696

    
2697
    # bridge checks
2698
    # FIXME: this needs to be changed per node-group, not cluster-wide
2699
    bridges = set()
2700
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
      bridges.add(default_nicpp[constants.NIC_LINK])
2703
    for instance in self.my_inst_info.values():
2704
      for nic in instance.nics:
2705
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707
          bridges.add(full_nic[constants.NIC_LINK])
2708

    
2709
    if bridges:
2710
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711

    
2712
    # Build our expected cluster state
2713
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714
                                                 name=node.name,
2715
                                                 vm_capable=node.vm_capable))
2716
                      for node in node_data_list)
2717

    
2718
    # Gather OOB paths
2719
    oob_paths = []
2720
    for node in self.all_node_info.values():
2721
      path = _SupportsOob(self.cfg, node)
2722
      if path and path not in oob_paths:
2723
        oob_paths.append(path)
2724

    
2725
    if oob_paths:
2726
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727

    
2728
    for instance in self.my_inst_names:
2729
      inst_config = self.my_inst_info[instance]
2730

    
2731
      for nname in inst_config.all_nodes:
2732
        if nname not in node_image:
2733
          gnode = self.NodeImage(name=nname)
2734
          gnode.ghost = (nname not in self.all_node_info)
2735
          node_image[nname] = gnode
2736

    
2737
      inst_config.MapLVsByNode(node_vol_should)
2738

    
2739
      pnode = inst_config.primary_node
2740
      node_image[pnode].pinst.append(instance)
2741

    
2742
      for snode in inst_config.secondary_nodes:
2743
        nimg = node_image[snode]
2744
        nimg.sinst.append(instance)
2745
        if pnode not in nimg.sbp:
2746
          nimg.sbp[pnode] = []
2747
        nimg.sbp[pnode].append(instance)
2748

    
2749
    # At this point, we have the in-memory data structures complete,
2750
    # except for the runtime information, which we'll gather next
2751

    
2752
    # Due to the way our RPC system works, exact response times cannot be
2753
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754
    # time before and after executing the request, we can at least have a time
2755
    # window.
2756
    nvinfo_starttime = time.time()
2757
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758
                                           node_verify_param,
2759
                                           self.cfg.GetClusterName())
2760
    nvinfo_endtime = time.time()
2761

    
2762
    if self.extra_lv_nodes and vg_name is not None:
2763
      extra_lv_nvinfo = \
2764
          self.rpc.call_node_verify(self.extra_lv_nodes,
2765
                                    {constants.NV_LVLIST: vg_name},
2766
                                    self.cfg.GetClusterName())
2767
    else:
2768
      extra_lv_nvinfo = {}
2769

    
2770
    all_drbd_map = self.cfg.ComputeDRBDMap()
2771

    
2772
    feedback_fn("* Gathering disk information (%s nodes)" %
2773
                len(self.my_node_names))
2774
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775
                                     self.my_inst_info)
2776

    
2777
    feedback_fn("* Verifying configuration file consistency")
2778

    
2779
    # If not all nodes are being checked, we need to make sure the master node
2780
    # and a non-checked vm_capable node are in the list.
2781
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782
    if absent_nodes:
2783
      vf_nvinfo = all_nvinfo.copy()
2784
      vf_node_info = list(self.my_node_info.values())
2785
      additional_nodes = []
2786
      if master_node not in self.my_node_info:
2787
        additional_nodes.append(master_node)
2788
        vf_node_info.append(self.all_node_info[master_node])
2789
      # Add the first vm_capable node we find which is not included
2790
      for node in absent_nodes:
2791
        nodeinfo = self.all_node_info[node]
2792
        if nodeinfo.vm_capable and not nodeinfo.offline:
2793
          additional_nodes.append(node)
2794
          vf_node_info.append(self.all_node_info[node])
2795
          break
2796
      key = constants.NV_FILELIST
2797
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798
                                                 {key: node_verify_param[key]},
2799
                                                 self.cfg.GetClusterName()))
2800
    else:
2801
      vf_nvinfo = all_nvinfo
2802
      vf_node_info = self.my_node_info.values()
2803

    
2804
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805

    
2806
    feedback_fn("* Verifying node status")
2807

    
2808
    refos_img = None
2809

    
2810
    for node_i in node_data_list:
2811
      node = node_i.name
2812
      nimg = node_image[node]
2813

    
2814
      if node_i.offline:
2815
        if verbose:
2816
          feedback_fn("* Skipping offline node %s" % (node,))
2817
        n_offline += 1
2818
        continue
2819

    
2820
      if node == master_node:
2821
        ntype = "master"
2822
      elif node_i.master_candidate:
2823
        ntype = "master candidate"
2824
      elif node_i.drained:
2825
        ntype = "drained"
2826
        n_drained += 1
2827
      else:
2828
        ntype = "regular"
2829
      if verbose:
2830
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831

    
2832
      msg = all_nvinfo[node].fail_msg
2833
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834
      if msg:
2835
        nimg.rpc_fail = True
2836
        continue
2837

    
2838
      nresult = all_nvinfo[node].payload
2839

    
2840
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2841
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842
      self._VerifyNodeNetwork(node_i, nresult)
2843
      self._VerifyOob(node_i, nresult)
2844

    
2845
      if nimg.vm_capable:
2846
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2847
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848
                             all_drbd_map)
2849

    
2850
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851
        self._UpdateNodeInstances(node_i, nresult, nimg)
2852
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853
        self._UpdateNodeOS(node_i, nresult, nimg)
2854

    
2855
        if not nimg.os_fail:
2856
          if refos_img is None:
2857
            refos_img = nimg
2858
          self._VerifyNodeOS(node_i, nimg, refos_img)
2859
        self._VerifyNodeBridges(node_i, nresult, bridges)
2860

    
2861
        # Check whether all running instancies are primary for the node. (This
2862
        # can no longer be done from _VerifyInstance below, since some of the
2863
        # wrong instances could be from other node groups.)
2864
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865

    
2866
        for inst in non_primary_inst:
2867
          test = inst in self.all_inst_info
2868
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869
                   "instance should not run on node %s", node_i.name)
2870
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871
                   "node is running unknown instance %s", inst)
2872

    
2873
    for node, result in extra_lv_nvinfo.items():
2874
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875
                              node_image[node], vg_name)
2876

    
2877
    feedback_fn("* Verifying instance status")
2878
    for instance in self.my_inst_names:
2879
      if verbose:
2880
        feedback_fn("* Verifying instance %s" % instance)
2881
      inst_config = self.my_inst_info[instance]
2882
      self._VerifyInstance(instance, inst_config, node_image,
2883
                           instdisk[instance])
2884
      inst_nodes_offline = []
2885

    
2886
      pnode = inst_config.primary_node
2887
      pnode_img = node_image[pnode]
2888
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889
               self.ENODERPC, pnode, "instance %s, connection to"
2890
               " primary node failed", instance)
2891

    
2892
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893
               self.EINSTANCEBADNODE, instance,
2894
               "instance is marked as running and lives on offline node %s",
2895
               inst_config.primary_node)
2896

    
2897
      # If the instance is non-redundant we cannot survive losing its primary
2898
      # node, so we are not N+1 compliant. On the other hand we have no disk
2899
      # templates with more than one secondary so that situation is not well
2900
      # supported either.
2901
      # FIXME: does not support file-backed instances
2902
      if not inst_config.secondary_nodes:
2903
        i_non_redundant.append(instance)
2904

    
2905
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906
               instance, "instance has multiple secondary nodes: %s",
2907
               utils.CommaJoin(inst_config.secondary_nodes),
2908
               code=self.ETYPE_WARNING)
2909

    
2910
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911
        pnode = inst_config.primary_node
2912
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913
        instance_groups = {}
2914

    
2915
        for node in instance_nodes:
2916
          instance_groups.setdefault(self.all_node_info[node].group,
2917
                                     []).append(node)
2918

    
2919
        pretty_list = [
2920
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921
          # Sort so that we always list the primary node first.
2922
          for group, nodes in sorted(instance_groups.items(),
2923
                                     key=lambda (_, nodes): pnode in nodes,
2924
                                     reverse=True)]
2925

    
2926
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927
                      instance, "instance has primary and secondary nodes in"
2928
                      " different groups: %s", utils.CommaJoin(pretty_list),
2929
                      code=self.ETYPE_WARNING)
2930

    
2931
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932
        i_non_a_balanced.append(instance)
2933

    
2934
      for snode in inst_config.secondary_nodes:
2935
        s_img = node_image[snode]
2936
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937
                 "instance %s, connection to secondary node failed", instance)
2938

    
2939
        if s_img.offline:
2940
          inst_nodes_offline.append(snode)
2941

    
2942
      # warn that the instance lives on offline nodes
2943
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944
               "instance has offline secondary node(s) %s",
2945
               utils.CommaJoin(inst_nodes_offline))
2946
      # ... or ghost/non-vm_capable nodes
2947
      for node in inst_config.all_nodes:
2948
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949
                 "instance lives on ghost node %s", node)
2950
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951
                 instance, "instance lives on non-vm_capable node %s", node)
2952

    
2953
    feedback_fn("* Verifying orphan volumes")
2954
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2955

    
2956
    # We will get spurious "unknown volume" warnings if any node of this group
2957
    # is secondary for an instance whose primary is in another group. To avoid
2958
    # them, we find these instances and add their volumes to node_vol_should.
2959
    for inst in self.all_inst_info.values():
2960
      for secondary in inst.secondary_nodes:
2961
        if (secondary in self.my_node_info
2962
            and inst.name not in self.my_inst_info):
2963
          inst.MapLVsByNode(node_vol_should)
2964
          break
2965

    
2966
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967

    
2968
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969
      feedback_fn("* Verifying N+1 Memory redundancy")
2970
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971

    
2972
    feedback_fn("* Other Notes")
2973
    if i_non_redundant:
2974
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975
                  % len(i_non_redundant))
2976

    
2977
    if i_non_a_balanced:
2978
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979
                  % len(i_non_a_balanced))
2980

    
2981
    if n_offline:
2982
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983

    
2984
    if n_drained:
2985
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986

    
2987
    return not self.bad
2988

    
2989
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990
    """Analyze the post-hooks' result
2991

2992
    This method analyses the hook result, handles it, and sends some
2993
    nicely-formatted feedback back to the user.
2994

2995
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997
    @param hooks_results: the results of the multi-node hooks rpc call
2998
    @param feedback_fn: function used send feedback back to the caller
2999
    @param lu_result: previous Exec result
3000
    @return: the new Exec result, based on the previous result
3001
        and hook results
3002

3003
    """
3004
    # We only really run POST phase hooks, only for non-empty groups,
3005
    # and are only interested in their results
3006
    if not self.my_node_names:
3007
      # empty node group
3008
      pass
3009
    elif phase == constants.HOOKS_PHASE_POST:
3010
      # Used to change hooks' output to proper indentation
3011
      feedback_fn("* Hooks Results")
3012
      assert hooks_results, "invalid result from hooks"
3013

    
3014
      for node_name in hooks_results:
3015
        res = hooks_results[node_name]
3016
        msg = res.fail_msg
3017
        test = msg and not res.offline
3018
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019
                      "Communication failure in hooks execution: %s", msg)
3020
        if res.offline or msg:
3021
          # No need to investigate payload if node is offline or gave
3022
          # an error.
3023
          continue
3024
        for script, hkr, output in res.payload:
3025
          test = hkr == constants.HKR_FAIL
3026
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027
                        "Script %s failed, output:", script)
3028
          if test:
3029
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3030
            feedback_fn("%s" % output)
3031
            lu_result = False
3032

    
3033
    return lu_result
3034

    
3035

    
3036
class LUClusterVerifyDisks(NoHooksLU):
3037
  """Verifies the cluster disks status.
3038

3039
  """
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.share_locks = _ShareAll()
3044
    self.needed_locks = {
3045
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046
      }
3047

    
3048
  def Exec(self, feedback_fn):
3049
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050

    
3051
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053
                           for group in group_names])
3054

    
3055

    
3056
class LUGroupVerifyDisks(NoHooksLU):
3057
  """Verifies the status of all disks in a node group.
3058

3059
  """
3060
  REQ_BGL = False
3061

    
3062
  def ExpandNames(self):
3063
    # Raises errors.OpPrereqError on its own if group can't be found
3064
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065

    
3066
    self.share_locks = _ShareAll()
3067
    self.needed_locks = {
3068
      locking.LEVEL_INSTANCE: [],
3069
      locking.LEVEL_NODEGROUP: [],
3070
      locking.LEVEL_NODE: [],
3071
      }
3072

    
3073
  def DeclareLocks(self, level):
3074
    if level == locking.LEVEL_INSTANCE:
3075
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076

    
3077
      # Lock instances optimistically, needs verification once node and group
3078
      # locks have been acquired
3079
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3080
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3081

    
3082
    elif level == locking.LEVEL_NODEGROUP:
3083
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084

    
3085
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086
        set([self.group_uuid] +
3087
            # Lock all groups used by instances optimistically; this requires
3088
            # going via the node before it's locked, requiring verification
3089
            # later on
3090
            [group_uuid
3091
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093

    
3094
    elif level == locking.LEVEL_NODE:
3095
      # This will only lock the nodes in the group to be verified which contain
3096
      # actual instances
3097
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098
      self._LockInstancesNodes()
3099

    
3100
      # Lock all nodes in group to be verified
3101
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104

    
3105
  def CheckPrereq(self):
3106
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109

    
3110
    assert self.group_uuid in owned_groups
3111

    
3112
    # Check if locked instances are still correct
3113
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114

    
3115
    # Get instance information
3116
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117

    
3118
    # Check if node groups for locked instances are still correct
3119
    for (instance_name, inst) in self.instances.items():
3120
      assert owned_nodes.issuperset(inst.all_nodes), \
3121
        "Instance %s's nodes changed while we kept the lock" % instance_name
3122

    
3123
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124
                                             owned_groups)
3125

    
3126
      assert self.group_uuid in inst_groups, \
3127
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Verify integrity of cluster disks.
3131

3132
    @rtype: tuple of three items
3133
    @return: a tuple of (dict of node-to-node_error, list of instances
3134
        which need activate-disks, dict of instance: (node, volume) for
3135
        missing volumes
3136

3137
    """
3138
    res_nodes = {}
3139
    res_instances = set()
3140
    res_missing = {}
3141

    
3142
    nv_dict = _MapInstanceDisksToNodes([inst
3143
                                        for inst in self.instances.values()
3144
                                        if inst.admin_up])
3145

    
3146
    if nv_dict:
3147
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148
                             set(self.cfg.GetVmCapableNodeList()))
3149

    
3150
      node_lvs = self.rpc.call_lv_list(nodes, [])
3151

    
3152
      for (node, node_res) in node_lvs.items():
3153
        if node_res.offline:
3154
          continue
3155

    
3156
        msg = node_res.fail_msg
3157
        if msg:
3158
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159
          res_nodes[node] = msg
3160
          continue
3161

    
3162
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3163
          inst = nv_dict.pop((node, lv_name), None)
3164
          if not (lv_online or inst is None):
3165
            res_instances.add(inst)
3166

    
3167
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3168
      # better
3169
      for key, inst in nv_dict.iteritems():
3170
        res_missing.setdefault(inst, []).append(key)
3171

    
3172
    return (res_nodes, list(res_instances), res_missing)
3173

    
3174

    
3175
class LUClusterRepairDiskSizes(NoHooksLU):
3176
  """Verifies the cluster disks sizes.
3177

3178
  """
3179
  REQ_BGL = False
3180

    
3181
  def ExpandNames(self):
3182
    if self.op.instances:
3183
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184
      self.needed_locks = {
3185
        locking.LEVEL_NODE: [],
3186
        locking.LEVEL_INSTANCE: self.wanted_names,
3187
        }
3188
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189
    else:
3190
      self.wanted_names = None
3191
      self.needed_locks = {
3192
        locking.LEVEL_NODE: locking.ALL_SET,
3193
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3194
        }
3195
    self.share_locks = _ShareAll()
3196

    
3197
  def DeclareLocks(self, level):
3198
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3199
      self._LockInstancesNodes(primary_only=True)
3200

    
3201
  def CheckPrereq(self):
3202
    """Check prerequisites.
3203

3204
    This only checks the optional instance list against the existing names.
3205

3206
    """
3207
    if self.wanted_names is None:
3208
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3209

    
3210
    self.wanted_instances = \
3211
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3212

    
3213
  def _EnsureChildSizes(self, disk):
3214
    """Ensure children of the disk have the needed disk size.
3215

3216
    This is valid mainly for DRBD8 and fixes an issue where the
3217
    children have smaller disk size.
3218

3219
    @param disk: an L{ganeti.objects.Disk} object
3220

3221
    """
3222
    if disk.dev_type == constants.LD_DRBD8:
3223
      assert disk.children, "Empty children for DRBD8?"
3224
      fchild = disk.children[0]
3225
      mismatch = fchild.size < disk.size
3226
      if mismatch:
3227
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3228
                     fchild.size, disk.size)
3229
        fchild.size = disk.size
3230

    
3231
      # and we recurse on this child only, not on the metadev
3232
      return self._EnsureChildSizes(fchild) or mismatch
3233
    else:
3234
      return False
3235

    
3236
  def Exec(self, feedback_fn):
3237
    """Verify the size of cluster disks.
3238

3239
    """
3240
    # TODO: check child disks too
3241
    # TODO: check differences in size between primary/secondary nodes
3242
    per_node_disks = {}
3243
    for instance in self.wanted_instances:
3244
      pnode = instance.primary_node
3245
      if pnode not in per_node_disks:
3246
        per_node_disks[pnode] = []
3247
      for idx, disk in enumerate(instance.disks):
3248
        per_node_disks[pnode].append((instance, idx, disk))
3249

    
3250
    changed = []
3251
    for node, dskl in per_node_disks.items():
3252
      newl = [v[2].Copy() for v in dskl]
3253
      for dsk in newl:
3254
        self.cfg.SetDiskID(dsk, node)
3255
      result = self.rpc.call_blockdev_getsize(node, newl)
3256
      if result.fail_msg:
3257
        self.LogWarning("Failure in blockdev_getsize call to node"
3258
                        " %s, ignoring", node)
3259
        continue
3260
      if len(result.payload) != len(dskl):
3261
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3262
                        " result.payload=%s", node, len(dskl), result.payload)
3263
        self.LogWarning("Invalid result from node %s, ignoring node results",
3264
                        node)
3265
        continue
3266
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3267
        if size is None:
3268
          self.LogWarning("Disk %d of instance %s did not return size"
3269
                          " information, ignoring", idx, instance.name)
3270
          continue
3271
        if not isinstance(size, (int, long)):
3272
          self.LogWarning("Disk %d of instance %s did not return valid"
3273
                          " size information, ignoring", idx, instance.name)
3274
          continue
3275
        size = size >> 20
3276
        if size != disk.size:
3277
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3278
                       " correcting: recorded %d, actual %d", idx,
3279
                       instance.name, disk.size, size)
3280
          disk.size = size
3281
          self.cfg.Update(instance, feedback_fn)
3282
          changed.append((instance.name, idx, size))
3283
        if self._EnsureChildSizes(disk):
3284
          self.cfg.Update(instance, feedback_fn)
3285
          changed.append((instance.name, idx, disk.size))
3286
    return changed
3287

    
3288

    
3289
class LUClusterRename(LogicalUnit):
3290
  """Rename the cluster.
3291

3292
  """
3293
  HPATH = "cluster-rename"
3294
  HTYPE = constants.HTYPE_CLUSTER
3295

    
3296
  def BuildHooksEnv(self):
3297
    """Build hooks env.
3298

3299
    """
3300
    return {
3301
      "OP_TARGET": self.cfg.GetClusterName(),
3302
      "NEW_NAME": self.op.name,
3303
      }
3304

    
3305
  def BuildHooksNodes(self):
3306
    """Build hooks nodes.
3307

3308
    """
3309
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3310

    
3311
  def CheckPrereq(self):
3312
    """Verify that the passed name is a valid one.
3313

3314
    """
3315
    hostname = netutils.GetHostname(name=self.op.name,
3316
                                    family=self.cfg.GetPrimaryIPFamily())
3317

    
3318
    new_name = hostname.name
3319
    self.ip = new_ip = hostname.ip
3320
    old_name = self.cfg.GetClusterName()
3321
    old_ip = self.cfg.GetMasterIP()
3322
    if new_name == old_name and new_ip == old_ip:
3323
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3324
                                 " cluster has changed",
3325
                                 errors.ECODE_INVAL)
3326
    if new_ip != old_ip:
3327
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3328
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3329
                                   " reachable on the network" %
3330
                                   new_ip, errors.ECODE_NOTUNIQUE)
3331

    
3332
    self.op.name = new_name
3333

    
3334
  def Exec(self, feedback_fn):
3335
    """Rename the cluster.
3336

3337
    """
3338
    clustername = self.op.name
3339
    ip = self.ip
3340

    
3341
    # shutdown the master IP
3342
    master = self.cfg.GetMasterNode()
3343
    result = self.rpc.call_node_deactivate_master_ip(master)
3344
    result.Raise("Could not disable the master role")
3345

    
3346
    try:
3347
      cluster = self.cfg.GetClusterInfo()
3348
      cluster.cluster_name = clustername
3349
      cluster.master_ip = ip
3350
      self.cfg.Update(cluster, feedback_fn)
3351

    
3352
      # update the known hosts file
3353
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3354
      node_list = self.cfg.GetOnlineNodeList()
3355
      try:
3356
        node_list.remove(master)
3357
      except ValueError:
3358
        pass
3359
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3360
    finally:
3361
      result = self.rpc.call_node_activate_master_ip(master)
3362
      msg = result.fail_msg
3363
      if msg:
3364
        self.LogWarning("Could not re-enable the master role on"
3365
                        " the master, please restart manually: %s", msg)
3366

    
3367
    return clustername
3368

    
3369

    
3370
class LUClusterSetParams(LogicalUnit):
3371
  """Change the parameters of the cluster.
3372

3373
  """
3374
  HPATH = "cluster-modify"
3375
  HTYPE = constants.HTYPE_CLUSTER
3376
  REQ_BGL = False
3377

    
3378
  def CheckArguments(self):
3379
    """Check parameters
3380

3381
    """
3382
    if self.op.uid_pool:
3383
      uidpool.CheckUidPool(self.op.uid_pool)
3384

    
3385
    if self.op.add_uids:
3386
      uidpool.CheckUidPool(self.op.add_uids)
3387

    
3388
    if self.op.remove_uids:
3389
      uidpool.CheckUidPool(self.op.remove_uids)
3390

    
3391
  def ExpandNames(self):
3392
    # FIXME: in the future maybe other cluster params won't require checking on
3393
    # all nodes to be modified.
3394
    self.needed_locks = {
3395
      locking.LEVEL_NODE: locking.ALL_SET,
3396
    }
3397
    self.share_locks[locking.LEVEL_NODE] = 1
3398

    
3399
  def BuildHooksEnv(self):
3400
    """Build hooks env.
3401

3402
    """
3403
    return {
3404
      "OP_TARGET": self.cfg.GetClusterName(),
3405
      "NEW_VG_NAME": self.op.vg_name,
3406
      }
3407

    
3408
  def BuildHooksNodes(self):
3409
    """Build hooks nodes.
3410

3411
    """
3412
    mn = self.cfg.GetMasterNode()
3413
    return ([mn], [mn])
3414

    
3415
  def CheckPrereq(self):
3416
    """Check prerequisites.
3417

3418
    This checks whether the given params don't conflict and
3419
    if the given volume group is valid.
3420

3421
    """
3422
    if self.op.vg_name is not None and not self.op.vg_name:
3423
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3424
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3425
                                   " instances exist", errors.ECODE_INVAL)
3426

    
3427
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3428
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3429
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3430
                                   " drbd-based instances exist",
3431
                                   errors.ECODE_INVAL)
3432

    
3433
    node_list = self.owned_locks(locking.LEVEL_NODE)
3434

    
3435
    # if vg_name not None, checks given volume group on all nodes
3436
    if self.op.vg_name:
3437
      vglist = self.rpc.call_vg_list(node_list)
3438
      for node in node_list:
3439
        msg = vglist[node].fail_msg
3440
        if msg:
3441
          # ignoring down node
3442
          self.LogWarning("Error while gathering data on node %s"
3443
                          " (ignoring node): %s", node, msg)
3444
          continue
3445
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3446
                                              self.op.vg_name,
3447
                                              constants.MIN_VG_SIZE)
3448
        if vgstatus:
3449
          raise errors.OpPrereqError("Error on node '%s': %s" %
3450
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3451

    
3452
    if self.op.drbd_helper:
3453
      # checks given drbd helper on all nodes
3454
      helpers = self.rpc.call_drbd_helper(node_list)
3455
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3456
        if ninfo.offline:
3457
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3458
          continue
3459
        msg = helpers[node].fail_msg
3460
        if msg:
3461
          raise errors.OpPrereqError("Error checking drbd helper on node"
3462
                                     " '%s': %s" % (node, msg),
3463
                                     errors.ECODE_ENVIRON)
3464
        node_helper = helpers[node].payload
3465
        if node_helper != self.op.drbd_helper:
3466
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3467
                                     (node, node_helper), errors.ECODE_ENVIRON)
3468

    
3469
    self.cluster = cluster = self.cfg.GetClusterInfo()
3470
    # validate params changes
3471
    if self.op.beparams:
3472
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3473
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3474

    
3475
    if self.op.ndparams:
3476
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3477
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3478

    
3479
      # TODO: we need a more general way to handle resetting
3480
      # cluster-level parameters to default values
3481
      if self.new_ndparams["oob_program"] == "":
3482
        self.new_ndparams["oob_program"] = \
3483
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3484

    
3485
    if self.op.nicparams:
3486
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3487
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3488
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3489
      nic_errors = []
3490

    
3491
      # check all instances for consistency
3492
      for instance in self.cfg.GetAllInstancesInfo().values():
3493
        for nic_idx, nic in enumerate(instance.nics):
3494
          params_copy = copy.deepcopy(nic.nicparams)
3495
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3496

    
3497
          # check parameter syntax
3498
          try:
3499
            objects.NIC.CheckParameterSyntax(params_filled)
3500
          except errors.ConfigurationError, err:
3501
            nic_errors.append("Instance %s, nic/%d: %s" %
3502
                              (instance.name, nic_idx, err))
3503

    
3504
          # if we're moving instances to routed, check that they have an ip
3505
          target_mode = params_filled[constants.NIC_MODE]
3506
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3507
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3508
                              " address" % (instance.name, nic_idx))
3509
      if nic_errors:
3510
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3511
                                   "\n".join(nic_errors))
3512

    
3513
    # hypervisor list/parameters
3514
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3515
    if self.op.hvparams:
3516
      for hv_name, hv_dict in self.op.hvparams.items():
3517
        if hv_name not in self.new_hvparams:
3518
          self.new_hvparams[hv_name] = hv_dict
3519
        else:
3520
          self.new_hvparams[hv_name].update(hv_dict)
3521

    
3522
    # os hypervisor parameters
3523
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3524
    if self.op.os_hvp:
3525
      for os_name, hvs in self.op.os_hvp.items():
3526
        if os_name not in self.new_os_hvp:
3527
          self.new_os_hvp[os_name] = hvs
3528
        else:
3529
          for hv_name, hv_dict in hvs.items():
3530
            if hv_name not in self.new_os_hvp[os_name]:
3531
              self.new_os_hvp[os_name][hv_name] = hv_dict
3532
            else:
3533
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3534

    
3535
    # os parameters
3536
    self.new_osp = objects.FillDict(cluster.osparams, {})
3537
    if self.op.osparams:
3538
      for os_name, osp in self.op.osparams.items():
3539
        if os_name not in self.new_osp:
3540
          self.new_osp[os_name] = {}
3541

    
3542
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3543
                                                  use_none=True)
3544

    
3545
        if not self.new_osp[os_name]:
3546
          # we removed all parameters
3547
          del self.new_osp[os_name]
3548
        else:
3549
          # check the parameter validity (remote check)
3550
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3551
                         os_name, self.new_osp[os_name])
3552

    
3553
    # changes to the hypervisor list
3554
    if self.op.enabled_hypervisors is not None:
3555
      self.hv_list = self.op.enabled_hypervisors
3556
      for hv in self.hv_list:
3557
        # if the hypervisor doesn't already exist in the cluster
3558
        # hvparams, we initialize it to empty, and then (in both
3559
        # cases) we make sure to fill the defaults, as we might not
3560
        # have a complete defaults list if the hypervisor wasn't
3561
        # enabled before
3562
        if hv not in new_hvp:
3563
          new_hvp[hv] = {}
3564
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3565
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3566
    else:
3567
      self.hv_list = cluster.enabled_hypervisors
3568

    
3569
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3570
      # either the enabled list has changed, or the parameters have, validate
3571
      for hv_name, hv_params in self.new_hvparams.items():
3572
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3573
            (self.op.enabled_hypervisors and
3574
             hv_name in self.op.enabled_hypervisors)):
3575
          # either this is a new hypervisor, or its parameters have changed
3576
          hv_class = hypervisor.GetHypervisor(hv_name)
3577
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3578
          hv_class.CheckParameterSyntax(hv_params)
3579
          _CheckHVParams(self, node_list, hv_name, hv_params)
3580

    
3581
    if self.op.os_hvp:
3582
      # no need to check any newly-enabled hypervisors, since the
3583
      # defaults have already been checked in the above code-block
3584
      for os_name, os_hvp in self.new_os_hvp.items():
3585
        for hv_name, hv_params in os_hvp.items():
3586
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3587
          # we need to fill in the new os_hvp on top of the actual hv_p
3588
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3589
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3590
          hv_class = hypervisor.GetHypervisor(hv_name)
3591
          hv_class.CheckParameterSyntax(new_osp)
3592
          _CheckHVParams(self, node_list, hv_name, new_osp)
3593

    
3594
    if self.op.default_iallocator:
3595
      alloc_script = utils.FindFile(self.op.default_iallocator,
3596
                                    constants.IALLOCATOR_SEARCH_PATH,
3597
                                    os.path.isfile)
3598
      if alloc_script is None:
3599
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3600
                                   " specified" % self.op.default_iallocator,
3601
                                   errors.ECODE_INVAL)
3602

    
3603
  def Exec(self, feedback_fn):
3604
    """Change the parameters of the cluster.
3605

3606
    """
3607
    if self.op.vg_name is not None:
3608
      new_volume = self.op.vg_name
3609
      if not new_volume:
3610
        new_volume = None
3611
      if new_volume != self.cfg.GetVGName():
3612
        self.cfg.SetVGName(new_volume)
3613
      else:
3614
        feedback_fn("Cluster LVM configuration already in desired"
3615
                    " state, not changing")
3616
    if self.op.drbd_helper is not None:
3617
      new_helper = self.op.drbd_helper
3618
      if not new_helper:
3619
        new_helper = None
3620
      if new_helper != self.cfg.GetDRBDHelper():
3621
        self.cfg.SetDRBDHelper(new_helper)
3622
      else:
3623
        feedback_fn("Cluster DRBD helper already in desired state,"
3624
                    " not changing")
3625
    if self.op.hvparams:
3626
      self.cluster.hvparams = self.new_hvparams
3627
    if self.op.os_hvp:
3628
      self.cluster.os_hvp = self.new_os_hvp
3629
    if self.op.enabled_hypervisors is not None:
3630
      self.cluster.hvparams = self.new_hvparams
3631
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3632
    if self.op.beparams:
3633
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3634
    if self.op.nicparams:
3635
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3636
    if self.op.osparams:
3637
      self.cluster.osparams = self.new_osp
3638
    if self.op.ndparams:
3639
      self.cluster.ndparams = self.new_ndparams
3640

    
3641
    if self.op.candidate_pool_size is not None:
3642
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3643
      # we need to update the pool size here, otherwise the save will fail
3644
      _AdjustCandidatePool(self, [])
3645

    
3646
    if self.op.maintain_node_health is not None:
3647
      self.cluster.maintain_node_health = self.op.maintain_node_health
3648

    
3649
    if self.op.prealloc_wipe_disks is not None:
3650
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3651

    
3652
    if self.op.add_uids is not None:
3653
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3654

    
3655
    if self.op.remove_uids is not None:
3656
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3657

    
3658
    if self.op.uid_pool is not None:
3659
      self.cluster.uid_pool = self.op.uid_pool
3660

    
3661
    if self.op.default_iallocator is not None:
3662
      self.cluster.default_iallocator = self.op.default_iallocator
3663

    
3664
    if self.op.reserved_lvs is not None:
3665
      self.cluster.reserved_lvs = self.op.reserved_lvs
3666

    
3667
    def helper_os(aname, mods, desc):
3668
      desc += " OS list"
3669
      lst = getattr(self.cluster, aname)
3670
      for key, val in mods:
3671
        if key == constants.DDM_ADD:
3672
          if val in lst:
3673
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3674
          else:
3675
            lst.append(val)
3676
        elif key == constants.DDM_REMOVE:
3677
          if val in lst:
3678
            lst.remove(val)
3679
          else:
3680
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3681
        else:
3682
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3683

    
3684
    if self.op.hidden_os:
3685
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3686

    
3687
    if self.op.blacklisted_os:
3688
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3689

    
3690
    if self.op.master_netdev:
3691
      master = self.cfg.GetMasterNode()
3692
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3693
                  self.cluster.master_netdev)
3694
      result = self.rpc.call_node_deactivate_master_ip(master)
3695
      result.Raise("Could not disable the master ip")
3696
      feedback_fn("Changing master_netdev from %s to %s" %
3697
                  (self.cluster.master_netdev, self.op.master_netdev))
3698
      self.cluster.master_netdev = self.op.master_netdev
3699

    
3700
    self.cfg.Update(self.cluster, feedback_fn)
3701

    
3702
    if self.op.master_netdev:
3703
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3704
                  self.op.master_netdev)
3705
      result = self.rpc.call_node_activate_master_ip(master)
3706
      if result.fail_msg:
3707
        self.LogWarning("Could not re-enable the master ip on"
3708
                        " the master, please restart manually: %s",
3709
                        result.fail_msg)
3710

    
3711

    
3712
def _UploadHelper(lu, nodes, fname):
3713
  """Helper for uploading a file and showing warnings.
3714

3715
  """
3716
  if os.path.exists(fname):
3717
    result = lu.rpc.call_upload_file(nodes, fname)
3718
    for to_node, to_result in result.items():
3719
      msg = to_result.fail_msg
3720
      if msg:
3721
        msg = ("Copy of file %s to node %s failed: %s" %
3722
               (fname, to_node, msg))
3723
        lu.proc.LogWarning(msg)
3724

    
3725

    
3726
def _ComputeAncillaryFiles(cluster, redist):
3727
  """Compute files external to Ganeti which need to be consistent.
3728

3729
  @type redist: boolean
3730
  @param redist: Whether to include files which need to be redistributed
3731

3732
  """
3733
  # Compute files for all nodes
3734
  files_all = set([
3735
    constants.SSH_KNOWN_HOSTS_FILE,
3736
    constants.CONFD_HMAC_KEY,
3737
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3738
    ])
3739

    
3740
  if not redist:
3741
    files_all.update(constants.ALL_CERT_FILES)
3742
    files_all.update(ssconf.SimpleStore().GetFileList())
3743
  else:
3744
    # we need to ship at least the RAPI certificate
3745
    files_all.add(constants.RAPI_CERT_FILE)
3746

    
3747
  if cluster.modify_etc_hosts:
3748
    files_all.add(constants.ETC_HOSTS)
3749

    
3750
  # Files which must either exist on all nodes or on none
3751
  files_all_opt = set([
3752
    constants.RAPI_USERS_FILE,
3753
    ])
3754

    
3755
  # Files which should only be on master candidates
3756
  files_mc = set()
3757
  if not redist:
3758
    files_mc.add(constants.CLUSTER_CONF_FILE)
3759

    
3760
  # Files which should only be on VM-capable nodes
3761
  files_vm = set(filename
3762
    for hv_name in cluster.enabled_hypervisors
3763
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3764

    
3765
  # Filenames must be unique
3766
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3767
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3768
         "Found file listed in more than one file list"
3769

    
3770
  return (files_all, files_all_opt, files_mc, files_vm)
3771

    
3772

    
3773
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3774
  """Distribute additional files which are part of the cluster configuration.
3775

3776
  ConfigWriter takes care of distributing the config and ssconf files, but
3777
  there are more files which should be distributed to all nodes. This function
3778
  makes sure those are copied.
3779

3780
  @param lu: calling logical unit
3781
  @param additional_nodes: list of nodes not in the config to distribute to
3782
  @type additional_vm: boolean
3783
  @param additional_vm: whether the additional nodes are vm-capable or not
3784

3785
  """
3786
  # Gather target nodes
3787
  cluster = lu.cfg.GetClusterInfo()
3788
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3789

    
3790
  online_nodes = lu.cfg.GetOnlineNodeList()
3791
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3792

    
3793
  if additional_nodes is not None:
3794
    online_nodes.extend(additional_nodes)
3795
    if additional_vm:
3796
      vm_nodes.extend(additional_nodes)
3797

    
3798
  # Never distribute to master node
3799
  for nodelist in [online_nodes, vm_nodes]:
3800
    if master_info.name in nodelist:
3801
      nodelist.remove(master_info.name)
3802

    
3803
  # Gather file lists
3804
  (files_all, files_all_opt, files_mc, files_vm) = \
3805
    _ComputeAncillaryFiles(cluster, True)
3806

    
3807
  # Never re-distribute configuration file from here
3808
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3809
              constants.CLUSTER_CONF_FILE in files_vm)
3810
  assert not files_mc, "Master candidates not handled in this function"
3811

    
3812
  filemap = [
3813
    (online_nodes, files_all),
3814
    (online_nodes, files_all_opt),
3815
    (vm_nodes, files_vm),
3816
    ]
3817

    
3818
  # Upload the files
3819
  for (node_list, files) in filemap:
3820
    for fname in files:
3821
      _UploadHelper(lu, node_list, fname)
3822

    
3823

    
3824
class LUClusterRedistConf(NoHooksLU):
3825
  """Force the redistribution of cluster configuration.
3826

3827
  This is a very simple LU.
3828

3829
  """
3830
  REQ_BGL = False
3831

    
3832
  def ExpandNames(self):
3833
    self.needed_locks = {
3834
      locking.LEVEL_NODE: locking.ALL_SET,
3835
    }
3836
    self.share_locks[locking.LEVEL_NODE] = 1
3837

    
3838
  def Exec(self, feedback_fn):
3839
    """Redistribute the configuration.
3840

3841
    """
3842
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3843
    _RedistributeAncillaryFiles(self)
3844

    
3845

    
3846
class LUClusterActivateMasterIp(NoHooksLU):
3847
  """Activate the master IP on the master node.
3848

3849
  """
3850
  def Exec(self, feedback_fn):
3851
    """Activate the master IP.
3852

3853
    """
3854
    master = self.cfg.GetMasterNode()
3855
    self.rpc.call_node_activate_master_ip(master)
3856

    
3857

    
3858
class LUClusterDeactivateMasterIp(NoHooksLU):
3859
  """Deactivate the master IP on the master node.
3860

3861
  """
3862
  def Exec(self, feedback_fn):
3863
    """Deactivate the master IP.
3864

3865
    """
3866
    master = self.cfg.GetMasterNode()
3867
    self.rpc.call_node_deactivate_master_ip(master)
3868

    
3869

    
3870
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3871
  """Sleep and poll for an instance's disk to sync.
3872

3873
  """
3874
  if not instance.disks or disks is not None and not disks:
3875
    return True
3876

    
3877
  disks = _ExpandCheckDisks(instance, disks)
3878

    
3879
  if not oneshot:
3880
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3881

    
3882
  node = instance.primary_node
3883

    
3884
  for dev in disks:
3885
    lu.cfg.SetDiskID(dev, node)
3886

    
3887
  # TODO: Convert to utils.Retry
3888

    
3889
  retries = 0
3890
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3891
  while True:
3892
    max_time = 0
3893
    done = True
3894
    cumul_degraded = False
3895
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3896
    msg = rstats.fail_msg
3897
    if msg:
3898
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3899
      retries += 1
3900
      if retries >= 10:
3901
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3902
                                 " aborting." % node)
3903
      time.sleep(6)
3904
      continue
3905
    rstats = rstats.payload
3906
    retries = 0
3907
    for i, mstat in enumerate(rstats):
3908
      if mstat is None:
3909
        lu.LogWarning("Can't compute data for node %s/%s",
3910
                           node, disks[i].iv_name)
3911
        continue
3912

    
3913
      cumul_degraded = (cumul_degraded or
3914
                        (mstat.is_degraded and mstat.sync_percent is None))
3915
      if mstat.sync_percent is not None:
3916
        done = False
3917
        if mstat.estimated_time is not None:
3918
          rem_time = ("%s remaining (estimated)" %
3919
                      utils.FormatSeconds(mstat.estimated_time))
3920
          max_time = mstat.estimated_time
3921
        else:
3922
          rem_time = "no time estimate"
3923
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3924
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3925

    
3926
    # if we're done but degraded, let's do a few small retries, to
3927
    # make sure we see a stable and not transient situation; therefore
3928
    # we force restart of the loop
3929
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3930
      logging.info("Degraded disks found, %d retries left", degr_retries)
3931
      degr_retries -= 1
3932
      time.sleep(1)
3933
      continue
3934

    
3935
    if done or oneshot:
3936
      break
3937

    
3938
    time.sleep(min(60, max_time))
3939

    
3940
  if done:
3941
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3942
  return not cumul_degraded
3943

    
3944

    
3945
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3946
  """Check that mirrors are not degraded.
3947

3948
  The ldisk parameter, if True, will change the test from the
3949
  is_degraded attribute (which represents overall non-ok status for
3950
  the device(s)) to the ldisk (representing the local storage status).
3951

3952
  """
3953
  lu.cfg.SetDiskID(dev, node)
3954

    
3955
  result = True
3956

    
3957
  if on_primary or dev.AssembleOnSecondary():
3958
    rstats = lu.rpc.call_blockdev_find(node, dev)
3959
    msg = rstats.fail_msg
3960
    if msg:
3961
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3962
      result = False
3963
    elif not rstats.payload:
3964
      lu.LogWarning("Can't find disk on node %s", node)
3965
      result = False
3966
    else:
3967
      if ldisk:
3968
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3969
      else:
3970
        result = result and not rstats.payload.is_degraded
3971

    
3972
  if dev.children:
3973
    for child in dev.children:
3974
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3975

    
3976
  return result
3977

    
3978

    
3979
class LUOobCommand(NoHooksLU):
3980
  """Logical unit for OOB handling.
3981

3982
  """
3983
  REG_BGL = False
3984
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3985

    
3986
  def ExpandNames(self):
3987
    """Gather locks we need.
3988

3989
    """
3990
    if self.op.node_names:
3991
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3992
      lock_names = self.op.node_names
3993
    else:
3994
      lock_names = locking.ALL_SET
3995

    
3996
    self.needed_locks = {
3997
      locking.LEVEL_NODE: lock_names,
3998
      }
3999

    
4000
  def CheckPrereq(self):
4001
    """Check prerequisites.
4002

4003
    This checks:
4004
     - the node exists in the configuration
4005
     - OOB is supported
4006

4007
    Any errors are signaled by raising errors.OpPrereqError.
4008

4009
    """
4010
    self.nodes = []
4011
    self.master_node = self.cfg.GetMasterNode()
4012

    
4013
    assert self.op.power_delay >= 0.0
4014

    
4015
    if self.op.node_names:
4016
      if (self.op.command in self._SKIP_MASTER and
4017
          self.master_node in self.op.node_names):
4018
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4019
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4020

    
4021
        if master_oob_handler:
4022
          additional_text = ("run '%s %s %s' if you want to operate on the"
4023
                             " master regardless") % (master_oob_handler,
4024
                                                      self.op.command,
4025
                                                      self.master_node)
4026
        else:
4027
          additional_text = "it does not support out-of-band operations"
4028

    
4029
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4030
                                    " allowed for %s; %s") %
4031
                                   (self.master_node, self.op.command,
4032
                                    additional_text), errors.ECODE_INVAL)
4033
    else:
4034
      self.op.node_names = self.cfg.GetNodeList()
4035
      if self.op.command in self._SKIP_MASTER:
4036
        self.op.node_names.remove(self.master_node)
4037

    
4038
    if self.op.command in self._SKIP_MASTER:
4039
      assert self.master_node not in self.op.node_names
4040

    
4041
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4042
      if node is None:
4043
        raise errors.OpPrereqError("Node %s not found" % node_name,
4044
                                   errors.ECODE_NOENT)
4045
      else:
4046
        self.nodes.append(node)
4047

    
4048
      if (not self.op.ignore_status and
4049
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4050
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4051
                                    " not marked offline") % node_name,
4052
                                   errors.ECODE_STATE)
4053

    
4054
  def Exec(self, feedback_fn):
4055
    """Execute OOB and return result if we expect any.
4056

4057
    """
4058
    master_node = self.master_node
4059
    ret = []
4060

    
4061
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4062
                                              key=lambda node: node.name)):
4063
      node_entry = [(constants.RS_NORMAL, node.name)]
4064
      ret.append(node_entry)
4065

    
4066
      oob_program = _SupportsOob(self.cfg, node)
4067

    
4068
      if not oob_program:
4069
        node_entry.append((constants.RS_UNAVAIL, None))
4070
        continue
4071

    
4072
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4073
                   self.op.command, oob_program, node.name)
4074
      result = self.rpc.call_run_oob(master_node, oob_program,
4075
                                     self.op.command, node.name,
4076
                                     self.op.timeout)
4077

    
4078
      if result.fail_msg:
4079
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4080
                        node.name, result.fail_msg)
4081
        node_entry.append((constants.RS_NODATA, None))
4082
      else:
4083
        try:
4084
          self._CheckPayload(result)
4085
        except errors.OpExecError, err:
4086
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4087
                          node.name, err)
4088
          node_entry.append((constants.RS_NODATA, None))
4089
        else:
4090
          if self.op.command == constants.OOB_HEALTH:
4091
            # For health we should log important events
4092
            for item, status in result.payload:
4093
              if status in [constants.OOB_STATUS_WARNING,
4094
                            constants.OOB_STATUS_CRITICAL]:
4095
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4096
                                item, node.name, status)
4097

    
4098
          if self.op.command == constants.OOB_POWER_ON:
4099
            node.powered = True
4100
          elif self.op.command == constants.OOB_POWER_OFF:
4101
            node.powered = False
4102
          elif self.op.command == constants.OOB_POWER_STATUS:
4103
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4104
            if powered != node.powered:
4105
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4106
                               " match actual power state (%s)"), node.powered,
4107
                              node.name, powered)
4108

    
4109
          # For configuration changing commands we should update the node
4110
          if self.op.command in (constants.OOB_POWER_ON,
4111
                                 constants.OOB_POWER_OFF):
4112
            self.cfg.Update(node, feedback_fn)
4113

    
4114
          node_entry.append((constants.RS_NORMAL, result.payload))
4115

    
4116
          if (self.op.command == constants.OOB_POWER_ON and
4117
              idx < len(self.nodes) - 1):
4118
            time.sleep(self.op.power_delay)
4119

    
4120
    return ret
4121

    
4122
  def _CheckPayload(self, result):
4123
    """Checks if the payload is valid.
4124

4125
    @param result: RPC result
4126
    @raises errors.OpExecError: If payload is not valid
4127

4128
    """
4129
    errs = []
4130
    if self.op.command == constants.OOB_HEALTH:
4131
      if not isinstance(result.payload, list):
4132
        errs.append("command 'health' is expected to return a list but got %s" %
4133
                    type(result.payload))
4134
      else:
4135
        for item, status in result.payload:
4136
          if status not in constants.OOB_STATUSES:
4137
            errs.append("health item '%s' has invalid status '%s'" %
4138
                        (item, status))
4139

    
4140
    if self.op.command == constants.OOB_POWER_STATUS:
4141
      if not isinstance(result.payload, dict):
4142
        errs.append("power-status is expected to return a dict but got %s" %
4143
                    type(result.payload))
4144

    
4145
    if self.op.command in [
4146
        constants.OOB_POWER_ON,
4147
        constants.OOB_POWER_OFF,
4148
        constants.OOB_POWER_CYCLE,
4149
        ]:
4150
      if result.payload is not None:
4151
        errs.append("%s is expected to not return payload but got '%s'" %
4152
                    (self.op.command, result.payload))
4153

    
4154
    if errs:
4155
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4156
                               utils.CommaJoin(errs))
4157

    
4158

    
4159
class _OsQuery(_QueryBase):
4160
  FIELDS = query.OS_FIELDS
4161

    
4162
  def ExpandNames(self, lu):
4163
    # Lock all nodes in shared mode
4164
    # Temporary removal of locks, should be reverted later
4165
    # TODO: reintroduce locks when they are lighter-weight
4166
    lu.needed_locks = {}
4167
    #self.share_locks[locking.LEVEL_NODE] = 1
4168
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4169

    
4170
    # The following variables interact with _QueryBase._GetNames
4171
    if self.names:
4172
      self.wanted = self.names
4173
    else:
4174
      self.wanted = locking.ALL_SET
4175

    
4176
    self.do_locking = self.use_locking
4177

    
4178
  def DeclareLocks(self, lu, level):
4179
    pass
4180

    
4181
  @staticmethod
4182
  def _DiagnoseByOS(rlist):
4183
    """Remaps a per-node return list into an a per-os per-node dictionary
4184

4185
    @param rlist: a map with node names as keys and OS objects as values
4186

4187
    @rtype: dict
4188
    @return: a dictionary with osnames as keys and as value another
4189
        map, with nodes as keys and tuples of (path, status, diagnose,
4190
        variants, parameters, api_versions) as values, eg::
4191

4192
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4193
                                     (/srv/..., False, "invalid api")],
4194
                           "node2": [(/srv/..., True, "", [], [])]}
4195
          }
4196

4197
    """
4198
    all_os = {}
4199
    # we build here the list of nodes that didn't fail the RPC (at RPC
4200
    # level), so that nodes with a non-responding node daemon don't
4201
    # make all OSes invalid
4202
    good_nodes = [node_name for node_name in rlist
4203
                  if not rlist[node_name].fail_msg]
4204
    for node_name, nr in rlist.items():
4205
      if nr.fail_msg or not nr.payload:
4206
        continue
4207
      for (name, path, status, diagnose, variants,
4208
           params, api_versions) in nr.payload:
4209
        if name not in all_os:
4210
          # build a list of nodes for this os containing empty lists
4211
          # for each node in node_list
4212
          all_os[name] = {}
4213
          for nname in good_nodes:
4214
            all_os[name][nname] = []
4215
        # convert params from [name, help] to (name, help)
4216
        params = [tuple(v) for v in params]
4217
        all_os[name][node_name].append((path, status, diagnose,
4218
                                        variants, params, api_versions))
4219
    return all_os
4220

    
4221
  def _GetQueryData(self, lu):
4222
    """Computes the list of nodes and their attributes.
4223

4224
    """
4225
    # Locking is not used
4226
    assert not (compat.any(lu.glm.is_owned(level)
4227
                           for level in locking.LEVELS
4228
                           if level != locking.LEVEL_CLUSTER) or
4229
                self.do_locking or self.use_locking)
4230

    
4231
    valid_nodes = [node.name
4232
                   for node in lu.cfg.GetAllNodesInfo().values()
4233
                   if not node.offline and node.vm_capable]
4234
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4235
    cluster = lu.cfg.GetClusterInfo()
4236

    
4237
    data = {}
4238

    
4239
    for (os_name, os_data) in pol.items():
4240
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4241
                          hidden=(os_name in cluster.hidden_os),
4242
                          blacklisted=(os_name in cluster.blacklisted_os))
4243

    
4244
      variants = set()
4245
      parameters = set()
4246
      api_versions = set()
4247

    
4248
      for idx, osl in enumerate(os_data.values()):
4249
        info.valid = bool(info.valid and osl and osl[0][1])
4250
        if not info.valid:
4251
          break
4252

    
4253
        (node_variants, node_params, node_api) = osl[0][3:6]
4254
        if idx == 0:
4255
          # First entry
4256
          variants.update(node_variants)
4257
          parameters.update(node_params)
4258
          api_versions.update(node_api)
4259
        else:
4260
          # Filter out inconsistent values
4261
          variants.intersection_update(node_variants)
4262
          parameters.intersection_update(node_params)
4263
          api_versions.intersection_update(node_api)
4264

    
4265
      info.variants = list(variants)
4266
      info.parameters = list(parameters)
4267
      info.api_versions = list(api_versions)
4268

    
4269
      data[os_name] = info
4270

    
4271
    # Prepare data in requested order
4272
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4273
            if name in data]
4274

    
4275

    
4276
class LUOsDiagnose(NoHooksLU):
4277
  """Logical unit for OS diagnose/query.
4278

4279
  """
4280
  REQ_BGL = False
4281

    
4282
  @staticmethod
4283
  def _BuildFilter(fields, names):
4284
    """Builds a filter for querying OSes.
4285

4286
    """
4287
    name_filter = qlang.MakeSimpleFilter("name", names)
4288

    
4289
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4290
    # respective field is not requested
4291
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4292
                     for fname in ["hidden", "blacklisted"]
4293
                     if fname not in fields]
4294
    if "valid" not in fields:
4295
      status_filter.append([qlang.OP_TRUE, "valid"])
4296

    
4297
    if status_filter:
4298
      status_filter.insert(0, qlang.OP_AND)
4299
    else:
4300
      status_filter = None
4301

    
4302
    if name_filter and status_filter:
4303
      return [qlang.OP_AND, name_filter, status_filter]
4304
    elif name_filter:
4305
      return name_filter
4306
    else:
4307
      return status_filter
4308

    
4309
  def CheckArguments(self):
4310
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4311
                       self.op.output_fields, False)
4312

    
4313
  def ExpandNames(self):
4314
    self.oq.ExpandNames(self)
4315

    
4316
  def Exec(self, feedback_fn):
4317
    return self.oq.OldStyleQuery(self)
4318

    
4319

    
4320
class LUNodeRemove(LogicalUnit):
4321
  """Logical unit for removing a node.
4322

4323
  """
4324
  HPATH = "node-remove"
4325
  HTYPE = constants.HTYPE_NODE
4326

    
4327
  def BuildHooksEnv(self):
4328
    """Build hooks env.
4329

4330
    This doesn't run on the target node in the pre phase as a failed
4331
    node would then be impossible to remove.
4332

4333
    """
4334
    return {
4335
      "OP_TARGET": self.op.node_name,
4336
      "NODE_NAME": self.op.node_name,
4337
      }
4338

    
4339
  def BuildHooksNodes(self):
4340
    """Build hooks nodes.
4341

4342
    """
4343
    all_nodes = self.cfg.GetNodeList()
4344
    try:
4345
      all_nodes.remove(self.op.node_name)
4346
    except ValueError:
4347
      logging.warning("Node '%s', which is about to be removed, was not found"
4348
                      " in the list of all nodes", self.op.node_name)
4349
    return (all_nodes, all_nodes)
4350

    
4351
  def CheckPrereq(self):
4352
    """Check prerequisites.
4353

4354
    This checks:
4355
     - the node exists in the configuration
4356
     - it does not have primary or secondary instances
4357
     - it's not the master
4358

4359
    Any errors are signaled by raising errors.OpPrereqError.
4360

4361
    """
4362
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4363
    node = self.cfg.GetNodeInfo(self.op.node_name)
4364
    assert node is not None
4365

    
4366
    masternode = self.cfg.GetMasterNode()
4367
    if node.name == masternode:
4368
      raise errors.OpPrereqError("Node is the master node, failover to another"
4369
                                 " node is required", errors.ECODE_INVAL)
4370

    
4371
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4372
      if node.name in instance.all_nodes:
4373
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4374
                                   " please remove first" % instance_name,
4375
                                   errors.ECODE_INVAL)
4376
    self.op.node_name = node.name
4377
    self.node = node
4378

    
4379
  def Exec(self, feedback_fn):
4380
    """Removes the node from the cluster.
4381

4382
    """
4383
    node = self.node
4384
    logging.info("Stopping the node daemon and removing configs from node %s",
4385
                 node.name)
4386

    
4387
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4388

    
4389
    # Promote nodes to master candidate as needed
4390
    _AdjustCandidatePool(self, exceptions=[node.name])
4391
    self.context.RemoveNode(node.name)
4392

    
4393
    # Run post hooks on the node before it's removed
4394
    _RunPostHook(self, node.name)
4395

    
4396
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4397
    msg = result.fail_msg
4398
    if msg:
4399
      self.LogWarning("Errors encountered on the remote node while leaving"
4400
                      " the cluster: %s", msg)
4401

    
4402
    # Remove node from our /etc/hosts
4403
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4404
      master_node = self.cfg.GetMasterNode()
4405
      result = self.rpc.call_etc_hosts_modify(master_node,
4406
                                              constants.ETC_HOSTS_REMOVE,
4407
                                              node.name, None)
4408
      result.Raise("Can't update hosts file with new host data")
4409
      _RedistributeAncillaryFiles(self)
4410

    
4411

    
4412
class _NodeQuery(_QueryBase):
4413
  FIELDS = query.NODE_FIELDS
4414

    
4415
  def ExpandNames(self, lu):
4416
    lu.needed_locks = {}
4417
    lu.share_locks = _ShareAll()
4418

    
4419
    if self.names:
4420
      self.wanted = _GetWantedNodes(lu, self.names)
4421
    else:
4422
      self.wanted = locking.ALL_SET
4423

    
4424
    self.do_locking = (self.use_locking and
4425
                       query.NQ_LIVE in self.requested_data)
4426

    
4427
    if self.do_locking:
4428
      # If any non-static field is requested we need to lock the nodes
4429
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4430

    
4431
  def DeclareLocks(self, lu, level):
4432
    pass
4433

    
4434
  def _GetQueryData(self, lu):
4435
    """Computes the list of nodes and their attributes.
4436

4437
    """
4438
    all_info = lu.cfg.GetAllNodesInfo()
4439

    
4440
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4441

    
4442
    # Gather data as requested
4443
    if query.NQ_LIVE in self.requested_data:
4444
      # filter out non-vm_capable nodes
4445
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4446

    
4447
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4448
                                        lu.cfg.GetHypervisorType())
4449
      live_data = dict((name, nresult.payload)
4450
                       for (name, nresult) in node_data.items()
4451
                       if not nresult.fail_msg and nresult.payload)
4452
    else:
4453
      live_data = None
4454

    
4455
    if query.NQ_INST in self.requested_data:
4456
      node_to_primary = dict([(name, set()) for name in nodenames])
4457
      node_to_secondary = dict([(name, set()) for name in nodenames])
4458

    
4459
      inst_data = lu.cfg.GetAllInstancesInfo()
4460

    
4461
      for inst in inst_data.values():
4462
        if inst.primary_node in node_to_primary:
4463
          node_to_primary[inst.primary_node].add(inst.name)
4464
        for secnode in inst.secondary_nodes:
4465
          if secnode in node_to_secondary:
4466
            node_to_secondary[secnode].add(inst.name)
4467
    else:
4468
      node_to_primary = None
4469
      node_to_secondary = None
4470

    
4471
    if query.NQ_OOB in self.requested_data:
4472
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4473
                         for name, node in all_info.iteritems())
4474
    else:
4475
      oob_support = None
4476

    
4477
    if query.NQ_GROUP in self.requested_data:
4478
      groups = lu.cfg.GetAllNodeGroupsInfo()
4479
    else:
4480
      groups = {}
4481

    
4482
    return query.NodeQueryData([all_info[name] for name in nodenames],
4483
                               live_data, lu.cfg.GetMasterNode(),
4484
                               node_to_primary, node_to_secondary, groups,
4485
                               oob_support, lu.cfg.GetClusterInfo())
4486

    
4487

    
4488
class LUNodeQuery(NoHooksLU):
4489
  """Logical unit for querying nodes.
4490

4491
  """
4492
  # pylint: disable=W0142
4493
  REQ_BGL = False
4494

    
4495
  def CheckArguments(self):
4496
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4497
                         self.op.output_fields, self.op.use_locking)
4498

    
4499
  def ExpandNames(self):
4500
    self.nq.ExpandNames(self)
4501

    
4502
  def Exec(self, feedback_fn):
4503
    return self.nq.OldStyleQuery(self)
4504

    
4505

    
4506
class LUNodeQueryvols(NoHooksLU):
4507
  """Logical unit for getting volumes on node(s).
4508

4509
  """
4510
  REQ_BGL = False
4511
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4512
  _FIELDS_STATIC = utils.FieldSet("node")
4513

    
4514
  def CheckArguments(self):
4515
    _CheckOutputFields(static=self._FIELDS_STATIC,
4516
                       dynamic=self._FIELDS_DYNAMIC,
4517
                       selected=self.op.output_fields)
4518

    
4519
  def ExpandNames(self):
4520
    self.needed_locks = {}
4521
    self.share_locks[locking.LEVEL_NODE] = 1
4522
    if not self.op.nodes:
4523
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4524
    else:
4525
      self.needed_locks[locking.LEVEL_NODE] = \
4526
        _GetWantedNodes(self, self.op.nodes)
4527

    
4528
  def Exec(self, feedback_fn):
4529
    """Computes the list of nodes and their attributes.
4530

4531
    """
4532
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4533
    volumes = self.rpc.call_node_volumes(nodenames)
4534

    
4535
    ilist = self.cfg.GetAllInstancesInfo()
4536
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4537

    
4538
    output = []
4539
    for node in nodenames:
4540
      nresult = volumes[node]
4541
      if nresult.offline:
4542
        continue
4543
      msg = nresult.fail_msg
4544
      if msg:
4545
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4546
        continue
4547

    
4548
      node_vols = sorted(nresult.payload,
4549
                         key=operator.itemgetter("dev"))
4550

    
4551
      for vol in node_vols:
4552
        node_output = []
4553
        for field in self.op.output_fields:
4554
          if field == "node":
4555
            val = node
4556
          elif field == "phys":
4557
            val = vol["dev"]
4558
          elif field == "vg":
4559
            val = vol["vg"]
4560
          elif field == "name":
4561
            val = vol["name"]
4562
          elif field == "size":
4563
            val = int(float(vol["size"]))
4564
          elif field == "instance":
4565
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4566
          else:
4567
            raise errors.ParameterError(field)
4568
          node_output.append(str(val))
4569

    
4570
        output.append(node_output)
4571

    
4572
    return output
4573

    
4574

    
4575
class LUNodeQueryStorage(NoHooksLU):
4576
  """Logical unit for getting information on storage units on node(s).
4577

4578
  """
4579
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4580
  REQ_BGL = False
4581

    
4582
  def CheckArguments(self):
4583
    _CheckOutputFields(static=self._FIELDS_STATIC,
4584
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4585
                       selected=self.op.output_fields)
4586

    
4587
  def ExpandNames(self):
4588
    self.needed_locks = {}
4589
    self.share_locks[locking.LEVEL_NODE] = 1
4590

    
4591
    if self.op.nodes:
4592
      self.needed_locks[locking.LEVEL_NODE] = \
4593
        _GetWantedNodes(self, self.op.nodes)
4594
    else:
4595
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4596

    
4597
  def Exec(self, feedback_fn):
4598
    """Computes the list of nodes and their attributes.
4599

4600
    """
4601
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4602

    
4603
    # Always get name to sort by
4604
    if constants.SF_NAME in self.op.output_fields:
4605
      fields = self.op.output_fields[:]
4606
    else:
4607
      fields = [constants.SF_NAME] + self.op.output_fields
4608

    
4609
    # Never ask for node or type as it's only known to the LU
4610
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4611
      while extra in fields:
4612
        fields.remove(extra)
4613

    
4614
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4615
    name_idx = field_idx[constants.SF_NAME]
4616

    
4617
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4618
    data = self.rpc.call_storage_list(self.nodes,
4619
                                      self.op.storage_type, st_args,
4620
                                      self.op.name, fields)
4621

    
4622
    result = []
4623

    
4624
    for node in utils.NiceSort(self.nodes):
4625
      nresult = data[node]
4626
      if nresult.offline:
4627
        continue
4628

    
4629
      msg = nresult.fail_msg
4630
      if msg:
4631
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4632
        continue
4633

    
4634
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4635

    
4636
      for name in utils.NiceSort(rows.keys()):
4637
        row = rows[name]
4638

    
4639
        out = []
4640

    
4641
        for field in self.op.output_fields:
4642
          if field == constants.SF_NODE:
4643
            val = node
4644
          elif field == constants.SF_TYPE:
4645
            val = self.op.storage_type
4646
          elif field in field_idx:
4647
            val = row[field_idx[field]]
4648
          else:
4649
            raise errors.ParameterError(field)
4650

    
4651
          out.append(val)
4652

    
4653
        result.append(out)
4654

    
4655
    return result
4656

    
4657

    
4658
class _InstanceQuery(_QueryBase):
4659
  FIELDS = query.INSTANCE_FIELDS
4660

    
4661
  def ExpandNames(self, lu):
4662
    lu.needed_locks = {}
4663
    lu.share_locks = _ShareAll()
4664

    
4665
    if self.names:
4666
      self.wanted = _GetWantedInstances(lu, self.names)
4667
    else:
4668
      self.wanted = locking.ALL_SET
4669

    
4670
    self.do_locking = (self.use_locking and
4671
                       query.IQ_LIVE in self.requested_data)
4672
    if self.do_locking:
4673
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4674
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4675
      lu.needed_locks[locking.LEVEL_NODE] = []
4676
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4677

    
4678
    self.do_grouplocks = (self.do_locking and
4679
                          query.IQ_NODES in self.requested_data)
4680

    
4681
  def DeclareLocks(self, lu, level):
4682
    if self.do_locking:
4683
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4684
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4685

    
4686
        # Lock all groups used by instances optimistically; this requires going
4687
        # via the node before it's locked, requiring verification later on
4688
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4689
          set(group_uuid
4690
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4691
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4692
      elif level == locking.LEVEL_NODE:
4693
        lu._LockInstancesNodes() # pylint: disable=W0212
4694

    
4695
  @staticmethod
4696
  def _CheckGroupLocks(lu):
4697
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4698
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4699

    
4700
    # Check if node groups for locked instances are still correct
4701
    for instance_name in owned_instances:
4702
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4703

    
4704
  def _GetQueryData(self, lu):
4705
    """Computes the list of instances and their attributes.
4706

4707
    """
4708
    if self.do_grouplocks:
4709
      self._CheckGroupLocks(lu)
4710

    
4711
    cluster = lu.cfg.GetClusterInfo()
4712
    all_info = lu.cfg.GetAllInstancesInfo()
4713

    
4714
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4715

    
4716
    instance_list = [all_info[name] for name in instance_names]
4717
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4718
                                        for inst in instance_list)))
4719
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4720
    bad_nodes = []
4721
    offline_nodes = []
4722
    wrongnode_inst = set()
4723

    
4724
    # Gather data as requested
4725
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4726
      live_data = {}
4727
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4728
      for name in nodes:
4729
        result = node_data[name]
4730
        if result.offline:
4731
          # offline nodes will be in both lists
4732
          assert result.fail_msg
4733
          offline_nodes.append(name)
4734
        if result.fail_msg:
4735
          bad_nodes.append(name)
4736
        elif result.payload:
4737
          for inst in result.payload:
4738
            if inst in all_info:
4739
              if all_info[inst].primary_node == name:
4740
                live_data.update(result.payload)
4741
              else:
4742
                wrongnode_inst.add(inst)
4743
            else:
4744
              # orphan instance; we don't list it here as we don't
4745
              # handle this case yet in the output of instance listing
4746
              logging.warning("Orphan instance '%s' found on node %s",
4747
                              inst, name)
4748
        # else no instance is alive
4749
    else:
4750
      live_data = {}
4751

    
4752
    if query.IQ_DISKUSAGE in self.requested_data:
4753
      disk_usage = dict((inst.name,
4754
                         _ComputeDiskSize(inst.disk_template,
4755
                                          [{constants.IDISK_SIZE: disk.size}
4756
                                           for disk in inst.disks]))
4757
                        for inst in instance_list)
4758
    else:
4759
      disk_usage = None
4760

    
4761
    if query.IQ_CONSOLE in self.requested_data:
4762
      consinfo = {}
4763
      for inst in instance_list:
4764
        if inst.name in live_data:
4765
          # Instance is running
4766
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4767
        else:
4768
          consinfo[inst.name] = None
4769
      assert set(consinfo.keys()) == set(instance_names)
4770
    else:
4771
      consinfo = None
4772

    
4773
    if query.IQ_NODES in self.requested_data:
4774
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4775
                                            instance_list)))
4776
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4777
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4778
                    for uuid in set(map(operator.attrgetter("group"),
4779
                                        nodes.values())))
4780
    else:
4781
      nodes = None
4782
      groups = None
4783

    
4784
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4785
                                   disk_usage, offline_nodes, bad_nodes,
4786
                                   live_data, wrongnode_inst, consinfo,
4787
                                   nodes, groups)
4788

    
4789

    
4790
class LUQuery(NoHooksLU):
4791
  """Query for resources/items of a certain kind.
4792

4793
  """
4794
  # pylint: disable=W0142
4795
  REQ_BGL = False
4796

    
4797
  def CheckArguments(self):
4798
    qcls = _GetQueryImplementation(self.op.what)
4799

    
4800
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4801

    
4802
  def ExpandNames(self):
4803
    self.impl.ExpandNames(self)
4804

    
4805
  def DeclareLocks(self, level):
4806
    self.impl.DeclareLocks(self, level)
4807

    
4808
  def Exec(self, feedback_fn):
4809
    return self.impl.NewStyleQuery(self)
4810

    
4811

    
4812
class LUQueryFields(NoHooksLU):
4813
  """Query for resources/items of a certain kind.
4814

4815
  """
4816
  # pylint: disable=W0142
4817
  REQ_BGL = False
4818

    
4819
  def CheckArguments(self):
4820
    self.qcls = _GetQueryImplementation(self.op.what)
4821

    
4822
  def ExpandNames(self):
4823
    self.needed_locks = {}
4824

    
4825
  def Exec(self, feedback_fn):
4826
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4827

    
4828

    
4829
class LUNodeModifyStorage(NoHooksLU):
4830
  """Logical unit for modifying a storage volume on a node.
4831

4832
  """
4833
  REQ_BGL = False
4834

    
4835
  def CheckArguments(self):
4836
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4837

    
4838
    storage_type = self.op.storage_type
4839

    
4840
    try:
4841
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4842
    except KeyError:
4843
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4844
                                 " modified" % storage_type,
4845
                                 errors.ECODE_INVAL)
4846

    
4847
    diff = set(self.op.changes.keys()) - modifiable
4848
    if diff:
4849
      raise errors.OpPrereqError("The following fields can not be modified for"
4850
                                 " storage units of type '%s': %r" %
4851
                                 (storage_type, list(diff)),
4852
                                 errors.ECODE_INVAL)
4853

    
4854
  def ExpandNames(self):
4855
    self.needed_locks = {
4856
      locking.LEVEL_NODE: self.op.node_name,
4857
      }
4858

    
4859
  def Exec(self, feedback_fn):
4860
    """Computes the list of nodes and their attributes.
4861

4862
    """
4863
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4864
    result = self.rpc.call_storage_modify(self.op.node_name,
4865
                                          self.op.storage_type, st_args,
4866
                                          self.op.name, self.op.changes)
4867
    result.Raise("Failed to modify storage unit '%s' on %s" %
4868
                 (self.op.name, self.op.node_name))
4869

    
4870

    
4871
class LUNodeAdd(LogicalUnit):
4872
  """Logical unit for adding node to the cluster.
4873

4874
  """
4875
  HPATH = "node-add"
4876
  HTYPE = constants.HTYPE_NODE
4877
  _NFLAGS = ["master_capable", "vm_capable"]
4878

    
4879
  def CheckArguments(self):
4880
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4881
    # validate/normalize the node name
4882
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4883
                                         family=self.primary_ip_family)
4884
    self.op.node_name = self.hostname.name
4885

    
4886
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4887
      raise errors.OpPrereqError("Cannot readd the master node",
4888
                                 errors.ECODE_STATE)
4889

    
4890
    if self.op.readd and self.op.group:
4891
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4892
                                 " being readded", errors.ECODE_INVAL)
4893

    
4894
  def BuildHooksEnv(self):
4895
    """Build hooks env.
4896

4897
    This will run on all nodes before, and on all nodes + the new node after.
4898

4899
    """
4900
    return {
4901
      "OP_TARGET": self.op.node_name,
4902
      "NODE_NAME": self.op.node_name,
4903
      "NODE_PIP": self.op.primary_ip,
4904
      "NODE_SIP": self.op.secondary_ip,
4905
      "MASTER_CAPABLE": str(self.op.master_capable),
4906
      "VM_CAPABLE": str(self.op.vm_capable),
4907
      }
4908

    
4909
  def BuildHooksNodes(self):
4910
    """Build hooks nodes.
4911

4912
    """
4913
    # Exclude added node
4914
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4915
    post_nodes = pre_nodes + [self.op.node_name, ]
4916

    
4917
    return (pre_nodes, post_nodes)
4918

    
4919
  def CheckPrereq(self):
4920
    """Check prerequisites.
4921

4922
    This checks:
4923
     - the new node is not already in the config
4924
     - it is resolvable
4925
     - its parameters (single/dual homed) matches the cluster
4926

4927
    Any errors are signaled by raising errors.OpPrereqError.
4928

4929
    """
4930
    cfg = self.cfg
4931
    hostname = self.hostname
4932
    node = hostname.name
4933
    primary_ip = self.op.primary_ip = hostname.ip
4934
    if self.op.secondary_ip is None:
4935
      if self.primary_ip_family == netutils.IP6Address.family:
4936
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4937
                                   " IPv4 address must be given as secondary",
4938
                                   errors.ECODE_INVAL)
4939
      self.op.secondary_ip = primary_ip
4940

    
4941
    secondary_ip = self.op.secondary_ip
4942
    if not netutils.IP4Address.IsValid(secondary_ip):
4943
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4944
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4945

    
4946
    node_list = cfg.GetNodeList()
4947
    if not self.op.readd and node in node_list:
4948
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4949
                                 node, errors.ECODE_EXISTS)
4950
    elif self.op.readd and node not in node_list:
4951
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4952
                                 errors.ECODE_NOENT)
4953

    
4954
    self.changed_primary_ip = False
4955

    
4956
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4957
      if self.op.readd and node == existing_node_name:
4958
        if existing_node.secondary_ip != secondary_ip:
4959
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4960
                                     " address configuration as before",
4961
                                     errors.ECODE_INVAL)
4962
        if existing_node.primary_ip != primary_ip:
4963
          self.changed_primary_ip = True
4964

    
4965
        continue
4966

    
4967
      if (existing_node.primary_ip == primary_ip or
4968
          existing_node.secondary_ip == primary_ip or
4969
          existing_node.primary_ip == secondary_ip or
4970
          existing_node.secondary_ip == secondary_ip):
4971
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4972
                                   " existing node %s" % existing_node.name,
4973
                                   errors.ECODE_NOTUNIQUE)
4974

    
4975
    # After this 'if' block, None is no longer a valid value for the
4976
    # _capable op attributes
4977
    if self.op.readd:
4978
      old_node = self.cfg.GetNodeInfo(node)
4979
      assert old_node is not None, "Can't retrieve locked node %s" % node
4980
      for attr in self._NFLAGS:
4981
        if getattr(self.op, attr) is None:
4982
          setattr(self.op, attr, getattr(old_node, attr))
4983
    else:
4984
      for attr in self._NFLAGS:
4985
        if getattr(self.op, attr) is None:
4986
          setattr(self.op, attr, True)
4987

    
4988
    if self.op.readd and not self.op.vm_capable:
4989
      pri, sec = cfg.GetNodeInstances(node)
4990
      if pri or sec:
4991
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4992
                                   " flag set to false, but it already holds"
4993
                                   " instances" % node,
4994
                                   errors.ECODE_STATE)
4995

    
4996
    # check that the type of the node (single versus dual homed) is the
4997
    # same as for the master
4998
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4999
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5000
    newbie_singlehomed = secondary_ip == primary_ip
5001
    if master_singlehomed != newbie_singlehomed:
5002
      if master_singlehomed:
5003
        raise errors.OpPrereqError("The master has no secondary ip but the"
5004
                                   " new node has one",
5005
                                   errors.ECODE_INVAL)
5006
      else:
5007
        raise errors.OpPrereqError("The master has a secondary ip but the"
5008
                                   " new node doesn't have one",
5009
                                   errors.ECODE_INVAL)
5010

    
5011
    # checks reachability
5012
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5013
      raise errors.OpPrereqError("Node not reachable by ping",
5014
                                 errors.ECODE_ENVIRON)
5015

    
5016
    if not newbie_singlehomed:
5017
      # check reachability from my secondary ip to newbie's secondary ip
5018
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5019
                           source=myself.secondary_ip):
5020
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5021
                                   " based ping to node daemon port",
5022
                                   errors.ECODE_ENVIRON)
5023

    
5024
    if self.op.readd:
5025
      exceptions = [node]
5026
    else:
5027
      exceptions = []
5028

    
5029
    if self.op.master_capable:
5030
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5031
    else:
5032
      self.master_candidate = False
5033

    
5034
    if self.op.readd:
5035
      self.new_node = old_node
5036
    else:
5037
      node_group = cfg.LookupNodeGroup(self.op.group)
5038
      self.new_node = objects.Node(name=node,
5039
                                   primary_ip=primary_ip,
5040
                                   secondary_ip=secondary_ip,
5041
                                   master_candidate=self.master_candidate,
5042
                                   offline=False, drained=False,
5043
                                   group=node_group)
5044

    
5045
    if self.op.ndparams:
5046
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5047

    
5048
  def Exec(self, feedback_fn):
5049
    """Adds the new node to the cluster.
5050

5051
    """
5052
    new_node = self.new_node
5053
    node = new_node.name
5054

    
5055
    # We adding a new node so we assume it's powered
5056
    new_node.powered = True
5057

    
5058
    # for re-adds, reset the offline/drained/master-candidate flags;
5059
    # we need to reset here, otherwise offline would prevent RPC calls
5060
    # later in the procedure; this also means that if the re-add
5061
    # fails, we are left with a non-offlined, broken node
5062
    if self.op.readd:
5063
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5064
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5065
      # if we demote the node, we do cleanup later in the procedure
5066
      new_node.master_candidate = self.master_candidate
5067
      if self.changed_primary_ip:
5068
        new_node.primary_ip = self.op.primary_ip
5069

    
5070
    # copy the master/vm_capable flags
5071
    for attr in self._NFLAGS:
5072
      setattr(new_node, attr, getattr(self.op, attr))
5073

    
5074
    # notify the user about any possible mc promotion
5075
    if new_node.master_candidate:
5076
      self.LogInfo("Node will be a master candidate")
5077

    
5078
    if self.op.ndparams:
5079
      new_node.ndparams = self.op.ndparams
5080
    else:
5081
      new_node.ndparams = {}
5082

    
5083
    # check connectivity
5084
    result = self.rpc.call_version([node])[node]
5085
    result.Raise("Can't get version information from node %s" % node)
5086
    if constants.PROTOCOL_VERSION == result.payload:
5087
      logging.info("Communication to node %s fine, sw version %s match",
5088
                   node, result.payload)
5089
    else:
5090
      raise errors.OpExecError("Version mismatch master version %s,"
5091
                               " node version %s" %
5092
                               (constants.PROTOCOL_VERSION, result.payload))
5093

    
5094
    # Add node to our /etc/hosts, and add key to known_hosts
5095
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5096
      master_node = self.cfg.GetMasterNode()
5097
      result = self.rpc.call_etc_hosts_modify(master_node,
5098
                                              constants.ETC_HOSTS_ADD,
5099
                                              self.hostname.name,
5100
                                              self.hostname.ip)
5101
      result.Raise("Can't update hosts file with new host data")
5102

    
5103
    if new_node.secondary_ip != new_node.primary_ip:
5104
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5105
                               False)
5106

    
5107
    node_verify_list = [self.cfg.GetMasterNode()]
5108
    node_verify_param = {
5109
      constants.NV_NODELIST: ([node], {}),
5110
      # TODO: do a node-net-test as well?
5111
    }
5112

    
5113
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5114
                                       self.cfg.GetClusterName())
5115
    for verifier in node_verify_list:
5116
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5117
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5118
      if nl_payload:
5119
        for failed in nl_payload:
5120
          feedback_fn("ssh/hostname verification failed"
5121
                      " (checking from %s): %s" %
5122
                      (verifier, nl_payload[failed]))
5123
        raise errors.OpExecError("ssh/hostname verification failed")
5124

    
5125
    if self.op.readd:
5126
      _RedistributeAncillaryFiles(self)
5127
      self.context.ReaddNode(new_node)
5128
      # make sure we redistribute the config
5129
      self.cfg.Update(new_node, feedback_fn)
5130
      # and make sure the new node will not have old files around
5131
      if not new_node.master_candidate:
5132
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5133
        msg = result.fail_msg
5134
        if msg:
5135
          self.LogWarning("Node failed to demote itself from master"
5136
                          " candidate status: %s" % msg)
5137
    else:
5138
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5139
                                  additional_vm=self.op.vm_capable)
5140
      self.context.AddNode(new_node, self.proc.GetECId())
5141

    
5142

    
5143
class LUNodeSetParams(LogicalUnit):
5144
  """Modifies the parameters of a node.
5145

5146
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5147
      to the node role (as _ROLE_*)
5148
  @cvar _R2F: a dictionary from node role to tuples of flags
5149
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5150

5151
  """
5152
  HPATH = "node-modify"
5153
  HTYPE = constants.HTYPE_NODE
5154
  REQ_BGL = False
5155
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5156
  _F2R = {
5157
    (True, False, False): _ROLE_CANDIDATE,
5158
    (False, True, False): _ROLE_DRAINED,
5159
    (False, False, True): _ROLE_OFFLINE,
5160
    (False, False, False): _ROLE_REGULAR,
5161
    }
5162
  _R2F = dict((v, k) for k, v in _F2R.items())
5163
  _FLAGS = ["master_candidate", "drained", "offline"]
5164

    
5165
  def CheckArguments(self):
5166
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5167
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5168
                self.op.master_capable, self.op.vm_capable,
5169
                self.op.secondary_ip, self.op.ndparams]
5170
    if all_mods.count(None) == len(all_mods):
5171
      raise errors.OpPrereqError("Please pass at least one modification",
5172
                                 errors.ECODE_INVAL)
5173
    if all_mods.count(True) > 1:
5174
      raise errors.OpPrereqError("Can't set the node into more than one"
5175
                                 " state at the same time",
5176
                                 errors.ECODE_INVAL)
5177

    
5178
    # Boolean value that tells us whether we might be demoting from MC
5179
    self.might_demote = (self.op.master_candidate == False or
5180
                         self.op.offline == True or
5181
                         self.op.drained == True or
5182
                         self.op.master_capable == False)
5183

    
5184
    if self.op.secondary_ip:
5185
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5186
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5187
                                   " address" % self.op.secondary_ip,
5188
                                   errors.ECODE_INVAL)
5189

    
5190
    self.lock_all = self.op.auto_promote and self.might_demote
5191
    self.lock_instances = self.op.secondary_ip is not None
5192

    
5193
  def ExpandNames(self):
5194
    if self.lock_all:
5195
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5196
    else:
5197
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5198

    
5199
    if self.lock_instances:
5200
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5201

    
5202
  def DeclareLocks(self, level):
5203
    # If we have locked all instances, before waiting to lock nodes, release
5204
    # all the ones living on nodes unrelated to the current operation.
5205
    if level == locking.LEVEL_NODE and self.lock_instances:
5206
      self.affected_instances = []
5207
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5208
        instances_keep = []
5209

    
5210
        # Build list of instances to release
5211
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5212
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5213
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5214
              self.op.node_name in instance.all_nodes):
5215
            instances_keep.append(instance_name)
5216
            self.affected_instances.append(instance)
5217

    
5218
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5219

    
5220
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5221
                set(instances_keep))
5222

    
5223
  def BuildHooksEnv(self):
5224
    """Build hooks env.
5225

5226
    This runs on the master node.
5227

5228
    """
5229
    return {
5230
      "OP_TARGET": self.op.node_name,
5231
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5232
      "OFFLINE": str(self.op.offline),
5233
      "DRAINED": str(self.op.drained),
5234
      "MASTER_CAPABLE": str(self.op.master_capable),
5235
      "VM_CAPABLE": str(self.op.vm_capable),
5236
      }
5237

    
5238
  def BuildHooksNodes(self):
5239
    """Build hooks nodes.
5240

5241
    """
5242
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5243
    return (nl, nl)
5244

    
5245
  def CheckPrereq(self):
5246
    """Check prerequisites.
5247

5248
    This only checks the instance list against the existing names.
5249

5250
    """
5251
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5252

    
5253
    if (self.op.master_candidate is not None or
5254
        self.op.drained is not None or
5255
        self.op.offline is not None):
5256
      # we can't change the master's node flags
5257
      if self.op.node_name == self.cfg.GetMasterNode():
5258
        raise errors.OpPrereqError("The master role can be changed"
5259
                                   " only via master-failover",
5260
                                   errors.ECODE_INVAL)
5261

    
5262
    if self.op.master_candidate and not node.master_capable:
5263
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5264
                                 " it a master candidate" % node.name,
5265
                                 errors.ECODE_STATE)
5266

    
5267
    if self.op.vm_capable == False:
5268
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5269
      if ipri or isec:
5270
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5271
                                   " the vm_capable flag" % node.name,
5272
                                   errors.ECODE_STATE)
5273

    
5274
    if node.master_candidate and self.might_demote and not self.lock_all:
5275
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5276
      # check if after removing the current node, we're missing master
5277
      # candidates
5278
      (mc_remaining, mc_should, _) = \
5279
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5280
      if mc_remaining < mc_should:
5281
        raise errors.OpPrereqError("Not enough master candidates, please"
5282
                                   " pass auto promote option to allow"
5283
                                   " promotion", errors.ECODE_STATE)
5284

    
5285
    self.old_flags = old_flags = (node.master_candidate,
5286
                                  node.drained, node.offline)
5287
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5288
    self.old_role = old_role = self._F2R[old_flags]
5289

    
5290
    # Check for ineffective changes
5291
    for attr in self._FLAGS:
5292
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5293
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5294
        setattr(self.op, attr, None)
5295

    
5296
    # Past this point, any flag change to False means a transition
5297
    # away from the respective state, as only real changes are kept
5298

    
5299
    # TODO: We might query the real power state if it supports OOB
5300
    if _SupportsOob(self.cfg, node):
5301
      if self.op.offline is False and not (node.powered or
5302
                                           self.op.powered == True):
5303
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5304
                                    " offline status can be reset") %
5305
                                   self.op.node_name)
5306
    elif self.op.powered is not None:
5307
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5308
                                  " as it does not support out-of-band"
5309
                                  " handling") % self.op.node_name)
5310

    
5311
    # If we're being deofflined/drained, we'll MC ourself if needed
5312
    if (self.op.drained == False or self.op.offline == False or
5313
        (self.op.master_capable and not node.master_capable)):
5314
      if _DecideSelfPromotion(self):
5315
        self.op.master_candidate = True
5316
        self.LogInfo("Auto-promoting node to master candidate")
5317

    
5318
    # If we're no longer master capable, we'll demote ourselves from MC
5319
    if self.op.master_capable == False and node.master_candidate:
5320
      self.LogInfo("Demoting from master candidate")
5321
      self.op.master_candidate = False
5322

    
5323
    # Compute new role
5324
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5325
    if self.op.master_candidate:
5326
      new_role = self._ROLE_CANDIDATE
5327
    elif self.op.drained:
5328
      new_role = self._ROLE_DRAINED
5329
    elif self.op.offline:
5330
      new_role = self._ROLE_OFFLINE
5331
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5332
      # False is still in new flags, which means we're un-setting (the
5333
      # only) True flag
5334
      new_role = self._ROLE_REGULAR
5335
    else: # no new flags, nothing, keep old role
5336
      new_role = old_role
5337

    
5338
    self.new_role = new_role
5339

    
5340
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5341
      # Trying to transition out of offline status
5342
      result = self.rpc.call_version([node.name])[node.name]
5343
      if result.fail_msg:
5344
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5345
                                   " to report its version: %s" %
5346
                                   (node.name, result.fail_msg),
5347
                                   errors.ECODE_STATE)
5348
      else:
5349
        self.LogWarning("Transitioning node from offline to online state"
5350
                        " without using re-add. Please make sure the node"
5351
                        " is healthy!")
5352

    
5353
    if self.op.secondary_ip:
5354
      # Ok even without locking, because this can't be changed by any LU
5355
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5356
      master_singlehomed = master.secondary_ip == master.primary_ip
5357
      if master_singlehomed and self.op.secondary_ip:
5358
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5359
                                   " homed cluster", errors.ECODE_INVAL)
5360

    
5361
      if node.offline:
5362
        if self.affected_instances:
5363
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5364
                                     " node has instances (%s) configured"
5365
                                     " to use it" % self.affected_instances)
5366
      else:
5367
        # On online nodes, check that no instances are running, and that
5368
        # the node has the new ip and we can reach it.
5369
        for instance in self.affected_instances:
5370
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5371

    
5372
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5373
        if master.name != node.name:
5374
          # check reachability from master secondary ip to new secondary ip
5375
          if not netutils.TcpPing(self.op.secondary_ip,
5376
                                  constants.DEFAULT_NODED_PORT,
5377
                                  source=master.secondary_ip):
5378
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5379
                                       " based ping to node daemon port",
5380
                                       errors.ECODE_ENVIRON)
5381

    
5382
    if self.op.ndparams:
5383
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5384
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5385
      self.new_ndparams = new_ndparams
5386

    
5387
  def Exec(self, feedback_fn):
5388
    """Modifies a node.
5389

5390
    """
5391
    node = self.node
5392
    old_role = self.old_role
5393
    new_role = self.new_role
5394

    
5395
    result = []
5396

    
5397
    if self.op.ndparams:
5398
      node.ndparams = self.new_ndparams
5399

    
5400
    if self.op.powered is not None:
5401
      node.powered = self.op.powered
5402

    
5403
    for attr in ["master_capable", "vm_capable"]:
5404
      val = getattr(self.op, attr)
5405
      if val is not None:
5406
        setattr(node, attr, val)
5407
        result.append((attr, str(val)))
5408

    
5409
    if new_role != old_role:
5410
      # Tell the node to demote itself, if no longer MC and not offline
5411
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5412
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5413
        if msg:
5414
          self.LogWarning("Node failed to demote itself: %s", msg)
5415

    
5416
      new_flags = self._R2F[new_role]
5417
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5418
        if of != nf:
5419
          result.append((desc, str(nf)))
5420
      (node.master_candidate, node.drained, node.offline) = new_flags
5421

    
5422
      # we locked all nodes, we adjust the CP before updating this node
5423
      if self.lock_all:
5424
        _AdjustCandidatePool(self, [node.name])
5425

    
5426
    if self.op.secondary_ip:
5427
      node.secondary_ip = self.op.secondary_ip
5428
      result.append(("secondary_ip", self.op.secondary_ip))
5429

    
5430
    # this will trigger configuration file update, if needed
5431
    self.cfg.Update(node, feedback_fn)
5432

    
5433
    # this will trigger job queue propagation or cleanup if the mc
5434
    # flag changed
5435
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5436
      self.context.ReaddNode(node)
5437

    
5438
    return result
5439

    
5440

    
5441
class LUNodePowercycle(NoHooksLU):
5442
  """Powercycles a node.
5443

5444
  """
5445
  REQ_BGL = False
5446

    
5447
  def CheckArguments(self):
5448
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5449
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5450
      raise errors.OpPrereqError("The node is the master and the force"
5451
                                 " parameter was not set",
5452
                                 errors.ECODE_INVAL)
5453

    
5454
  def ExpandNames(self):
5455
    """Locking for PowercycleNode.
5456

5457
    This is a last-resort option and shouldn't block on other
5458
    jobs. Therefore, we grab no locks.
5459

5460
    """
5461
    self.needed_locks = {}
5462

    
5463
  def Exec(self, feedback_fn):
5464
    """Reboots a node.
5465

5466
    """
5467
    result = self.rpc.call_node_powercycle(self.op.node_name,
5468
                                           self.cfg.GetHypervisorType())
5469
    result.Raise("Failed to schedule the reboot")
5470
    return result.payload
5471

    
5472

    
5473
class LUClusterQuery(NoHooksLU):
5474
  """Query cluster configuration.
5475

5476
  """
5477
  REQ_BGL = False
5478

    
5479
  def ExpandNames(self):
5480
    self.needed_locks = {}
5481

    
5482
  def Exec(self, feedback_fn):
5483
    """Return cluster config.
5484

5485
    """
5486
    cluster = self.cfg.GetClusterInfo()
5487
    os_hvp = {}
5488

    
5489
    # Filter just for enabled hypervisors
5490
    for os_name, hv_dict in cluster.os_hvp.items():
5491
      os_hvp[os_name] = {}
5492
      for hv_name, hv_params in hv_dict.items():
5493
        if hv_name in cluster.enabled_hypervisors:
5494
          os_hvp[os_name][hv_name] = hv_params
5495

    
5496
    # Convert ip_family to ip_version
5497
    primary_ip_version = constants.IP4_VERSION
5498
    if cluster.primary_ip_family == netutils.IP6Address.family:
5499
      primary_ip_version = constants.IP6_VERSION
5500

    
5501
    result = {
5502
      "software_version": constants.RELEASE_VERSION,
5503
      "protocol_version": constants.PROTOCOL_VERSION,
5504
      "config_version": constants.CONFIG_VERSION,
5505
      "os_api_version": max(constants.OS_API_VERSIONS),
5506
      "export_version": constants.EXPORT_VERSION,
5507
      "architecture": (platform.architecture()[0], platform.machine()),
5508
      "name": cluster.cluster_name,
5509
      "master": cluster.master_node,
5510
      "default_hypervisor": cluster.enabled_hypervisors[0],
5511
      "enabled_hypervisors": cluster.enabled_hypervisors,
5512
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5513
                        for hypervisor_name in cluster.enabled_hypervisors]),
5514
      "os_hvp": os_hvp,
5515
      "beparams": cluster.beparams,
5516
      "osparams": cluster.osparams,
5517
      "nicparams": cluster.nicparams,
5518
      "ndparams": cluster.ndparams,
5519
      "candidate_pool_size": cluster.candidate_pool_size,
5520
      "master_netdev": cluster.master_netdev,
5521
      "volume_group_name": cluster.volume_group_name,
5522
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5523
      "file_storage_dir": cluster.file_storage_dir,
5524
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5525
      "maintain_node_health": cluster.maintain_node_health,
5526
      "ctime": cluster.ctime,
5527
      "mtime": cluster.mtime,
5528
      "uuid": cluster.uuid,
5529
      "tags": list(cluster.GetTags()),
5530
      "uid_pool": cluster.uid_pool,
5531
      "default_iallocator": cluster.default_iallocator,
5532
      "reserved_lvs": cluster.reserved_lvs,
5533
      "primary_ip_version": primary_ip_version,
5534
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5535
      "hidden_os": cluster.hidden_os,
5536
      "blacklisted_os": cluster.blacklisted_os,
5537
      }
5538

    
5539
    return result
5540

    
5541

    
5542
class LUClusterConfigQuery(NoHooksLU):
5543
  """Return configuration values.
5544

5545
  """
5546
  REQ_BGL = False
5547
  _FIELDS_DYNAMIC = utils.FieldSet()
5548
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5549
                                  "watcher_pause", "volume_group_name")
5550

    
5551
  def CheckArguments(self):
5552
    _CheckOutputFields(static=self._FIELDS_STATIC,
5553
                       dynamic=self._FIELDS_DYNAMIC,
5554
                       selected=self.op.output_fields)
5555

    
5556
  def ExpandNames(self):
5557
    self.needed_locks = {}
5558

    
5559
  def Exec(self, feedback_fn):
5560
    """Dump a representation of the cluster config to the standard output.
5561

5562
    """
5563
    values = []
5564
    for field in self.op.output_fields:
5565
      if field == "cluster_name":
5566
        entry = self.cfg.GetClusterName()
5567
      elif field == "master_node":
5568
        entry = self.cfg.GetMasterNode()
5569
      elif field == "drain_flag":
5570
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5571
      elif field == "watcher_pause":
5572
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5573
      elif field == "volume_group_name":
5574
        entry = self.cfg.GetVGName()
5575
      else:
5576
        raise errors.ParameterError(field)
5577
      values.append(entry)
5578
    return values
5579

    
5580

    
5581
class LUInstanceActivateDisks(NoHooksLU):
5582
  """Bring up an instance's disks.
5583

5584
  """
5585
  REQ_BGL = False
5586

    
5587
  def ExpandNames(self):
5588
    self._ExpandAndLockInstance()
5589
    self.needed_locks[locking.LEVEL_NODE] = []
5590
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5591

    
5592
  def DeclareLocks(self, level):
5593
    if level == locking.LEVEL_NODE:
5594
      self._LockInstancesNodes()
5595

    
5596
  def CheckPrereq(self):
5597
    """Check prerequisites.
5598

5599
    This checks that the instance is in the cluster.
5600

5601
    """
5602
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5603
    assert self.instance is not None, \
5604
      "Cannot retrieve locked instance %s" % self.op.instance_name
5605
    _CheckNodeOnline(self, self.instance.primary_node)
5606

    
5607
  def Exec(self, feedback_fn):
5608
    """Activate the disks.
5609

5610
    """
5611
    disks_ok, disks_info = \
5612
              _AssembleInstanceDisks(self, self.instance,
5613
                                     ignore_size=self.op.ignore_size)
5614
    if not disks_ok:
5615
      raise errors.OpExecError("Cannot activate block devices")
5616

    
5617
    return disks_info
5618

    
5619

    
5620
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5621
                           ignore_size=False):
5622
  """Prepare the block devices for an instance.
5623

5624
  This sets up the block devices on all nodes.
5625

5626
  @type lu: L{LogicalUnit}
5627
  @param lu: the logical unit on whose behalf we execute
5628
  @type instance: L{objects.Instance}
5629
  @param instance: the instance for whose disks we assemble
5630
  @type disks: list of L{objects.Disk} or None
5631
  @param disks: which disks to assemble (or all, if None)
5632
  @type ignore_secondaries: boolean
5633
  @param ignore_secondaries: if true, errors on secondary nodes
5634
      won't result in an error return from the function
5635
  @type ignore_size: boolean
5636
  @param ignore_size: if true, the current known size of the disk
5637
      will not be used during the disk activation, useful for cases
5638
      when the size is wrong
5639
  @return: False if the operation failed, otherwise a list of
5640
      (host, instance_visible_name, node_visible_name)
5641
      with the mapping from node devices to instance devices
5642

5643
  """
5644
  device_info = []
5645
  disks_ok = True
5646
  iname = instance.name
5647
  disks = _ExpandCheckDisks(instance, disks)
5648

    
5649
  # With the two passes mechanism we try to reduce the window of
5650
  # opportunity for the race condition of switching DRBD to primary
5651
  # before handshaking occured, but we do not eliminate it
5652

    
5653
  # The proper fix would be to wait (with some limits) until the
5654
  # connection has been made and drbd transitions from WFConnection
5655
  # into any other network-connected state (Connected, SyncTarget,
5656
  # SyncSource, etc.)
5657

    
5658
  # 1st pass, assemble on all nodes in secondary mode
5659
  for idx, inst_disk in enumerate(disks):
5660
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5661
      if ignore_size:
5662
        node_disk = node_disk.Copy()
5663
        node_disk.UnsetSize()
5664
      lu.cfg.SetDiskID(node_disk, node)
5665
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5666
      msg = result.fail_msg
5667
      if msg:
5668
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5669
                           " (is_primary=False, pass=1): %s",
5670
                           inst_disk.iv_name, node, msg)
5671
        if not ignore_secondaries:
5672
          disks_ok = False
5673

    
5674
  # FIXME: race condition on drbd migration to primary
5675

    
5676
  # 2nd pass, do only the primary node
5677
  for idx, inst_disk in enumerate(disks):
5678
    dev_path = None
5679

    
5680
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5681
      if node != instance.primary_node:
5682
        continue
5683
      if ignore_size:
5684
        node_disk = node_disk.Copy()
5685
        node_disk.UnsetSize()
5686
      lu.cfg.SetDiskID(node_disk, node)
5687
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5688
      msg = result.fail_msg
5689
      if msg:
5690
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5691
                           " (is_primary=True, pass=2): %s",
5692
                           inst_disk.iv_name, node, msg)
5693
        disks_ok = False
5694
      else:
5695
        dev_path = result.payload
5696

    
5697
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5698

    
5699
  # leave the disks configured for the primary node
5700
  # this is a workaround that would be fixed better by
5701
  # improving the logical/physical id handling
5702
  for disk in disks:
5703
    lu.cfg.SetDiskID(disk, instance.primary_node)
5704

    
5705
  return disks_ok, device_info
5706

    
5707

    
5708
def _StartInstanceDisks(lu, instance, force):
5709
  """Start the disks of an instance.
5710

5711
  """
5712
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5713
                                           ignore_secondaries=force)
5714
  if not disks_ok:
5715
    _ShutdownInstanceDisks(lu, instance)
5716
    if force is not None and not force:
5717
      lu.proc.LogWarning("", hint="If the message above refers to a"
5718
                         " secondary node,"
5719
                         " you can retry the operation using '--force'.")
5720
    raise errors.OpExecError("Disk consistency error")
5721

    
5722

    
5723
class LUInstanceDeactivateDisks(NoHooksLU):
5724
  """Shutdown an instance's disks.
5725

5726
  """
5727
  REQ_BGL = False
5728

    
5729
  def ExpandNames(self):
5730
    self._ExpandAndLockInstance()
5731
    self.needed_locks[locking.LEVEL_NODE] = []
5732
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5733

    
5734
  def DeclareLocks(self, level):
5735
    if level == locking.LEVEL_NODE:
5736
      self._LockInstancesNodes()
5737

    
5738
  def CheckPrereq(self):
5739
    """Check prerequisites.
5740

5741
    This checks that the instance is in the cluster.
5742

5743
    """
5744
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5745
    assert self.instance is not None, \
5746
      "Cannot retrieve locked instance %s" % self.op.instance_name
5747

    
5748
  def Exec(self, feedback_fn):
5749
    """Deactivate the disks
5750

5751
    """
5752
    instance = self.instance
5753
    if self.op.force:
5754
      _ShutdownInstanceDisks(self, instance)
5755
    else:
5756
      _SafeShutdownInstanceDisks(self, instance)
5757

    
5758

    
5759
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5760
  """Shutdown block devices of an instance.
5761

5762
  This function checks if an instance is running, before calling
5763
  _ShutdownInstanceDisks.
5764

5765
  """
5766
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5767
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5768

    
5769

    
5770
def _ExpandCheckDisks(instance, disks):
5771
  """Return the instance disks selected by the disks list
5772

5773
  @type disks: list of L{objects.Disk} or None
5774
  @param disks: selected disks
5775
  @rtype: list of L{objects.Disk}
5776
  @return: selected instance disks to act on
5777

5778
  """
5779
  if disks is None:
5780
    return instance.disks
5781
  else:
5782
    if not set(disks).issubset(instance.disks):
5783
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5784
                                   " target instance")
5785
    return disks
5786

    
5787

    
5788
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5789
  """Shutdown block devices of an instance.
5790

5791
  This does the shutdown on all nodes of the instance.
5792

5793
  If the ignore_primary is false, errors on the primary node are
5794
  ignored.
5795

5796
  """
5797
  all_result = True
5798
  disks = _ExpandCheckDisks(instance, disks)
5799

    
5800
  for disk in disks:
5801
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5802
      lu.cfg.SetDiskID(top_disk, node)
5803
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5804
      msg = result.fail_msg
5805
      if msg:
5806
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5807
                      disk.iv_name, node, msg)
5808
        if ((node == instance.primary_node and not ignore_primary) or
5809
            (node != instance.primary_node and not result.offline)):
5810
          all_result = False
5811
  return all_result
5812

    
5813

    
5814
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5815
  """Checks if a node has enough free memory.
5816

5817
  This function check if a given node has the needed amount of free
5818
  memory. In case the node has less memory or we cannot get the
5819
  information from the node, this function raise an OpPrereqError
5820
  exception.
5821

5822
  @type lu: C{LogicalUnit}
5823
  @param lu: a logical unit from which we get configuration data
5824
  @type node: C{str}
5825
  @param node: the node to check
5826
  @type reason: C{str}
5827
  @param reason: string to use in the error message
5828
  @type requested: C{int}
5829
  @param requested: the amount of memory in MiB to check for
5830
  @type hypervisor_name: C{str}
5831
  @param hypervisor_name: the hypervisor to ask for memory stats
5832
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5833
      we cannot check the node
5834

5835
  """
5836
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5837
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5838
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5839
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5840
  if not isinstance(free_mem, int):
5841
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5842
                               " was '%s'" % (node, free_mem),
5843
                               errors.ECODE_ENVIRON)
5844
  if requested > free_mem:
5845
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5846
                               " needed %s MiB, available %s MiB" %
5847
                               (node, reason, requested, free_mem),
5848
                               errors.ECODE_NORES)
5849

    
5850

    
5851
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5852
  """Checks if nodes have enough free disk space in the all VGs.
5853

5854
  This function check if all given nodes have the needed amount of
5855
  free disk. In case any node has less disk or we cannot get the
5856
  information from the node, this function raise an OpPrereqError
5857
  exception.
5858

5859
  @type lu: C{LogicalUnit}
5860
  @param lu: a logical unit from which we get configuration data
5861
  @type nodenames: C{list}
5862
  @param nodenames: the list of node names to check
5863
  @type req_sizes: C{dict}
5864
  @param req_sizes: the hash of vg and corresponding amount of disk in
5865
      MiB to check for
5866
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5867
      or we cannot check the node
5868

5869
  """
5870
  for vg, req_size in req_sizes.items():
5871
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5872

    
5873

    
5874
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5875
  """Checks if nodes have enough free disk space in the specified VG.
5876

5877
  This function check if all given nodes have the needed amount of
5878
  free disk. In case any node has less disk or we cannot get the
5879
  information from the node, this function raise an OpPrereqError
5880
  exception.
5881

5882
  @type lu: C{LogicalUnit}
5883
  @param lu: a logical unit from which we get configuration data
5884
  @type nodenames: C{list}
5885
  @param nodenames: the list of node names to check
5886
  @type vg: C{str}
5887
  @param vg: the volume group to check
5888
  @type requested: C{int}
5889
  @param requested: the amount of disk in MiB to check for
5890
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5891
      or we cannot check the node
5892

5893
  """
5894
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5895
  for node in nodenames:
5896
    info = nodeinfo[node]
5897
    info.Raise("Cannot get current information from node %s" % node,
5898
               prereq=True, ecode=errors.ECODE_ENVIRON)
5899
    vg_free = info.payload.get("vg_free", None)
5900
    if not isinstance(vg_free, int):
5901
      raise errors.OpPrereqError("Can't compute free disk space on node"
5902
                                 " %s for vg %s, result was '%s'" %
5903
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5904
    if requested > vg_free:
5905
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5906
                                 " vg %s: required %d MiB, available %d MiB" %
5907
                                 (node, vg, requested, vg_free),
5908
                                 errors.ECODE_NORES)
5909

    
5910

    
5911
class LUInstanceStartup(LogicalUnit):
5912
  """Starts an instance.
5913

5914
  """
5915
  HPATH = "instance-start"
5916
  HTYPE = constants.HTYPE_INSTANCE
5917
  REQ_BGL = False
5918

    
5919
  def CheckArguments(self):
5920
    # extra beparams
5921
    if self.op.beparams:
5922
      # fill the beparams dict
5923
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5924

    
5925
  def ExpandNames(self):
5926
    self._ExpandAndLockInstance()
5927

    
5928
  def BuildHooksEnv(self):
5929
    """Build hooks env.
5930

5931
    This runs on master, primary and secondary nodes of the instance.
5932

5933
    """
5934
    env = {
5935
      "FORCE": self.op.force,
5936
      }
5937

    
5938
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5939

    
5940
    return env
5941

    
5942
  def BuildHooksNodes(self):
5943
    """Build hooks nodes.
5944

5945
    """
5946
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5947
    return (nl, nl)
5948

    
5949
  def CheckPrereq(self):
5950
    """Check prerequisites.
5951

5952
    This checks that the instance is in the cluster.
5953

5954
    """
5955
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5956
    assert self.instance is not None, \
5957
      "Cannot retrieve locked instance %s" % self.op.instance_name
5958

    
5959
    # extra hvparams
5960
    if self.op.hvparams:
5961
      # check hypervisor parameter syntax (locally)
5962
      cluster = self.cfg.GetClusterInfo()
5963
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5964
      filled_hvp = cluster.FillHV(instance)
5965
      filled_hvp.update(self.op.hvparams)
5966
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5967
      hv_type.CheckParameterSyntax(filled_hvp)
5968
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5969

    
5970
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5971

    
5972
    if self.primary_offline and self.op.ignore_offline_nodes:
5973
      self.proc.LogWarning("Ignoring offline primary node")
5974

    
5975
      if self.op.hvparams or self.op.beparams:
5976
        self.proc.LogWarning("Overridden parameters are ignored")
5977
    else:
5978
      _CheckNodeOnline(self, instance.primary_node)
5979

    
5980
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5981

    
5982
      # check bridges existence
5983
      _CheckInstanceBridgesExist(self, instance)
5984

    
5985
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5986
                                                instance.name,
5987
                                                instance.hypervisor)
5988
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5989
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5990
      if not remote_info.payload: # not running already
5991
        _CheckNodeFreeMemory(self, instance.primary_node,
5992
                             "starting instance %s" % instance.name,
5993
                             bep[constants.BE_MEMORY], instance.hypervisor)
5994

    
5995
  def Exec(self, feedback_fn):
5996
    """Start the instance.
5997

5998
    """
5999
    instance = self.instance
6000
    force = self.op.force
6001

    
6002
    if not self.op.no_remember:
6003
      self.cfg.MarkInstanceUp(instance.name)
6004

    
6005
    if self.primary_offline:
6006
      assert self.op.ignore_offline_nodes
6007
      self.proc.LogInfo("Primary node offline, marked instance as started")
6008
    else:
6009
      node_current = instance.primary_node
6010

    
6011
      _StartInstanceDisks(self, instance, force)
6012

    
6013
      result = self.rpc.call_instance_start(node_current, instance,
6014
                                            self.op.hvparams, self.op.beparams,
6015
                                            self.op.startup_paused)
6016
      msg = result.fail_msg
6017
      if msg:
6018
        _ShutdownInstanceDisks(self, instance)
6019
        raise errors.OpExecError("Could not start instance: %s" % msg)
6020

    
6021

    
6022
class LUInstanceReboot(LogicalUnit):
6023
  """Reboot an instance.
6024

6025
  """
6026
  HPATH = "instance-reboot"
6027
  HTYPE = constants.HTYPE_INSTANCE
6028
  REQ_BGL = False
6029

    
6030
  def ExpandNames(self):
6031
    self._ExpandAndLockInstance()
6032

    
6033
  def BuildHooksEnv(self):
6034
    """Build hooks env.
6035

6036
    This runs on master, primary and secondary nodes of the instance.
6037

6038
    """
6039
    env = {
6040
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6041
      "REBOOT_TYPE": self.op.reboot_type,
6042
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6043
      }
6044

    
6045
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6046

    
6047
    return env
6048

    
6049
  def BuildHooksNodes(self):
6050
    """Build hooks nodes.
6051

6052
    """
6053
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6054
    return (nl, nl)
6055

    
6056
  def CheckPrereq(self):
6057
    """Check prerequisites.
6058

6059
    This checks that the instance is in the cluster.
6060

6061
    """
6062
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063
    assert self.instance is not None, \
6064
      "Cannot retrieve locked instance %s" % self.op.instance_name
6065

    
6066
    _CheckNodeOnline(self, instance.primary_node)
6067

    
6068
    # check bridges existence
6069
    _CheckInstanceBridgesExist(self, instance)
6070

    
6071
  def Exec(self, feedback_fn):
6072
    """Reboot the instance.
6073

6074
    """
6075
    instance = self.instance
6076
    ignore_secondaries = self.op.ignore_secondaries
6077
    reboot_type = self.op.reboot_type
6078

    
6079
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6080
                                              instance.name,
6081
                                              instance.hypervisor)
6082
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6083
    instance_running = bool(remote_info.payload)
6084

    
6085
    node_current = instance.primary_node
6086

    
6087
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6088
                                            constants.INSTANCE_REBOOT_HARD]:
6089
      for disk in instance.disks:
6090
        self.cfg.SetDiskID(disk, node_current)
6091
      result = self.rpc.call_instance_reboot(node_current, instance,
6092
                                             reboot_type,
6093
                                             self.op.shutdown_timeout)
6094
      result.Raise("Could not reboot instance")
6095
    else:
6096
      if instance_running:
6097
        result = self.rpc.call_instance_shutdown(node_current, instance,
6098
                                                 self.op.shutdown_timeout)
6099
        result.Raise("Could not shutdown instance for full reboot")
6100
        _ShutdownInstanceDisks(self, instance)
6101
      else:
6102
        self.LogInfo("Instance %s was already stopped, starting now",
6103
                     instance.name)
6104
      _StartInstanceDisks(self, instance, ignore_secondaries)
6105
      result = self.rpc.call_instance_start(node_current, instance,
6106
                                            None, None, False)
6107
      msg = result.fail_msg
6108
      if msg:
6109
        _ShutdownInstanceDisks(self, instance)
6110
        raise errors.OpExecError("Could not start instance for"
6111
                                 " full reboot: %s" % msg)
6112

    
6113
    self.cfg.MarkInstanceUp(instance.name)
6114

    
6115

    
6116
class LUInstanceShutdown(LogicalUnit):
6117
  """Shutdown an instance.
6118

6119
  """
6120
  HPATH = "instance-stop"
6121
  HTYPE = constants.HTYPE_INSTANCE
6122
  REQ_BGL = False
6123

    
6124
  def ExpandNames(self):
6125
    self._ExpandAndLockInstance()
6126

    
6127
  def BuildHooksEnv(self):
6128
    """Build hooks env.
6129

6130
    This runs on master, primary and secondary nodes of the instance.
6131

6132
    """
6133
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6134
    env["TIMEOUT"] = self.op.timeout
6135
    return env
6136

    
6137
  def BuildHooksNodes(self):
6138
    """Build hooks nodes.
6139

6140
    """
6141
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6142
    return (nl, nl)
6143

    
6144
  def CheckPrereq(self):
6145
    """Check prerequisites.
6146

6147
    This checks that the instance is in the cluster.
6148

6149
    """
6150
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151
    assert self.instance is not None, \
6152
      "Cannot retrieve locked instance %s" % self.op.instance_name
6153

    
6154
    self.primary_offline = \
6155
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6156

    
6157
    if self.primary_offline and self.op.ignore_offline_nodes:
6158
      self.proc.LogWarning("Ignoring offline primary node")
6159
    else:
6160
      _CheckNodeOnline(self, self.instance.primary_node)
6161

    
6162
  def Exec(self, feedback_fn):
6163
    """Shutdown the instance.
6164

6165
    """
6166
    instance = self.instance
6167
    node_current = instance.primary_node
6168
    timeout = self.op.timeout
6169

    
6170
    if not self.op.no_remember:
6171
      self.cfg.MarkInstanceDown(instance.name)
6172

    
6173
    if self.primary_offline:
6174
      assert self.op.ignore_offline_nodes
6175
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6176
    else:
6177
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6178
      msg = result.fail_msg
6179
      if msg:
6180
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6181

    
6182
      _ShutdownInstanceDisks(self, instance)
6183

    
6184

    
6185
class LUInstanceReinstall(LogicalUnit):
6186
  """Reinstall an instance.
6187

6188
  """
6189
  HPATH = "instance-reinstall"
6190
  HTYPE = constants.HTYPE_INSTANCE
6191
  REQ_BGL = False
6192

    
6193
  def ExpandNames(self):
6194
    self._ExpandAndLockInstance()
6195

    
6196
  def BuildHooksEnv(self):
6197
    """Build hooks env.
6198

6199
    This runs on master, primary and secondary nodes of the instance.
6200

6201
    """
6202
    return _BuildInstanceHookEnvByObject(self, self.instance)
6203

    
6204
  def BuildHooksNodes(self):
6205
    """Build hooks nodes.
6206

6207
    """
6208
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6209
    return (nl, nl)
6210

    
6211
  def CheckPrereq(self):
6212
    """Check prerequisites.
6213

6214
    This checks that the instance is in the cluster and is not running.
6215

6216
    """
6217
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6218
    assert instance is not None, \
6219
      "Cannot retrieve locked instance %s" % self.op.instance_name
6220
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6221
                     " offline, cannot reinstall")
6222
    for node in instance.secondary_nodes:
6223
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6224
                       " cannot reinstall")
6225

    
6226
    if instance.disk_template == constants.DT_DISKLESS:
6227
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6228
                                 self.op.instance_name,
6229
                                 errors.ECODE_INVAL)
6230
    _CheckInstanceDown(self, instance, "cannot reinstall")
6231

    
6232
    if self.op.os_type is not None:
6233
      # OS verification
6234
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6235
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6236
      instance_os = self.op.os_type
6237
    else:
6238
      instance_os = instance.os
6239

    
6240
    nodelist = list(instance.all_nodes)
6241

    
6242
    if self.op.osparams:
6243
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6244
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6245
      self.os_inst = i_osdict # the new dict (without defaults)
6246
    else:
6247
      self.os_inst = None
6248

    
6249
    self.instance = instance
6250

    
6251
  def Exec(self, feedback_fn):
6252
    """Reinstall the instance.
6253

6254
    """
6255
    inst = self.instance
6256

    
6257
    if self.op.os_type is not None:
6258
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6259
      inst.os = self.op.os_type
6260
      # Write to configuration
6261
      self.cfg.Update(inst, feedback_fn)
6262

    
6263
    _StartInstanceDisks(self, inst, None)
6264
    try:
6265
      feedback_fn("Running the instance OS create scripts...")
6266
      # FIXME: pass debug option from opcode to backend
6267
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6268
                                             self.op.debug_level,
6269
                                             osparams=self.os_inst)
6270
      result.Raise("Could not install OS for instance %s on node %s" %
6271
                   (inst.name, inst.primary_node))
6272
    finally:
6273
      _ShutdownInstanceDisks(self, inst)
6274

    
6275

    
6276
class LUInstanceRecreateDisks(LogicalUnit):
6277
  """Recreate an instance's missing disks.
6278

6279
  """
6280
  HPATH = "instance-recreate-disks"
6281
  HTYPE = constants.HTYPE_INSTANCE
6282
  REQ_BGL = False
6283

    
6284
  def CheckArguments(self):
6285
    # normalise the disk list
6286
    self.op.disks = sorted(frozenset(self.op.disks))
6287

    
6288
  def ExpandNames(self):
6289
    self._ExpandAndLockInstance()
6290
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6291
    if self.op.nodes:
6292
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6293
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6294
    else:
6295
      self.needed_locks[locking.LEVEL_NODE] = []
6296

    
6297
  def DeclareLocks(self, level):
6298
    if level == locking.LEVEL_NODE:
6299
      # if we replace the nodes, we only need to lock the old primary,
6300
      # otherwise we need to lock all nodes for disk re-creation
6301
      primary_only = bool(self.op.nodes)
6302
      self._LockInstancesNodes(primary_only=primary_only)
6303

    
6304
  def BuildHooksEnv(self):
6305
    """Build hooks env.
6306

6307
    This runs on master, primary and secondary nodes of the instance.
6308

6309
    """
6310
    return _BuildInstanceHookEnvByObject(self, self.instance)
6311

    
6312
  def BuildHooksNodes(self):
6313
    """Build hooks nodes.
6314

6315
    """
6316
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6317
    return (nl, nl)
6318

    
6319
  def CheckPrereq(self):
6320
    """Check prerequisites.
6321

6322
    This checks that the instance is in the cluster and is not running.
6323

6324
    """
6325
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6326
    assert instance is not None, \
6327
      "Cannot retrieve locked instance %s" % self.op.instance_name
6328
    if self.op.nodes:
6329
      if len(self.op.nodes) != len(instance.all_nodes):
6330
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6331
                                   " %d replacement nodes were specified" %
6332
                                   (instance.name, len(instance.all_nodes),
6333
                                    len(self.op.nodes)),
6334
                                   errors.ECODE_INVAL)
6335
      assert instance.disk_template != constants.DT_DRBD8 or \
6336
          len(self.op.nodes) == 2
6337
      assert instance.disk_template != constants.DT_PLAIN or \
6338
          len(self.op.nodes) == 1
6339
      primary_node = self.op.nodes[0]
6340
    else:
6341
      primary_node = instance.primary_node
6342
    _CheckNodeOnline(self, primary_node)
6343

    
6344
    if instance.disk_template == constants.DT_DISKLESS:
6345
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6346
                                 self.op.instance_name, errors.ECODE_INVAL)
6347
    # if we replace nodes *and* the old primary is offline, we don't
6348
    # check
6349
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6350
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6351
    if not (self.op.nodes and old_pnode.offline):
6352
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6353

    
6354
    if not self.op.disks:
6355
      self.op.disks = range(len(instance.disks))
6356
    else:
6357
      for idx in self.op.disks:
6358
        if idx >= len(instance.disks):
6359
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6360
                                     errors.ECODE_INVAL)
6361
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6362
      raise errors.OpPrereqError("Can't recreate disks partially and"
6363
                                 " change the nodes at the same time",
6364
                                 errors.ECODE_INVAL)
6365
    self.instance = instance
6366

    
6367
  def Exec(self, feedback_fn):
6368
    """Recreate the disks.
6369

6370
    """
6371
    instance = self.instance
6372

    
6373
    to_skip = []
6374
    mods = [] # keeps track of needed logical_id changes
6375

    
6376
    for idx, disk in enumerate(instance.disks):
6377
      if idx not in self.op.disks: # disk idx has not been passed in
6378
        to_skip.append(idx)
6379
        continue
6380
      # update secondaries for disks, if needed
6381
      if self.op.nodes:
6382
        if disk.dev_type == constants.LD_DRBD8:
6383
          # need to update the nodes and minors
6384
          assert len(self.op.nodes) == 2
6385
          assert len(disk.logical_id) == 6 # otherwise disk internals
6386
                                           # have changed
6387
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6388
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6389
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6390
                    new_minors[0], new_minors[1], old_secret)
6391
          assert len(disk.logical_id) == len(new_id)
6392
          mods.append((idx, new_id))
6393

    
6394
    # now that we have passed all asserts above, we can apply the mods
6395
    # in a single run (to avoid partial changes)
6396
    for idx, new_id in mods:
6397
      instance.disks[idx].logical_id = new_id
6398

    
6399
    # change primary node, if needed
6400
    if self.op.nodes:
6401
      instance.primary_node = self.op.nodes[0]
6402
      self.LogWarning("Changing the instance's nodes, you will have to"
6403
                      " remove any disks left on the older nodes manually")
6404

    
6405
    if self.op.nodes:
6406
      self.cfg.Update(instance, feedback_fn)
6407

    
6408
    _CreateDisks(self, instance, to_skip=to_skip)
6409

    
6410

    
6411
class LUInstanceRename(LogicalUnit):
6412
  """Rename an instance.
6413

6414
  """
6415
  HPATH = "instance-rename"
6416
  HTYPE = constants.HTYPE_INSTANCE
6417

    
6418
  def CheckArguments(self):
6419
    """Check arguments.
6420

6421
    """
6422
    if self.op.ip_check and not self.op.name_check:
6423
      # TODO: make the ip check more flexible and not depend on the name check
6424
      raise errors.OpPrereqError("IP address check requires a name check",
6425
                                 errors.ECODE_INVAL)
6426

    
6427
  def BuildHooksEnv(self):
6428
    """Build hooks env.
6429

6430
    This runs on master, primary and secondary nodes of the instance.
6431

6432
    """
6433
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6434
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6435
    return env
6436

    
6437
  def BuildHooksNodes(self):
6438
    """Build hooks nodes.
6439

6440
    """
6441
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6442
    return (nl, nl)
6443

    
6444
  def CheckPrereq(self):
6445
    """Check prerequisites.
6446

6447
    This checks that the instance is in the cluster and is not running.
6448

6449
    """
6450
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6451
                                                self.op.instance_name)
6452
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6453
    assert instance is not None
6454
    _CheckNodeOnline(self, instance.primary_node)
6455
    _CheckInstanceDown(self, instance, "cannot rename")
6456
    self.instance = instance
6457

    
6458
    new_name = self.op.new_name
6459
    if self.op.name_check:
6460
      hostname = netutils.GetHostname(name=new_name)
6461
      if hostname != new_name:
6462
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6463
                     hostname.name)
6464
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6465
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6466
                                    " same as given hostname '%s'") %
6467
                                    (hostname.name, self.op.new_name),
6468
                                    errors.ECODE_INVAL)
6469
      new_name = self.op.new_name = hostname.name
6470
      if (self.op.ip_check and
6471
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6472
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6473
                                   (hostname.ip, new_name),
6474
                                   errors.ECODE_NOTUNIQUE)
6475

    
6476
    instance_list = self.cfg.GetInstanceList()
6477
    if new_name in instance_list and new_name != instance.name:
6478
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6479
                                 new_name, errors.ECODE_EXISTS)
6480

    
6481
  def Exec(self, feedback_fn):
6482
    """Rename the instance.
6483

6484
    """
6485
    inst = self.instance
6486
    old_name = inst.name
6487

    
6488
    rename_file_storage = False
6489
    if (inst.disk_template in constants.DTS_FILEBASED and
6490
        self.op.new_name != inst.name):
6491
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6492
      rename_file_storage = True
6493

    
6494
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6495
    # Change the instance lock. This is definitely safe while we hold the BGL.
6496
    # Otherwise the new lock would have to be added in acquired mode.
6497
    assert self.REQ_BGL
6498
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6499
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6500

    
6501
    # re-read the instance from the configuration after rename
6502
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6503

    
6504
    if rename_file_storage:
6505
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6506
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6507
                                                     old_file_storage_dir,
6508
                                                     new_file_storage_dir)
6509
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6510
                   " (but the instance has been renamed in Ganeti)" %
6511
                   (inst.primary_node, old_file_storage_dir,
6512
                    new_file_storage_dir))
6513

    
6514
    _StartInstanceDisks(self, inst, None)
6515
    try:
6516
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6517
                                                 old_name, self.op.debug_level)
6518
      msg = result.fail_msg
6519
      if msg:
6520
        msg = ("Could not run OS rename script for instance %s on node %s"
6521
               " (but the instance has been renamed in Ganeti): %s" %
6522
               (inst.name, inst.primary_node, msg))
6523
        self.proc.LogWarning(msg)
6524
    finally:
6525
      _ShutdownInstanceDisks(self, inst)
6526

    
6527
    return inst.name
6528

    
6529

    
6530
class LUInstanceRemove(LogicalUnit):
6531
  """Remove an instance.
6532

6533
  """
6534
  HPATH = "instance-remove"
6535
  HTYPE = constants.HTYPE_INSTANCE
6536
  REQ_BGL = False
6537

    
6538
  def ExpandNames(self):
6539
    self._ExpandAndLockInstance()
6540
    self.needed_locks[locking.LEVEL_NODE] = []
6541
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6542

    
6543
  def DeclareLocks(self, level):
6544
    if level == locking.LEVEL_NODE:
6545
      self._LockInstancesNodes()
6546

    
6547
  def BuildHooksEnv(self):
6548
    """Build hooks env.
6549

6550
    This runs on master, primary and secondary nodes of the instance.
6551

6552
    """
6553
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6554
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6555
    return env
6556

    
6557
  def BuildHooksNodes(self):
6558
    """Build hooks nodes.
6559

6560
    """
6561
    nl = [self.cfg.GetMasterNode()]
6562
    nl_post = list(self.instance.all_nodes) + nl
6563
    return (nl, nl_post)
6564

    
6565
  def CheckPrereq(self):
6566
    """Check prerequisites.
6567

6568
    This checks that the instance is in the cluster.
6569

6570
    """
6571
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6572
    assert self.instance is not None, \
6573
      "Cannot retrieve locked instance %s" % self.op.instance_name
6574

    
6575
  def Exec(self, feedback_fn):
6576
    """Remove the instance.
6577

6578
    """
6579
    instance = self.instance
6580
    logging.info("Shutting down instance %s on node %s",
6581
                 instance.name, instance.primary_node)
6582

    
6583
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6584
                                             self.op.shutdown_timeout)
6585
    msg = result.fail_msg
6586
    if msg:
6587
      if self.op.ignore_failures:
6588
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6589
      else:
6590
        raise errors.OpExecError("Could not shutdown instance %s on"
6591
                                 " node %s: %s" %
6592
                                 (instance.name, instance.primary_node, msg))
6593

    
6594
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6595

    
6596

    
6597
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6598
  """Utility function to remove an instance.
6599

6600
  """
6601
  logging.info("Removing block devices for instance %s", instance.name)
6602

    
6603
  if not _RemoveDisks(lu, instance):
6604
    if not ignore_failures:
6605
      raise errors.OpExecError("Can't remove instance's disks")
6606
    feedback_fn("Warning: can't remove instance's disks")
6607

    
6608
  logging.info("Removing instance %s out of cluster config", instance.name)
6609

    
6610
  lu.cfg.RemoveInstance(instance.name)
6611

    
6612
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6613
    "Instance lock removal conflict"
6614

    
6615
  # Remove lock for the instance
6616
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6617

    
6618

    
6619
class LUInstanceQuery(NoHooksLU):
6620
  """Logical unit for querying instances.
6621

6622
  """
6623
  # pylint: disable=W0142
6624
  REQ_BGL = False
6625

    
6626
  def CheckArguments(self):
6627
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6628
                             self.op.output_fields, self.op.use_locking)
6629

    
6630
  def ExpandNames(self):
6631
    self.iq.ExpandNames(self)
6632

    
6633
  def DeclareLocks(self, level):
6634
    self.iq.DeclareLocks(self, level)
6635

    
6636
  def Exec(self, feedback_fn):
6637
    return self.iq.OldStyleQuery(self)
6638

    
6639

    
6640
class LUInstanceFailover(LogicalUnit):
6641
  """Failover an instance.
6642

6643
  """
6644
  HPATH = "instance-failover"
6645
  HTYPE = constants.HTYPE_INSTANCE
6646
  REQ_BGL = False
6647

    
6648
  def CheckArguments(self):
6649
    """Check the arguments.
6650

6651
    """
6652
    self.iallocator = getattr(self.op, "iallocator", None)
6653
    self.target_node = getattr(self.op, "target_node", None)
6654

    
6655
  def ExpandNames(self):
6656
    self._ExpandAndLockInstance()
6657

    
6658
    if self.op.target_node is not None:
6659
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6660

    
6661
    self.needed_locks[locking.LEVEL_NODE] = []
6662
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6663

    
6664
    ignore_consistency = self.op.ignore_consistency
6665
    shutdown_timeout = self.op.shutdown_timeout
6666
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6667
                                       cleanup=False,
6668
                                       failover=True,
6669
                                       ignore_consistency=ignore_consistency,
6670
                                       shutdown_timeout=shutdown_timeout)
6671
    self.tasklets = [self._migrater]
6672

    
6673
  def DeclareLocks(self, level):
6674
    if level == locking.LEVEL_NODE:
6675
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6676
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6677
        if self.op.target_node is None:
6678
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6679
        else:
6680
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6681
                                                   self.op.target_node]
6682
        del self.recalculate_locks[locking.LEVEL_NODE]
6683
      else:
6684
        self._LockInstancesNodes()
6685

    
6686
  def BuildHooksEnv(self):
6687
    """Build hooks env.
6688

6689
    This runs on master, primary and secondary nodes of the instance.
6690

6691
    """
6692
    instance = self._migrater.instance
6693
    source_node = instance.primary_node
6694
    target_node = self.op.target_node
6695
    env = {
6696
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6697
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6698
      "OLD_PRIMARY": source_node,
6699
      "NEW_PRIMARY": target_node,
6700
      }
6701

    
6702
    if instance.disk_template in constants.DTS_INT_MIRROR:
6703
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6704
      env["NEW_SECONDARY"] = source_node
6705
    else:
6706
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6707

    
6708
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6709

    
6710
    return env
6711

    
6712
  def BuildHooksNodes(self):
6713
    """Build hooks nodes.
6714

6715
    """
6716
    instance = self._migrater.instance
6717
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6718
    return (nl, nl + [instance.primary_node])
6719

    
6720

    
6721
class LUInstanceMigrate(LogicalUnit):
6722
  """Migrate an instance.
6723

6724
  This is migration without shutting down, compared to the failover,
6725
  which is done with shutdown.
6726

6727
  """
6728
  HPATH = "instance-migrate"
6729
  HTYPE = constants.HTYPE_INSTANCE
6730
  REQ_BGL = False
6731

    
6732
  def ExpandNames(self):
6733
    self._ExpandAndLockInstance()
6734

    
6735
    if self.op.target_node is not None:
6736
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6737

    
6738
    self.needed_locks[locking.LEVEL_NODE] = []
6739
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6740

    
6741
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6742
                                       cleanup=self.op.cleanup,
6743
                                       failover=False,
6744
                                       fallback=self.op.allow_failover)
6745
    self.tasklets = [self._migrater]
6746

    
6747
  def DeclareLocks(self, level):
6748
    if level == locking.LEVEL_NODE:
6749
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6750
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6751
        if self.op.target_node is None:
6752
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6753
        else:
6754
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6755
                                                   self.op.target_node]
6756
        del self.recalculate_locks[locking.LEVEL_NODE]
6757
      else:
6758
        self._LockInstancesNodes()
6759

    
6760
  def BuildHooksEnv(self):
6761
    """Build hooks env.
6762

6763
    This runs on master, primary and secondary nodes of the instance.
6764

6765
    """
6766
    instance = self._migrater.instance
6767
    source_node = instance.primary_node
6768
    target_node = self.op.target_node
6769
    env = _BuildInstanceHookEnvByObject(self, instance)
6770
    env.update({
6771
      "MIGRATE_LIVE": self._migrater.live,
6772
      "MIGRATE_CLEANUP": self.op.cleanup,
6773
      "OLD_PRIMARY": source_node,
6774
      "NEW_PRIMARY": target_node,
6775
      })
6776

    
6777
    if instance.disk_template in constants.DTS_INT_MIRROR:
6778
      env["OLD_SECONDARY"] = target_node
6779
      env["NEW_SECONDARY"] = source_node
6780
    else:
6781
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6782

    
6783
    return env
6784

    
6785
  def BuildHooksNodes(self):
6786
    """Build hooks nodes.
6787

6788
    """
6789
    instance = self._migrater.instance
6790
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6791
    return (nl, nl + [instance.primary_node])
6792

    
6793

    
6794
class LUInstanceMove(LogicalUnit):
6795
  """Move an instance by data-copying.
6796

6797
  """
6798
  HPATH = "instance-move"
6799
  HTYPE = constants.HTYPE_INSTANCE
6800
  REQ_BGL = False
6801

    
6802
  def ExpandNames(self):
6803
    self._ExpandAndLockInstance()
6804
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6805
    self.op.target_node = target_node
6806
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6807
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6808

    
6809
  def DeclareLocks(self, level):
6810
    if level == locking.LEVEL_NODE:
6811
      self._LockInstancesNodes(primary_only=True)
6812

    
6813
  def BuildHooksEnv(self):
6814
    """Build hooks env.
6815

6816
    This runs on master, primary and secondary nodes of the instance.
6817

6818
    """
6819
    env = {
6820
      "TARGET_NODE": self.op.target_node,
6821
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6822
      }
6823
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6824
    return env
6825

    
6826
  def BuildHooksNodes(self):
6827
    """Build hooks nodes.
6828

6829
    """
6830
    nl = [
6831
      self.cfg.GetMasterNode(),
6832
      self.instance.primary_node,
6833
      self.op.target_node,
6834
      ]
6835
    return (nl, nl)
6836

    
6837
  def CheckPrereq(self):
6838
    """Check prerequisites.
6839

6840
    This checks that the instance is in the cluster.
6841

6842
    """
6843
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6844
    assert self.instance is not None, \
6845
      "Cannot retrieve locked instance %s" % self.op.instance_name
6846

    
6847
    node = self.cfg.GetNodeInfo(self.op.target_node)
6848
    assert node is not None, \
6849
      "Cannot retrieve locked node %s" % self.op.target_node
6850

    
6851
    self.target_node = target_node = node.name
6852

    
6853
    if target_node == instance.primary_node:
6854
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6855
                                 (instance.name, target_node),
6856
                                 errors.ECODE_STATE)
6857

    
6858
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6859

    
6860
    for idx, dsk in enumerate(instance.disks):
6861
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6862
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6863
                                   " cannot copy" % idx, errors.ECODE_STATE)
6864

    
6865
    _CheckNodeOnline(self, target_node)
6866
    _CheckNodeNotDrained(self, target_node)
6867
    _CheckNodeVmCapable(self, target_node)
6868

    
6869
    if instance.admin_up:
6870
      # check memory requirements on the secondary node
6871
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6872
                           instance.name, bep[constants.BE_MEMORY],
6873
                           instance.hypervisor)
6874
    else:
6875
      self.LogInfo("Not checking memory on the secondary node as"
6876
                   " instance will not be started")
6877

    
6878
    # check bridge existance
6879
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6880

    
6881
  def Exec(self, feedback_fn):
6882
    """Move an instance.
6883

6884
    The move is done by shutting it down on its present node, copying
6885
    the data over (slow) and starting it on the new node.
6886

6887
    """
6888
    instance = self.instance
6889

    
6890
    source_node = instance.primary_node
6891
    target_node = self.target_node
6892

    
6893
    self.LogInfo("Shutting down instance %s on source node %s",
6894
                 instance.name, source_node)
6895

    
6896
    result = self.rpc.call_instance_shutdown(source_node, instance,
6897
                                             self.op.shutdown_timeout)
6898
    msg = result.fail_msg
6899
    if msg:
6900
      if self.op.ignore_consistency:
6901
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6902
                             " Proceeding anyway. Please make sure node"
6903
                             " %s is down. Error details: %s",
6904
                             instance.name, source_node, source_node, msg)
6905
      else:
6906
        raise errors.OpExecError("Could not shutdown instance %s on"
6907
                                 " node %s: %s" %
6908
                                 (instance.name, source_node, msg))
6909

    
6910
    # create the target disks
6911
    try:
6912
      _CreateDisks(self, instance, target_node=target_node)
6913
    except errors.OpExecError:
6914
      self.LogWarning("Device creation failed, reverting...")
6915
      try:
6916
        _RemoveDisks(self, instance, target_node=target_node)
6917
      finally:
6918
        self.cfg.ReleaseDRBDMinors(instance.name)
6919
        raise
6920

    
6921
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6922

    
6923
    errs = []
6924
    # activate, get path, copy the data over
6925
    for idx, disk in enumerate(instance.disks):
6926
      self.LogInfo("Copying data for disk %d", idx)
6927
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6928
                                               instance.name, True, idx)
6929
      if result.fail_msg:
6930
        self.LogWarning("Can't assemble newly created disk %d: %s",
6931
                        idx, result.fail_msg)
6932
        errs.append(result.fail_msg)
6933
        break
6934
      dev_path = result.payload
6935
      result = self.rpc.call_blockdev_export(source_node, disk,
6936
                                             target_node, dev_path,
6937
                                             cluster_name)
6938
      if result.fail_msg:
6939
        self.LogWarning("Can't copy data over for disk %d: %s",
6940
                        idx, result.fail_msg)
6941
        errs.append(result.fail_msg)
6942
        break
6943

    
6944
    if errs:
6945
      self.LogWarning("Some disks failed to copy, aborting")
6946
      try:
6947
        _RemoveDisks(self, instance, target_node=target_node)
6948
      finally:
6949
        self.cfg.ReleaseDRBDMinors(instance.name)
6950
        raise errors.OpExecError("Errors during disk copy: %s" %
6951
                                 (",".join(errs),))
6952

    
6953
    instance.primary_node = target_node
6954
    self.cfg.Update(instance, feedback_fn)
6955

    
6956
    self.LogInfo("Removing the disks on the original node")
6957
    _RemoveDisks(self, instance, target_node=source_node)
6958

    
6959
    # Only start the instance if it's marked as up
6960
    if instance.admin_up:
6961
      self.LogInfo("Starting instance %s on node %s",
6962
                   instance.name, target_node)
6963

    
6964
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6965
                                           ignore_secondaries=True)
6966
      if not disks_ok:
6967
        _ShutdownInstanceDisks(self, instance)
6968
        raise errors.OpExecError("Can't activate the instance's disks")
6969

    
6970
      result = self.rpc.call_instance_start(target_node, instance,
6971
                                            None, None, False)
6972
      msg = result.fail_msg
6973
      if msg:
6974
        _ShutdownInstanceDisks(self, instance)
6975
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6976
                                 (instance.name, target_node, msg))
6977

    
6978

    
6979
class LUNodeMigrate(LogicalUnit):
6980
  """Migrate all instances from a node.
6981

6982
  """
6983
  HPATH = "node-migrate"
6984
  HTYPE = constants.HTYPE_NODE
6985
  REQ_BGL = False
6986

    
6987
  def CheckArguments(self):
6988
    pass
6989

    
6990
  def ExpandNames(self):
6991
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6992

    
6993
    self.share_locks = _ShareAll()
6994
    self.needed_locks = {
6995
      locking.LEVEL_NODE: [self.op.node_name],
6996
      }
6997

    
6998
  def BuildHooksEnv(self):
6999
    """Build hooks env.
7000

7001
    This runs on the master, the primary and all the secondaries.
7002

7003
    """
7004
    return {
7005
      "NODE_NAME": self.op.node_name,
7006
      }
7007

    
7008
  def BuildHooksNodes(self):
7009
    """Build hooks nodes.
7010

7011
    """
7012
    nl = [self.cfg.GetMasterNode()]
7013
    return (nl, nl)
7014

    
7015
  def CheckPrereq(self):
7016
    pass
7017

    
7018
  def Exec(self, feedback_fn):
7019
    # Prepare jobs for migration instances
7020
    jobs = [
7021
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7022
                                 mode=self.op.mode,
7023
                                 live=self.op.live,
7024
                                 iallocator=self.op.iallocator,
7025
                                 target_node=self.op.target_node)]
7026
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7027
      ]
7028

    
7029
    # TODO: Run iallocator in this opcode and pass correct placement options to
7030
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7031
    # running the iallocator and the actual migration, a good consistency model
7032
    # will have to be found.
7033

    
7034
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7035
            frozenset([self.op.node_name]))
7036

    
7037
    return ResultWithJobs(jobs)
7038

    
7039

    
7040
class TLMigrateInstance(Tasklet):
7041
  """Tasklet class for instance migration.
7042

7043
  @type live: boolean
7044
  @ivar live: whether the migration will be done live or non-live;
7045
      this variable is initalized only after CheckPrereq has run
7046
  @type cleanup: boolean
7047
  @ivar cleanup: Wheater we cleanup from a failed migration
7048
  @type iallocator: string
7049
  @ivar iallocator: The iallocator used to determine target_node
7050
  @type target_node: string
7051
  @ivar target_node: If given, the target_node to reallocate the instance to
7052
  @type failover: boolean
7053
  @ivar failover: Whether operation results in failover or migration
7054
  @type fallback: boolean
7055
  @ivar fallback: Whether fallback to failover is allowed if migration not
7056
                  possible
7057
  @type ignore_consistency: boolean
7058
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7059
                            and target node
7060
  @type shutdown_timeout: int
7061
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7062

7063
  """
7064
  def __init__(self, lu, instance_name, cleanup=False,
7065
               failover=False, fallback=False,
7066
               ignore_consistency=False,
7067
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7068
    """Initializes this class.
7069

7070
    """
7071
    Tasklet.__init__(self, lu)
7072

    
7073
    # Parameters
7074
    self.instance_name = instance_name
7075
    self.cleanup = cleanup
7076
    self.live = False # will be overridden later
7077
    self.failover = failover
7078
    self.fallback = fallback
7079
    self.ignore_consistency = ignore_consistency
7080
    self.shutdown_timeout = shutdown_timeout
7081

    
7082
  def CheckPrereq(self):
7083
    """Check prerequisites.
7084

7085
    This checks that the instance is in the cluster.
7086

7087
    """
7088
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7089
    instance = self.cfg.GetInstanceInfo(instance_name)
7090
    assert instance is not None
7091
    self.instance = instance
7092

    
7093
    if (not self.cleanup and not instance.admin_up and not self.failover and
7094
        self.fallback):
7095
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7096
                      " to failover")
7097
      self.failover = True
7098

    
7099
    if instance.disk_template not in constants.DTS_MIRRORED:
7100
      if self.failover:
7101
        text = "failovers"
7102
      else:
7103
        text = "migrations"
7104
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7105
                                 " %s" % (instance.disk_template, text),
7106
                                 errors.ECODE_STATE)
7107

    
7108
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7109
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7110

    
7111
      if self.lu.op.iallocator:
7112
        self._RunAllocator()
7113
      else:
7114
        # We set set self.target_node as it is required by
7115
        # BuildHooksEnv
7116
        self.target_node = self.lu.op.target_node
7117

    
7118
      # self.target_node is already populated, either directly or by the
7119
      # iallocator run
7120
      target_node = self.target_node
7121
      if self.target_node == instance.primary_node:
7122
        raise errors.OpPrereqError("Cannot migrate instance %s"
7123
                                   " to its primary (%s)" %
7124
                                   (instance.name, instance.primary_node))
7125

    
7126
      if len(self.lu.tasklets) == 1:
7127
        # It is safe to release locks only when we're the only tasklet
7128
        # in the LU
7129
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7130
                      keep=[instance.primary_node, self.target_node])
7131

    
7132
    else:
7133
      secondary_nodes = instance.secondary_nodes
7134
      if not secondary_nodes:
7135
        raise errors.ConfigurationError("No secondary node but using"
7136
                                        " %s disk template" %
7137
                                        instance.disk_template)
7138
      target_node = secondary_nodes[0]
7139
      if self.lu.op.iallocator or (self.lu.op.target_node and
7140
                                   self.lu.op.target_node != target_node):
7141
        if self.failover:
7142
          text = "failed over"
7143
        else:
7144
          text = "migrated"
7145
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7146
                                   " be %s to arbitrary nodes"
7147
                                   " (neither an iallocator nor a target"
7148
                                   " node can be passed)" %
7149
                                   (instance.disk_template, text),
7150
                                   errors.ECODE_INVAL)
7151

    
7152
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7153

    
7154
    # check memory requirements on the secondary node
7155
    if not self.failover or instance.admin_up:
7156
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7157
                           instance.name, i_be[constants.BE_MEMORY],
7158
                           instance.hypervisor)
7159
    else:
7160
      self.lu.LogInfo("Not checking memory on the secondary node as"
7161
                      " instance will not be started")
7162

    
7163
    # check bridge existance
7164
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7165

    
7166
    if not self.cleanup:
7167
      _CheckNodeNotDrained(self.lu, target_node)
7168
      if not self.failover:
7169
        result = self.rpc.call_instance_migratable(instance.primary_node,
7170
                                                   instance)
7171
        if result.fail_msg and self.fallback:
7172
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7173
                          " failover")
7174
          self.failover = True
7175
        else:
7176
          result.Raise("Can't migrate, please use failover",
7177
                       prereq=True, ecode=errors.ECODE_STATE)
7178

    
7179
    assert not (self.failover and self.cleanup)
7180

    
7181
    if not self.failover:
7182
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7183
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7184
                                   " parameters are accepted",
7185
                                   errors.ECODE_INVAL)
7186
      if self.lu.op.live is not None:
7187
        if self.lu.op.live:
7188
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7189
        else:
7190
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7191
        # reset the 'live' parameter to None so that repeated
7192
        # invocations of CheckPrereq do not raise an exception
7193
        self.lu.op.live = None
7194
      elif self.lu.op.mode is None:
7195
        # read the default value from the hypervisor
7196
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7197
                                                skip_globals=False)
7198
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7199

    
7200
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7201
    else:
7202
      # Failover is never live
7203
      self.live = False
7204

    
7205
  def _RunAllocator(self):
7206
    """Run the allocator based on input opcode.
7207

7208
    """
7209
    ial = IAllocator(self.cfg, self.rpc,
7210
                     mode=constants.IALLOCATOR_MODE_RELOC,
7211
                     name=self.instance_name,
7212
                     # TODO See why hail breaks with a single node below
7213
                     relocate_from=[self.instance.primary_node,
7214
                                    self.instance.primary_node],
7215
                     )
7216

    
7217
    ial.Run(self.lu.op.iallocator)
7218

    
7219
    if not ial.success:
7220
      raise errors.OpPrereqError("Can't compute nodes using"
7221
                                 " iallocator '%s': %s" %
7222
                                 (self.lu.op.iallocator, ial.info),
7223
                                 errors.ECODE_NORES)
7224
    if len(ial.result) != ial.required_nodes:
7225
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7226
                                 " of nodes (%s), required %s" %
7227
                                 (self.lu.op.iallocator, len(ial.result),
7228
                                  ial.required_nodes), errors.ECODE_FAULT)
7229
    self.target_node = ial.result[0]
7230
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7231
                 self.instance_name, self.lu.op.iallocator,
7232
                 utils.CommaJoin(ial.result))
7233

    
7234
  def _WaitUntilSync(self):
7235
    """Poll with custom rpc for disk sync.
7236

7237
    This uses our own step-based rpc call.
7238

7239
    """
7240
    self.feedback_fn("* wait until resync is done")
7241
    all_done = False
7242
    while not all_done:
7243
      all_done = True
7244
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7245
                                            self.nodes_ip,
7246
                                            self.instance.disks)
7247
      min_percent = 100
7248
      for node, nres in result.items():
7249
        nres.Raise("Cannot resync disks on node %s" % node)
7250
        node_done, node_percent = nres.payload
7251
        all_done = all_done and node_done
7252
        if node_percent is not None:
7253
          min_percent = min(min_percent, node_percent)
7254
      if not all_done:
7255
        if min_percent < 100:
7256
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7257
        time.sleep(2)
7258

    
7259
  def _EnsureSecondary(self, node):
7260
    """Demote a node to secondary.
7261

7262
    """
7263
    self.feedback_fn("* switching node %s to secondary mode" % node)
7264

    
7265
    for dev in self.instance.disks:
7266
      self.cfg.SetDiskID(dev, node)
7267

    
7268
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7269
                                          self.instance.disks)
7270
    result.Raise("Cannot change disk to secondary on node %s" % node)
7271

    
7272
  def _GoStandalone(self):
7273
    """Disconnect from the network.
7274

7275
    """
7276
    self.feedback_fn("* changing into standalone mode")
7277
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7278
                                               self.instance.disks)
7279
    for node, nres in result.items():
7280
      nres.Raise("Cannot disconnect disks node %s" % node)
7281

    
7282
  def _GoReconnect(self, multimaster):
7283
    """Reconnect to the network.
7284

7285
    """
7286
    if multimaster:
7287
      msg = "dual-master"
7288
    else:
7289
      msg = "single-master"
7290
    self.feedback_fn("* changing disks into %s mode" % msg)
7291
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7292
                                           self.instance.disks,
7293
                                           self.instance.name, multimaster)
7294
    for node, nres in result.items():
7295
      nres.Raise("Cannot change disks config on node %s" % node)
7296

    
7297
  def _ExecCleanup(self):
7298
    """Try to cleanup after a failed migration.
7299

7300
    The cleanup is done by:
7301
      - check that the instance is running only on one node
7302
        (and update the config if needed)
7303
      - change disks on its secondary node to secondary
7304
      - wait until disks are fully synchronized
7305
      - disconnect from the network
7306
      - change disks into single-master mode
7307
      - wait again until disks are fully synchronized
7308

7309
    """
7310
    instance = self.instance
7311
    target_node = self.target_node
7312
    source_node = self.source_node
7313

    
7314
    # check running on only one node
7315
    self.feedback_fn("* checking where the instance actually runs"
7316
                     " (if this hangs, the hypervisor might be in"
7317
                     " a bad state)")
7318
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7319
    for node, result in ins_l.items():
7320
      result.Raise("Can't contact node %s" % node)
7321

    
7322
    runningon_source = instance.name in ins_l[source_node].payload
7323
    runningon_target = instance.name in ins_l[target_node].payload
7324

    
7325
    if runningon_source and runningon_target:
7326
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7327
                               " or the hypervisor is confused; you will have"
7328
                               " to ensure manually that it runs only on one"
7329
                               " and restart this operation")
7330

    
7331
    if not (runningon_source or runningon_target):
7332
      raise errors.OpExecError("Instance does not seem to be running at all;"
7333
                               " in this case it's safer to repair by"
7334
                               " running 'gnt-instance stop' to ensure disk"
7335
                               " shutdown, and then restarting it")
7336

    
7337
    if runningon_target:
7338
      # the migration has actually succeeded, we need to update the config
7339
      self.feedback_fn("* instance running on secondary node (%s),"
7340
                       " updating config" % target_node)
7341
      instance.primary_node = target_node
7342
      self.cfg.Update(instance, self.feedback_fn)
7343
      demoted_node = source_node
7344
    else:
7345
      self.feedback_fn("* instance confirmed to be running on its"
7346
                       " primary node (%s)" % source_node)
7347
      demoted_node = target_node
7348

    
7349
    if instance.disk_template in constants.DTS_INT_MIRROR:
7350
      self._EnsureSecondary(demoted_node)
7351
      try:
7352
        self._WaitUntilSync()
7353
      except errors.OpExecError:
7354
        # we ignore here errors, since if the device is standalone, it
7355
        # won't be able to sync
7356
        pass
7357
      self._GoStandalone()
7358
      self._GoReconnect(False)
7359
      self._WaitUntilSync()
7360

    
7361
    self.feedback_fn("* done")
7362

    
7363
  def _RevertDiskStatus(self):
7364
    """Try to revert the disk status after a failed migration.
7365

7366
    """
7367
    target_node = self.target_node
7368
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7369
      return
7370

    
7371
    try:
7372
      self._EnsureSecondary(target_node)
7373
      self._GoStandalone()
7374
      self._GoReconnect(False)
7375
      self._WaitUntilSync()
7376
    except errors.OpExecError, err:
7377
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7378
                         " please try to recover the instance manually;"
7379
                         " error '%s'" % str(err))
7380

    
7381
  def _AbortMigration(self):
7382
    """Call the hypervisor code to abort a started migration.
7383

7384
    """
7385
    instance = self.instance
7386
    target_node = self.target_node
7387
    migration_info = self.migration_info
7388

    
7389
    abort_result = self.rpc.call_finalize_migration(target_node,
7390
                                                    instance,
7391
                                                    migration_info,
7392
                                                    False)
7393
    abort_msg = abort_result.fail_msg
7394
    if abort_msg:
7395
      logging.error("Aborting migration failed on target node %s: %s",
7396
                    target_node, abort_msg)
7397
      # Don't raise an exception here, as we stil have to try to revert the
7398
      # disk status, even if this step failed.
7399

    
7400
  def _ExecMigration(self):
7401
    """Migrate an instance.
7402

7403
    The migrate is done by:
7404
      - change the disks into dual-master mode
7405
      - wait until disks are fully synchronized again
7406
      - migrate the instance
7407
      - change disks on the new secondary node (the old primary) to secondary
7408
      - wait until disks are fully synchronized
7409
      - change disks into single-master mode
7410

7411
    """
7412
    instance = self.instance
7413
    target_node = self.target_node
7414
    source_node = self.source_node
7415

    
7416
    # Check for hypervisor version mismatch and warn the user.
7417
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7418
                                       None, self.instance.hypervisor)
7419
    src_info = nodeinfo[source_node]
7420
    dst_info = nodeinfo[target_node]
7421

    
7422
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7423
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7424
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7425
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7426
      if src_version != dst_version:
7427
        self.feedback_fn("* warning: hypervisor version mismatch between"
7428
                         " source (%s) and target (%s) node" %
7429
                         (src_version, dst_version))
7430

    
7431
    self.feedback_fn("* checking disk consistency between source and target")
7432
    for dev in instance.disks:
7433
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7434
        raise errors.OpExecError("Disk %s is degraded or not fully"
7435
                                 " synchronized on target node,"
7436
                                 " aborting migration" % dev.iv_name)
7437

    
7438
    # First get the migration information from the remote node
7439
    result = self.rpc.call_migration_info(source_node, instance)
7440
    msg = result.fail_msg
7441
    if msg:
7442
      log_err = ("Failed fetching source migration information from %s: %s" %
7443
                 (source_node, msg))
7444
      logging.error(log_err)
7445
      raise errors.OpExecError(log_err)
7446

    
7447
    self.migration_info = migration_info = result.payload
7448

    
7449
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7450
      # Then switch the disks to master/master mode
7451
      self._EnsureSecondary(target_node)
7452
      self._GoStandalone()
7453
      self._GoReconnect(True)
7454
      self._WaitUntilSync()
7455

    
7456
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7457
    result = self.rpc.call_accept_instance(target_node,
7458
                                           instance,
7459
                                           migration_info,
7460
                                           self.nodes_ip[target_node])
7461

    
7462
    msg = result.fail_msg
7463
    if msg:
7464
      logging.error("Instance pre-migration failed, trying to revert"
7465
                    " disk status: %s", msg)
7466
      self.feedback_fn("Pre-migration failed, aborting")
7467
      self._AbortMigration()
7468
      self._RevertDiskStatus()
7469
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7470
                               (instance.name, msg))
7471

    
7472
    self.feedback_fn("* migrating instance to %s" % target_node)
7473
    result = self.rpc.call_instance_migrate(source_node, instance,
7474
                                            self.nodes_ip[target_node],
7475
                                            self.live)
7476
    msg = result.fail_msg
7477
    if msg:
7478
      logging.error("Instance migration failed, trying to revert"
7479
                    " disk status: %s", msg)
7480
      self.feedback_fn("Migration failed, aborting")
7481
      self._AbortMigration()
7482
      self._RevertDiskStatus()
7483
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7484
                               (instance.name, msg))
7485

    
7486
    instance.primary_node = target_node
7487
    # distribute new instance config to the other nodes
7488
    self.cfg.Update(instance, self.feedback_fn)
7489

    
7490
    result = self.rpc.call_finalize_migration(target_node,
7491
                                              instance,
7492
                                              migration_info,
7493
                                              True)
7494
    msg = result.fail_msg
7495
    if msg:
7496
      logging.error("Instance migration succeeded, but finalization failed:"
7497
                    " %s", msg)
7498
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7499
                               msg)
7500

    
7501
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7502
      self._EnsureSecondary(source_node)
7503
      self._WaitUntilSync()
7504
      self._GoStandalone()
7505
      self._GoReconnect(False)
7506
      self._WaitUntilSync()
7507

    
7508
    self.feedback_fn("* done")
7509

    
7510
  def _ExecFailover(self):
7511
    """Failover an instance.
7512

7513
    The failover is done by shutting it down on its present node and
7514
    starting it on the secondary.
7515

7516
    """
7517
    instance = self.instance
7518
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7519

    
7520
    source_node = instance.primary_node
7521
    target_node = self.target_node
7522

    
7523
    if instance.admin_up:
7524
      self.feedback_fn("* checking disk consistency between source and target")
7525
      for dev in instance.disks:
7526
        # for drbd, these are drbd over lvm
7527
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7528
          if primary_node.offline:
7529
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7530
                             " target node %s" %
7531
                             (primary_node.name, dev.iv_name, target_node))
7532
          elif not self.ignore_consistency:
7533
            raise errors.OpExecError("Disk %s is degraded on target node,"
7534
                                     " aborting failover" % dev.iv_name)
7535
    else:
7536
      self.feedback_fn("* not checking disk consistency as instance is not"
7537
                       " running")
7538

    
7539
    self.feedback_fn("* shutting down instance on source node")
7540
    logging.info("Shutting down instance %s on node %s",
7541
                 instance.name, source_node)
7542

    
7543
    result = self.rpc.call_instance_shutdown(source_node, instance,
7544
                                             self.shutdown_timeout)
7545
    msg = result.fail_msg
7546
    if msg:
7547
      if self.ignore_consistency or primary_node.offline:
7548
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7549
                           " proceeding anyway; please make sure node"
7550
                           " %s is down; error details: %s",
7551
                           instance.name, source_node, source_node, msg)
7552
      else:
7553
        raise errors.OpExecError("Could not shutdown instance %s on"
7554
                                 " node %s: %s" %
7555
                                 (instance.name, source_node, msg))
7556

    
7557
    self.feedback_fn("* deactivating the instance's disks on source node")
7558
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7559
      raise errors.OpExecError("Can't shut down the instance's disks")
7560

    
7561
    instance.primary_node = target_node
7562
    # distribute new instance config to the other nodes
7563
    self.cfg.Update(instance, self.feedback_fn)
7564

    
7565
    # Only start the instance if it's marked as up
7566
    if instance.admin_up:
7567
      self.feedback_fn("* activating the instance's disks on target node %s" %
7568
                       target_node)
7569
      logging.info("Starting instance %s on node %s",
7570
                   instance.name, target_node)
7571

    
7572
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7573
                                           ignore_secondaries=True)
7574
      if not disks_ok:
7575
        _ShutdownInstanceDisks(self.lu, instance)
7576
        raise errors.OpExecError("Can't activate the instance's disks")
7577

    
7578
      self.feedback_fn("* starting the instance on the target node %s" %
7579
                       target_node)
7580
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7581
                                            False)
7582
      msg = result.fail_msg
7583
      if msg:
7584
        _ShutdownInstanceDisks(self.lu, instance)
7585
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7586
                                 (instance.name, target_node, msg))
7587

    
7588
  def Exec(self, feedback_fn):
7589
    """Perform the migration.
7590

7591
    """
7592
    self.feedback_fn = feedback_fn
7593
    self.source_node = self.instance.primary_node
7594

    
7595
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7596
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7597
      self.target_node = self.instance.secondary_nodes[0]
7598
      # Otherwise self.target_node has been populated either
7599
      # directly, or through an iallocator.
7600

    
7601
    self.all_nodes = [self.source_node, self.target_node]
7602
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7603
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7604

    
7605
    if self.failover:
7606
      feedback_fn("Failover instance %s" % self.instance.name)
7607
      self._ExecFailover()
7608
    else:
7609
      feedback_fn("Migrating instance %s" % self.instance.name)
7610

    
7611
      if self.cleanup:
7612
        return self._ExecCleanup()
7613
      else:
7614
        return self._ExecMigration()
7615

    
7616

    
7617
def _CreateBlockDev(lu, node, instance, device, force_create,
7618
                    info, force_open):
7619
  """Create a tree of block devices on a given node.
7620

7621
  If this device type has to be created on secondaries, create it and
7622
  all its children.
7623

7624
  If not, just recurse to children keeping the same 'force' value.
7625

7626
  @param lu: the lu on whose behalf we execute
7627
  @param node: the node on which to create the device
7628
  @type instance: L{objects.Instance}
7629
  @param instance: the instance which owns the device
7630
  @type device: L{objects.Disk}
7631
  @param device: the device to create
7632
  @type force_create: boolean
7633
  @param force_create: whether to force creation of this device; this
7634
      will be change to True whenever we find a device which has
7635
      CreateOnSecondary() attribute
7636
  @param info: the extra 'metadata' we should attach to the device
7637
      (this will be represented as a LVM tag)
7638
  @type force_open: boolean
7639
  @param force_open: this parameter will be passes to the
7640
      L{backend.BlockdevCreate} function where it specifies
7641
      whether we run on primary or not, and it affects both
7642
      the child assembly and the device own Open() execution
7643

7644
  """
7645
  if device.CreateOnSecondary():
7646
    force_create = True
7647

    
7648
  if device.children:
7649
    for child in device.children:
7650
      _CreateBlockDev(lu, node, instance, child, force_create,
7651
                      info, force_open)
7652

    
7653
  if not force_create:
7654
    return
7655

    
7656
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7657

    
7658

    
7659
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7660
  """Create a single block device on a given node.
7661

7662
  This will not recurse over children of the device, so they must be
7663
  created in advance.
7664

7665
  @param lu: the lu on whose behalf we execute
7666
  @param node: the node on which to create the device
7667
  @type instance: L{objects.Instance}
7668
  @param instance: the instance which owns the device
7669
  @type device: L{objects.Disk}
7670
  @param device: the device to create
7671
  @param info: the extra 'metadata' we should attach to the device
7672
      (this will be represented as a LVM tag)
7673
  @type force_open: boolean
7674
  @param force_open: this parameter will be passes to the
7675
      L{backend.BlockdevCreate} function where it specifies
7676
      whether we run on primary or not, and it affects both
7677
      the child assembly and the device own Open() execution
7678

7679
  """
7680
  lu.cfg.SetDiskID(device, node)
7681
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7682
                                       instance.name, force_open, info)
7683
  result.Raise("Can't create block device %s on"
7684
               " node %s for instance %s" % (device, node, instance.name))
7685
  if device.physical_id is None:
7686
    device.physical_id = result.payload
7687

    
7688

    
7689
def _GenerateUniqueNames(lu, exts):
7690
  """Generate a suitable LV name.
7691

7692
  This will generate a logical volume name for the given instance.
7693

7694
  """
7695
  results = []
7696
  for val in exts:
7697
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7698
    results.append("%s%s" % (new_id, val))
7699
  return results
7700

    
7701

    
7702
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7703
                         iv_name, p_minor, s_minor):
7704
  """Generate a drbd8 device complete with its children.
7705

7706
  """
7707
  assert len(vgnames) == len(names) == 2
7708
  port = lu.cfg.AllocatePort()
7709
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7710
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7711
                          logical_id=(vgnames[0], names[0]))
7712
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7713
                          logical_id=(vgnames[1], names[1]))
7714
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7715
                          logical_id=(primary, secondary, port,
7716
                                      p_minor, s_minor,
7717
                                      shared_secret),
7718
                          children=[dev_data, dev_meta],
7719
                          iv_name=iv_name)
7720
  return drbd_dev
7721

    
7722

    
7723
def _GenerateDiskTemplate(lu, template_name,
7724
                          instance_name, primary_node,
7725
                          secondary_nodes, disk_info,
7726
                          file_storage_dir, file_driver,
7727
                          base_index, feedback_fn):
7728
  """Generate the entire disk layout for a given template type.
7729

7730
  """
7731
  #TODO: compute space requirements
7732

    
7733
  vgname = lu.cfg.GetVGName()
7734
  disk_count = len(disk_info)
7735
  disks = []
7736
  if template_name == constants.DT_DISKLESS:
7737
    pass
7738
  elif template_name == constants.DT_PLAIN:
7739
    if len(secondary_nodes) != 0:
7740
      raise errors.ProgrammerError("Wrong template configuration")
7741

    
7742
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7743
                                      for i in range(disk_count)])
7744
    for idx, disk in enumerate(disk_info):
7745
      disk_index = idx + base_index
7746
      vg = disk.get(constants.IDISK_VG, vgname)
7747
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7748
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7749
                              size=disk[constants.IDISK_SIZE],
7750
                              logical_id=(vg, names[idx]),
7751
                              iv_name="disk/%d" % disk_index,
7752
                              mode=disk[constants.IDISK_MODE])
7753
      disks.append(disk_dev)
7754
  elif template_name == constants.DT_DRBD8:
7755
    if len(secondary_nodes) != 1:
7756
      raise errors.ProgrammerError("Wrong template configuration")
7757
    remote_node = secondary_nodes[0]
7758
    minors = lu.cfg.AllocateDRBDMinor(
7759
      [primary_node, remote_node] * len(disk_info), instance_name)
7760

    
7761
    names = []
7762
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7763
                                               for i in range(disk_count)]):
7764
      names.append(lv_prefix + "_data")
7765
      names.append(lv_prefix + "_meta")
7766
    for idx, disk in enumerate(disk_info):
7767
      disk_index = idx + base_index
7768
      data_vg = disk.get(constants.IDISK_VG, vgname)
7769
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7770
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7771
                                      disk[constants.IDISK_SIZE],
7772
                                      [data_vg, meta_vg],
7773
                                      names[idx * 2:idx * 2 + 2],
7774
                                      "disk/%d" % disk_index,
7775
                                      minors[idx * 2], minors[idx * 2 + 1])
7776
      disk_dev.mode = disk[constants.IDISK_MODE]
7777
      disks.append(disk_dev)
7778
  elif template_name == constants.DT_FILE:
7779
    if len(secondary_nodes) != 0:
7780
      raise errors.ProgrammerError("Wrong template configuration")
7781

    
7782
    opcodes.RequireFileStorage()
7783

    
7784
    for idx, disk in enumerate(disk_info):
7785
      disk_index = idx + base_index
7786
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7787
                              size=disk[constants.IDISK_SIZE],
7788
                              iv_name="disk/%d" % disk_index,
7789
                              logical_id=(file_driver,
7790
                                          "%s/disk%d" % (file_storage_dir,
7791
                                                         disk_index)),
7792
                              mode=disk[constants.IDISK_MODE])
7793
      disks.append(disk_dev)
7794
  elif template_name == constants.DT_SHARED_FILE:
7795
    if len(secondary_nodes) != 0:
7796
      raise errors.ProgrammerError("Wrong template configuration")
7797

    
7798
    opcodes.RequireSharedFileStorage()
7799

    
7800
    for idx, disk in enumerate(disk_info):
7801
      disk_index = idx + base_index
7802
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7803
                              size=disk[constants.IDISK_SIZE],
7804
                              iv_name="disk/%d" % disk_index,
7805
                              logical_id=(file_driver,
7806
                                          "%s/disk%d" % (file_storage_dir,
7807
                                                         disk_index)),
7808
                              mode=disk[constants.IDISK_MODE])
7809
      disks.append(disk_dev)
7810
  elif template_name == constants.DT_BLOCK:
7811
    if len(secondary_nodes) != 0:
7812
      raise errors.ProgrammerError("Wrong template configuration")
7813

    
7814
    for idx, disk in enumerate(disk_info):
7815
      disk_index = idx + base_index
7816
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7817
                              size=disk[constants.IDISK_SIZE],
7818
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7819
                                          disk[constants.IDISK_ADOPT]),
7820
                              iv_name="disk/%d" % disk_index,
7821
                              mode=disk[constants.IDISK_MODE])
7822
      disks.append(disk_dev)
7823

    
7824
  else:
7825
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7826
  return disks
7827

    
7828

    
7829
def _GetInstanceInfoText(instance):
7830
  """Compute that text that should be added to the disk's metadata.
7831

7832
  """
7833
  return "originstname+%s" % instance.name
7834

    
7835

    
7836
def _CalcEta(time_taken, written, total_size):
7837
  """Calculates the ETA based on size written and total size.
7838

7839
  @param time_taken: The time taken so far
7840
  @param written: amount written so far
7841
  @param total_size: The total size of data to be written
7842
  @return: The remaining time in seconds
7843

7844
  """
7845
  avg_time = time_taken / float(written)
7846
  return (total_size - written) * avg_time
7847

    
7848

    
7849
def _WipeDisks(lu, instance):
7850
  """Wipes instance disks.
7851

7852
  @type lu: L{LogicalUnit}
7853
  @param lu: the logical unit on whose behalf we execute
7854
  @type instance: L{objects.Instance}
7855
  @param instance: the instance whose disks we should create
7856
  @return: the success of the wipe
7857

7858
  """
7859
  node = instance.primary_node
7860

    
7861
  for device in instance.disks:
7862
    lu.cfg.SetDiskID(device, node)
7863

    
7864
  logging.info("Pause sync of instance %s disks", instance.name)
7865
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7866

    
7867
  for idx, success in enumerate(result.payload):
7868
    if not success:
7869
      logging.warn("pause-sync of instance %s for disks %d failed",
7870
                   instance.name, idx)
7871

    
7872
  try:
7873
    for idx, device in enumerate(instance.disks):
7874
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7875
      # MAX_WIPE_CHUNK at max
7876
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7877
                            constants.MIN_WIPE_CHUNK_PERCENT)
7878
      # we _must_ make this an int, otherwise rounding errors will
7879
      # occur
7880
      wipe_chunk_size = int(wipe_chunk_size)
7881

    
7882
      lu.LogInfo("* Wiping disk %d", idx)
7883
      logging.info("Wiping disk %d for instance %s, node %s using"
7884
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7885

    
7886
      offset = 0
7887
      size = device.size
7888
      last_output = 0
7889
      start_time = time.time()
7890

    
7891
      while offset < size:
7892
        wipe_size = min(wipe_chunk_size, size - offset)
7893
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7894
                      idx, offset, wipe_size)
7895
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7896
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7897
                     (idx, offset, wipe_size))
7898
        now = time.time()
7899
        offset += wipe_size
7900
        if now - last_output >= 60:
7901
          eta = _CalcEta(now - start_time, offset, size)
7902
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7903
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7904
          last_output = now
7905
  finally:
7906
    logging.info("Resume sync of instance %s disks", instance.name)
7907

    
7908
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7909

    
7910
    for idx, success in enumerate(result.payload):
7911
      if not success:
7912
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7913
                      " look at the status and troubleshoot the issue", idx)
7914
        logging.warn("resume-sync of instance %s for disks %d failed",
7915
                     instance.name, idx)
7916

    
7917

    
7918
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7919
  """Create all disks for an instance.
7920

7921
  This abstracts away some work from AddInstance.
7922

7923
  @type lu: L{LogicalUnit}
7924
  @param lu: the logical unit on whose behalf we execute
7925
  @type instance: L{objects.Instance}
7926
  @param instance: the instance whose disks we should create
7927
  @type to_skip: list
7928
  @param to_skip: list of indices to skip
7929
  @type target_node: string
7930
  @param target_node: if passed, overrides the target node for creation
7931
  @rtype: boolean
7932
  @return: the success of the creation
7933

7934
  """
7935
  info = _GetInstanceInfoText(instance)
7936
  if target_node is None:
7937
    pnode = instance.primary_node
7938
    all_nodes = instance.all_nodes
7939
  else:
7940
    pnode = target_node
7941
    all_nodes = [pnode]
7942

    
7943
  if instance.disk_template in constants.DTS_FILEBASED:
7944
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7945
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7946

    
7947
    result.Raise("Failed to create directory '%s' on"
7948
                 " node %s" % (file_storage_dir, pnode))
7949

    
7950
  # Note: this needs to be kept in sync with adding of disks in
7951
  # LUInstanceSetParams
7952
  for idx, device in enumerate(instance.disks):
7953
    if to_skip and idx in to_skip:
7954
      continue
7955
    logging.info("Creating volume %s for instance %s",
7956
                 device.iv_name, instance.name)
7957
    #HARDCODE
7958
    for node in all_nodes:
7959
      f_create = node == pnode
7960
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7961

    
7962

    
7963
def _RemoveDisks(lu, instance, target_node=None):
7964
  """Remove all disks for an instance.
7965

7966
  This abstracts away some work from `AddInstance()` and
7967
  `RemoveInstance()`. Note that in case some of the devices couldn't
7968
  be removed, the removal will continue with the other ones (compare
7969
  with `_CreateDisks()`).
7970

7971
  @type lu: L{LogicalUnit}
7972
  @param lu: the logical unit on whose behalf we execute
7973
  @type instance: L{objects.Instance}
7974
  @param instance: the instance whose disks we should remove
7975
  @type target_node: string
7976
  @param target_node: used to override the node on which to remove the disks
7977
  @rtype: boolean
7978
  @return: the success of the removal
7979

7980
  """
7981
  logging.info("Removing block devices for instance %s", instance.name)
7982

    
7983
  all_result = True
7984
  for device in instance.disks:
7985
    if target_node:
7986
      edata = [(target_node, device)]
7987
    else:
7988
      edata = device.ComputeNodeTree(instance.primary_node)
7989
    for node, disk in edata:
7990
      lu.cfg.SetDiskID(disk, node)
7991
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7992
      if msg:
7993
        lu.LogWarning("Could not remove block device %s on node %s,"
7994
                      " continuing anyway: %s", device.iv_name, node, msg)
7995
        all_result = False
7996

    
7997
  if instance.disk_template == constants.DT_FILE:
7998
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7999
    if target_node:
8000
      tgt = target_node
8001
    else:
8002
      tgt = instance.primary_node
8003
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8004
    if result.fail_msg:
8005
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8006
                    file_storage_dir, instance.primary_node, result.fail_msg)
8007
      all_result = False
8008

    
8009
  return all_result
8010

    
8011

    
8012
def _ComputeDiskSizePerVG(disk_template, disks):
8013
  """Compute disk size requirements in the volume group
8014

8015
  """
8016
  def _compute(disks, payload):
8017
    """Universal algorithm.
8018

8019
    """
8020
    vgs = {}
8021
    for disk in disks:
8022
      vgs[disk[constants.IDISK_VG]] = \
8023
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8024

    
8025
    return vgs
8026

    
8027
  # Required free disk space as a function of disk and swap space
8028
  req_size_dict = {
8029
    constants.DT_DISKLESS: {},
8030
    constants.DT_PLAIN: _compute(disks, 0),
8031
    # 128 MB are added for drbd metadata for each disk
8032
    constants.DT_DRBD8: _compute(disks, 128),
8033
    constants.DT_FILE: {},
8034
    constants.DT_SHARED_FILE: {},
8035
  }
8036

    
8037
  if disk_template not in req_size_dict:
8038
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8039
                                 " is unknown" % disk_template)
8040

    
8041
  return req_size_dict[disk_template]
8042

    
8043

    
8044
def _ComputeDiskSize(disk_template, disks):
8045
  """Compute disk size requirements in the volume group
8046

8047
  """
8048
  # Required free disk space as a function of disk and swap space
8049
  req_size_dict = {
8050
    constants.DT_DISKLESS: None,
8051
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8052
    # 128 MB are added for drbd metadata for each disk
8053
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8054
    constants.DT_FILE: None,
8055
    constants.DT_SHARED_FILE: 0,
8056
    constants.DT_BLOCK: 0,
8057
  }
8058

    
8059
  if disk_template not in req_size_dict:
8060
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8061
                                 " is unknown" % disk_template)
8062

    
8063
  return req_size_dict[disk_template]
8064

    
8065

    
8066
def _FilterVmNodes(lu, nodenames):
8067
  """Filters out non-vm_capable nodes from a list.
8068

8069
  @type lu: L{LogicalUnit}
8070
  @param lu: the logical unit for which we check
8071
  @type nodenames: list
8072
  @param nodenames: the list of nodes on which we should check
8073
  @rtype: list
8074
  @return: the list of vm-capable nodes
8075

8076
  """
8077
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8078
  return [name for name in nodenames if name not in vm_nodes]
8079

    
8080

    
8081
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8082
  """Hypervisor parameter validation.
8083

8084
  This function abstract the hypervisor parameter validation to be
8085
  used in both instance create and instance modify.
8086

8087
  @type lu: L{LogicalUnit}
8088
  @param lu: the logical unit for which we check
8089
  @type nodenames: list
8090
  @param nodenames: the list of nodes on which we should check
8091
  @type hvname: string
8092
  @param hvname: the name of the hypervisor we should use
8093
  @type hvparams: dict
8094
  @param hvparams: the parameters which we need to check
8095
  @raise errors.OpPrereqError: if the parameters are not valid
8096

8097
  """
8098
  nodenames = _FilterVmNodes(lu, nodenames)
8099
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8100
                                                  hvname,
8101
                                                  hvparams)
8102
  for node in nodenames:
8103
    info = hvinfo[node]
8104
    if info.offline:
8105
      continue
8106
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8107

    
8108

    
8109
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8110
  """OS parameters validation.
8111

8112
  @type lu: L{LogicalUnit}
8113
  @param lu: the logical unit for which we check
8114
  @type required: boolean
8115
  @param required: whether the validation should fail if the OS is not
8116
      found
8117
  @type nodenames: list
8118
  @param nodenames: the list of nodes on which we should check
8119
  @type osname: string
8120
  @param osname: the name of the hypervisor we should use
8121
  @type osparams: dict
8122
  @param osparams: the parameters which we need to check
8123
  @raise errors.OpPrereqError: if the parameters are not valid
8124

8125
  """
8126
  nodenames = _FilterVmNodes(lu, nodenames)
8127
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8128
                                   [constants.OS_VALIDATE_PARAMETERS],
8129
                                   osparams)
8130
  for node, nres in result.items():
8131
    # we don't check for offline cases since this should be run only
8132
    # against the master node and/or an instance's nodes
8133
    nres.Raise("OS Parameters validation failed on node %s" % node)
8134
    if not nres.payload:
8135
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8136
                 osname, node)
8137

    
8138

    
8139
class LUInstanceCreate(LogicalUnit):
8140
  """Create an instance.
8141

8142
  """
8143
  HPATH = "instance-add"
8144
  HTYPE = constants.HTYPE_INSTANCE
8145
  REQ_BGL = False
8146

    
8147
  def CheckArguments(self):
8148
    """Check arguments.
8149

8150
    """
8151
    # do not require name_check to ease forward/backward compatibility
8152
    # for tools
8153
    if self.op.no_install and self.op.start:
8154
      self.LogInfo("No-installation mode selected, disabling startup")
8155
      self.op.start = False
8156
    # validate/normalize the instance name
8157
    self.op.instance_name = \
8158
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8159

    
8160
    if self.op.ip_check and not self.op.name_check:
8161
      # TODO: make the ip check more flexible and not depend on the name check
8162
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8163
                                 " check", errors.ECODE_INVAL)
8164

    
8165
    # check nics' parameter names
8166
    for nic in self.op.nics:
8167
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8168

    
8169
    # check disks. parameter names and consistent adopt/no-adopt strategy
8170
    has_adopt = has_no_adopt = False
8171
    for disk in self.op.disks:
8172
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8173
      if constants.IDISK_ADOPT in disk:
8174
        has_adopt = True
8175
      else:
8176
        has_no_adopt = True
8177
    if has_adopt and has_no_adopt:
8178
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8179
                                 errors.ECODE_INVAL)
8180
    if has_adopt:
8181
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8182
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8183
                                   " '%s' disk template" %
8184
                                   self.op.disk_template,
8185
                                   errors.ECODE_INVAL)
8186
      if self.op.iallocator is not None:
8187
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8188
                                   " iallocator script", errors.ECODE_INVAL)
8189
      if self.op.mode == constants.INSTANCE_IMPORT:
8190
        raise errors.OpPrereqError("Disk adoption not allowed for"
8191
                                   " instance import", errors.ECODE_INVAL)
8192
    else:
8193
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8194
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8195
                                   " but no 'adopt' parameter given" %
8196
                                   self.op.disk_template,
8197
                                   errors.ECODE_INVAL)
8198

    
8199
    self.adopt_disks = has_adopt
8200

    
8201
    # instance name verification
8202
    if self.op.name_check:
8203
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8204
      self.op.instance_name = self.hostname1.name
8205
      # used in CheckPrereq for ip ping check
8206
      self.check_ip = self.hostname1.ip
8207
    else:
8208
      self.check_ip = None
8209

    
8210
    # file storage checks
8211
    if (self.op.file_driver and
8212
        not self.op.file_driver in constants.FILE_DRIVER):
8213
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8214
                                 self.op.file_driver, errors.ECODE_INVAL)
8215

    
8216
    if self.op.disk_template == constants.DT_FILE:
8217
      opcodes.RequireFileStorage()
8218
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8219
      opcodes.RequireSharedFileStorage()
8220

    
8221
    ### Node/iallocator related checks
8222
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8223

    
8224
    if self.op.pnode is not None:
8225
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8226
        if self.op.snode is None:
8227
          raise errors.OpPrereqError("The networked disk templates need"
8228
                                     " a mirror node", errors.ECODE_INVAL)
8229
      elif self.op.snode:
8230
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8231
                        " template")
8232
        self.op.snode = None
8233

    
8234
    self._cds = _GetClusterDomainSecret()
8235

    
8236
    if self.op.mode == constants.INSTANCE_IMPORT:
8237
      # On import force_variant must be True, because if we forced it at
8238
      # initial install, our only chance when importing it back is that it
8239
      # works again!
8240
      self.op.force_variant = True
8241

    
8242
      if self.op.no_install:
8243
        self.LogInfo("No-installation mode has no effect during import")
8244

    
8245
    elif self.op.mode == constants.INSTANCE_CREATE:
8246
      if self.op.os_type is None:
8247
        raise errors.OpPrereqError("No guest OS specified",
8248
                                   errors.ECODE_INVAL)
8249
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8250
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8251
                                   " installation" % self.op.os_type,
8252
                                   errors.ECODE_STATE)
8253
      if self.op.disk_template is None:
8254
        raise errors.OpPrereqError("No disk template specified",
8255
                                   errors.ECODE_INVAL)
8256

    
8257
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8258
      # Check handshake to ensure both clusters have the same domain secret
8259
      src_handshake = self.op.source_handshake
8260
      if not src_handshake:
8261
        raise errors.OpPrereqError("Missing source handshake",
8262
                                   errors.ECODE_INVAL)
8263

    
8264
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8265
                                                           src_handshake)
8266
      if errmsg:
8267
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8268
                                   errors.ECODE_INVAL)
8269

    
8270
      # Load and check source CA
8271
      self.source_x509_ca_pem = self.op.source_x509_ca
8272
      if not self.source_x509_ca_pem:
8273
        raise errors.OpPrereqError("Missing source X509 CA",
8274
                                   errors.ECODE_INVAL)
8275

    
8276
      try:
8277
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8278
                                                    self._cds)
8279
      except OpenSSL.crypto.Error, err:
8280
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8281
                                   (err, ), errors.ECODE_INVAL)
8282

    
8283
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8284
      if errcode is not None:
8285
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8286
                                   errors.ECODE_INVAL)
8287

    
8288
      self.source_x509_ca = cert
8289

    
8290
      src_instance_name = self.op.source_instance_name
8291
      if not src_instance_name:
8292
        raise errors.OpPrereqError("Missing source instance name",
8293
                                   errors.ECODE_INVAL)
8294

    
8295
      self.source_instance_name = \
8296
          netutils.GetHostname(name=src_instance_name).name
8297

    
8298
    else:
8299
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8300
                                 self.op.mode, errors.ECODE_INVAL)
8301

    
8302
  def ExpandNames(self):
8303
    """ExpandNames for CreateInstance.
8304

8305
    Figure out the right locks for instance creation.
8306

8307
    """
8308
    self.needed_locks = {}
8309

    
8310
    instance_name = self.op.instance_name
8311
    # this is just a preventive check, but someone might still add this
8312
    # instance in the meantime, and creation will fail at lock-add time
8313
    if instance_name in self.cfg.GetInstanceList():
8314
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8315
                                 instance_name, errors.ECODE_EXISTS)
8316

    
8317
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8318

    
8319
    if self.op.iallocator:
8320
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8321
    else:
8322
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8323
      nodelist = [self.op.pnode]
8324
      if self.op.snode is not None:
8325
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8326
        nodelist.append(self.op.snode)
8327
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8328

    
8329
    # in case of import lock the source node too
8330
    if self.op.mode == constants.INSTANCE_IMPORT:
8331
      src_node = self.op.src_node
8332
      src_path = self.op.src_path
8333

    
8334
      if src_path is None:
8335
        self.op.src_path = src_path = self.op.instance_name
8336

    
8337
      if src_node is None:
8338
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8339
        self.op.src_node = None
8340
        if os.path.isabs(src_path):
8341
          raise errors.OpPrereqError("Importing an instance from a path"
8342
                                     " requires a source node option",
8343
                                     errors.ECODE_INVAL)
8344
      else:
8345
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8346
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8347
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8348
        if not os.path.isabs(src_path):
8349
          self.op.src_path = src_path = \
8350
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8351

    
8352
  def _RunAllocator(self):
8353
    """Run the allocator based on input opcode.
8354

8355
    """
8356
    nics = [n.ToDict() for n in self.nics]
8357
    ial = IAllocator(self.cfg, self.rpc,
8358
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8359
                     name=self.op.instance_name,
8360
                     disk_template=self.op.disk_template,
8361
                     tags=self.op.tags,
8362
                     os=self.op.os_type,
8363
                     vcpus=self.be_full[constants.BE_VCPUS],
8364
                     memory=self.be_full[constants.BE_MEMORY],
8365
                     disks=self.disks,
8366
                     nics=nics,
8367
                     hypervisor=self.op.hypervisor,
8368
                     )
8369

    
8370
    ial.Run(self.op.iallocator)
8371

    
8372
    if not ial.success:
8373
      raise errors.OpPrereqError("Can't compute nodes using"
8374
                                 " iallocator '%s': %s" %
8375
                                 (self.op.iallocator, ial.info),
8376
                                 errors.ECODE_NORES)
8377
    if len(ial.result) != ial.required_nodes:
8378
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8379
                                 " of nodes (%s), required %s" %
8380
                                 (self.op.iallocator, len(ial.result),
8381
                                  ial.required_nodes), errors.ECODE_FAULT)
8382
    self.op.pnode = ial.result[0]
8383
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8384
                 self.op.instance_name, self.op.iallocator,
8385
                 utils.CommaJoin(ial.result))
8386
    if ial.required_nodes == 2:
8387
      self.op.snode = ial.result[1]
8388

    
8389
  def BuildHooksEnv(self):
8390
    """Build hooks env.
8391

8392
    This runs on master, primary and secondary nodes of the instance.
8393

8394
    """
8395
    env = {
8396
      "ADD_MODE": self.op.mode,
8397
      }
8398
    if self.op.mode == constants.INSTANCE_IMPORT:
8399
      env["SRC_NODE"] = self.op.src_node
8400
      env["SRC_PATH"] = self.op.src_path
8401
      env["SRC_IMAGES"] = self.src_images
8402

    
8403
    env.update(_BuildInstanceHookEnv(
8404
      name=self.op.instance_name,
8405
      primary_node=self.op.pnode,
8406
      secondary_nodes=self.secondaries,
8407
      status=self.op.start,
8408
      os_type=self.op.os_type,
8409
      memory=self.be_full[constants.BE_MEMORY],
8410
      vcpus=self.be_full[constants.BE_VCPUS],
8411
      nics=_NICListToTuple(self, self.nics),
8412
      disk_template=self.op.disk_template,
8413
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8414
             for d in self.disks],
8415
      bep=self.be_full,
8416
      hvp=self.hv_full,
8417
      hypervisor_name=self.op.hypervisor,
8418
      tags=self.op.tags,
8419
    ))
8420

    
8421
    return env
8422

    
8423
  def BuildHooksNodes(self):
8424
    """Build hooks nodes.
8425

8426
    """
8427
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8428
    return nl, nl
8429

    
8430
  def _ReadExportInfo(self):
8431
    """Reads the export information from disk.
8432

8433
    It will override the opcode source node and path with the actual
8434
    information, if these two were not specified before.
8435

8436
    @return: the export information
8437

8438
    """
8439
    assert self.op.mode == constants.INSTANCE_IMPORT
8440

    
8441
    src_node = self.op.src_node
8442
    src_path = self.op.src_path
8443

    
8444
    if src_node is None:
8445
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8446
      exp_list = self.rpc.call_export_list(locked_nodes)
8447
      found = False
8448
      for node in exp_list:
8449
        if exp_list[node].fail_msg:
8450
          continue
8451
        if src_path in exp_list[node].payload:
8452
          found = True
8453
          self.op.src_node = src_node = node
8454
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8455
                                                       src_path)
8456
          break
8457
      if not found:
8458
        raise errors.OpPrereqError("No export found for relative path %s" %
8459
                                    src_path, errors.ECODE_INVAL)
8460

    
8461
    _CheckNodeOnline(self, src_node)
8462
    result = self.rpc.call_export_info(src_node, src_path)
8463
    result.Raise("No export or invalid export found in dir %s" % src_path)
8464

    
8465
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8466
    if not export_info.has_section(constants.INISECT_EXP):
8467
      raise errors.ProgrammerError("Corrupted export config",
8468
                                   errors.ECODE_ENVIRON)
8469

    
8470
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8471
    if (int(ei_version) != constants.EXPORT_VERSION):
8472
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8473
                                 (ei_version, constants.EXPORT_VERSION),
8474
                                 errors.ECODE_ENVIRON)
8475
    return export_info
8476

    
8477
  def _ReadExportParams(self, einfo):
8478
    """Use export parameters as defaults.
8479

8480
    In case the opcode doesn't specify (as in override) some instance
8481
    parameters, then try to use them from the export information, if
8482
    that declares them.
8483

8484
    """
8485
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8486

    
8487
    if self.op.disk_template is None:
8488
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8489
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8490
                                          "disk_template")
8491
      else:
8492
        raise errors.OpPrereqError("No disk template specified and the export"
8493
                                   " is missing the disk_template information",
8494
                                   errors.ECODE_INVAL)
8495

    
8496
    if not self.op.disks:
8497
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8498
        disks = []
8499
        # TODO: import the disk iv_name too
8500
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8501
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8502
          disks.append({constants.IDISK_SIZE: disk_sz})
8503
        self.op.disks = disks
8504
      else:
8505
        raise errors.OpPrereqError("No disk info specified and the export"
8506
                                   " is missing the disk information",
8507
                                   errors.ECODE_INVAL)
8508

    
8509
    if (not self.op.nics and
8510
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8511
      nics = []
8512
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8513
        ndict = {}
8514
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8515
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8516
          ndict[name] = v
8517
        nics.append(ndict)
8518
      self.op.nics = nics
8519

    
8520
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8521
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8522

    
8523
    if (self.op.hypervisor is None and
8524
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8525
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8526

    
8527
    if einfo.has_section(constants.INISECT_HYP):
8528
      # use the export parameters but do not override the ones
8529
      # specified by the user
8530
      for name, value in einfo.items(constants.INISECT_HYP):
8531
        if name not in self.op.hvparams:
8532
          self.op.hvparams[name] = value
8533

    
8534
    if einfo.has_section(constants.INISECT_BEP):
8535
      # use the parameters, without overriding
8536
      for name, value in einfo.items(constants.INISECT_BEP):
8537
        if name not in self.op.beparams:
8538
          self.op.beparams[name] = value
8539
    else:
8540
      # try to read the parameters old style, from the main section
8541
      for name in constants.BES_PARAMETERS:
8542
        if (name not in self.op.beparams and
8543
            einfo.has_option(constants.INISECT_INS, name)):
8544
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8545

    
8546
    if einfo.has_section(constants.INISECT_OSP):
8547
      # use the parameters, without overriding
8548
      for name, value in einfo.items(constants.INISECT_OSP):
8549
        if name not in self.op.osparams:
8550
          self.op.osparams[name] = value
8551

    
8552
  def _RevertToDefaults(self, cluster):
8553
    """Revert the instance parameters to the default values.
8554

8555
    """
8556
    # hvparams
8557
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8558
    for name in self.op.hvparams.keys():
8559
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8560
        del self.op.hvparams[name]
8561
    # beparams
8562
    be_defs = cluster.SimpleFillBE({})
8563
    for name in self.op.beparams.keys():
8564
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8565
        del self.op.beparams[name]
8566
    # nic params
8567
    nic_defs = cluster.SimpleFillNIC({})
8568
    for nic in self.op.nics:
8569
      for name in constants.NICS_PARAMETERS:
8570
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8571
          del nic[name]
8572
    # osparams
8573
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8574
    for name in self.op.osparams.keys():
8575
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8576
        del self.op.osparams[name]
8577

    
8578
  def _CalculateFileStorageDir(self):
8579
    """Calculate final instance file storage dir.
8580

8581
    """
8582
    # file storage dir calculation/check
8583
    self.instance_file_storage_dir = None
8584
    if self.op.disk_template in constants.DTS_FILEBASED:
8585
      # build the full file storage dir path
8586
      joinargs = []
8587

    
8588
      if self.op.disk_template == constants.DT_SHARED_FILE:
8589
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8590
      else:
8591
        get_fsd_fn = self.cfg.GetFileStorageDir
8592

    
8593
      cfg_storagedir = get_fsd_fn()
8594
      if not cfg_storagedir:
8595
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8596
      joinargs.append(cfg_storagedir)
8597

    
8598
      if self.op.file_storage_dir is not None:
8599
        joinargs.append(self.op.file_storage_dir)
8600

    
8601
      joinargs.append(self.op.instance_name)
8602

    
8603
      # pylint: disable=W0142
8604
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8605

    
8606
  def CheckPrereq(self):
8607
    """Check prerequisites.
8608

8609
    """
8610
    self._CalculateFileStorageDir()
8611

    
8612
    if self.op.mode == constants.INSTANCE_IMPORT:
8613
      export_info = self._ReadExportInfo()
8614
      self._ReadExportParams(export_info)
8615

    
8616
    if (not self.cfg.GetVGName() and
8617
        self.op.disk_template not in constants.DTS_NOT_LVM):
8618
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8619
                                 " instances", errors.ECODE_STATE)
8620

    
8621
    if self.op.hypervisor is None:
8622
      self.op.hypervisor = self.cfg.GetHypervisorType()
8623

    
8624
    cluster = self.cfg.GetClusterInfo()
8625
    enabled_hvs = cluster.enabled_hypervisors
8626
    if self.op.hypervisor not in enabled_hvs:
8627
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8628
                                 " cluster (%s)" % (self.op.hypervisor,
8629
                                  ",".join(enabled_hvs)),
8630
                                 errors.ECODE_STATE)
8631

    
8632
    # Check tag validity
8633
    for tag in self.op.tags:
8634
      objects.TaggableObject.ValidateTag(tag)
8635

    
8636
    # check hypervisor parameter syntax (locally)
8637
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8638
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8639
                                      self.op.hvparams)
8640
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8641
    hv_type.CheckParameterSyntax(filled_hvp)
8642
    self.hv_full = filled_hvp
8643
    # check that we don't specify global parameters on an instance
8644
    _CheckGlobalHvParams(self.op.hvparams)
8645

    
8646
    # fill and remember the beparams dict
8647
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8648
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8649

    
8650
    # build os parameters
8651
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8652

    
8653
    # now that hvp/bep are in final format, let's reset to defaults,
8654
    # if told to do so
8655
    if self.op.identify_defaults:
8656
      self._RevertToDefaults(cluster)
8657

    
8658
    # NIC buildup
8659
    self.nics = []
8660
    for idx, nic in enumerate(self.op.nics):
8661
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8662
      nic_mode = nic_mode_req
8663
      if nic_mode is None:
8664
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8665

    
8666
      # in routed mode, for the first nic, the default ip is 'auto'
8667
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8668
        default_ip_mode = constants.VALUE_AUTO
8669
      else:
8670
        default_ip_mode = constants.VALUE_NONE
8671

    
8672
      # ip validity checks
8673
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8674
      if ip is None or ip.lower() == constants.VALUE_NONE:
8675
        nic_ip = None
8676
      elif ip.lower() == constants.VALUE_AUTO:
8677
        if not self.op.name_check:
8678
          raise errors.OpPrereqError("IP address set to auto but name checks"
8679
                                     " have been skipped",
8680
                                     errors.ECODE_INVAL)
8681
        nic_ip = self.hostname1.ip
8682
      else:
8683
        if not netutils.IPAddress.IsValid(ip):
8684
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8685
                                     errors.ECODE_INVAL)
8686
        nic_ip = ip
8687

    
8688
      # TODO: check the ip address for uniqueness
8689
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8690
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8691
                                   errors.ECODE_INVAL)
8692

    
8693
      # MAC address verification
8694
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8695
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8696
        mac = utils.NormalizeAndValidateMac(mac)
8697

    
8698
        try:
8699
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8700
        except errors.ReservationError:
8701
          raise errors.OpPrereqError("MAC address %s already in use"
8702
                                     " in cluster" % mac,
8703
                                     errors.ECODE_NOTUNIQUE)
8704

    
8705
      #  Build nic parameters
8706
      link = nic.get(constants.INIC_LINK, None)
8707
      nicparams = {}
8708
      if nic_mode_req:
8709
        nicparams[constants.NIC_MODE] = nic_mode_req
8710
      if link:
8711
        nicparams[constants.NIC_LINK] = link
8712

    
8713
      check_params = cluster.SimpleFillNIC(nicparams)
8714
      objects.NIC.CheckParameterSyntax(check_params)
8715
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8716

    
8717
    # disk checks/pre-build
8718
    default_vg = self.cfg.GetVGName()
8719
    self.disks = []
8720
    for disk in self.op.disks:
8721
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8722
      if mode not in constants.DISK_ACCESS_SET:
8723
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8724
                                   mode, errors.ECODE_INVAL)
8725
      size = disk.get(constants.IDISK_SIZE, None)
8726
      if size is None:
8727
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8728
      try:
8729
        size = int(size)
8730
      except (TypeError, ValueError):
8731
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8732
                                   errors.ECODE_INVAL)
8733

    
8734
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8735
      new_disk = {
8736
        constants.IDISK_SIZE: size,
8737
        constants.IDISK_MODE: mode,
8738
        constants.IDISK_VG: data_vg,
8739
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8740
        }
8741
      if constants.IDISK_ADOPT in disk:
8742
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8743
      self.disks.append(new_disk)
8744

    
8745
    if self.op.mode == constants.INSTANCE_IMPORT:
8746

    
8747
      # Check that the new instance doesn't have less disks than the export
8748
      instance_disks = len(self.disks)
8749
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8750
      if instance_disks < export_disks:
8751
        raise errors.OpPrereqError("Not enough disks to import."
8752
                                   " (instance: %d, export: %d)" %
8753
                                   (instance_disks, export_disks),
8754
                                   errors.ECODE_INVAL)
8755

    
8756
      disk_images = []
8757
      for idx in range(export_disks):
8758
        option = "disk%d_dump" % idx
8759
        if export_info.has_option(constants.INISECT_INS, option):
8760
          # FIXME: are the old os-es, disk sizes, etc. useful?
8761
          export_name = export_info.get(constants.INISECT_INS, option)
8762
          image = utils.PathJoin(self.op.src_path, export_name)
8763
          disk_images.append(image)
8764
        else:
8765
          disk_images.append(False)
8766

    
8767
      self.src_images = disk_images
8768

    
8769
      old_name = export_info.get(constants.INISECT_INS, "name")
8770
      try:
8771
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8772
      except (TypeError, ValueError), err:
8773
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8774
                                   " an integer: %s" % str(err),
8775
                                   errors.ECODE_STATE)
8776
      if self.op.instance_name == old_name:
8777
        for idx, nic in enumerate(self.nics):
8778
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8779
            nic_mac_ini = "nic%d_mac" % idx
8780
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8781

    
8782
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8783

    
8784
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8785
    if self.op.ip_check:
8786
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8787
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8788
                                   (self.check_ip, self.op.instance_name),
8789
                                   errors.ECODE_NOTUNIQUE)
8790

    
8791
    #### mac address generation
8792
    # By generating here the mac address both the allocator and the hooks get
8793
    # the real final mac address rather than the 'auto' or 'generate' value.
8794
    # There is a race condition between the generation and the instance object
8795
    # creation, which means that we know the mac is valid now, but we're not
8796
    # sure it will be when we actually add the instance. If things go bad
8797
    # adding the instance will abort because of a duplicate mac, and the
8798
    # creation job will fail.
8799
    for nic in self.nics:
8800
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8801
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8802

    
8803
    #### allocator run
8804

    
8805
    if self.op.iallocator is not None:
8806
      self._RunAllocator()
8807

    
8808
    #### node related checks
8809

    
8810
    # check primary node
8811
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8812
    assert self.pnode is not None, \
8813
      "Cannot retrieve locked node %s" % self.op.pnode
8814
    if pnode.offline:
8815
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8816
                                 pnode.name, errors.ECODE_STATE)
8817
    if pnode.drained:
8818
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8819
                                 pnode.name, errors.ECODE_STATE)
8820
    if not pnode.vm_capable:
8821
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8822
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8823

    
8824
    self.secondaries = []
8825

    
8826
    # mirror node verification
8827
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8828
      if self.op.snode == pnode.name:
8829
        raise errors.OpPrereqError("The secondary node cannot be the"
8830
                                   " primary node", errors.ECODE_INVAL)
8831
      _CheckNodeOnline(self, self.op.snode)
8832
      _CheckNodeNotDrained(self, self.op.snode)
8833
      _CheckNodeVmCapable(self, self.op.snode)
8834
      self.secondaries.append(self.op.snode)
8835

    
8836
    nodenames = [pnode.name] + self.secondaries
8837

    
8838
    if not self.adopt_disks:
8839
      # Check lv size requirements, if not adopting
8840
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8841
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8842

    
8843
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8844
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8845
                                disk[constants.IDISK_ADOPT])
8846
                     for disk in self.disks])
8847
      if len(all_lvs) != len(self.disks):
8848
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8849
                                   errors.ECODE_INVAL)
8850
      for lv_name in all_lvs:
8851
        try:
8852
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8853
          # to ReserveLV uses the same syntax
8854
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8855
        except errors.ReservationError:
8856
          raise errors.OpPrereqError("LV named %s used by another instance" %
8857
                                     lv_name, errors.ECODE_NOTUNIQUE)
8858

    
8859
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8860
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8861

    
8862
      node_lvs = self.rpc.call_lv_list([pnode.name],
8863
                                       vg_names.payload.keys())[pnode.name]
8864
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8865
      node_lvs = node_lvs.payload
8866

    
8867
      delta = all_lvs.difference(node_lvs.keys())
8868
      if delta:
8869
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8870
                                   utils.CommaJoin(delta),
8871
                                   errors.ECODE_INVAL)
8872
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8873
      if online_lvs:
8874
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8875
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8876
                                   errors.ECODE_STATE)
8877
      # update the size of disk based on what is found
8878
      for dsk in self.disks:
8879
        dsk[constants.IDISK_SIZE] = \
8880
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8881
                                        dsk[constants.IDISK_ADOPT])][0]))
8882

    
8883
    elif self.op.disk_template == constants.DT_BLOCK:
8884
      # Normalize and de-duplicate device paths
8885
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8886
                       for disk in self.disks])
8887
      if len(all_disks) != len(self.disks):
8888
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8889
                                   errors.ECODE_INVAL)
8890
      baddisks = [d for d in all_disks
8891
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8892
      if baddisks:
8893
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8894
                                   " cannot be adopted" %
8895
                                   (", ".join(baddisks),
8896
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8897
                                   errors.ECODE_INVAL)
8898

    
8899
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8900
                                            list(all_disks))[pnode.name]
8901
      node_disks.Raise("Cannot get block device information from node %s" %
8902
                       pnode.name)
8903
      node_disks = node_disks.payload
8904
      delta = all_disks.difference(node_disks.keys())
8905
      if delta:
8906
        raise errors.OpPrereqError("Missing block device(s): %s" %
8907
                                   utils.CommaJoin(delta),
8908
                                   errors.ECODE_INVAL)
8909
      for dsk in self.disks:
8910
        dsk[constants.IDISK_SIZE] = \
8911
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8912

    
8913
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8914

    
8915
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8916
    # check OS parameters (remotely)
8917
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8918

    
8919
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8920

    
8921
    # memory check on primary node
8922
    if self.op.start:
8923
      _CheckNodeFreeMemory(self, self.pnode.name,
8924
                           "creating instance %s" % self.op.instance_name,
8925
                           self.be_full[constants.BE_MEMORY],
8926
                           self.op.hypervisor)
8927

    
8928
    self.dry_run_result = list(nodenames)
8929

    
8930
  def Exec(self, feedback_fn):
8931
    """Create and add the instance to the cluster.
8932

8933
    """
8934
    instance = self.op.instance_name
8935
    pnode_name = self.pnode.name
8936

    
8937
    ht_kind = self.op.hypervisor
8938
    if ht_kind in constants.HTS_REQ_PORT:
8939
      network_port = self.cfg.AllocatePort()
8940
    else:
8941
      network_port = None
8942

    
8943
    disks = _GenerateDiskTemplate(self,
8944
                                  self.op.disk_template,
8945
                                  instance, pnode_name,
8946
                                  self.secondaries,
8947
                                  self.disks,
8948
                                  self.instance_file_storage_dir,
8949
                                  self.op.file_driver,
8950
                                  0,
8951
                                  feedback_fn)
8952

    
8953
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8954
                            primary_node=pnode_name,
8955
                            nics=self.nics, disks=disks,
8956
                            disk_template=self.op.disk_template,
8957
                            admin_up=False,
8958
                            network_port=network_port,
8959
                            beparams=self.op.beparams,
8960
                            hvparams=self.op.hvparams,
8961
                            hypervisor=self.op.hypervisor,
8962
                            osparams=self.op.osparams,
8963
                            )
8964

    
8965
    if self.op.tags:
8966
      for tag in self.op.tags:
8967
        iobj.AddTag(tag)
8968

    
8969
    if self.adopt_disks:
8970
      if self.op.disk_template == constants.DT_PLAIN:
8971
        # rename LVs to the newly-generated names; we need to construct
8972
        # 'fake' LV disks with the old data, plus the new unique_id
8973
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8974
        rename_to = []
8975
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8976
          rename_to.append(t_dsk.logical_id)
8977
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8978
          self.cfg.SetDiskID(t_dsk, pnode_name)
8979
        result = self.rpc.call_blockdev_rename(pnode_name,
8980
                                               zip(tmp_disks, rename_to))
8981
        result.Raise("Failed to rename adoped LVs")
8982
    else:
8983
      feedback_fn("* creating instance disks...")
8984
      try:
8985
        _CreateDisks(self, iobj)
8986
      except errors.OpExecError:
8987
        self.LogWarning("Device creation failed, reverting...")
8988
        try:
8989
          _RemoveDisks(self, iobj)
8990
        finally:
8991
          self.cfg.ReleaseDRBDMinors(instance)
8992
          raise
8993

    
8994
    feedback_fn("adding instance %s to cluster config" % instance)
8995

    
8996
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8997

    
8998
    # Declare that we don't want to remove the instance lock anymore, as we've
8999
    # added the instance to the config
9000
    del self.remove_locks[locking.LEVEL_INSTANCE]
9001

    
9002
    if self.op.mode == constants.INSTANCE_IMPORT:
9003
      # Release unused nodes
9004
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9005
    else:
9006
      # Release all nodes
9007
      _ReleaseLocks(self, locking.LEVEL_NODE)
9008

    
9009
    disk_abort = False
9010
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9011
      feedback_fn("* wiping instance disks...")
9012
      try:
9013
        _WipeDisks(self, iobj)
9014
      except errors.OpExecError, err:
9015
        logging.exception("Wiping disks failed")
9016
        self.LogWarning("Wiping instance disks failed (%s)", err)
9017
        disk_abort = True
9018

    
9019
    if disk_abort:
9020
      # Something is already wrong with the disks, don't do anything else
9021
      pass
9022
    elif self.op.wait_for_sync:
9023
      disk_abort = not _WaitForSync(self, iobj)
9024
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9025
      # make sure the disks are not degraded (still sync-ing is ok)
9026
      feedback_fn("* checking mirrors status")
9027
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9028
    else:
9029
      disk_abort = False
9030

    
9031
    if disk_abort:
9032
      _RemoveDisks(self, iobj)
9033
      self.cfg.RemoveInstance(iobj.name)
9034
      # Make sure the instance lock gets removed
9035
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9036
      raise errors.OpExecError("There are some degraded disks for"
9037
                               " this instance")
9038

    
9039
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9040
      if self.op.mode == constants.INSTANCE_CREATE:
9041
        if not self.op.no_install:
9042
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9043
                        not self.op.wait_for_sync)
9044
          if pause_sync:
9045
            feedback_fn("* pausing disk sync to install instance OS")
9046
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9047
                                                              iobj.disks, True)
9048
            for idx, success in enumerate(result.payload):
9049
              if not success:
9050
                logging.warn("pause-sync of instance %s for disk %d failed",
9051
                             instance, idx)
9052

    
9053
          feedback_fn("* running the instance OS create scripts...")
9054
          # FIXME: pass debug option from opcode to backend
9055
          os_add_result = \
9056
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9057
                                          self.op.debug_level)
9058
          if pause_sync:
9059
            feedback_fn("* resuming disk sync")
9060
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9061
                                                              iobj.disks, False)
9062
            for idx, success in enumerate(result.payload):
9063
              if not success:
9064
                logging.warn("resume-sync of instance %s for disk %d failed",
9065
                             instance, idx)
9066

    
9067
          os_add_result.Raise("Could not add os for instance %s"
9068
                              " on node %s" % (instance, pnode_name))
9069

    
9070
      elif self.op.mode == constants.INSTANCE_IMPORT:
9071
        feedback_fn("* running the instance OS import scripts...")
9072

    
9073
        transfers = []
9074

    
9075
        for idx, image in enumerate(self.src_images):
9076
          if not image:
9077
            continue
9078

    
9079
          # FIXME: pass debug option from opcode to backend
9080
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9081
                                             constants.IEIO_FILE, (image, ),
9082
                                             constants.IEIO_SCRIPT,
9083
                                             (iobj.disks[idx], idx),
9084
                                             None)
9085
          transfers.append(dt)
9086

    
9087
        import_result = \
9088
          masterd.instance.TransferInstanceData(self, feedback_fn,
9089
                                                self.op.src_node, pnode_name,
9090
                                                self.pnode.secondary_ip,
9091
                                                iobj, transfers)
9092
        if not compat.all(import_result):
9093
          self.LogWarning("Some disks for instance %s on node %s were not"
9094
                          " imported successfully" % (instance, pnode_name))
9095

    
9096
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9097
        feedback_fn("* preparing remote import...")
9098
        # The source cluster will stop the instance before attempting to make a
9099
        # connection. In some cases stopping an instance can take a long time,
9100
        # hence the shutdown timeout is added to the connection timeout.
9101
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9102
                           self.op.source_shutdown_timeout)
9103
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9104

    
9105
        assert iobj.primary_node == self.pnode.name
9106
        disk_results = \
9107
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9108
                                        self.source_x509_ca,
9109
                                        self._cds, timeouts)
9110
        if not compat.all(disk_results):
9111
          # TODO: Should the instance still be started, even if some disks
9112
          # failed to import (valid for local imports, too)?
9113
          self.LogWarning("Some disks for instance %s on node %s were not"
9114
                          " imported successfully" % (instance, pnode_name))
9115

    
9116
        # Run rename script on newly imported instance
9117
        assert iobj.name == instance
9118
        feedback_fn("Running rename script for %s" % instance)
9119
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9120
                                                   self.source_instance_name,
9121
                                                   self.op.debug_level)
9122
        if result.fail_msg:
9123
          self.LogWarning("Failed to run rename script for %s on node"
9124
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9125

    
9126
      else:
9127
        # also checked in the prereq part
9128
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9129
                                     % self.op.mode)
9130

    
9131
    if self.op.start:
9132
      iobj.admin_up = True
9133
      self.cfg.Update(iobj, feedback_fn)
9134
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9135
      feedback_fn("* starting instance...")
9136
      result = self.rpc.call_instance_start(pnode_name, iobj,
9137
                                            None, None, False)
9138
      result.Raise("Could not start instance")
9139

    
9140
    return list(iobj.all_nodes)
9141

    
9142

    
9143
class LUInstanceConsole(NoHooksLU):
9144
  """Connect to an instance's console.
9145

9146
  This is somewhat special in that it returns the command line that
9147
  you need to run on the master node in order to connect to the
9148
  console.
9149

9150
  """
9151
  REQ_BGL = False
9152

    
9153
  def ExpandNames(self):
9154
    self._ExpandAndLockInstance()
9155

    
9156
  def CheckPrereq(self):
9157
    """Check prerequisites.
9158

9159
    This checks that the instance is in the cluster.
9160

9161
    """
9162
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9163
    assert self.instance is not None, \
9164
      "Cannot retrieve locked instance %s" % self.op.instance_name
9165
    _CheckNodeOnline(self, self.instance.primary_node)
9166

    
9167
  def Exec(self, feedback_fn):
9168
    """Connect to the console of an instance
9169

9170
    """
9171
    instance = self.instance
9172
    node = instance.primary_node
9173

    
9174
    node_insts = self.rpc.call_instance_list([node],
9175
                                             [instance.hypervisor])[node]
9176
    node_insts.Raise("Can't get node information from %s" % node)
9177

    
9178
    if instance.name not in node_insts.payload:
9179
      if instance.admin_up:
9180
        state = constants.INSTST_ERRORDOWN
9181
      else:
9182
        state = constants.INSTST_ADMINDOWN
9183
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9184
                               (instance.name, state))
9185

    
9186
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9187

    
9188
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9189

    
9190

    
9191
def _GetInstanceConsole(cluster, instance):
9192
  """Returns console information for an instance.
9193

9194
  @type cluster: L{objects.Cluster}
9195
  @type instance: L{objects.Instance}
9196
  @rtype: dict
9197

9198
  """
9199
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9200
  # beparams and hvparams are passed separately, to avoid editing the
9201
  # instance and then saving the defaults in the instance itself.
9202
  hvparams = cluster.FillHV(instance)
9203
  beparams = cluster.FillBE(instance)
9204
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9205

    
9206
  assert console.instance == instance.name
9207
  assert console.Validate()
9208

    
9209
  return console.ToDict()
9210

    
9211

    
9212
class LUInstanceReplaceDisks(LogicalUnit):
9213
  """Replace the disks of an instance.
9214

9215
  """
9216
  HPATH = "mirrors-replace"
9217
  HTYPE = constants.HTYPE_INSTANCE
9218
  REQ_BGL = False
9219

    
9220
  def CheckArguments(self):
9221
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9222
                                  self.op.iallocator)
9223

    
9224
  def ExpandNames(self):
9225
    self._ExpandAndLockInstance()
9226

    
9227
    assert locking.LEVEL_NODE not in self.needed_locks
9228
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9229

    
9230
    assert self.op.iallocator is None or self.op.remote_node is None, \
9231
      "Conflicting options"
9232

    
9233
    if self.op.remote_node is not None:
9234
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9235

    
9236
      # Warning: do not remove the locking of the new secondary here
9237
      # unless DRBD8.AddChildren is changed to work in parallel;
9238
      # currently it doesn't since parallel invocations of
9239
      # FindUnusedMinor will conflict
9240
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9241
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9242
    else:
9243
      self.needed_locks[locking.LEVEL_NODE] = []
9244
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9245

    
9246
      if self.op.iallocator is not None:
9247
        # iallocator will select a new node in the same group
9248
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9249

    
9250
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9251
                                   self.op.iallocator, self.op.remote_node,
9252
                                   self.op.disks, False, self.op.early_release)
9253

    
9254
    self.tasklets = [self.replacer]
9255

    
9256
  def DeclareLocks(self, level):
9257
    if level == locking.LEVEL_NODEGROUP:
9258
      assert self.op.remote_node is None
9259
      assert self.op.iallocator is not None
9260
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9261

    
9262
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9263
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9264
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9265

    
9266
    elif level == locking.LEVEL_NODE:
9267
      if self.op.iallocator is not None:
9268
        assert self.op.remote_node is None
9269
        assert not self.needed_locks[locking.LEVEL_NODE]
9270

    
9271
        # Lock member nodes of all locked groups
9272
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9273
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9274
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9275
      else:
9276
        self._LockInstancesNodes()
9277

    
9278
  def BuildHooksEnv(self):
9279
    """Build hooks env.
9280

9281
    This runs on the master, the primary and all the secondaries.
9282

9283
    """
9284
    instance = self.replacer.instance
9285
    env = {
9286
      "MODE": self.op.mode,
9287
      "NEW_SECONDARY": self.op.remote_node,
9288
      "OLD_SECONDARY": instance.secondary_nodes[0],
9289
      }
9290
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9291
    return env
9292

    
9293
  def BuildHooksNodes(self):
9294
    """Build hooks nodes.
9295

9296
    """
9297
    instance = self.replacer.instance
9298
    nl = [
9299
      self.cfg.GetMasterNode(),
9300
      instance.primary_node,
9301
      ]
9302
    if self.op.remote_node is not None:
9303
      nl.append(self.op.remote_node)
9304
    return nl, nl
9305

    
9306
  def CheckPrereq(self):
9307
    """Check prerequisites.
9308

9309
    """
9310
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9311
            self.op.iallocator is None)
9312

    
9313
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9314
    if owned_groups:
9315
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9316

    
9317
    return LogicalUnit.CheckPrereq(self)
9318

    
9319

    
9320
class TLReplaceDisks(Tasklet):
9321
  """Replaces disks for an instance.
9322

9323
  Note: Locking is not within the scope of this class.
9324

9325
  """
9326
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9327
               disks, delay_iallocator, early_release):
9328
    """Initializes this class.
9329

9330
    """
9331
    Tasklet.__init__(self, lu)
9332

    
9333
    # Parameters
9334
    self.instance_name = instance_name
9335
    self.mode = mode
9336
    self.iallocator_name = iallocator_name
9337
    self.remote_node = remote_node
9338
    self.disks = disks
9339
    self.delay_iallocator = delay_iallocator
9340
    self.early_release = early_release
9341

    
9342
    # Runtime data
9343
    self.instance = None
9344
    self.new_node = None
9345
    self.target_node = None
9346
    self.other_node = None
9347
    self.remote_node_info = None
9348
    self.node_secondary_ip = None
9349

    
9350
  @staticmethod
9351
  def CheckArguments(mode, remote_node, iallocator):
9352
    """Helper function for users of this class.
9353

9354
    """
9355
    # check for valid parameter combination
9356
    if mode == constants.REPLACE_DISK_CHG:
9357
      if remote_node is None and iallocator is None:
9358
        raise errors.OpPrereqError("When changing the secondary either an"
9359
                                   " iallocator script must be used or the"
9360
                                   " new node given", errors.ECODE_INVAL)
9361

    
9362
      if remote_node is not None and iallocator is not None:
9363
        raise errors.OpPrereqError("Give either the iallocator or the new"
9364
                                   " secondary, not both", errors.ECODE_INVAL)
9365

    
9366
    elif remote_node is not None or iallocator is not None:
9367
      # Not replacing the secondary
9368
      raise errors.OpPrereqError("The iallocator and new node options can"
9369
                                 " only be used when changing the"
9370
                                 " secondary node", errors.ECODE_INVAL)
9371

    
9372
  @staticmethod
9373
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9374
    """Compute a new secondary node using an IAllocator.
9375

9376
    """
9377
    ial = IAllocator(lu.cfg, lu.rpc,
9378
                     mode=constants.IALLOCATOR_MODE_RELOC,
9379
                     name=instance_name,
9380
                     relocate_from=list(relocate_from))
9381

    
9382
    ial.Run(iallocator_name)
9383

    
9384
    if not ial.success:
9385
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9386
                                 " %s" % (iallocator_name, ial.info),
9387
                                 errors.ECODE_NORES)
9388

    
9389
    if len(ial.result) != ial.required_nodes:
9390
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9391
                                 " of nodes (%s), required %s" %
9392
                                 (iallocator_name,
9393
                                  len(ial.result), ial.required_nodes),
9394
                                 errors.ECODE_FAULT)
9395

    
9396
    remote_node_name = ial.result[0]
9397

    
9398
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9399
               instance_name, remote_node_name)
9400

    
9401
    return remote_node_name
9402

    
9403
  def _FindFaultyDisks(self, node_name):
9404
    """Wrapper for L{_FindFaultyInstanceDisks}.
9405

9406
    """
9407
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9408
                                    node_name, True)
9409

    
9410
  def _CheckDisksActivated(self, instance):
9411
    """Checks if the instance disks are activated.
9412

9413
    @param instance: The instance to check disks
9414
    @return: True if they are activated, False otherwise
9415

9416
    """
9417
    nodes = instance.all_nodes
9418

    
9419
    for idx, dev in enumerate(instance.disks):
9420
      for node in nodes:
9421
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9422
        self.cfg.SetDiskID(dev, node)
9423

    
9424
        result = self.rpc.call_blockdev_find(node, dev)
9425

    
9426
        if result.offline:
9427
          continue
9428
        elif result.fail_msg or not result.payload:
9429
          return False
9430

    
9431
    return True
9432

    
9433
  def CheckPrereq(self):
9434
    """Check prerequisites.
9435

9436
    This checks that the instance is in the cluster.
9437

9438
    """
9439
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9440
    assert instance is not None, \
9441
      "Cannot retrieve locked instance %s" % self.instance_name
9442

    
9443
    if instance.disk_template != constants.DT_DRBD8:
9444
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9445
                                 " instances", errors.ECODE_INVAL)
9446

    
9447
    if len(instance.secondary_nodes) != 1:
9448
      raise errors.OpPrereqError("The instance has a strange layout,"
9449
                                 " expected one secondary but found %d" %
9450
                                 len(instance.secondary_nodes),
9451
                                 errors.ECODE_FAULT)
9452

    
9453
    if not self.delay_iallocator:
9454
      self._CheckPrereq2()
9455

    
9456
  def _CheckPrereq2(self):
9457
    """Check prerequisites, second part.
9458

9459
    This function should always be part of CheckPrereq. It was separated and is
9460
    now called from Exec because during node evacuation iallocator was only
9461
    called with an unmodified cluster model, not taking planned changes into
9462
    account.
9463

9464
    """
9465
    instance = self.instance
9466
    secondary_node = instance.secondary_nodes[0]
9467

    
9468
    if self.iallocator_name is None:
9469
      remote_node = self.remote_node
9470
    else:
9471
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9472
                                       instance.name, instance.secondary_nodes)
9473

    
9474
    if remote_node is None:
9475
      self.remote_node_info = None
9476
    else:
9477
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9478
             "Remote node '%s' is not locked" % remote_node
9479

    
9480
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9481
      assert self.remote_node_info is not None, \
9482
        "Cannot retrieve locked node %s" % remote_node
9483

    
9484
    if remote_node == self.instance.primary_node:
9485
      raise errors.OpPrereqError("The specified node is the primary node of"
9486
                                 " the instance", errors.ECODE_INVAL)
9487

    
9488
    if remote_node == secondary_node:
9489
      raise errors.OpPrereqError("The specified node is already the"
9490
                                 " secondary node of the instance",
9491
                                 errors.ECODE_INVAL)
9492

    
9493
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9494
                                    constants.REPLACE_DISK_CHG):
9495
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9496
                                 errors.ECODE_INVAL)
9497

    
9498
    if self.mode == constants.REPLACE_DISK_AUTO:
9499
      if not self._CheckDisksActivated(instance):
9500
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9501
                                   " first" % self.instance_name,
9502
                                   errors.ECODE_STATE)
9503
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9504
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9505

    
9506
      if faulty_primary and faulty_secondary:
9507
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9508
                                   " one node and can not be repaired"
9509
                                   " automatically" % self.instance_name,
9510
                                   errors.ECODE_STATE)
9511

    
9512
      if faulty_primary:
9513
        self.disks = faulty_primary
9514
        self.target_node = instance.primary_node
9515
        self.other_node = secondary_node
9516
        check_nodes = [self.target_node, self.other_node]
9517
      elif faulty_secondary:
9518
        self.disks = faulty_secondary
9519
        self.target_node = secondary_node
9520
        self.other_node = instance.primary_node
9521
        check_nodes = [self.target_node, self.other_node]
9522
      else:
9523
        self.disks = []
9524
        check_nodes = []
9525

    
9526
    else:
9527
      # Non-automatic modes
9528
      if self.mode == constants.REPLACE_DISK_PRI:
9529
        self.target_node = instance.primary_node
9530
        self.other_node = secondary_node
9531
        check_nodes = [self.target_node, self.other_node]
9532

    
9533
      elif self.mode == constants.REPLACE_DISK_SEC:
9534
        self.target_node = secondary_node
9535
        self.other_node = instance.primary_node
9536
        check_nodes = [self.target_node, self.other_node]
9537

    
9538
      elif self.mode == constants.REPLACE_DISK_CHG:
9539
        self.new_node = remote_node
9540
        self.other_node = instance.primary_node
9541
        self.target_node = secondary_node
9542
        check_nodes = [self.new_node, self.other_node]
9543

    
9544
        _CheckNodeNotDrained(self.lu, remote_node)
9545
        _CheckNodeVmCapable(self.lu, remote_node)
9546

    
9547
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9548
        assert old_node_info is not None
9549
        if old_node_info.offline and not self.early_release:
9550
          # doesn't make sense to delay the release
9551
          self.early_release = True
9552
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9553
                          " early-release mode", secondary_node)
9554

    
9555
      else:
9556
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9557
                                     self.mode)
9558

    
9559
      # If not specified all disks should be replaced
9560
      if not self.disks:
9561
        self.disks = range(len(self.instance.disks))
9562

    
9563
    for node in check_nodes:
9564
      _CheckNodeOnline(self.lu, node)
9565

    
9566
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9567
                                                          self.other_node,
9568
                                                          self.target_node]
9569
                              if node_name is not None)
9570

    
9571
    # Release unneeded node locks
9572
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9573

    
9574
    # Release any owned node group
9575
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9576
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9577

    
9578
    # Check whether disks are valid
9579
    for disk_idx in self.disks:
9580
      instance.FindDisk(disk_idx)
9581

    
9582
    # Get secondary node IP addresses
9583
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9584
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9585

    
9586
  def Exec(self, feedback_fn):
9587
    """Execute disk replacement.
9588

9589
    This dispatches the disk replacement to the appropriate handler.
9590

9591
    """
9592
    if self.delay_iallocator:
9593
      self._CheckPrereq2()
9594

    
9595
    if __debug__:
9596
      # Verify owned locks before starting operation
9597
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9598
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9599
          ("Incorrect node locks, owning %s, expected %s" %
9600
           (owned_nodes, self.node_secondary_ip.keys()))
9601

    
9602
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9603
      assert list(owned_instances) == [self.instance_name], \
9604
          "Instance '%s' not locked" % self.instance_name
9605

    
9606
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9607
          "Should not own any node group lock at this point"
9608

    
9609
    if not self.disks:
9610
      feedback_fn("No disks need replacement")
9611
      return
9612

    
9613
    feedback_fn("Replacing disk(s) %s for %s" %
9614
                (utils.CommaJoin(self.disks), self.instance.name))
9615

    
9616
    activate_disks = (not self.instance.admin_up)
9617

    
9618
    # Activate the instance disks if we're replacing them on a down instance
9619
    if activate_disks:
9620
      _StartInstanceDisks(self.lu, self.instance, True)
9621

    
9622
    try:
9623
      # Should we replace the secondary node?
9624
      if self.new_node is not None:
9625
        fn = self._ExecDrbd8Secondary
9626
      else:
9627
        fn = self._ExecDrbd8DiskOnly
9628

    
9629
      result = fn(feedback_fn)
9630
    finally:
9631
      # Deactivate the instance disks if we're replacing them on a
9632
      # down instance
9633
      if activate_disks:
9634
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9635

    
9636
    if __debug__:
9637
      # Verify owned locks
9638
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9639
      nodes = frozenset(self.node_secondary_ip)
9640
      assert ((self.early_release and not owned_nodes) or
9641
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9642
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9643
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9644

    
9645
    return result
9646

    
9647
  def _CheckVolumeGroup(self, nodes):
9648
    self.lu.LogInfo("Checking volume groups")
9649

    
9650
    vgname = self.cfg.GetVGName()
9651

    
9652
    # Make sure volume group exists on all involved nodes
9653
    results = self.rpc.call_vg_list(nodes)
9654
    if not results:
9655
      raise errors.OpExecError("Can't list volume groups on the nodes")
9656

    
9657
    for node in nodes:
9658
      res = results[node]
9659
      res.Raise("Error checking node %s" % node)
9660
      if vgname not in res.payload:
9661
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9662
                                 (vgname, node))
9663

    
9664
  def _CheckDisksExistence(self, nodes):
9665
    # Check disk existence
9666
    for idx, dev in enumerate(self.instance.disks):
9667
      if idx not in self.disks:
9668
        continue
9669

    
9670
      for node in nodes:
9671
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9672
        self.cfg.SetDiskID(dev, node)
9673

    
9674
        result = self.rpc.call_blockdev_find(node, dev)
9675

    
9676
        msg = result.fail_msg
9677
        if msg or not result.payload:
9678
          if not msg:
9679
            msg = "disk not found"
9680
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9681
                                   (idx, node, msg))
9682

    
9683
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9684
    for idx, dev in enumerate(self.instance.disks):
9685
      if idx not in self.disks:
9686
        continue
9687

    
9688
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9689
                      (idx, node_name))
9690

    
9691
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9692
                                   ldisk=ldisk):
9693
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9694
                                 " replace disks for instance %s" %
9695
                                 (node_name, self.instance.name))
9696

    
9697
  def _CreateNewStorage(self, node_name):
9698
    """Create new storage on the primary or secondary node.
9699

9700
    This is only used for same-node replaces, not for changing the
9701
    secondary node, hence we don't want to modify the existing disk.
9702

9703
    """
9704
    iv_names = {}
9705

    
9706
    for idx, dev in enumerate(self.instance.disks):
9707
      if idx not in self.disks:
9708
        continue
9709

    
9710
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9711

    
9712
      self.cfg.SetDiskID(dev, node_name)
9713

    
9714
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9715
      names = _GenerateUniqueNames(self.lu, lv_names)
9716

    
9717
      vg_data = dev.children[0].logical_id[0]
9718
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9719
                             logical_id=(vg_data, names[0]))
9720
      vg_meta = dev.children[1].logical_id[0]
9721
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9722
                             logical_id=(vg_meta, names[1]))
9723

    
9724
      new_lvs = [lv_data, lv_meta]
9725
      old_lvs = [child.Copy() for child in dev.children]
9726
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9727

    
9728
      # we pass force_create=True to force the LVM creation
9729
      for new_lv in new_lvs:
9730
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9731
                        _GetInstanceInfoText(self.instance), False)
9732

    
9733
    return iv_names
9734

    
9735
  def _CheckDevices(self, node_name, iv_names):
9736
    for name, (dev, _, _) in iv_names.iteritems():
9737
      self.cfg.SetDiskID(dev, node_name)
9738

    
9739
      result = self.rpc.call_blockdev_find(node_name, dev)
9740

    
9741
      msg = result.fail_msg
9742
      if msg or not result.payload:
9743
        if not msg:
9744
          msg = "disk not found"
9745
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9746
                                 (name, msg))
9747

    
9748
      if result.payload.is_degraded:
9749
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9750

    
9751
  def _RemoveOldStorage(self, node_name, iv_names):
9752
    for name, (_, old_lvs, _) in iv_names.iteritems():
9753
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9754

    
9755
      for lv in old_lvs:
9756
        self.cfg.SetDiskID(lv, node_name)
9757

    
9758
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9759
        if msg:
9760
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9761
                             hint="remove unused LVs manually")
9762

    
9763
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9764
    """Replace a disk on the primary or secondary for DRBD 8.
9765

9766
    The algorithm for replace is quite complicated:
9767

9768
      1. for each disk to be replaced:
9769

9770
        1. create new LVs on the target node with unique names
9771
        1. detach old LVs from the drbd device
9772
        1. rename old LVs to name_replaced.<time_t>
9773
        1. rename new LVs to old LVs
9774
        1. attach the new LVs (with the old names now) to the drbd device
9775

9776
      1. wait for sync across all devices
9777

9778
      1. for each modified disk:
9779

9780
        1. remove old LVs (which have the name name_replaces.<time_t>)
9781

9782
    Failures are not very well handled.
9783

9784
    """
9785
    steps_total = 6
9786

    
9787
    # Step: check device activation
9788
    self.lu.LogStep(1, steps_total, "Check device existence")
9789
    self._CheckDisksExistence([self.other_node, self.target_node])
9790
    self._CheckVolumeGroup([self.target_node, self.other_node])
9791

    
9792
    # Step: check other node consistency
9793
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9794
    self._CheckDisksConsistency(self.other_node,
9795
                                self.other_node == self.instance.primary_node,
9796
                                False)
9797

    
9798
    # Step: create new storage
9799
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9800
    iv_names = self._CreateNewStorage(self.target_node)
9801

    
9802
    # Step: for each lv, detach+rename*2+attach
9803
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9804
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9805
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9806

    
9807
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9808
                                                     old_lvs)
9809
      result.Raise("Can't detach drbd from local storage on node"
9810
                   " %s for device %s" % (self.target_node, dev.iv_name))
9811
      #dev.children = []
9812
      #cfg.Update(instance)
9813

    
9814
      # ok, we created the new LVs, so now we know we have the needed
9815
      # storage; as such, we proceed on the target node to rename
9816
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9817
      # using the assumption that logical_id == physical_id (which in
9818
      # turn is the unique_id on that node)
9819

    
9820
      # FIXME(iustin): use a better name for the replaced LVs
9821
      temp_suffix = int(time.time())
9822
      ren_fn = lambda d, suff: (d.physical_id[0],
9823
                                d.physical_id[1] + "_replaced-%s" % suff)
9824

    
9825
      # Build the rename list based on what LVs exist on the node
9826
      rename_old_to_new = []
9827
      for to_ren in old_lvs:
9828
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9829
        if not result.fail_msg and result.payload:
9830
          # device exists
9831
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9832

    
9833
      self.lu.LogInfo("Renaming the old LVs on the target node")
9834
      result = self.rpc.call_blockdev_rename(self.target_node,
9835
                                             rename_old_to_new)
9836
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9837

    
9838
      # Now we rename the new LVs to the old LVs
9839
      self.lu.LogInfo("Renaming the new LVs on the target node")
9840
      rename_new_to_old = [(new, old.physical_id)
9841
                           for old, new in zip(old_lvs, new_lvs)]
9842
      result = self.rpc.call_blockdev_rename(self.target_node,
9843
                                             rename_new_to_old)
9844
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9845

    
9846
      # Intermediate steps of in memory modifications
9847
      for old, new in zip(old_lvs, new_lvs):
9848
        new.logical_id = old.logical_id
9849
        self.cfg.SetDiskID(new, self.target_node)
9850

    
9851
      # We need to modify old_lvs so that removal later removes the
9852
      # right LVs, not the newly added ones; note that old_lvs is a
9853
      # copy here
9854
      for disk in old_lvs:
9855
        disk.logical_id = ren_fn(disk, temp_suffix)
9856
        self.cfg.SetDiskID(disk, self.target_node)
9857

    
9858
      # Now that the new lvs have the old name, we can add them to the device
9859
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9860
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9861
                                                  new_lvs)
9862
      msg = result.fail_msg
9863
      if msg:
9864
        for new_lv in new_lvs:
9865
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9866
                                               new_lv).fail_msg
9867
          if msg2:
9868
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9869
                               hint=("cleanup manually the unused logical"
9870
                                     "volumes"))
9871
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9872

    
9873
    cstep = 5
9874
    if self.early_release:
9875
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9876
      cstep += 1
9877
      self._RemoveOldStorage(self.target_node, iv_names)
9878
      # WARNING: we release both node locks here, do not do other RPCs
9879
      # than WaitForSync to the primary node
9880
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9881
                    names=[self.target_node, self.other_node])
9882

    
9883
    # Wait for sync
9884
    # This can fail as the old devices are degraded and _WaitForSync
9885
    # does a combined result over all disks, so we don't check its return value
9886
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9887
    cstep += 1
9888
    _WaitForSync(self.lu, self.instance)
9889

    
9890
    # Check all devices manually
9891
    self._CheckDevices(self.instance.primary_node, iv_names)
9892

    
9893
    # Step: remove old storage
9894
    if not self.early_release:
9895
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9896
      cstep += 1
9897
      self._RemoveOldStorage(self.target_node, iv_names)
9898

    
9899
  def _ExecDrbd8Secondary(self, feedback_fn):
9900
    """Replace the secondary node for DRBD 8.
9901

9902
    The algorithm for replace is quite complicated:
9903
      - for all disks of the instance:
9904
        - create new LVs on the new node with same names
9905
        - shutdown the drbd device on the old secondary
9906
        - disconnect the drbd network on the primary
9907
        - create the drbd device on the new secondary
9908
        - network attach the drbd on the primary, using an artifice:
9909
          the drbd code for Attach() will connect to the network if it
9910
          finds a device which is connected to the good local disks but
9911
          not network enabled
9912
      - wait for sync across all devices
9913
      - remove all disks from the old secondary
9914

9915
    Failures are not very well handled.
9916

9917
    """
9918
    steps_total = 6
9919

    
9920
    pnode = self.instance.primary_node
9921

    
9922
    # Step: check device activation
9923
    self.lu.LogStep(1, steps_total, "Check device existence")
9924
    self._CheckDisksExistence([self.instance.primary_node])
9925
    self._CheckVolumeGroup([self.instance.primary_node])
9926

    
9927
    # Step: check other node consistency
9928
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9929
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9930

    
9931
    # Step: create new storage
9932
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9933
    for idx, dev in enumerate(self.instance.disks):
9934
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9935
                      (self.new_node, idx))
9936
      # we pass force_create=True to force LVM creation
9937
      for new_lv in dev.children:
9938
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9939
                        _GetInstanceInfoText(self.instance), False)
9940

    
9941
    # Step 4: dbrd minors and drbd setups changes
9942
    # after this, we must manually remove the drbd minors on both the
9943
    # error and the success paths
9944
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9945
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9946
                                         for dev in self.instance.disks],
9947
                                        self.instance.name)
9948
    logging.debug("Allocated minors %r", minors)
9949

    
9950
    iv_names = {}
9951
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9952
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9953
                      (self.new_node, idx))
9954
      # create new devices on new_node; note that we create two IDs:
9955
      # one without port, so the drbd will be activated without
9956
      # networking information on the new node at this stage, and one
9957
      # with network, for the latter activation in step 4
9958
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9959
      if self.instance.primary_node == o_node1:
9960
        p_minor = o_minor1
9961
      else:
9962
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9963
        p_minor = o_minor2
9964

    
9965
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9966
                      p_minor, new_minor, o_secret)
9967
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9968
                    p_minor, new_minor, o_secret)
9969

    
9970
      iv_names[idx] = (dev, dev.children, new_net_id)
9971
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9972
                    new_net_id)
9973
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9974
                              logical_id=new_alone_id,
9975
                              children=dev.children,
9976
                              size=dev.size)
9977
      try:
9978
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9979
                              _GetInstanceInfoText(self.instance), False)
9980
      except errors.GenericError:
9981
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9982
        raise
9983

    
9984
    # We have new devices, shutdown the drbd on the old secondary
9985
    for idx, dev in enumerate(self.instance.disks):
9986
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9987
      self.cfg.SetDiskID(dev, self.target_node)
9988
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9989
      if msg:
9990
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9991
                           "node: %s" % (idx, msg),
9992
                           hint=("Please cleanup this device manually as"
9993
                                 " soon as possible"))
9994

    
9995
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9996
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9997
                                               self.instance.disks)[pnode]
9998

    
9999
    msg = result.fail_msg
10000
    if msg:
10001
      # detaches didn't succeed (unlikely)
10002
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10003
      raise errors.OpExecError("Can't detach the disks from the network on"
10004
                               " old node: %s" % (msg,))
10005

    
10006
    # if we managed to detach at least one, we update all the disks of
10007
    # the instance to point to the new secondary
10008
    self.lu.LogInfo("Updating instance configuration")
10009
    for dev, _, new_logical_id in iv_names.itervalues():
10010
      dev.logical_id = new_logical_id
10011
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10012

    
10013
    self.cfg.Update(self.instance, feedback_fn)
10014

    
10015
    # and now perform the drbd attach
10016
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10017
                    " (standalone => connected)")
10018
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10019
                                            self.new_node],
10020
                                           self.node_secondary_ip,
10021
                                           self.instance.disks,
10022
                                           self.instance.name,
10023
                                           False)
10024
    for to_node, to_result in result.items():
10025
      msg = to_result.fail_msg
10026
      if msg:
10027
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10028
                           to_node, msg,
10029
                           hint=("please do a gnt-instance info to see the"
10030
                                 " status of disks"))
10031
    cstep = 5
10032
    if self.early_release:
10033
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10034
      cstep += 1
10035
      self._RemoveOldStorage(self.target_node, iv_names)
10036
      # WARNING: we release all node locks here, do not do other RPCs
10037
      # than WaitForSync to the primary node
10038
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10039
                    names=[self.instance.primary_node,
10040
                           self.target_node,
10041
                           self.new_node])
10042

    
10043
    # Wait for sync
10044
    # This can fail as the old devices are degraded and _WaitForSync
10045
    # does a combined result over all disks, so we don't check its return value
10046
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10047
    cstep += 1
10048
    _WaitForSync(self.lu, self.instance)
10049

    
10050
    # Check all devices manually
10051
    self._CheckDevices(self.instance.primary_node, iv_names)
10052

    
10053
    # Step: remove old storage
10054
    if not self.early_release:
10055
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10056
      self._RemoveOldStorage(self.target_node, iv_names)
10057

    
10058

    
10059
class LURepairNodeStorage(NoHooksLU):
10060
  """Repairs the volume group on a node.
10061

10062
  """
10063
  REQ_BGL = False
10064

    
10065
  def CheckArguments(self):
10066
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10067

    
10068
    storage_type = self.op.storage_type
10069

    
10070
    if (constants.SO_FIX_CONSISTENCY not in
10071
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10072
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10073
                                 " repaired" % storage_type,
10074
                                 errors.ECODE_INVAL)
10075

    
10076
  def ExpandNames(self):
10077
    self.needed_locks = {
10078
      locking.LEVEL_NODE: [self.op.node_name],
10079
      }
10080

    
10081
  def _CheckFaultyDisks(self, instance, node_name):
10082
    """Ensure faulty disks abort the opcode or at least warn."""
10083
    try:
10084
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10085
                                  node_name, True):
10086
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10087
                                   " node '%s'" % (instance.name, node_name),
10088
                                   errors.ECODE_STATE)
10089
    except errors.OpPrereqError, err:
10090
      if self.op.ignore_consistency:
10091
        self.proc.LogWarning(str(err.args[0]))
10092
      else:
10093
        raise
10094

    
10095
  def CheckPrereq(self):
10096
    """Check prerequisites.
10097

10098
    """
10099
    # Check whether any instance on this node has faulty disks
10100
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10101
      if not inst.admin_up:
10102
        continue
10103
      check_nodes = set(inst.all_nodes)
10104
      check_nodes.discard(self.op.node_name)
10105
      for inst_node_name in check_nodes:
10106
        self._CheckFaultyDisks(inst, inst_node_name)
10107

    
10108
  def Exec(self, feedback_fn):
10109
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10110
                (self.op.name, self.op.node_name))
10111

    
10112
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10113
    result = self.rpc.call_storage_execute(self.op.node_name,
10114
                                           self.op.storage_type, st_args,
10115
                                           self.op.name,
10116
                                           constants.SO_FIX_CONSISTENCY)
10117
    result.Raise("Failed to repair storage unit '%s' on %s" %
10118
                 (self.op.name, self.op.node_name))
10119

    
10120

    
10121
class LUNodeEvacuate(NoHooksLU):
10122
  """Evacuates instances off a list of nodes.
10123

10124
  """
10125
  REQ_BGL = False
10126

    
10127
  def CheckArguments(self):
10128
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10129

    
10130
  def ExpandNames(self):
10131
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10132

    
10133
    if self.op.remote_node is not None:
10134
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10135
      assert self.op.remote_node
10136

    
10137
      if self.op.remote_node == self.op.node_name:
10138
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10139
                                   " secondary node", errors.ECODE_INVAL)
10140

    
10141
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10142
        raise errors.OpPrereqError("Without the use of an iallocator only"
10143
                                   " secondary instances can be evacuated",
10144
                                   errors.ECODE_INVAL)
10145

    
10146
    # Declare locks
10147
    self.share_locks = _ShareAll()
10148
    self.needed_locks = {
10149
      locking.LEVEL_INSTANCE: [],
10150
      locking.LEVEL_NODEGROUP: [],
10151
      locking.LEVEL_NODE: [],
10152
      }
10153

    
10154
    if self.op.remote_node is None:
10155
      # Iallocator will choose any node(s) in the same group
10156
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10157
    else:
10158
      group_nodes = frozenset([self.op.remote_node])
10159

    
10160
    # Determine nodes to be locked
10161
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10162

    
10163
  def _DetermineInstances(self):
10164
    """Builds list of instances to operate on.
10165

10166
    """
10167
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10168

    
10169
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10170
      # Primary instances only
10171
      inst_fn = _GetNodePrimaryInstances
10172
      assert self.op.remote_node is None, \
10173
        "Evacuating primary instances requires iallocator"
10174
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10175
      # Secondary instances only
10176
      inst_fn = _GetNodeSecondaryInstances
10177
    else:
10178
      # All instances
10179
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10180
      inst_fn = _GetNodeInstances
10181

    
10182
    return inst_fn(self.cfg, self.op.node_name)
10183

    
10184
  def DeclareLocks(self, level):
10185
    if level == locking.LEVEL_INSTANCE:
10186
      # Lock instances optimistically, needs verification once node and group
10187
      # locks have been acquired
10188
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10189
        set(i.name for i in self._DetermineInstances())
10190

    
10191
    elif level == locking.LEVEL_NODEGROUP:
10192
      # Lock node groups optimistically, needs verification once nodes have
10193
      # been acquired
10194
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10195
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10196

    
10197
    elif level == locking.LEVEL_NODE:
10198
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10199

    
10200
  def CheckPrereq(self):
10201
    # Verify locks
10202
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10203
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10204
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10205

    
10206
    assert owned_nodes == self.lock_nodes
10207

    
10208
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10209
    if owned_groups != wanted_groups:
10210
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10211
                               " current groups are '%s', used to be '%s'" %
10212
                               (utils.CommaJoin(wanted_groups),
10213
                                utils.CommaJoin(owned_groups)))
10214

    
10215
    # Determine affected instances
10216
    self.instances = self._DetermineInstances()
10217
    self.instance_names = [i.name for i in self.instances]
10218

    
10219
    if set(self.instance_names) != owned_instances:
10220
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10221
                               " were acquired, current instances are '%s',"
10222
                               " used to be '%s'" %
10223
                               (self.op.node_name,
10224
                                utils.CommaJoin(self.instance_names),
10225
                                utils.CommaJoin(owned_instances)))
10226

    
10227
    if self.instance_names:
10228
      self.LogInfo("Evacuating instances from node '%s': %s",
10229
                   self.op.node_name,
10230
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10231
    else:
10232
      self.LogInfo("No instances to evacuate from node '%s'",
10233
                   self.op.node_name)
10234

    
10235
    if self.op.remote_node is not None:
10236
      for i in self.instances:
10237
        if i.primary_node == self.op.remote_node:
10238
          raise errors.OpPrereqError("Node %s is the primary node of"
10239
                                     " instance %s, cannot use it as"
10240
                                     " secondary" %
10241
                                     (self.op.remote_node, i.name),
10242
                                     errors.ECODE_INVAL)
10243

    
10244
  def Exec(self, feedback_fn):
10245
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10246

    
10247
    if not self.instance_names:
10248
      # No instances to evacuate
10249
      jobs = []
10250

    
10251
    elif self.op.iallocator is not None:
10252
      # TODO: Implement relocation to other group
10253
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254
                       evac_mode=self.op.mode,
10255
                       instances=list(self.instance_names))
10256

    
10257
      ial.Run(self.op.iallocator)
10258

    
10259
      if not ial.success:
10260
        raise errors.OpPrereqError("Can't compute node evacuation using"
10261
                                   " iallocator '%s': %s" %
10262
                                   (self.op.iallocator, ial.info),
10263
                                   errors.ECODE_NORES)
10264

    
10265
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10266

    
10267
    elif self.op.remote_node is not None:
10268
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10269
      jobs = [
10270
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271
                                        remote_node=self.op.remote_node,
10272
                                        disks=[],
10273
                                        mode=constants.REPLACE_DISK_CHG,
10274
                                        early_release=self.op.early_release)]
10275
        for instance_name in self.instance_names
10276
        ]
10277

    
10278
    else:
10279
      raise errors.ProgrammerError("No iallocator or remote node")
10280

    
10281
    return ResultWithJobs(jobs)
10282

    
10283

    
10284
def _SetOpEarlyRelease(early_release, op):
10285
  """Sets C{early_release} flag on opcodes if available.
10286

10287
  """
10288
  try:
10289
    op.early_release = early_release
10290
  except AttributeError:
10291
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10292

    
10293
  return op
10294

    
10295

    
10296
def _NodeEvacDest(use_nodes, group, nodes):
10297
  """Returns group or nodes depending on caller's choice.
10298

10299
  """
10300
  if use_nodes:
10301
    return utils.CommaJoin(nodes)
10302
  else:
10303
    return group
10304

    
10305

    
10306
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10308

10309
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10311

10312
  @type lu: L{LogicalUnit}
10313
  @param lu: Logical unit instance
10314
  @type alloc_result: tuple/list
10315
  @param alloc_result: Result from iallocator
10316
  @type early_release: bool
10317
  @param early_release: Whether to release locks early if possible
10318
  @type use_nodes: bool
10319
  @param use_nodes: Whether to display node names instead of groups
10320

10321
  """
10322
  (moved, failed, jobs) = alloc_result
10323

    
10324
  if failed:
10325
    lu.LogWarning("Unable to evacuate instances %s",
10326
                  utils.CommaJoin("%s (%s)" % (name, reason)
10327
                                  for (name, reason) in failed))
10328

    
10329
  if moved:
10330
    lu.LogInfo("Instances to be moved: %s",
10331
               utils.CommaJoin("%s (to %s)" %
10332
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10333
                               for (name, group, nodes) in moved))
10334

    
10335
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10336
              map(opcodes.OpCode.LoadOpCode, ops))
10337
          for ops in jobs]
10338

    
10339

    
10340
class LUInstanceGrowDisk(LogicalUnit):
10341
  """Grow a disk of an instance.
10342

10343
  """
10344
  HPATH = "disk-grow"
10345
  HTYPE = constants.HTYPE_INSTANCE
10346
  REQ_BGL = False
10347

    
10348
  def ExpandNames(self):
10349
    self._ExpandAndLockInstance()
10350
    self.needed_locks[locking.LEVEL_NODE] = []
10351
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10352

    
10353
  def DeclareLocks(self, level):
10354
    if level == locking.LEVEL_NODE:
10355
      self._LockInstancesNodes()
10356

    
10357
  def BuildHooksEnv(self):
10358
    """Build hooks env.
10359

10360
    This runs on the master, the primary and all the secondaries.
10361

10362
    """
10363
    env = {
10364
      "DISK": self.op.disk,
10365
      "AMOUNT": self.op.amount,
10366
      }
10367
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10368
    return env
10369

    
10370
  def BuildHooksNodes(self):
10371
    """Build hooks nodes.
10372

10373
    """
10374
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10375
    return (nl, nl)
10376

    
10377
  def CheckPrereq(self):
10378
    """Check prerequisites.
10379

10380
    This checks that the instance is in the cluster.
10381

10382
    """
10383
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10384
    assert instance is not None, \
10385
      "Cannot retrieve locked instance %s" % self.op.instance_name
10386
    nodenames = list(instance.all_nodes)
10387
    for node in nodenames:
10388
      _CheckNodeOnline(self, node)
10389

    
10390
    self.instance = instance
10391

    
10392
    if instance.disk_template not in constants.DTS_GROWABLE:
10393
      raise errors.OpPrereqError("Instance's disk layout does not support"
10394
                                 " growing", errors.ECODE_INVAL)
10395

    
10396
    self.disk = instance.FindDisk(self.op.disk)
10397

    
10398
    if instance.disk_template not in (constants.DT_FILE,
10399
                                      constants.DT_SHARED_FILE):
10400
      # TODO: check the free disk space for file, when that feature will be
10401
      # supported
10402
      _CheckNodesFreeDiskPerVG(self, nodenames,
10403
                               self.disk.ComputeGrowth(self.op.amount))
10404

    
10405
  def Exec(self, feedback_fn):
10406
    """Execute disk grow.
10407

10408
    """
10409
    instance = self.instance
10410
    disk = self.disk
10411

    
10412
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10413
    if not disks_ok:
10414
      raise errors.OpExecError("Cannot activate block device to grow")
10415

    
10416
    # First run all grow ops in dry-run mode
10417
    for node in instance.all_nodes:
10418
      self.cfg.SetDiskID(disk, node)
10419
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10420
      result.Raise("Grow request failed to node %s" % node)
10421

    
10422
    # We know that (as far as we can test) operations across different
10423
    # nodes will succeed, time to run it for real
10424
    for node in instance.all_nodes:
10425
      self.cfg.SetDiskID(disk, node)
10426
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10427
      result.Raise("Grow request failed to node %s" % node)
10428

    
10429
      # TODO: Rewrite code to work properly
10430
      # DRBD goes into sync mode for a short amount of time after executing the
10431
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10432
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10433
      # time is a work-around.
10434
      time.sleep(5)
10435

    
10436
    disk.RecordGrow(self.op.amount)
10437
    self.cfg.Update(instance, feedback_fn)
10438
    if self.op.wait_for_sync:
10439
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10440
      if disk_abort:
10441
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10442
                             " status; please check the instance")
10443
      if not instance.admin_up:
10444
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10445
    elif not instance.admin_up:
10446
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10447
                           " not supposed to be running because no wait for"
10448
                           " sync mode was requested")
10449

    
10450

    
10451
class LUInstanceQueryData(NoHooksLU):
10452
  """Query runtime instance data.
10453

10454
  """
10455
  REQ_BGL = False
10456

    
10457
  def ExpandNames(self):
10458
    self.needed_locks = {}
10459

    
10460
    # Use locking if requested or when non-static information is wanted
10461
    if not (self.op.static or self.op.use_locking):
10462
      self.LogWarning("Non-static data requested, locks need to be acquired")
10463
      self.op.use_locking = True
10464

    
10465
    if self.op.instances or not self.op.use_locking:
10466
      # Expand instance names right here
10467
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10468
    else:
10469
      # Will use acquired locks
10470
      self.wanted_names = None
10471

    
10472
    if self.op.use_locking:
10473
      self.share_locks = _ShareAll()
10474

    
10475
      if self.wanted_names is None:
10476
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10477
      else:
10478
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10479

    
10480
      self.needed_locks[locking.LEVEL_NODE] = []
10481
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10482

    
10483
  def DeclareLocks(self, level):
10484
    if self.op.use_locking and level == locking.LEVEL_NODE:
10485
      self._LockInstancesNodes()
10486

    
10487
  def CheckPrereq(self):
10488
    """Check prerequisites.
10489

10490
    This only checks the optional instance list against the existing names.
10491

10492
    """
10493
    if self.wanted_names is None:
10494
      assert self.op.use_locking, "Locking was not used"
10495
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10496

    
10497
    self.wanted_instances = \
10498
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10499

    
10500
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10501
    """Returns the status of a block device
10502

10503
    """
10504
    if self.op.static or not node:
10505
      return None
10506

    
10507
    self.cfg.SetDiskID(dev, node)
10508

    
10509
    result = self.rpc.call_blockdev_find(node, dev)
10510
    if result.offline:
10511
      return None
10512

    
10513
    result.Raise("Can't compute disk status for %s" % instance_name)
10514

    
10515
    status = result.payload
10516
    if status is None:
10517
      return None
10518

    
10519
    return (status.dev_path, status.major, status.minor,
10520
            status.sync_percent, status.estimated_time,
10521
            status.is_degraded, status.ldisk_status)
10522

    
10523
  def _ComputeDiskStatus(self, instance, snode, dev):
10524
    """Compute block device status.
10525

10526
    """
10527
    if dev.dev_type in constants.LDS_DRBD:
10528
      # we change the snode then (otherwise we use the one passed in)
10529
      if dev.logical_id[0] == instance.primary_node:
10530
        snode = dev.logical_id[1]
10531
      else:
10532
        snode = dev.logical_id[0]
10533

    
10534
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10535
                                              instance.name, dev)
10536
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10537

    
10538
    if dev.children:
10539
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10540
                                        instance, snode),
10541
                         dev.children)
10542
    else:
10543
      dev_children = []
10544

    
10545
    return {
10546
      "iv_name": dev.iv_name,
10547
      "dev_type": dev.dev_type,
10548
      "logical_id": dev.logical_id,
10549
      "physical_id": dev.physical_id,
10550
      "pstatus": dev_pstatus,
10551
      "sstatus": dev_sstatus,
10552
      "children": dev_children,
10553
      "mode": dev.mode,
10554
      "size": dev.size,
10555
      }
10556

    
10557
  def Exec(self, feedback_fn):
10558
    """Gather and return data"""
10559
    result = {}
10560

    
10561
    cluster = self.cfg.GetClusterInfo()
10562

    
10563
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10564
                                          for i in self.wanted_instances)
10565
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10566
      if self.op.static or pnode.offline:
10567
        remote_state = None
10568
        if pnode.offline:
10569
          self.LogWarning("Primary node %s is marked offline, returning static"
10570
                          " information only for instance %s" %
10571
                          (pnode.name, instance.name))
10572
      else:
10573
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10574
                                                  instance.name,
10575
                                                  instance.hypervisor)
10576
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10577
        remote_info = remote_info.payload
10578
        if remote_info and "state" in remote_info:
10579
          remote_state = "up"
10580
        else:
10581
          remote_state = "down"
10582

    
10583
      if instance.admin_up:
10584
        config_state = "up"
10585
      else:
10586
        config_state = "down"
10587

    
10588
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10589
                  instance.disks)
10590

    
10591
      result[instance.name] = {
10592
        "name": instance.name,
10593
        "config_state": config_state,
10594
        "run_state": remote_state,
10595
        "pnode": instance.primary_node,
10596
        "snodes": instance.secondary_nodes,
10597
        "os": instance.os,
10598
        # this happens to be the same format used for hooks
10599
        "nics": _NICListToTuple(self, instance.nics),
10600
        "disk_template": instance.disk_template,
10601
        "disks": disks,
10602
        "hypervisor": instance.hypervisor,
10603
        "network_port": instance.network_port,
10604
        "hv_instance": instance.hvparams,
10605
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10606
        "be_instance": instance.beparams,
10607
        "be_actual": cluster.FillBE(instance),
10608
        "os_instance": instance.osparams,
10609
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10610
        "serial_no": instance.serial_no,
10611
        "mtime": instance.mtime,
10612
        "ctime": instance.ctime,
10613
        "uuid": instance.uuid,
10614
        }
10615

    
10616
    return result
10617

    
10618

    
10619
class LUInstanceSetParams(LogicalUnit):
10620
  """Modifies an instances's parameters.
10621

10622
  """
10623
  HPATH = "instance-modify"
10624
  HTYPE = constants.HTYPE_INSTANCE
10625
  REQ_BGL = False
10626

    
10627
  def CheckArguments(self):
10628
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10629
            self.op.hvparams or self.op.beparams or self.op.os_name):
10630
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10631

    
10632
    if self.op.hvparams:
10633
      _CheckGlobalHvParams(self.op.hvparams)
10634

    
10635
    # Disk validation
10636
    disk_addremove = 0
10637
    for disk_op, disk_dict in self.op.disks:
10638
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10639
      if disk_op == constants.DDM_REMOVE:
10640
        disk_addremove += 1
10641
        continue
10642
      elif disk_op == constants.DDM_ADD:
10643
        disk_addremove += 1
10644
      else:
10645
        if not isinstance(disk_op, int):
10646
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10647
        if not isinstance(disk_dict, dict):
10648
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10649
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10650

    
10651
      if disk_op == constants.DDM_ADD:
10652
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10653
        if mode not in constants.DISK_ACCESS_SET:
10654
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10655
                                     errors.ECODE_INVAL)
10656
        size = disk_dict.get(constants.IDISK_SIZE, None)
10657
        if size is None:
10658
          raise errors.OpPrereqError("Required disk parameter size missing",
10659
                                     errors.ECODE_INVAL)
10660
        try:
10661
          size = int(size)
10662
        except (TypeError, ValueError), err:
10663
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10664
                                     str(err), errors.ECODE_INVAL)
10665
        disk_dict[constants.IDISK_SIZE] = size
10666
      else:
10667
        # modification of disk
10668
        if constants.IDISK_SIZE in disk_dict:
10669
          raise errors.OpPrereqError("Disk size change not possible, use"
10670
                                     " grow-disk", errors.ECODE_INVAL)
10671

    
10672
    if disk_addremove > 1:
10673
      raise errors.OpPrereqError("Only one disk add or remove operation"
10674
                                 " supported at a time", errors.ECODE_INVAL)
10675

    
10676
    if self.op.disks and self.op.disk_template is not None:
10677
      raise errors.OpPrereqError("Disk template conversion and other disk"
10678
                                 " changes not supported at the same time",
10679
                                 errors.ECODE_INVAL)
10680

    
10681
    if (self.op.disk_template and
10682
        self.op.disk_template in constants.DTS_INT_MIRROR and
10683
        self.op.remote_node is None):
10684
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10685
                                 " one requires specifying a secondary node",
10686
                                 errors.ECODE_INVAL)
10687

    
10688
    # NIC validation
10689
    nic_addremove = 0
10690
    for nic_op, nic_dict in self.op.nics:
10691
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10692
      if nic_op == constants.DDM_REMOVE:
10693
        nic_addremove += 1
10694
        continue
10695
      elif nic_op == constants.DDM_ADD:
10696
        nic_addremove += 1
10697
      else:
10698
        if not isinstance(nic_op, int):
10699
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10700
        if not isinstance(nic_dict, dict):
10701
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10702
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10703

    
10704
      # nic_dict should be a dict
10705
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10706
      if nic_ip is not None:
10707
        if nic_ip.lower() == constants.VALUE_NONE:
10708
          nic_dict[constants.INIC_IP] = None
10709
        else:
10710
          if not netutils.IPAddress.IsValid(nic_ip):
10711
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10712
                                       errors.ECODE_INVAL)
10713

    
10714
      nic_bridge = nic_dict.get("bridge", None)
10715
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10716
      if nic_bridge and nic_link:
10717
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10718
                                   " at the same time", errors.ECODE_INVAL)
10719
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10720
        nic_dict["bridge"] = None
10721
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10722
        nic_dict[constants.INIC_LINK] = None
10723

    
10724
      if nic_op == constants.DDM_ADD:
10725
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10726
        if nic_mac is None:
10727
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10728

    
10729
      if constants.INIC_MAC in nic_dict:
10730
        nic_mac = nic_dict[constants.INIC_MAC]
10731
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10732
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10733

    
10734
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10735
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10736
                                     " modifying an existing nic",
10737
                                     errors.ECODE_INVAL)
10738

    
10739
    if nic_addremove > 1:
10740
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10741
                                 " supported at a time", errors.ECODE_INVAL)
10742

    
10743
  def ExpandNames(self):
10744
    self._ExpandAndLockInstance()
10745
    self.needed_locks[locking.LEVEL_NODE] = []
10746
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10747

    
10748
  def DeclareLocks(self, level):
10749
    if level == locking.LEVEL_NODE:
10750
      self._LockInstancesNodes()
10751
      if self.op.disk_template and self.op.remote_node:
10752
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10753
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10754

    
10755
  def BuildHooksEnv(self):
10756
    """Build hooks env.
10757

10758
    This runs on the master, primary and secondaries.
10759

10760
    """
10761
    args = dict()
10762
    if constants.BE_MEMORY in self.be_new:
10763
      args["memory"] = self.be_new[constants.BE_MEMORY]
10764
    if constants.BE_VCPUS in self.be_new:
10765
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10766
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10767
    # information at all.
10768
    if self.op.nics:
10769
      args["nics"] = []
10770
      nic_override = dict(self.op.nics)
10771
      for idx, nic in enumerate(self.instance.nics):
10772
        if idx in nic_override:
10773
          this_nic_override = nic_override[idx]
10774
        else:
10775
          this_nic_override = {}
10776
        if constants.INIC_IP in this_nic_override:
10777
          ip = this_nic_override[constants.INIC_IP]
10778
        else:
10779
          ip = nic.ip
10780
        if constants.INIC_MAC in this_nic_override:
10781
          mac = this_nic_override[constants.INIC_MAC]
10782
        else:
10783
          mac = nic.mac
10784
        if idx in self.nic_pnew:
10785
          nicparams = self.nic_pnew[idx]
10786
        else:
10787
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10788
        mode = nicparams[constants.NIC_MODE]
10789
        link = nicparams[constants.NIC_LINK]
10790
        args["nics"].append((ip, mac, mode, link))
10791
      if constants.DDM_ADD in nic_override:
10792
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10793
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10794
        nicparams = self.nic_pnew[constants.DDM_ADD]
10795
        mode = nicparams[constants.NIC_MODE]
10796
        link = nicparams[constants.NIC_LINK]
10797
        args["nics"].append((ip, mac, mode, link))
10798
      elif constants.DDM_REMOVE in nic_override:
10799
        del args["nics"][-1]
10800

    
10801
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10802
    if self.op.disk_template:
10803
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10804

    
10805
    return env
10806

    
10807
  def BuildHooksNodes(self):
10808
    """Build hooks nodes.
10809

10810
    """
10811
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10812
    return (nl, nl)
10813

    
10814
  def CheckPrereq(self):
10815
    """Check prerequisites.
10816

10817
    This only checks the instance list against the existing names.
10818

10819
    """
10820
    # checking the new params on the primary/secondary nodes
10821

    
10822
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10823
    cluster = self.cluster = self.cfg.GetClusterInfo()
10824
    assert self.instance is not None, \
10825
      "Cannot retrieve locked instance %s" % self.op.instance_name
10826
    pnode = instance.primary_node
10827
    nodelist = list(instance.all_nodes)
10828

    
10829
    # OS change
10830
    if self.op.os_name and not self.op.force:
10831
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10832
                      self.op.force_variant)
10833
      instance_os = self.op.os_name
10834
    else:
10835
      instance_os = instance.os
10836

    
10837
    if self.op.disk_template:
10838
      if instance.disk_template == self.op.disk_template:
10839
        raise errors.OpPrereqError("Instance already has disk template %s" %
10840
                                   instance.disk_template, errors.ECODE_INVAL)
10841

    
10842
      if (instance.disk_template,
10843
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10844
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10845
                                   " %s to %s" % (instance.disk_template,
10846
                                                  self.op.disk_template),
10847
                                   errors.ECODE_INVAL)
10848
      _CheckInstanceDown(self, instance, "cannot change disk template")
10849
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10850
        if self.op.remote_node == pnode:
10851
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10852
                                     " as the primary node of the instance" %
10853
                                     self.op.remote_node, errors.ECODE_STATE)
10854
        _CheckNodeOnline(self, self.op.remote_node)
10855
        _CheckNodeNotDrained(self, self.op.remote_node)
10856
        # FIXME: here we assume that the old instance type is DT_PLAIN
10857
        assert instance.disk_template == constants.DT_PLAIN
10858
        disks = [{constants.IDISK_SIZE: d.size,
10859
                  constants.IDISK_VG: d.logical_id[0]}
10860
                 for d in instance.disks]
10861
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10862
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10863

    
10864
    # hvparams processing
10865
    if self.op.hvparams:
10866
      hv_type = instance.hypervisor
10867
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10868
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10869
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10870

    
10871
      # local check
10872
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10873
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10874
      self.hv_new = hv_new # the new actual values
10875
      self.hv_inst = i_hvdict # the new dict (without defaults)
10876
    else:
10877
      self.hv_new = self.hv_inst = {}
10878

    
10879
    # beparams processing
10880
    if self.op.beparams:
10881
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10882
                                   use_none=True)
10883
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10884
      be_new = cluster.SimpleFillBE(i_bedict)
10885
      self.be_new = be_new # the new actual values
10886
      self.be_inst = i_bedict # the new dict (without defaults)
10887
    else:
10888
      self.be_new = self.be_inst = {}
10889
    be_old = cluster.FillBE(instance)
10890

    
10891
    # osparams processing
10892
    if self.op.osparams:
10893
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10894
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10895
      self.os_inst = i_osdict # the new dict (without defaults)
10896
    else:
10897
      self.os_inst = {}
10898

    
10899
    self.warn = []
10900

    
10901
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10902
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10903
      mem_check_list = [pnode]
10904
      if be_new[constants.BE_AUTO_BALANCE]:
10905
        # either we changed auto_balance to yes or it was from before
10906
        mem_check_list.extend(instance.secondary_nodes)
10907
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10908
                                                  instance.hypervisor)
10909
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10910
                                         instance.hypervisor)
10911
      pninfo = nodeinfo[pnode]
10912
      msg = pninfo.fail_msg
10913
      if msg:
10914
        # Assume the primary node is unreachable and go ahead
10915
        self.warn.append("Can't get info from primary node %s: %s" %
10916
                         (pnode, msg))
10917
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10918
        self.warn.append("Node data from primary node %s doesn't contain"
10919
                         " free memory information" % pnode)
10920
      elif instance_info.fail_msg:
10921
        self.warn.append("Can't get instance runtime information: %s" %
10922
                        instance_info.fail_msg)
10923
      else:
10924
        if instance_info.payload:
10925
          current_mem = int(instance_info.payload["memory"])
10926
        else:
10927
          # Assume instance not running
10928
          # (there is a slight race condition here, but it's not very probable,
10929
          # and we have no other way to check)
10930
          current_mem = 0
10931
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10932
                    pninfo.payload["memory_free"])
10933
        if miss_mem > 0:
10934
          raise errors.OpPrereqError("This change will prevent the instance"
10935
                                     " from starting, due to %d MB of memory"
10936
                                     " missing on its primary node" % miss_mem,
10937
                                     errors.ECODE_NORES)
10938

    
10939
      if be_new[constants.BE_AUTO_BALANCE]:
10940
        for node, nres in nodeinfo.items():
10941
          if node not in instance.secondary_nodes:
10942
            continue
10943
          nres.Raise("Can't get info from secondary node %s" % node,
10944
                     prereq=True, ecode=errors.ECODE_STATE)
10945
          if not isinstance(nres.payload.get("memory_free", None), int):
10946
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10947
                                       " memory information" % node,
10948
                                       errors.ECODE_STATE)
10949
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10950
            raise errors.OpPrereqError("This change will prevent the instance"
10951
                                       " from failover to its secondary node"
10952
                                       " %s, due to not enough memory" % node,
10953
                                       errors.ECODE_STATE)
10954

    
10955
    # NIC processing
10956
    self.nic_pnew = {}
10957
    self.nic_pinst = {}
10958
    for nic_op, nic_dict in self.op.nics:
10959
      if nic_op == constants.DDM_REMOVE:
10960
        if not instance.nics:
10961
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10962
                                     errors.ECODE_INVAL)
10963
        continue
10964
      if nic_op != constants.DDM_ADD:
10965
        # an existing nic
10966
        if not instance.nics:
10967
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10968
                                     " no NICs" % nic_op,
10969
                                     errors.ECODE_INVAL)
10970
        if nic_op < 0 or nic_op >= len(instance.nics):
10971
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10972
                                     " are 0 to %d" %
10973
                                     (nic_op, len(instance.nics) - 1),
10974
                                     errors.ECODE_INVAL)
10975
        old_nic_params = instance.nics[nic_op].nicparams
10976
        old_nic_ip = instance.nics[nic_op].ip
10977
      else:
10978
        old_nic_params = {}
10979
        old_nic_ip = None
10980

    
10981
      update_params_dict = dict([(key, nic_dict[key])
10982
                                 for key in constants.NICS_PARAMETERS
10983
                                 if key in nic_dict])
10984

    
10985
      if "bridge" in nic_dict:
10986
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10987

    
10988
      new_nic_params = _GetUpdatedParams(old_nic_params,
10989
                                         update_params_dict)
10990
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10991
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10992
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10993
      self.nic_pinst[nic_op] = new_nic_params
10994
      self.nic_pnew[nic_op] = new_filled_nic_params
10995
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10996

    
10997
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10998
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10999
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11000
        if msg:
11001
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11002
          if self.op.force:
11003
            self.warn.append(msg)
11004
          else:
11005
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11006
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11007
        if constants.INIC_IP in nic_dict:
11008
          nic_ip = nic_dict[constants.INIC_IP]
11009
        else:
11010
          nic_ip = old_nic_ip
11011
        if nic_ip is None:
11012
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11013
                                     " on a routed nic", errors.ECODE_INVAL)
11014
      if constants.INIC_MAC in nic_dict:
11015
        nic_mac = nic_dict[constants.INIC_MAC]
11016
        if nic_mac is None:
11017
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11018
                                     errors.ECODE_INVAL)
11019
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11020
          # otherwise generate the mac
11021
          nic_dict[constants.INIC_MAC] = \
11022
            self.cfg.GenerateMAC(self.proc.GetECId())
11023
        else:
11024
          # or validate/reserve the current one
11025
          try:
11026
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11027
          except errors.ReservationError:
11028
            raise errors.OpPrereqError("MAC address %s already in use"
11029
                                       " in cluster" % nic_mac,
11030
                                       errors.ECODE_NOTUNIQUE)
11031

    
11032
    # DISK processing
11033
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11034
      raise errors.OpPrereqError("Disk operations not supported for"
11035
                                 " diskless instances",
11036
                                 errors.ECODE_INVAL)
11037
    for disk_op, _ in self.op.disks:
11038
      if disk_op == constants.DDM_REMOVE:
11039
        if len(instance.disks) == 1:
11040
          raise errors.OpPrereqError("Cannot remove the last disk of"
11041
                                     " an instance", errors.ECODE_INVAL)
11042
        _CheckInstanceDown(self, instance, "cannot remove disks")
11043

    
11044
      if (disk_op == constants.DDM_ADD and
11045
          len(instance.disks) >= constants.MAX_DISKS):
11046
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11047
                                   " add more" % constants.MAX_DISKS,
11048
                                   errors.ECODE_STATE)
11049
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11050
        # an existing disk
11051
        if disk_op < 0 or disk_op >= len(instance.disks):
11052
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11053
                                     " are 0 to %d" %
11054
                                     (disk_op, len(instance.disks)),
11055
                                     errors.ECODE_INVAL)
11056

    
11057
    return
11058

    
11059
  def _ConvertPlainToDrbd(self, feedback_fn):
11060
    """Converts an instance from plain to drbd.
11061

11062
    """
11063
    feedback_fn("Converting template to drbd")
11064
    instance = self.instance
11065
    pnode = instance.primary_node
11066
    snode = self.op.remote_node
11067

    
11068
    # create a fake disk info for _GenerateDiskTemplate
11069
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11070
                  constants.IDISK_VG: d.logical_id[0]}
11071
                 for d in instance.disks]
11072
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11073
                                      instance.name, pnode, [snode],
11074
                                      disk_info, None, None, 0, feedback_fn)
11075
    info = _GetInstanceInfoText(instance)
11076
    feedback_fn("Creating aditional volumes...")
11077
    # first, create the missing data and meta devices
11078
    for disk in new_disks:
11079
      # unfortunately this is... not too nice
11080
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11081
                            info, True)
11082
      for child in disk.children:
11083
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11084
    # at this stage, all new LVs have been created, we can rename the
11085
    # old ones
11086
    feedback_fn("Renaming original volumes...")
11087
    rename_list = [(o, n.children[0].logical_id)
11088
                   for (o, n) in zip(instance.disks, new_disks)]
11089
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11090
    result.Raise("Failed to rename original LVs")
11091

    
11092
    feedback_fn("Initializing DRBD devices...")
11093
    # all child devices are in place, we can now create the DRBD devices
11094
    for disk in new_disks:
11095
      for node in [pnode, snode]:
11096
        f_create = node == pnode
11097
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11098

    
11099
    # at this point, the instance has been modified
11100
    instance.disk_template = constants.DT_DRBD8
11101
    instance.disks = new_disks
11102
    self.cfg.Update(instance, feedback_fn)
11103

    
11104
    # disks are created, waiting for sync
11105
    disk_abort = not _WaitForSync(self, instance,
11106
                                  oneshot=not self.op.wait_for_sync)
11107
    if disk_abort:
11108
      raise errors.OpExecError("There are some degraded disks for"
11109
                               " this instance, please cleanup manually")
11110

    
11111
  def _ConvertDrbdToPlain(self, feedback_fn):
11112
    """Converts an instance from drbd to plain.
11113

11114
    """
11115
    instance = self.instance
11116
    assert len(instance.secondary_nodes) == 1
11117
    pnode = instance.primary_node
11118
    snode = instance.secondary_nodes[0]
11119
    feedback_fn("Converting template to plain")
11120

    
11121
    old_disks = instance.disks
11122
    new_disks = [d.children[0] for d in old_disks]
11123

    
11124
    # copy over size and mode
11125
    for parent, child in zip(old_disks, new_disks):
11126
      child.size = parent.size
11127
      child.mode = parent.mode
11128

    
11129
    # update instance structure
11130
    instance.disks = new_disks
11131
    instance.disk_template = constants.DT_PLAIN
11132
    self.cfg.Update(instance, feedback_fn)
11133

    
11134
    feedback_fn("Removing volumes on the secondary node...")
11135
    for disk in old_disks:
11136
      self.cfg.SetDiskID(disk, snode)
11137
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11138
      if msg:
11139
        self.LogWarning("Could not remove block device %s on node %s,"
11140
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11141

    
11142
    feedback_fn("Removing unneeded volumes on the primary node...")
11143
    for idx, disk in enumerate(old_disks):
11144
      meta = disk.children[1]
11145
      self.cfg.SetDiskID(meta, pnode)
11146
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11147
      if msg:
11148
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11149
                        " continuing anyway: %s", idx, pnode, msg)
11150

    
11151
  def Exec(self, feedback_fn):
11152
    """Modifies an instance.
11153

11154
    All parameters take effect only at the next restart of the instance.
11155

11156
    """
11157
    # Process here the warnings from CheckPrereq, as we don't have a
11158
    # feedback_fn there.
11159
    for warn in self.warn:
11160
      feedback_fn("WARNING: %s" % warn)
11161

    
11162
    result = []
11163
    instance = self.instance
11164
    # disk changes
11165
    for disk_op, disk_dict in self.op.disks:
11166
      if disk_op == constants.DDM_REMOVE:
11167
        # remove the last disk
11168
        device = instance.disks.pop()
11169
        device_idx = len(instance.disks)
11170
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11171
          self.cfg.SetDiskID(disk, node)
11172
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11173
          if msg:
11174
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11175
                            " continuing anyway", device_idx, node, msg)
11176
        result.append(("disk/%d" % device_idx, "remove"))
11177
      elif disk_op == constants.DDM_ADD:
11178
        # add a new disk
11179
        if instance.disk_template in (constants.DT_FILE,
11180
                                        constants.DT_SHARED_FILE):
11181
          file_driver, file_path = instance.disks[0].logical_id
11182
          file_path = os.path.dirname(file_path)
11183
        else:
11184
          file_driver = file_path = None
11185
        disk_idx_base = len(instance.disks)
11186
        new_disk = _GenerateDiskTemplate(self,
11187
                                         instance.disk_template,
11188
                                         instance.name, instance.primary_node,
11189
                                         instance.secondary_nodes,
11190
                                         [disk_dict],
11191
                                         file_path,
11192
                                         file_driver,
11193
                                         disk_idx_base, feedback_fn)[0]
11194
        instance.disks.append(new_disk)
11195
        info = _GetInstanceInfoText(instance)
11196

    
11197
        logging.info("Creating volume %s for instance %s",
11198
                     new_disk.iv_name, instance.name)
11199
        # Note: this needs to be kept in sync with _CreateDisks
11200
        #HARDCODE
11201
        for node in instance.all_nodes:
11202
          f_create = node == instance.primary_node
11203
          try:
11204
            _CreateBlockDev(self, node, instance, new_disk,
11205
                            f_create, info, f_create)
11206
          except errors.OpExecError, err:
11207
            self.LogWarning("Failed to create volume %s (%s) on"
11208
                            " node %s: %s",
11209
                            new_disk.iv_name, new_disk, node, err)
11210
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11211
                       (new_disk.size, new_disk.mode)))
11212
      else:
11213
        # change a given disk
11214
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11215
        result.append(("disk.mode/%d" % disk_op,
11216
                       disk_dict[constants.IDISK_MODE]))
11217

    
11218
    if self.op.disk_template:
11219
      r_shut = _ShutdownInstanceDisks(self, instance)
11220
      if not r_shut:
11221
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11222
                                 " proceed with disk template conversion")
11223
      mode = (instance.disk_template, self.op.disk_template)
11224
      try:
11225
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11226
      except:
11227
        self.cfg.ReleaseDRBDMinors(instance.name)
11228
        raise
11229
      result.append(("disk_template", self.op.disk_template))
11230

    
11231
    # NIC changes
11232
    for nic_op, nic_dict in self.op.nics:
11233
      if nic_op == constants.DDM_REMOVE:
11234
        # remove the last nic
11235
        del instance.nics[-1]
11236
        result.append(("nic.%d" % len(instance.nics), "remove"))
11237
      elif nic_op == constants.DDM_ADD:
11238
        # mac and bridge should be set, by now
11239
        mac = nic_dict[constants.INIC_MAC]
11240
        ip = nic_dict.get(constants.INIC_IP, None)
11241
        nicparams = self.nic_pinst[constants.DDM_ADD]
11242
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11243
        instance.nics.append(new_nic)
11244
        result.append(("nic.%d" % (len(instance.nics) - 1),
11245
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11246
                       (new_nic.mac, new_nic.ip,
11247
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11248
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11249
                       )))
11250
      else:
11251
        for key in (constants.INIC_MAC, constants.INIC_IP):
11252
          if key in nic_dict:
11253
            setattr(instance.nics[nic_op], key, nic_dict[key])
11254
        if nic_op in self.nic_pinst:
11255
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11256
        for key, val in nic_dict.iteritems():
11257
          result.append(("nic.%s/%d" % (key, nic_op), val))
11258

    
11259
    # hvparams changes
11260
    if self.op.hvparams:
11261
      instance.hvparams = self.hv_inst
11262
      for key, val in self.op.hvparams.iteritems():
11263
        result.append(("hv/%s" % key, val))
11264

    
11265
    # beparams changes
11266
    if self.op.beparams:
11267
      instance.beparams = self.be_inst
11268
      for key, val in self.op.beparams.iteritems():
11269
        result.append(("be/%s" % key, val))
11270

    
11271
    # OS change
11272
    if self.op.os_name:
11273
      instance.os = self.op.os_name
11274

    
11275
    # osparams changes
11276
    if self.op.osparams:
11277
      instance.osparams = self.os_inst
11278
      for key, val in self.op.osparams.iteritems():
11279
        result.append(("os/%s" % key, val))
11280

    
11281
    self.cfg.Update(instance, feedback_fn)
11282

    
11283
    return result
11284

    
11285
  _DISK_CONVERSIONS = {
11286
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11287
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11288
    }
11289

    
11290

    
11291
class LUInstanceChangeGroup(LogicalUnit):
11292
  HPATH = "instance-change-group"
11293
  HTYPE = constants.HTYPE_INSTANCE
11294
  REQ_BGL = False
11295

    
11296
  def ExpandNames(self):
11297
    self.share_locks = _ShareAll()
11298
    self.needed_locks = {
11299
      locking.LEVEL_NODEGROUP: [],
11300
      locking.LEVEL_NODE: [],
11301
      }
11302

    
11303
    self._ExpandAndLockInstance()
11304

    
11305
    if self.op.target_groups:
11306
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11307
                                  self.op.target_groups)
11308
    else:
11309
      self.req_target_uuids = None
11310

    
11311
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11312

    
11313
  def DeclareLocks(self, level):
11314
    if level == locking.LEVEL_NODEGROUP:
11315
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11316

    
11317
      if self.req_target_uuids:
11318
        lock_groups = set(self.req_target_uuids)
11319

    
11320
        # Lock all groups used by instance optimistically; this requires going
11321
        # via the node before it's locked, requiring verification later on
11322
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11323
        lock_groups.update(instance_groups)
11324
      else:
11325
        # No target groups, need to lock all of them
11326
        lock_groups = locking.ALL_SET
11327

    
11328
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11329

    
11330
    elif level == locking.LEVEL_NODE:
11331
      if self.req_target_uuids:
11332
        # Lock all nodes used by instances
11333
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11334
        self._LockInstancesNodes()
11335

    
11336
        # Lock all nodes in all potential target groups
11337
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11338
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11339
        member_nodes = [node_name
11340
                        for group in lock_groups
11341
                        for node_name in self.cfg.GetNodeGroup(group).members]
11342
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11343
      else:
11344
        # Lock all nodes as all groups are potential targets
11345
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11346

    
11347
  def CheckPrereq(self):
11348
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11349
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11350
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11351

    
11352
    assert (self.req_target_uuids is None or
11353
            owned_groups.issuperset(self.req_target_uuids))
11354
    assert owned_instances == set([self.op.instance_name])
11355

    
11356
    # Get instance information
11357
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11358

    
11359
    # Check if node groups for locked instance are still correct
11360
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11361
      ("Instance %s's nodes changed while we kept the lock" %
11362
       self.op.instance_name)
11363

    
11364
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11365
                                           owned_groups)
11366

    
11367
    if self.req_target_uuids:
11368
      # User requested specific target groups
11369
      self.target_uuids = self.req_target_uuids
11370
    else:
11371
      # All groups except those used by the instance are potential targets
11372
      self.target_uuids = owned_groups - inst_groups
11373

    
11374
    conflicting_groups = self.target_uuids & inst_groups
11375
    if conflicting_groups:
11376
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11377
                                 " used by the instance '%s'" %
11378
                                 (utils.CommaJoin(conflicting_groups),
11379
                                  self.op.instance_name),
11380
                                 errors.ECODE_INVAL)
11381

    
11382
    if not self.target_uuids:
11383
      raise errors.OpPrereqError("There are no possible target groups",
11384
                                 errors.ECODE_INVAL)
11385

    
11386
  def BuildHooksEnv(self):
11387
    """Build hooks env.
11388

11389
    """
11390
    assert self.target_uuids
11391

    
11392
    env = {
11393
      "TARGET_GROUPS": " ".join(self.target_uuids),
11394
      }
11395

    
11396
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11397

    
11398
    return env
11399

    
11400
  def BuildHooksNodes(self):
11401
    """Build hooks nodes.
11402

11403
    """
11404
    mn = self.cfg.GetMasterNode()
11405
    return ([mn], [mn])
11406

    
11407
  def Exec(self, feedback_fn):
11408
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11409

    
11410
    assert instances == [self.op.instance_name], "Instance not locked"
11411

    
11412
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11413
                     instances=instances, target_groups=list(self.target_uuids))
11414

    
11415
    ial.Run(self.op.iallocator)
11416

    
11417
    if not ial.success:
11418
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11419
                                 " instance '%s' using iallocator '%s': %s" %
11420
                                 (self.op.instance_name, self.op.iallocator,
11421
                                  ial.info),
11422
                                 errors.ECODE_NORES)
11423

    
11424
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11425

    
11426
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11427
                 " instance '%s'", len(jobs), self.op.instance_name)
11428

    
11429
    return ResultWithJobs(jobs)
11430

    
11431

    
11432
class LUBackupQuery(NoHooksLU):
11433
  """Query the exports list
11434

11435
  """
11436
  REQ_BGL = False
11437

    
11438
  def ExpandNames(self):
11439
    self.needed_locks = {}
11440
    self.share_locks[locking.LEVEL_NODE] = 1
11441
    if not self.op.nodes:
11442
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11443
    else:
11444
      self.needed_locks[locking.LEVEL_NODE] = \
11445
        _GetWantedNodes(self, self.op.nodes)
11446

    
11447
  def Exec(self, feedback_fn):
11448
    """Compute the list of all the exported system images.
11449

11450
    @rtype: dict
11451
    @return: a dictionary with the structure node->(export-list)
11452
        where export-list is a list of the instances exported on
11453
        that node.
11454

11455
    """
11456
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11457
    rpcresult = self.rpc.call_export_list(self.nodes)
11458
    result = {}
11459
    for node in rpcresult:
11460
      if rpcresult[node].fail_msg:
11461
        result[node] = False
11462
      else:
11463
        result[node] = rpcresult[node].payload
11464

    
11465
    return result
11466

    
11467

    
11468
class LUBackupPrepare(NoHooksLU):
11469
  """Prepares an instance for an export and returns useful information.
11470

11471
  """
11472
  REQ_BGL = False
11473

    
11474
  def ExpandNames(self):
11475
    self._ExpandAndLockInstance()
11476

    
11477
  def CheckPrereq(self):
11478
    """Check prerequisites.
11479

11480
    """
11481
    instance_name = self.op.instance_name
11482

    
11483
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11484
    assert self.instance is not None, \
11485
          "Cannot retrieve locked instance %s" % self.op.instance_name
11486
    _CheckNodeOnline(self, self.instance.primary_node)
11487

    
11488
    self._cds = _GetClusterDomainSecret()
11489

    
11490
  def Exec(self, feedback_fn):
11491
    """Prepares an instance for an export.
11492

11493
    """
11494
    instance = self.instance
11495

    
11496
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11497
      salt = utils.GenerateSecret(8)
11498

    
11499
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11500
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11501
                                              constants.RIE_CERT_VALIDITY)
11502
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11503

    
11504
      (name, cert_pem) = result.payload
11505

    
11506
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11507
                                             cert_pem)
11508

    
11509
      return {
11510
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11511
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11512
                          salt),
11513
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11514
        }
11515

    
11516
    return None
11517

    
11518

    
11519
class LUBackupExport(LogicalUnit):
11520
  """Export an instance to an image in the cluster.
11521

11522
  """
11523
  HPATH = "instance-export"
11524
  HTYPE = constants.HTYPE_INSTANCE
11525
  REQ_BGL = False
11526

    
11527
  def CheckArguments(self):
11528
    """Check the arguments.
11529

11530
    """
11531
    self.x509_key_name = self.op.x509_key_name
11532
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11533

    
11534
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11535
      if not self.x509_key_name:
11536
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11537
                                   errors.ECODE_INVAL)
11538

    
11539
      if not self.dest_x509_ca_pem:
11540
        raise errors.OpPrereqError("Missing destination X509 CA",
11541
                                   errors.ECODE_INVAL)
11542

    
11543
  def ExpandNames(self):
11544
    self._ExpandAndLockInstance()
11545

    
11546
    # Lock all nodes for local exports
11547
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11548
      # FIXME: lock only instance primary and destination node
11549
      #
11550
      # Sad but true, for now we have do lock all nodes, as we don't know where
11551
      # the previous export might be, and in this LU we search for it and
11552
      # remove it from its current node. In the future we could fix this by:
11553
      #  - making a tasklet to search (share-lock all), then create the
11554
      #    new one, then one to remove, after
11555
      #  - removing the removal operation altogether
11556
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11557

    
11558
  def DeclareLocks(self, level):
11559
    """Last minute lock declaration."""
11560
    # All nodes are locked anyway, so nothing to do here.
11561

    
11562
  def BuildHooksEnv(self):
11563
    """Build hooks env.
11564

11565
    This will run on the master, primary node and target node.
11566

11567
    """
11568
    env = {
11569
      "EXPORT_MODE": self.op.mode,
11570
      "EXPORT_NODE": self.op.target_node,
11571
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11572
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11573
      # TODO: Generic function for boolean env variables
11574
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11575
      }
11576

    
11577
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11578

    
11579
    return env
11580

    
11581
  def BuildHooksNodes(self):
11582
    """Build hooks nodes.
11583

11584
    """
11585
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11586

    
11587
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11588
      nl.append(self.op.target_node)
11589

    
11590
    return (nl, nl)
11591

    
11592
  def CheckPrereq(self):
11593
    """Check prerequisites.
11594

11595
    This checks that the instance and node names are valid.
11596

11597
    """
11598
    instance_name = self.op.instance_name
11599

    
11600
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11601
    assert self.instance is not None, \
11602
          "Cannot retrieve locked instance %s" % self.op.instance_name
11603
    _CheckNodeOnline(self, self.instance.primary_node)
11604

    
11605
    if (self.op.remove_instance and self.instance.admin_up and
11606
        not self.op.shutdown):
11607
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11608
                                 " down before")
11609

    
11610
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11611
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11612
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11613
      assert self.dst_node is not None
11614

    
11615
      _CheckNodeOnline(self, self.dst_node.name)
11616
      _CheckNodeNotDrained(self, self.dst_node.name)
11617

    
11618
      self._cds = None
11619
      self.dest_disk_info = None
11620
      self.dest_x509_ca = None
11621

    
11622
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11623
      self.dst_node = None
11624

    
11625
      if len(self.op.target_node) != len(self.instance.disks):
11626
        raise errors.OpPrereqError(("Received destination information for %s"
11627
                                    " disks, but instance %s has %s disks") %
11628
                                   (len(self.op.target_node), instance_name,
11629
                                    len(self.instance.disks)),
11630
                                   errors.ECODE_INVAL)
11631

    
11632
      cds = _GetClusterDomainSecret()
11633

    
11634
      # Check X509 key name
11635
      try:
11636
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11637
      except (TypeError, ValueError), err:
11638
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11639

    
11640
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11641
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11642
                                   errors.ECODE_INVAL)
11643

    
11644
      # Load and verify CA
11645
      try:
11646
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11647
      except OpenSSL.crypto.Error, err:
11648
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11649
                                   (err, ), errors.ECODE_INVAL)
11650

    
11651
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11652
      if errcode is not None:
11653
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11654
                                   (msg, ), errors.ECODE_INVAL)
11655

    
11656
      self.dest_x509_ca = cert
11657

    
11658
      # Verify target information
11659
      disk_info = []
11660
      for idx, disk_data in enumerate(self.op.target_node):
11661
        try:
11662
          (host, port, magic) = \
11663
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11664
        except errors.GenericError, err:
11665
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11666
                                     (idx, err), errors.ECODE_INVAL)
11667

    
11668
        disk_info.append((host, port, magic))
11669

    
11670
      assert len(disk_info) == len(self.op.target_node)
11671
      self.dest_disk_info = disk_info
11672

    
11673
    else:
11674
      raise errors.ProgrammerError("Unhandled export mode %r" %
11675
                                   self.op.mode)
11676

    
11677
    # instance disk type verification
11678
    # TODO: Implement export support for file-based disks
11679
    for disk in self.instance.disks:
11680
      if disk.dev_type == constants.LD_FILE:
11681
        raise errors.OpPrereqError("Export not supported for instances with"
11682
                                   " file-based disks", errors.ECODE_INVAL)
11683

    
11684
  def _CleanupExports(self, feedback_fn):
11685
    """Removes exports of current instance from all other nodes.
11686

11687
    If an instance in a cluster with nodes A..D was exported to node C, its
11688
    exports will be removed from the nodes A, B and D.
11689

11690
    """
11691
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11692

    
11693
    nodelist = self.cfg.GetNodeList()
11694
    nodelist.remove(self.dst_node.name)
11695

    
11696
    # on one-node clusters nodelist will be empty after the removal
11697
    # if we proceed the backup would be removed because OpBackupQuery
11698
    # substitutes an empty list with the full cluster node list.
11699
    iname = self.instance.name
11700
    if nodelist:
11701
      feedback_fn("Removing old exports for instance %s" % iname)
11702
      exportlist = self.rpc.call_export_list(nodelist)
11703
      for node in exportlist:
11704
        if exportlist[node].fail_msg:
11705
          continue
11706
        if iname in exportlist[node].payload:
11707
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11708
          if msg:
11709
            self.LogWarning("Could not remove older export for instance %s"
11710
                            " on node %s: %s", iname, node, msg)
11711

    
11712
  def Exec(self, feedback_fn):
11713
    """Export an instance to an image in the cluster.
11714

11715
    """
11716
    assert self.op.mode in constants.EXPORT_MODES
11717

    
11718
    instance = self.instance
11719
    src_node = instance.primary_node
11720

    
11721
    if self.op.shutdown:
11722
      # shutdown the instance, but not the disks
11723
      feedback_fn("Shutting down instance %s" % instance.name)
11724
      result = self.rpc.call_instance_shutdown(src_node, instance,
11725
                                               self.op.shutdown_timeout)
11726
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11727
      result.Raise("Could not shutdown instance %s on"
11728
                   " node %s" % (instance.name, src_node))
11729

    
11730
    # set the disks ID correctly since call_instance_start needs the
11731
    # correct drbd minor to create the symlinks
11732
    for disk in instance.disks:
11733
      self.cfg.SetDiskID(disk, src_node)
11734

    
11735
    activate_disks = (not instance.admin_up)
11736

    
11737
    if activate_disks:
11738
      # Activate the instance disks if we'exporting a stopped instance
11739
      feedback_fn("Activating disks for %s" % instance.name)
11740
      _StartInstanceDisks(self, instance, None)
11741

    
11742
    try:
11743
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11744
                                                     instance)
11745

    
11746
      helper.CreateSnapshots()
11747
      try:
11748
        if (self.op.shutdown and instance.admin_up and
11749
            not self.op.remove_instance):
11750
          assert not activate_disks
11751
          feedback_fn("Starting instance %s" % instance.name)
11752
          result = self.rpc.call_instance_start(src_node, instance,
11753
                                                None, None, False)
11754
          msg = result.fail_msg
11755
          if msg:
11756
            feedback_fn("Failed to start instance: %s" % msg)
11757
            _ShutdownInstanceDisks(self, instance)
11758
            raise errors.OpExecError("Could not start instance: %s" % msg)
11759

    
11760
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11761
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11762
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11763
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11764
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11765

    
11766
          (key_name, _, _) = self.x509_key_name
11767

    
11768
          dest_ca_pem = \
11769
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11770
                                            self.dest_x509_ca)
11771

    
11772
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11773
                                                     key_name, dest_ca_pem,
11774
                                                     timeouts)
11775
      finally:
11776
        helper.Cleanup()
11777

    
11778
      # Check for backwards compatibility
11779
      assert len(dresults) == len(instance.disks)
11780
      assert compat.all(isinstance(i, bool) for i in dresults), \
11781
             "Not all results are boolean: %r" % dresults
11782

    
11783
    finally:
11784
      if activate_disks:
11785
        feedback_fn("Deactivating disks for %s" % instance.name)
11786
        _ShutdownInstanceDisks(self, instance)
11787

    
11788
    if not (compat.all(dresults) and fin_resu):
11789
      failures = []
11790
      if not fin_resu:
11791
        failures.append("export finalization")
11792
      if not compat.all(dresults):
11793
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11794
                               if not dsk)
11795
        failures.append("disk export: disk(s) %s" % fdsk)
11796

    
11797
      raise errors.OpExecError("Export failed, errors in %s" %
11798
                               utils.CommaJoin(failures))
11799

    
11800
    # At this point, the export was successful, we can cleanup/finish
11801

    
11802
    # Remove instance if requested
11803
    if self.op.remove_instance:
11804
      feedback_fn("Removing instance %s" % instance.name)
11805
      _RemoveInstance(self, feedback_fn, instance,
11806
                      self.op.ignore_remove_failures)
11807

    
11808
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11809
      self._CleanupExports(feedback_fn)
11810

    
11811
    return fin_resu, dresults
11812

    
11813

    
11814
class LUBackupRemove(NoHooksLU):
11815
  """Remove exports related to the named instance.
11816

11817
  """
11818
  REQ_BGL = False
11819

    
11820
  def ExpandNames(self):
11821
    self.needed_locks = {}
11822
    # We need all nodes to be locked in order for RemoveExport to work, but we
11823
    # don't need to lock the instance itself, as nothing will happen to it (and
11824
    # we can remove exports also for a removed instance)
11825
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11826

    
11827
  def Exec(self, feedback_fn):
11828
    """Remove any export.
11829

11830
    """
11831
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11832
    # If the instance was not found we'll try with the name that was passed in.
11833
    # This will only work if it was an FQDN, though.
11834
    fqdn_warn = False
11835
    if not instance_name:
11836
      fqdn_warn = True
11837
      instance_name = self.op.instance_name
11838

    
11839
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11840
    exportlist = self.rpc.call_export_list(locked_nodes)
11841
    found = False
11842
    for node in exportlist:
11843
      msg = exportlist[node].fail_msg
11844
      if msg:
11845
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11846
        continue
11847
      if instance_name in exportlist[node].payload:
11848
        found = True
11849
        result = self.rpc.call_export_remove(node, instance_name)
11850
        msg = result.fail_msg
11851
        if msg:
11852
          logging.error("Could not remove export for instance %s"
11853
                        " on node %s: %s", instance_name, node, msg)
11854

    
11855
    if fqdn_warn and not found:
11856
      feedback_fn("Export not found. If trying to remove an export belonging"
11857
                  " to a deleted instance please use its Fully Qualified"
11858
                  " Domain Name.")
11859

    
11860

    
11861
class LUGroupAdd(LogicalUnit):
11862
  """Logical unit for creating node groups.
11863

11864
  """
11865
  HPATH = "group-add"
11866
  HTYPE = constants.HTYPE_GROUP
11867
  REQ_BGL = False
11868

    
11869
  def ExpandNames(self):
11870
    # We need the new group's UUID here so that we can create and acquire the
11871
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11872
    # that it should not check whether the UUID exists in the configuration.
11873
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11874
    self.needed_locks = {}
11875
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11876

    
11877
  def CheckPrereq(self):
11878
    """Check prerequisites.
11879

11880
    This checks that the given group name is not an existing node group
11881
    already.
11882

11883
    """
11884
    try:
11885
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11886
    except errors.OpPrereqError:
11887
      pass
11888
    else:
11889
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11890
                                 " node group (UUID: %s)" %
11891
                                 (self.op.group_name, existing_uuid),
11892
                                 errors.ECODE_EXISTS)
11893

    
11894
    if self.op.ndparams:
11895
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11896

    
11897
  def BuildHooksEnv(self):
11898
    """Build hooks env.
11899

11900
    """
11901
    return {
11902
      "GROUP_NAME": self.op.group_name,
11903
      }
11904

    
11905
  def BuildHooksNodes(self):
11906
    """Build hooks nodes.
11907

11908
    """
11909
    mn = self.cfg.GetMasterNode()
11910
    return ([mn], [mn])
11911

    
11912
  def Exec(self, feedback_fn):
11913
    """Add the node group to the cluster.
11914

11915
    """
11916
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11917
                                  uuid=self.group_uuid,
11918
                                  alloc_policy=self.op.alloc_policy,
11919
                                  ndparams=self.op.ndparams)
11920

    
11921
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11922
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11923

    
11924

    
11925
class LUGroupAssignNodes(NoHooksLU):
11926
  """Logical unit for assigning nodes to groups.
11927

11928
  """
11929
  REQ_BGL = False
11930

    
11931
  def ExpandNames(self):
11932
    # These raise errors.OpPrereqError on their own:
11933
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11934
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11935

    
11936
    # We want to lock all the affected nodes and groups. We have readily
11937
    # available the list of nodes, and the *destination* group. To gather the
11938
    # list of "source" groups, we need to fetch node information later on.
11939
    self.needed_locks = {
11940
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11941
      locking.LEVEL_NODE: self.op.nodes,
11942
      }
11943

    
11944
  def DeclareLocks(self, level):
11945
    if level == locking.LEVEL_NODEGROUP:
11946
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11947

    
11948
      # Try to get all affected nodes' groups without having the group or node
11949
      # lock yet. Needs verification later in the code flow.
11950
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11951

    
11952
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11953

    
11954
  def CheckPrereq(self):
11955
    """Check prerequisites.
11956

11957
    """
11958
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11959
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11960
            frozenset(self.op.nodes))
11961

    
11962
    expected_locks = (set([self.group_uuid]) |
11963
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11964
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11965
    if actual_locks != expected_locks:
11966
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11967
                               " current groups are '%s', used to be '%s'" %
11968
                               (utils.CommaJoin(expected_locks),
11969
                                utils.CommaJoin(actual_locks)))
11970

    
11971
    self.node_data = self.cfg.GetAllNodesInfo()
11972
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11973
    instance_data = self.cfg.GetAllInstancesInfo()
11974

    
11975
    if self.group is None:
11976
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11977
                               (self.op.group_name, self.group_uuid))
11978

    
11979
    (new_splits, previous_splits) = \
11980
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11981
                                             for node in self.op.nodes],
11982
                                            self.node_data, instance_data)
11983

    
11984
    if new_splits:
11985
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11986

    
11987
      if not self.op.force:
11988
        raise errors.OpExecError("The following instances get split by this"
11989
                                 " change and --force was not given: %s" %
11990
                                 fmt_new_splits)
11991
      else:
11992
        self.LogWarning("This operation will split the following instances: %s",
11993
                        fmt_new_splits)
11994

    
11995
        if previous_splits:
11996
          self.LogWarning("In addition, these already-split instances continue"
11997
                          " to be split across groups: %s",
11998
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11999

    
12000
  def Exec(self, feedback_fn):
12001
    """Assign nodes to a new group.
12002

12003
    """
12004
    for node in self.op.nodes:
12005
      self.node_data[node].group = self.group_uuid
12006

    
12007
    # FIXME: Depends on side-effects of modifying the result of
12008
    # C{cfg.GetAllNodesInfo}
12009

    
12010
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12011

    
12012
  @staticmethod
12013
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12014
    """Check for split instances after a node assignment.
12015

12016
    This method considers a series of node assignments as an atomic operation,
12017
    and returns information about split instances after applying the set of
12018
    changes.
12019

12020
    In particular, it returns information about newly split instances, and
12021
    instances that were already split, and remain so after the change.
12022

12023
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12024
    considered.
12025

12026
    @type changes: list of (node_name, new_group_uuid) pairs.
12027
    @param changes: list of node assignments to consider.
12028
    @param node_data: a dict with data for all nodes
12029
    @param instance_data: a dict with all instances to consider
12030
    @rtype: a two-tuple
12031
    @return: a list of instances that were previously okay and result split as a
12032
      consequence of this change, and a list of instances that were previously
12033
      split and this change does not fix.
12034

12035
    """
12036
    changed_nodes = dict((node, group) for node, group in changes
12037
                         if node_data[node].group != group)
12038

    
12039
    all_split_instances = set()
12040
    previously_split_instances = set()
12041

    
12042
    def InstanceNodes(instance):
12043
      return [instance.primary_node] + list(instance.secondary_nodes)
12044

    
12045
    for inst in instance_data.values():
12046
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12047
        continue
12048

    
12049
      instance_nodes = InstanceNodes(inst)
12050

    
12051
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12052
        previously_split_instances.add(inst.name)
12053

    
12054
      if len(set(changed_nodes.get(node, node_data[node].group)
12055
                 for node in instance_nodes)) > 1:
12056
        all_split_instances.add(inst.name)
12057

    
12058
    return (list(all_split_instances - previously_split_instances),
12059
            list(previously_split_instances & all_split_instances))
12060

    
12061

    
12062
class _GroupQuery(_QueryBase):
12063
  FIELDS = query.GROUP_FIELDS
12064

    
12065
  def ExpandNames(self, lu):
12066
    lu.needed_locks = {}
12067

    
12068
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12069
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12070

    
12071
    if not self.names:
12072
      self.wanted = [name_to_uuid[name]
12073
                     for name in utils.NiceSort(name_to_uuid.keys())]
12074
    else:
12075
      # Accept names to be either names or UUIDs.
12076
      missing = []
12077
      self.wanted = []
12078
      all_uuid = frozenset(self._all_groups.keys())
12079

    
12080
      for name in self.names:
12081
        if name in all_uuid:
12082
          self.wanted.append(name)
12083
        elif name in name_to_uuid:
12084
          self.wanted.append(name_to_uuid[name])
12085
        else:
12086
          missing.append(name)
12087

    
12088
      if missing:
12089
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12090
                                   utils.CommaJoin(missing),
12091
                                   errors.ECODE_NOENT)
12092

    
12093
  def DeclareLocks(self, lu, level):
12094
    pass
12095

    
12096
  def _GetQueryData(self, lu):
12097
    """Computes the list of node groups and their attributes.
12098

12099
    """
12100
    do_nodes = query.GQ_NODE in self.requested_data
12101
    do_instances = query.GQ_INST in self.requested_data
12102

    
12103
    group_to_nodes = None
12104
    group_to_instances = None
12105

    
12106
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12107
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12108
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12109
    # instance->node. Hence, we will need to process nodes even if we only need
12110
    # instance information.
12111
    if do_nodes or do_instances:
12112
      all_nodes = lu.cfg.GetAllNodesInfo()
12113
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12114
      node_to_group = {}
12115

    
12116
      for node in all_nodes.values():
12117
        if node.group in group_to_nodes:
12118
          group_to_nodes[node.group].append(node.name)
12119
          node_to_group[node.name] = node.group
12120

    
12121
      if do_instances:
12122
        all_instances = lu.cfg.GetAllInstancesInfo()
12123
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12124

    
12125
        for instance in all_instances.values():
12126
          node = instance.primary_node
12127
          if node in node_to_group:
12128
            group_to_instances[node_to_group[node]].append(instance.name)
12129

    
12130
        if not do_nodes:
12131
          # Do not pass on node information if it was not requested.
12132
          group_to_nodes = None
12133

    
12134
    return query.GroupQueryData([self._all_groups[uuid]
12135
                                 for uuid in self.wanted],
12136
                                group_to_nodes, group_to_instances)
12137

    
12138

    
12139
class LUGroupQuery(NoHooksLU):
12140
  """Logical unit for querying node groups.
12141

12142
  """
12143
  REQ_BGL = False
12144

    
12145
  def CheckArguments(self):
12146
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12147
                          self.op.output_fields, False)
12148

    
12149
  def ExpandNames(self):
12150
    self.gq.ExpandNames(self)
12151

    
12152
  def DeclareLocks(self, level):
12153
    self.gq.DeclareLocks(self, level)
12154

    
12155
  def Exec(self, feedback_fn):
12156
    return self.gq.OldStyleQuery(self)
12157

    
12158

    
12159
class LUGroupSetParams(LogicalUnit):
12160
  """Modifies the parameters of a node group.
12161

12162
  """
12163
  HPATH = "group-modify"
12164
  HTYPE = constants.HTYPE_GROUP
12165
  REQ_BGL = False
12166

    
12167
  def CheckArguments(self):
12168
    all_changes = [
12169
      self.op.ndparams,
12170
      self.op.alloc_policy,
12171
      ]
12172

    
12173
    if all_changes.count(None) == len(all_changes):
12174
      raise errors.OpPrereqError("Please pass at least one modification",
12175
                                 errors.ECODE_INVAL)
12176

    
12177
  def ExpandNames(self):
12178
    # This raises errors.OpPrereqError on its own:
12179
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12180

    
12181
    self.needed_locks = {
12182
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12183
      }
12184

    
12185
  def CheckPrereq(self):
12186
    """Check prerequisites.
12187

12188
    """
12189
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12190

    
12191
    if self.group is None:
12192
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12193
                               (self.op.group_name, self.group_uuid))
12194

    
12195
    if self.op.ndparams:
12196
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12197
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12198
      self.new_ndparams = new_ndparams
12199

    
12200
  def BuildHooksEnv(self):
12201
    """Build hooks env.
12202

12203
    """
12204
    return {
12205
      "GROUP_NAME": self.op.group_name,
12206
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12207
      }
12208

    
12209
  def BuildHooksNodes(self):
12210
    """Build hooks nodes.
12211

12212
    """
12213
    mn = self.cfg.GetMasterNode()
12214
    return ([mn], [mn])
12215

    
12216
  def Exec(self, feedback_fn):
12217
    """Modifies the node group.
12218

12219
    """
12220
    result = []
12221

    
12222
    if self.op.ndparams:
12223
      self.group.ndparams = self.new_ndparams
12224
      result.append(("ndparams", str(self.group.ndparams)))
12225

    
12226
    if self.op.alloc_policy:
12227
      self.group.alloc_policy = self.op.alloc_policy
12228

    
12229
    self.cfg.Update(self.group, feedback_fn)
12230
    return result
12231

    
12232

    
12233
class LUGroupRemove(LogicalUnit):
12234
  HPATH = "group-remove"
12235
  HTYPE = constants.HTYPE_GROUP
12236
  REQ_BGL = False
12237

    
12238
  def ExpandNames(self):
12239
    # This will raises errors.OpPrereqError on its own:
12240
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12241
    self.needed_locks = {
12242
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12243
      }
12244

    
12245
  def CheckPrereq(self):
12246
    """Check prerequisites.
12247

12248
    This checks that the given group name exists as a node group, that is
12249
    empty (i.e., contains no nodes), and that is not the last group of the
12250
    cluster.
12251

12252
    """
12253
    # Verify that the group is empty.
12254
    group_nodes = [node.name
12255
                   for node in self.cfg.GetAllNodesInfo().values()
12256
                   if node.group == self.group_uuid]
12257

    
12258
    if group_nodes:
12259
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12260
                                 " nodes: %s" %
12261
                                 (self.op.group_name,
12262
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12263
                                 errors.ECODE_STATE)
12264

    
12265
    # Verify the cluster would not be left group-less.
12266
    if len(self.cfg.GetNodeGroupList()) == 1:
12267
      raise errors.OpPrereqError("Group '%s' is the only group,"
12268
                                 " cannot be removed" %
12269
                                 self.op.group_name,
12270
                                 errors.ECODE_STATE)
12271

    
12272
  def BuildHooksEnv(self):
12273
    """Build hooks env.
12274

12275
    """
12276
    return {
12277
      "GROUP_NAME": self.op.group_name,
12278
      }
12279

    
12280
  def BuildHooksNodes(self):
12281
    """Build hooks nodes.
12282

12283
    """
12284
    mn = self.cfg.GetMasterNode()
12285
    return ([mn], [mn])
12286

    
12287
  def Exec(self, feedback_fn):
12288
    """Remove the node group.
12289

12290
    """
12291
    try:
12292
      self.cfg.RemoveNodeGroup(self.group_uuid)
12293
    except errors.ConfigurationError:
12294
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12295
                               (self.op.group_name, self.group_uuid))
12296

    
12297
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12298

    
12299

    
12300
class LUGroupRename(LogicalUnit):
12301
  HPATH = "group-rename"
12302
  HTYPE = constants.HTYPE_GROUP
12303
  REQ_BGL = False
12304

    
12305
  def ExpandNames(self):
12306
    # This raises errors.OpPrereqError on its own:
12307
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12308

    
12309
    self.needed_locks = {
12310
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12311
      }
12312

    
12313
  def CheckPrereq(self):
12314
    """Check prerequisites.
12315

12316
    Ensures requested new name is not yet used.
12317

12318
    """
12319
    try:
12320
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12321
    except errors.OpPrereqError:
12322
      pass
12323
    else:
12324
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12325
                                 " node group (UUID: %s)" %
12326
                                 (self.op.new_name, new_name_uuid),
12327
                                 errors.ECODE_EXISTS)
12328

    
12329
  def BuildHooksEnv(self):
12330
    """Build hooks env.
12331

12332
    """
12333
    return {
12334
      "OLD_NAME": self.op.group_name,
12335
      "NEW_NAME": self.op.new_name,
12336
      }
12337

    
12338
  def BuildHooksNodes(self):
12339
    """Build hooks nodes.
12340

12341
    """
12342
    mn = self.cfg.GetMasterNode()
12343

    
12344
    all_nodes = self.cfg.GetAllNodesInfo()
12345
    all_nodes.pop(mn, None)
12346

    
12347
    run_nodes = [mn]
12348
    run_nodes.extend(node.name for node in all_nodes.values()
12349
                     if node.group == self.group_uuid)
12350

    
12351
    return (run_nodes, run_nodes)
12352

    
12353
  def Exec(self, feedback_fn):
12354
    """Rename the node group.
12355

12356
    """
12357
    group = self.cfg.GetNodeGroup(self.group_uuid)
12358

    
12359
    if group is None:
12360
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12361
                               (self.op.group_name, self.group_uuid))
12362

    
12363
    group.name = self.op.new_name
12364
    self.cfg.Update(group, feedback_fn)
12365

    
12366
    return self.op.new_name
12367

    
12368

    
12369
class LUGroupEvacuate(LogicalUnit):
12370
  HPATH = "group-evacuate"
12371
  HTYPE = constants.HTYPE_GROUP
12372
  REQ_BGL = False
12373

    
12374
  def ExpandNames(self):
12375
    # This raises errors.OpPrereqError on its own:
12376
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12377

    
12378
    if self.op.target_groups:
12379
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12380
                                  self.op.target_groups)
12381
    else:
12382
      self.req_target_uuids = []
12383

    
12384
    if self.group_uuid in self.req_target_uuids:
12385
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12386
                                 " as a target group (targets are %s)" %
12387
                                 (self.group_uuid,
12388
                                  utils.CommaJoin(self.req_target_uuids)),
12389
                                 errors.ECODE_INVAL)
12390

    
12391
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12392

    
12393
    self.share_locks = _ShareAll()
12394
    self.needed_locks = {
12395
      locking.LEVEL_INSTANCE: [],
12396
      locking.LEVEL_NODEGROUP: [],
12397
      locking.LEVEL_NODE: [],
12398
      }
12399

    
12400
  def DeclareLocks(self, level):
12401
    if level == locking.LEVEL_INSTANCE:
12402
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12403

    
12404
      # Lock instances optimistically, needs verification once node and group
12405
      # locks have been acquired
12406
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12407
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12408

    
12409
    elif level == locking.LEVEL_NODEGROUP:
12410
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12411

    
12412
      if self.req_target_uuids:
12413
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12414

    
12415
        # Lock all groups used by instances optimistically; this requires going
12416
        # via the node before it's locked, requiring verification later on
12417
        lock_groups.update(group_uuid
12418
                           for instance_name in
12419
                             self.owned_locks(locking.LEVEL_INSTANCE)
12420
                           for group_uuid in
12421
                             self.cfg.GetInstanceNodeGroups(instance_name))
12422
      else:
12423
        # No target groups, need to lock all of them
12424
        lock_groups = locking.ALL_SET
12425

    
12426
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12427

    
12428
    elif level == locking.LEVEL_NODE:
12429
      # This will only lock the nodes in the group to be evacuated which
12430
      # contain actual instances
12431
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12432
      self._LockInstancesNodes()
12433

    
12434
      # Lock all nodes in group to be evacuated and target groups
12435
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12436
      assert self.group_uuid in owned_groups
12437
      member_nodes = [node_name
12438
                      for group in owned_groups
12439
                      for node_name in self.cfg.GetNodeGroup(group).members]
12440
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12441

    
12442
  def CheckPrereq(self):
12443
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12444
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12445
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12446

    
12447
    assert owned_groups.issuperset(self.req_target_uuids)
12448
    assert self.group_uuid in owned_groups
12449

    
12450
    # Check if locked instances are still correct
12451
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12452

    
12453
    # Get instance information
12454
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12455

    
12456
    # Check if node groups for locked instances are still correct
12457
    for instance_name in owned_instances:
12458
      inst = self.instances[instance_name]
12459
      assert owned_nodes.issuperset(inst.all_nodes), \
12460
        "Instance %s's nodes changed while we kept the lock" % instance_name
12461

    
12462
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12463
                                             owned_groups)
12464

    
12465
      assert self.group_uuid in inst_groups, \
12466
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12467

    
12468
    if self.req_target_uuids:
12469
      # User requested specific target groups
12470
      self.target_uuids = self.req_target_uuids
12471
    else:
12472
      # All groups except the one to be evacuated are potential targets
12473
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12474
                           if group_uuid != self.group_uuid]
12475

    
12476
      if not self.target_uuids:
12477
        raise errors.OpPrereqError("There are no possible target groups",
12478
                                   errors.ECODE_INVAL)
12479

    
12480
  def BuildHooksEnv(self):
12481
    """Build hooks env.
12482

12483
    """
12484
    return {
12485
      "GROUP_NAME": self.op.group_name,
12486
      "TARGET_GROUPS": " ".join(self.target_uuids),
12487
      }
12488

    
12489
  def BuildHooksNodes(self):
12490
    """Build hooks nodes.
12491

12492
    """
12493
    mn = self.cfg.GetMasterNode()
12494

    
12495
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12496

    
12497
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12498

    
12499
    return (run_nodes, run_nodes)
12500

    
12501
  def Exec(self, feedback_fn):
12502
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12503

    
12504
    assert self.group_uuid not in self.target_uuids
12505

    
12506
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12507
                     instances=instances, target_groups=self.target_uuids)
12508

    
12509
    ial.Run(self.op.iallocator)
12510

    
12511
    if not ial.success:
12512
      raise errors.OpPrereqError("Can't compute group evacuation using"
12513
                                 " iallocator '%s': %s" %
12514
                                 (self.op.iallocator, ial.info),
12515
                                 errors.ECODE_NORES)
12516

    
12517
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12518

    
12519
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12520
                 len(jobs), self.op.group_name)
12521

    
12522
    return ResultWithJobs(jobs)
12523

    
12524

    
12525
class TagsLU(NoHooksLU): # pylint: disable=W0223
12526
  """Generic tags LU.
12527

12528
  This is an abstract class which is the parent of all the other tags LUs.
12529

12530
  """
12531
  def ExpandNames(self):
12532
    self.group_uuid = None
12533
    self.needed_locks = {}
12534
    if self.op.kind == constants.TAG_NODE:
12535
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12536
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12537
    elif self.op.kind == constants.TAG_INSTANCE:
12538
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12539
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12540
    elif self.op.kind == constants.TAG_NODEGROUP:
12541
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12542

    
12543
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12544
    # not possible to acquire the BGL based on opcode parameters)
12545

    
12546
  def CheckPrereq(self):
12547
    """Check prerequisites.
12548

12549
    """
12550
    if self.op.kind == constants.TAG_CLUSTER:
12551
      self.target = self.cfg.GetClusterInfo()
12552
    elif self.op.kind == constants.TAG_NODE:
12553
      self.target = self.cfg.GetNodeInfo(self.op.name)
12554
    elif self.op.kind == constants.TAG_INSTANCE:
12555
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12556
    elif self.op.kind == constants.TAG_NODEGROUP:
12557
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12558
    else:
12559
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12560
                                 str(self.op.kind), errors.ECODE_INVAL)
12561

    
12562

    
12563
class LUTagsGet(TagsLU):
12564
  """Returns the tags of a given object.
12565

12566
  """
12567
  REQ_BGL = False
12568

    
12569
  def ExpandNames(self):
12570
    TagsLU.ExpandNames(self)
12571

    
12572
    # Share locks as this is only a read operation
12573
    self.share_locks = _ShareAll()
12574

    
12575
  def Exec(self, feedback_fn):
12576
    """Returns the tag list.
12577

12578
    """
12579
    return list(self.target.GetTags())
12580

    
12581

    
12582
class LUTagsSearch(NoHooksLU):
12583
  """Searches the tags for a given pattern.
12584

12585
  """
12586
  REQ_BGL = False
12587

    
12588
  def ExpandNames(self):
12589
    self.needed_locks = {}
12590

    
12591
  def CheckPrereq(self):
12592
    """Check prerequisites.
12593

12594
    This checks the pattern passed for validity by compiling it.
12595

12596
    """
12597
    try:
12598
      self.re = re.compile(self.op.pattern)
12599
    except re.error, err:
12600
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12601
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12602

    
12603
  def Exec(self, feedback_fn):
12604
    """Returns the tag list.
12605

12606
    """
12607
    cfg = self.cfg
12608
    tgts = [("/cluster", cfg.GetClusterInfo())]
12609
    ilist = cfg.GetAllInstancesInfo().values()
12610
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12611
    nlist = cfg.GetAllNodesInfo().values()
12612
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12613
    tgts.extend(("/nodegroup/%s" % n.name, n)
12614
                for n in cfg.GetAllNodeGroupsInfo().values())
12615
    results = []
12616
    for path, target in tgts:
12617
      for tag in target.GetTags():
12618
        if self.re.search(tag):
12619
          results.append((path, tag))
12620
    return results
12621

    
12622

    
12623
class LUTagsSet(TagsLU):
12624
  """Sets a tag on a given object.
12625

12626
  """
12627
  REQ_BGL = False
12628

    
12629
  def CheckPrereq(self):
12630
    """Check prerequisites.
12631

12632
    This checks the type and length of the tag name and value.
12633

12634
    """
12635
    TagsLU.CheckPrereq(self)
12636
    for tag in self.op.tags:
12637
      objects.TaggableObject.ValidateTag(tag)
12638

    
12639
  def Exec(self, feedback_fn):
12640
    """Sets the tag.
12641

12642
    """
12643
    try:
12644
      for tag in self.op.tags:
12645
        self.target.AddTag(tag)
12646
    except errors.TagError, err:
12647
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12648
    self.cfg.Update(self.target, feedback_fn)
12649

    
12650

    
12651
class LUTagsDel(TagsLU):
12652
  """Delete a list of tags from a given object.
12653

12654
  """
12655
  REQ_BGL = False
12656

    
12657
  def CheckPrereq(self):
12658
    """Check prerequisites.
12659

12660
    This checks that we have the given tag.
12661

12662
    """
12663
    TagsLU.CheckPrereq(self)
12664
    for tag in self.op.tags:
12665
      objects.TaggableObject.ValidateTag(tag)
12666
    del_tags = frozenset(self.op.tags)
12667
    cur_tags = self.target.GetTags()
12668

    
12669
    diff_tags = del_tags - cur_tags
12670
    if diff_tags:
12671
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12672
      raise errors.OpPrereqError("Tag(s) %s not found" %
12673
                                 (utils.CommaJoin(diff_names), ),
12674
                                 errors.ECODE_NOENT)
12675

    
12676
  def Exec(self, feedback_fn):
12677
    """Remove the tag from the object.
12678

12679
    """
12680
    for tag in self.op.tags:
12681
      self.target.RemoveTag(tag)
12682
    self.cfg.Update(self.target, feedback_fn)
12683

    
12684

    
12685
class LUTestDelay(NoHooksLU):
12686
  """Sleep for a specified amount of time.
12687

12688
  This LU sleeps on the master and/or nodes for a specified amount of
12689
  time.
12690

12691
  """
12692
  REQ_BGL = False
12693

    
12694
  def ExpandNames(self):
12695
    """Expand names and set required locks.
12696

12697
    This expands the node list, if any.
12698

12699
    """
12700
    self.needed_locks = {}
12701
    if self.op.on_nodes:
12702
      # _GetWantedNodes can be used here, but is not always appropriate to use
12703
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12704
      # more information.
12705
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12706
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12707

    
12708
  def _TestDelay(self):
12709
    """Do the actual sleep.
12710

12711
    """
12712
    if self.op.on_master:
12713
      if not utils.TestDelay(self.op.duration):
12714
        raise errors.OpExecError("Error during master delay test")
12715
    if self.op.on_nodes:
12716
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12717
      for node, node_result in result.items():
12718
        node_result.Raise("Failure during rpc call to node %s" % node)
12719

    
12720
  def Exec(self, feedback_fn):
12721
    """Execute the test delay opcode, with the wanted repetitions.
12722

12723
    """
12724
    if self.op.repeat == 0:
12725
      self._TestDelay()
12726
    else:
12727
      top_value = self.op.repeat - 1
12728
      for i in range(self.op.repeat):
12729
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12730
        self._TestDelay()
12731

    
12732

    
12733
class LUTestJqueue(NoHooksLU):
12734
  """Utility LU to test some aspects of the job queue.
12735

12736
  """
12737
  REQ_BGL = False
12738

    
12739
  # Must be lower than default timeout for WaitForJobChange to see whether it
12740
  # notices changed jobs
12741
  _CLIENT_CONNECT_TIMEOUT = 20.0
12742
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12743

    
12744
  @classmethod
12745
  def _NotifyUsingSocket(cls, cb, errcls):
12746
    """Opens a Unix socket and waits for another program to connect.
12747

12748
    @type cb: callable
12749
    @param cb: Callback to send socket name to client
12750
    @type errcls: class
12751
    @param errcls: Exception class to use for errors
12752

12753
    """
12754
    # Using a temporary directory as there's no easy way to create temporary
12755
    # sockets without writing a custom loop around tempfile.mktemp and
12756
    # socket.bind
12757
    tmpdir = tempfile.mkdtemp()
12758
    try:
12759
      tmpsock = utils.PathJoin(tmpdir, "sock")
12760

    
12761
      logging.debug("Creating temporary socket at %s", tmpsock)
12762
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12763
      try:
12764
        sock.bind(tmpsock)
12765
        sock.listen(1)
12766

    
12767
        # Send details to client
12768
        cb(tmpsock)
12769

    
12770
        # Wait for client to connect before continuing
12771
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12772
        try:
12773
          (conn, _) = sock.accept()
12774
        except socket.error, err:
12775
          raise errcls("Client didn't connect in time (%s)" % err)
12776
      finally:
12777
        sock.close()
12778
    finally:
12779
      # Remove as soon as client is connected
12780
      shutil.rmtree(tmpdir)
12781

    
12782
    # Wait for client to close
12783
    try:
12784
      try:
12785
        # pylint: disable=E1101
12786
        # Instance of '_socketobject' has no ... member
12787
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12788
        conn.recv(1)
12789
      except socket.error, err:
12790
        raise errcls("Client failed to confirm notification (%s)" % err)
12791
    finally:
12792
      conn.close()
12793

    
12794
  def _SendNotification(self, test, arg, sockname):
12795
    """Sends a notification to the client.
12796

12797
    @type test: string
12798
    @param test: Test name
12799
    @param arg: Test argument (depends on test)
12800
    @type sockname: string
12801
    @param sockname: Socket path
12802

12803
    """
12804
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12805

    
12806
  def _Notify(self, prereq, test, arg):
12807
    """Notifies the client of a test.
12808

12809
    @type prereq: bool
12810
    @param prereq: Whether this is a prereq-phase test
12811
    @type test: string
12812
    @param test: Test name
12813
    @param arg: Test argument (depends on test)
12814

12815
    """
12816
    if prereq:
12817
      errcls = errors.OpPrereqError
12818
    else:
12819
      errcls = errors.OpExecError
12820

    
12821
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12822
                                                  test, arg),
12823
                                   errcls)
12824

    
12825
  def CheckArguments(self):
12826
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12827
    self.expandnames_calls = 0
12828

    
12829
  def ExpandNames(self):
12830
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12831
    if checkargs_calls < 1:
12832
      raise errors.ProgrammerError("CheckArguments was not called")
12833

    
12834
    self.expandnames_calls += 1
12835

    
12836
    if self.op.notify_waitlock:
12837
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12838

    
12839
    self.LogInfo("Expanding names")
12840

    
12841
    # Get lock on master node (just to get a lock, not for a particular reason)
12842
    self.needed_locks = {
12843
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12844
      }
12845

    
12846
  def Exec(self, feedback_fn):
12847
    if self.expandnames_calls < 1:
12848
      raise errors.ProgrammerError("ExpandNames was not called")
12849

    
12850
    if self.op.notify_exec:
12851
      self._Notify(False, constants.JQT_EXEC, None)
12852

    
12853
    self.LogInfo("Executing")
12854

    
12855
    if self.op.log_messages:
12856
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12857
      for idx, msg in enumerate(self.op.log_messages):
12858
        self.LogInfo("Sending log message %s", idx + 1)
12859
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12860
        # Report how many test messages have been sent
12861
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12862

    
12863
    if self.op.fail:
12864
      raise errors.OpExecError("Opcode failure was requested")
12865

    
12866
    return True
12867

    
12868

    
12869
class IAllocator(object):
12870
  """IAllocator framework.
12871

12872
  An IAllocator instance has three sets of attributes:
12873
    - cfg that is needed to query the cluster
12874
    - input data (all members of the _KEYS class attribute are required)
12875
    - four buffer attributes (in|out_data|text), that represent the
12876
      input (to the external script) in text and data structure format,
12877
      and the output from it, again in two formats
12878
    - the result variables from the script (success, info, nodes) for
12879
      easy usage
12880

12881
  """
12882
  # pylint: disable=R0902
12883
  # lots of instance attributes
12884

    
12885
  def __init__(self, cfg, rpc, mode, **kwargs):
12886
    self.cfg = cfg
12887
    self.rpc = rpc
12888
    # init buffer variables
12889
    self.in_text = self.out_text = self.in_data = self.out_data = None
12890
    # init all input fields so that pylint is happy
12891
    self.mode = mode
12892
    self.memory = self.disks = self.disk_template = None
12893
    self.os = self.tags = self.nics = self.vcpus = None
12894
    self.hypervisor = None
12895
    self.relocate_from = None
12896
    self.name = None
12897
    self.instances = None
12898
    self.evac_mode = None
12899
    self.target_groups = []
12900
    # computed fields
12901
    self.required_nodes = None
12902
    # init result fields
12903
    self.success = self.info = self.result = None
12904

    
12905
    try:
12906
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12907
    except KeyError:
12908
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12909
                                   " IAllocator" % self.mode)
12910

    
12911
    keyset = [n for (n, _) in keydata]
12912

    
12913
    for key in kwargs:
12914
      if key not in keyset:
12915
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12916
                                     " IAllocator" % key)
12917
      setattr(self, key, kwargs[key])
12918

    
12919
    for key in keyset:
12920
      if key not in kwargs:
12921
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12922
                                     " IAllocator" % key)
12923
    self._BuildInputData(compat.partial(fn, self), keydata)
12924

    
12925
  def _ComputeClusterData(self):
12926
    """Compute the generic allocator input data.
12927

12928
    This is the data that is independent of the actual operation.
12929

12930
    """
12931
    cfg = self.cfg
12932
    cluster_info = cfg.GetClusterInfo()
12933
    # cluster data
12934
    data = {
12935
      "version": constants.IALLOCATOR_VERSION,
12936
      "cluster_name": cfg.GetClusterName(),
12937
      "cluster_tags": list(cluster_info.GetTags()),
12938
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12939
      # we don't have job IDs
12940
      }
12941
    ninfo = cfg.GetAllNodesInfo()
12942
    iinfo = cfg.GetAllInstancesInfo().values()
12943
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12944

    
12945
    # node data
12946
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12947

    
12948
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12949
      hypervisor_name = self.hypervisor
12950
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12951
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12952
    else:
12953
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12954

    
12955
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12956
                                        hypervisor_name)
12957
    node_iinfo = \
12958
      self.rpc.call_all_instances_info(node_list,
12959
                                       cluster_info.enabled_hypervisors)
12960

    
12961
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12962

    
12963
    config_ndata = self._ComputeBasicNodeData(ninfo)
12964
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12965
                                                 i_list, config_ndata)
12966
    assert len(data["nodes"]) == len(ninfo), \
12967
        "Incomplete node data computed"
12968

    
12969
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12970

    
12971
    self.in_data = data
12972

    
12973
  @staticmethod
12974
  def _ComputeNodeGroupData(cfg):
12975
    """Compute node groups data.
12976

12977
    """
12978
    ng = dict((guuid, {
12979
      "name": gdata.name,
12980
      "alloc_policy": gdata.alloc_policy,
12981
      })
12982
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12983

    
12984
    return ng
12985

    
12986
  @staticmethod
12987
  def _ComputeBasicNodeData(node_cfg):
12988
    """Compute global node data.
12989

12990
    @rtype: dict
12991
    @returns: a dict of name: (node dict, node config)
12992

12993
    """
12994
    # fill in static (config-based) values
12995
    node_results = dict((ninfo.name, {
12996
      "tags": list(ninfo.GetTags()),
12997
      "primary_ip": ninfo.primary_ip,
12998
      "secondary_ip": ninfo.secondary_ip,
12999
      "offline": ninfo.offline,
13000
      "drained": ninfo.drained,
13001
      "master_candidate": ninfo.master_candidate,
13002
      "group": ninfo.group,
13003
      "master_capable": ninfo.master_capable,
13004
      "vm_capable": ninfo.vm_capable,
13005
      })
13006
      for ninfo in node_cfg.values())
13007

    
13008
    return node_results
13009

    
13010
  @staticmethod
13011
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13012
                              node_results):
13013
    """Compute global node data.
13014

13015
    @param node_results: the basic node structures as filled from the config
13016

13017
    """
13018
    # make a copy of the current dict
13019
    node_results = dict(node_results)
13020
    for nname, nresult in node_data.items():
13021
      assert nname in node_results, "Missing basic data for node %s" % nname
13022
      ninfo = node_cfg[nname]
13023

    
13024
      if not (ninfo.offline or ninfo.drained):
13025
        nresult.Raise("Can't get data for node %s" % nname)
13026
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13027
                                nname)
13028
        remote_info = nresult.payload
13029

    
13030
        for attr in ["memory_total", "memory_free", "memory_dom0",
13031
                     "vg_size", "vg_free", "cpu_total"]:
13032
          if attr not in remote_info:
13033
            raise errors.OpExecError("Node '%s' didn't return attribute"
13034
                                     " '%s'" % (nname, attr))
13035
          if not isinstance(remote_info[attr], int):
13036
            raise errors.OpExecError("Node '%s' returned invalid value"
13037
                                     " for '%s': %s" %
13038
                                     (nname, attr, remote_info[attr]))
13039
        # compute memory used by primary instances
13040
        i_p_mem = i_p_up_mem = 0
13041
        for iinfo, beinfo in i_list:
13042
          if iinfo.primary_node == nname:
13043
            i_p_mem += beinfo[constants.BE_MEMORY]
13044
            if iinfo.name not in node_iinfo[nname].payload:
13045
              i_used_mem = 0
13046
            else:
13047
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13048
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13049
            remote_info["memory_free"] -= max(0, i_mem_diff)
13050

    
13051
            if iinfo.admin_up:
13052
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13053

    
13054
        # compute memory used by instances
13055
        pnr_dyn = {
13056
          "total_memory": remote_info["memory_total"],
13057
          "reserved_memory": remote_info["memory_dom0"],
13058
          "free_memory": remote_info["memory_free"],
13059
          "total_disk": remote_info["vg_size"],
13060
          "free_disk": remote_info["vg_free"],
13061
          "total_cpus": remote_info["cpu_total"],
13062
          "i_pri_memory": i_p_mem,
13063
          "i_pri_up_memory": i_p_up_mem,
13064
          }
13065
        pnr_dyn.update(node_results[nname])
13066
        node_results[nname] = pnr_dyn
13067

    
13068
    return node_results
13069

    
13070
  @staticmethod
13071
  def _ComputeInstanceData(cluster_info, i_list):
13072
    """Compute global instance data.
13073

13074
    """
13075
    instance_data = {}
13076
    for iinfo, beinfo in i_list:
13077
      nic_data = []
13078
      for nic in iinfo.nics:
13079
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13080
        nic_dict = {
13081
          "mac": nic.mac,
13082
          "ip": nic.ip,
13083
          "mode": filled_params[constants.NIC_MODE],
13084
          "link": filled_params[constants.NIC_LINK],
13085
          }
13086
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13087
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13088
        nic_data.append(nic_dict)
13089
      pir = {
13090
        "tags": list(iinfo.GetTags()),
13091
        "admin_up": iinfo.admin_up,
13092
        "vcpus": beinfo[constants.BE_VCPUS],
13093
        "memory": beinfo[constants.BE_MEMORY],
13094
        "os": iinfo.os,
13095
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13096
        "nics": nic_data,
13097
        "disks": [{constants.IDISK_SIZE: dsk.size,
13098
                   constants.IDISK_MODE: dsk.mode}
13099
                  for dsk in iinfo.disks],
13100
        "disk_template": iinfo.disk_template,
13101
        "hypervisor": iinfo.hypervisor,
13102
        }
13103
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13104
                                                 pir["disks"])
13105
      instance_data[iinfo.name] = pir
13106

    
13107
    return instance_data
13108

    
13109
  def _AddNewInstance(self):
13110
    """Add new instance data to allocator structure.
13111

13112
    This in combination with _AllocatorGetClusterData will create the
13113
    correct structure needed as input for the allocator.
13114

13115
    The checks for the completeness of the opcode must have already been
13116
    done.
13117

13118
    """
13119
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13120

    
13121
    if self.disk_template in constants.DTS_INT_MIRROR:
13122
      self.required_nodes = 2
13123
    else:
13124
      self.required_nodes = 1
13125

    
13126
    request = {
13127
      "name": self.name,
13128
      "disk_template": self.disk_template,
13129
      "tags": self.tags,
13130
      "os": self.os,
13131
      "vcpus": self.vcpus,
13132
      "memory": self.memory,
13133
      "disks": self.disks,
13134
      "disk_space_total": disk_space,
13135
      "nics": self.nics,
13136
      "required_nodes": self.required_nodes,
13137
      "hypervisor": self.hypervisor,
13138
      }
13139

    
13140
    return request
13141

    
13142
  def _AddRelocateInstance(self):
13143
    """Add relocate instance data to allocator structure.
13144

13145
    This in combination with _IAllocatorGetClusterData will create the
13146
    correct structure needed as input for the allocator.
13147

13148
    The checks for the completeness of the opcode must have already been
13149
    done.
13150

13151
    """
13152
    instance = self.cfg.GetInstanceInfo(self.name)
13153
    if instance is None:
13154
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13155
                                   " IAllocator" % self.name)
13156

    
13157
    if instance.disk_template not in constants.DTS_MIRRORED:
13158
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13159
                                 errors.ECODE_INVAL)
13160

    
13161
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13162
        len(instance.secondary_nodes) != 1:
13163
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13164
                                 errors.ECODE_STATE)
13165

    
13166
    self.required_nodes = 1
13167
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13168
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13169

    
13170
    request = {
13171
      "name": self.name,
13172
      "disk_space_total": disk_space,
13173
      "required_nodes": self.required_nodes,
13174
      "relocate_from": self.relocate_from,
13175
      }
13176
    return request
13177

    
13178
  def _AddNodeEvacuate(self):
13179
    """Get data for node-evacuate requests.
13180

13181
    """
13182
    return {
13183
      "instances": self.instances,
13184
      "evac_mode": self.evac_mode,
13185
      }
13186

    
13187
  def _AddChangeGroup(self):
13188
    """Get data for node-evacuate requests.
13189

13190
    """
13191
    return {
13192
      "instances": self.instances,
13193
      "target_groups": self.target_groups,
13194
      }
13195

    
13196
  def _BuildInputData(self, fn, keydata):
13197
    """Build input data structures.
13198

13199
    """
13200
    self._ComputeClusterData()
13201

    
13202
    request = fn()
13203
    request["type"] = self.mode
13204
    for keyname, keytype in keydata:
13205
      if keyname not in request:
13206
        raise errors.ProgrammerError("Request parameter %s is missing" %
13207
                                     keyname)
13208
      val = request[keyname]
13209
      if not keytype(val):
13210
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13211
                                     " validation, value %s, expected"
13212
                                     " type %s" % (keyname, val, keytype))
13213
    self.in_data["request"] = request
13214

    
13215
    self.in_text = serializer.Dump(self.in_data)
13216

    
13217
  _STRING_LIST = ht.TListOf(ht.TString)
13218
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13219
     # pylint: disable=E1101
13220
     # Class '...' has no 'OP_ID' member
13221
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13222
                          opcodes.OpInstanceMigrate.OP_ID,
13223
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13224
     })))
13225

    
13226
  _NEVAC_MOVED = \
13227
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13228
                       ht.TItems([ht.TNonEmptyString,
13229
                                  ht.TNonEmptyString,
13230
                                  ht.TListOf(ht.TNonEmptyString),
13231
                                 ])))
13232
  _NEVAC_FAILED = \
13233
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13234
                       ht.TItems([ht.TNonEmptyString,
13235
                                  ht.TMaybeString,
13236
                                 ])))
13237
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13238
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13239

    
13240
  _MODE_DATA = {
13241
    constants.IALLOCATOR_MODE_ALLOC:
13242
      (_AddNewInstance,
13243
       [
13244
        ("name", ht.TString),
13245
        ("memory", ht.TInt),
13246
        ("disks", ht.TListOf(ht.TDict)),
13247
        ("disk_template", ht.TString),
13248
        ("os", ht.TString),
13249
        ("tags", _STRING_LIST),
13250
        ("nics", ht.TListOf(ht.TDict)),
13251
        ("vcpus", ht.TInt),
13252
        ("hypervisor", ht.TString),
13253
        ], ht.TList),
13254
    constants.IALLOCATOR_MODE_RELOC:
13255
      (_AddRelocateInstance,
13256
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13257
       ht.TList),
13258
     constants.IALLOCATOR_MODE_NODE_EVAC:
13259
      (_AddNodeEvacuate, [
13260
        ("instances", _STRING_LIST),
13261
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13262
        ], _NEVAC_RESULT),
13263
     constants.IALLOCATOR_MODE_CHG_GROUP:
13264
      (_AddChangeGroup, [
13265
        ("instances", _STRING_LIST),
13266
        ("target_groups", _STRING_LIST),
13267
        ], _NEVAC_RESULT),
13268
    }
13269

    
13270
  def Run(self, name, validate=True, call_fn=None):
13271
    """Run an instance allocator and return the results.
13272

13273
    """
13274
    if call_fn is None:
13275
      call_fn = self.rpc.call_iallocator_runner
13276

    
13277
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13278
    result.Raise("Failure while running the iallocator script")
13279

    
13280
    self.out_text = result.payload
13281
    if validate:
13282
      self._ValidateResult()
13283

    
13284
  def _ValidateResult(self):
13285
    """Process the allocator results.
13286

13287
    This will process and if successful save the result in
13288
    self.out_data and the other parameters.
13289

13290
    """
13291
    try:
13292
      rdict = serializer.Load(self.out_text)
13293
    except Exception, err:
13294
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13295

    
13296
    if not isinstance(rdict, dict):
13297
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13298

    
13299
    # TODO: remove backwards compatiblity in later versions
13300
    if "nodes" in rdict and "result" not in rdict:
13301
      rdict["result"] = rdict["nodes"]
13302
      del rdict["nodes"]
13303

    
13304
    for key in "success", "info", "result":
13305
      if key not in rdict:
13306
        raise errors.OpExecError("Can't parse iallocator results:"
13307
                                 " missing key '%s'" % key)
13308
      setattr(self, key, rdict[key])
13309

    
13310
    if not self._result_check(self.result):
13311
      raise errors.OpExecError("Iallocator returned invalid result,"
13312
                               " expected %s, got %s" %
13313
                               (self._result_check, self.result),
13314
                               errors.ECODE_INVAL)
13315

    
13316
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13317
      assert self.relocate_from is not None
13318
      assert self.required_nodes == 1
13319

    
13320
      node2group = dict((name, ndata["group"])
13321
                        for (name, ndata) in self.in_data["nodes"].items())
13322

    
13323
      fn = compat.partial(self._NodesToGroups, node2group,
13324
                          self.in_data["nodegroups"])
13325

    
13326
      instance = self.cfg.GetInstanceInfo(self.name)
13327
      request_groups = fn(self.relocate_from + [instance.primary_node])
13328
      result_groups = fn(rdict["result"] + [instance.primary_node])
13329

    
13330
      if self.success and not set(result_groups).issubset(request_groups):
13331
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13332
                                 " differ from original groups (%s)" %
13333
                                 (utils.CommaJoin(result_groups),
13334
                                  utils.CommaJoin(request_groups)))
13335

    
13336
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13337
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13338

    
13339
    self.out_data = rdict
13340

    
13341
  @staticmethod
13342
  def _NodesToGroups(node2group, groups, nodes):
13343
    """Returns a list of unique group names for a list of nodes.
13344

13345
    @type node2group: dict
13346
    @param node2group: Map from node name to group UUID
13347
    @type groups: dict
13348
    @param groups: Group information
13349
    @type nodes: list
13350
    @param nodes: Node names
13351

13352
    """
13353
    result = set()
13354

    
13355
    for node in nodes:
13356
      try:
13357
        group_uuid = node2group[node]
13358
      except KeyError:
13359
        # Ignore unknown node
13360
        pass
13361
      else:
13362
        try:
13363
          group = groups[group_uuid]
13364
        except KeyError:
13365
          # Can't find group, let's use UUID
13366
          group_name = group_uuid
13367
        else:
13368
          group_name = group["name"]
13369

    
13370
        result.add(group_name)
13371

    
13372
    return sorted(result)
13373

    
13374

    
13375
class LUTestAllocator(NoHooksLU):
13376
  """Run allocator tests.
13377

13378
  This LU runs the allocator tests
13379

13380
  """
13381
  def CheckPrereq(self):
13382
    """Check prerequisites.
13383

13384
    This checks the opcode parameters depending on the director and mode test.
13385

13386
    """
13387
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13388
      for attr in ["memory", "disks", "disk_template",
13389
                   "os", "tags", "nics", "vcpus"]:
13390
        if not hasattr(self.op, attr):
13391
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13392
                                     attr, errors.ECODE_INVAL)
13393
      iname = self.cfg.ExpandInstanceName(self.op.name)
13394
      if iname is not None:
13395
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13396
                                   iname, errors.ECODE_EXISTS)
13397
      if not isinstance(self.op.nics, list):
13398
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13399
                                   errors.ECODE_INVAL)
13400
      if not isinstance(self.op.disks, list):
13401
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13402
                                   errors.ECODE_INVAL)
13403
      for row in self.op.disks:
13404
        if (not isinstance(row, dict) or
13405
            constants.IDISK_SIZE not in row or
13406
            not isinstance(row[constants.IDISK_SIZE], int) or
13407
            constants.IDISK_MODE not in row or
13408
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13409
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13410
                                     " parameter", errors.ECODE_INVAL)
13411
      if self.op.hypervisor is None:
13412
        self.op.hypervisor = self.cfg.GetHypervisorType()
13413
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13414
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13415
      self.op.name = fname
13416
      self.relocate_from = \
13417
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13418
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13419
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13420
      if not self.op.instances:
13421
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13422
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13423
    else:
13424
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13425
                                 self.op.mode, errors.ECODE_INVAL)
13426

    
13427
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13428
      if self.op.allocator is None:
13429
        raise errors.OpPrereqError("Missing allocator name",
13430
                                   errors.ECODE_INVAL)
13431
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13432
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13433
                                 self.op.direction, errors.ECODE_INVAL)
13434

    
13435
  def Exec(self, feedback_fn):
13436
    """Run the allocator test.
13437

13438
    """
13439
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13440
      ial = IAllocator(self.cfg, self.rpc,
13441
                       mode=self.op.mode,
13442
                       name=self.op.name,
13443
                       memory=self.op.memory,
13444
                       disks=self.op.disks,
13445
                       disk_template=self.op.disk_template,
13446
                       os=self.op.os,
13447
                       tags=self.op.tags,
13448
                       nics=self.op.nics,
13449
                       vcpus=self.op.vcpus,
13450
                       hypervisor=self.op.hypervisor,
13451
                       )
13452
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13453
      ial = IAllocator(self.cfg, self.rpc,
13454
                       mode=self.op.mode,
13455
                       name=self.op.name,
13456
                       relocate_from=list(self.relocate_from),
13457
                       )
13458
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13459
      ial = IAllocator(self.cfg, self.rpc,
13460
                       mode=self.op.mode,
13461
                       instances=self.op.instances,
13462
                       target_groups=self.op.target_groups)
13463
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13464
      ial = IAllocator(self.cfg, self.rpc,
13465
                       mode=self.op.mode,
13466
                       instances=self.op.instances,
13467
                       evac_mode=self.op.evac_mode)
13468
    else:
13469
      raise errors.ProgrammerError("Uncatched mode %s in"
13470
                                   " LUTestAllocator.Exec", self.op.mode)
13471

    
13472
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13473
      result = ial.in_text
13474
    else:
13475
      ial.Run(self.op.allocator, validate=False)
13476
      result = ial.out_text
13477
    return result
13478

    
13479

    
13480
#: Query type implementations
13481
_QUERY_IMPL = {
13482
  constants.QR_INSTANCE: _InstanceQuery,
13483
  constants.QR_NODE: _NodeQuery,
13484
  constants.QR_GROUP: _GroupQuery,
13485
  constants.QR_OS: _OsQuery,
13486
  }
13487

    
13488
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13489

    
13490

    
13491
def _GetQueryImplementation(name):
13492
  """Returns the implemtnation for a query type.
13493

13494
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13495

13496
  """
13497
  try:
13498
    return _QUERY_IMPL[name]
13499
  except KeyError:
13500
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13501
                               errors.ECODE_INVAL)