Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 0ad1ea40

History | View | Annotate | Download (474.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    # Define functions determining which nodes to consider for a file
2112
    files2nodefn = [
2113
      (files_all, None),
2114
      (files_mc, lambda node: (node.master_candidate or
2115
                               node.name == master_node)),
2116
      (files_vm, lambda node: node.vm_capable),
2117
      ]
2118

    
2119
    # Build mapping from filename to list of nodes which should have the file
2120
    nodefiles = {}
2121
    for (files, fn) in files2nodefn:
2122
      if fn is None:
2123
        filenodes = nodeinfo
2124
      else:
2125
        filenodes = filter(fn, nodeinfo)
2126
      nodefiles.update((filename,
2127
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2128
                       for filename in files)
2129

    
2130
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2131

    
2132
    fileinfo = dict((filename, {}) for filename in nodefiles)
2133
    ignore_nodes = set()
2134

    
2135
    for node in nodeinfo:
2136
      if node.offline:
2137
        ignore_nodes.add(node.name)
2138
        continue
2139

    
2140
      nresult = all_nvinfo[node.name]
2141

    
2142
      if nresult.fail_msg or not nresult.payload:
2143
        node_files = None
2144
      else:
2145
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2146

    
2147
      test = not (node_files and isinstance(node_files, dict))
2148
      errorif(test, cls.ENODEFILECHECK, node.name,
2149
              "Node did not return file checksum data")
2150
      if test:
2151
        ignore_nodes.add(node.name)
2152
        continue
2153

    
2154
      # Build per-checksum mapping from filename to nodes having it
2155
      for (filename, checksum) in node_files.items():
2156
        assert filename in nodefiles
2157
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2158

    
2159
    for (filename, checksums) in fileinfo.items():
2160
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2161

    
2162
      # Nodes having the file
2163
      with_file = frozenset(node_name
2164
                            for nodes in fileinfo[filename].values()
2165
                            for node_name in nodes) - ignore_nodes
2166

    
2167
      expected_nodes = nodefiles[filename] - ignore_nodes
2168

    
2169
      # Nodes missing file
2170
      missing_file = expected_nodes - with_file
2171

    
2172
      if filename in files_opt:
2173
        # All or no nodes
2174
        errorif(missing_file and missing_file != expected_nodes,
2175
                cls.ECLUSTERFILECHECK, None,
2176
                "File %s is optional, but it must exist on all or no"
2177
                " nodes (not found on %s)",
2178
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2179
      else:
2180
        # Non-optional files
2181
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2182
                "File %s is missing from node(s) %s", filename,
2183
                utils.CommaJoin(utils.NiceSort(missing_file)))
2184

    
2185
        # Warn if a node has a file it shouldn't
2186
        unexpected = with_file - expected_nodes
2187
        errorif(unexpected,
2188
                cls.ECLUSTERFILECHECK, None,
2189
                "File %s should not exist on node(s) %s",
2190
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2191

    
2192
      # See if there are multiple versions of the file
2193
      test = len(checksums) > 1
2194
      if test:
2195
        variants = ["variant %s on %s" %
2196
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2197
                    for (idx, (checksum, nodes)) in
2198
                      enumerate(sorted(checksums.items()))]
2199
      else:
2200
        variants = []
2201

    
2202
      errorif(test, cls.ECLUSTERFILECHECK, None,
2203
              "File %s found with %s different checksums (%s)",
2204
              filename, len(checksums), "; ".join(variants))
2205

    
2206
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2207
                      drbd_map):
2208
    """Verifies and the node DRBD status.
2209

2210
    @type ninfo: L{objects.Node}
2211
    @param ninfo: the node to check
2212
    @param nresult: the remote results for the node
2213
    @param instanceinfo: the dict of instances
2214
    @param drbd_helper: the configured DRBD usermode helper
2215
    @param drbd_map: the DRBD map as returned by
2216
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2217

2218
    """
2219
    node = ninfo.name
2220
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2221

    
2222
    if drbd_helper:
2223
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2224
      test = (helper_result == None)
2225
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2226
               "no drbd usermode helper returned")
2227
      if helper_result:
2228
        status, payload = helper_result
2229
        test = not status
2230
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
                 "drbd usermode helper check unsuccessful: %s", payload)
2232
        test = status and (payload != drbd_helper)
2233
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2234
                 "wrong drbd usermode helper: %s", payload)
2235

    
2236
    # compute the DRBD minors
2237
    node_drbd = {}
2238
    for minor, instance in drbd_map[node].items():
2239
      test = instance not in instanceinfo
2240
      _ErrorIf(test, self.ECLUSTERCFG, None,
2241
               "ghost instance '%s' in temporary DRBD map", instance)
2242
        # ghost instance should not be running, but otherwise we
2243
        # don't give double warnings (both ghost instance and
2244
        # unallocated minor in use)
2245
      if test:
2246
        node_drbd[minor] = (instance, False)
2247
      else:
2248
        instance = instanceinfo[instance]
2249
        node_drbd[minor] = (instance.name, instance.admin_up)
2250

    
2251
    # and now check them
2252
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2253
    test = not isinstance(used_minors, (tuple, list))
2254
    _ErrorIf(test, self.ENODEDRBD, node,
2255
             "cannot parse drbd status file: %s", str(used_minors))
2256
    if test:
2257
      # we cannot check drbd status
2258
      return
2259

    
2260
    for minor, (iname, must_exist) in node_drbd.items():
2261
      test = minor not in used_minors and must_exist
2262
      _ErrorIf(test, self.ENODEDRBD, node,
2263
               "drbd minor %d of instance %s is not active", minor, iname)
2264
    for minor in used_minors:
2265
      test = minor not in node_drbd
2266
      _ErrorIf(test, self.ENODEDRBD, node,
2267
               "unallocated drbd minor %d is in use", minor)
2268

    
2269
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2270
    """Builds the node OS structures.
2271

2272
    @type ninfo: L{objects.Node}
2273
    @param ninfo: the node to check
2274
    @param nresult: the remote results for the node
2275
    @param nimg: the node image object
2276

2277
    """
2278
    node = ninfo.name
2279
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2280

    
2281
    remote_os = nresult.get(constants.NV_OSLIST, None)
2282
    test = (not isinstance(remote_os, list) or
2283
            not compat.all(isinstance(v, list) and len(v) == 7
2284
                           for v in remote_os))
2285

    
2286
    _ErrorIf(test, self.ENODEOS, node,
2287
             "node hasn't returned valid OS data")
2288

    
2289
    nimg.os_fail = test
2290

    
2291
    if test:
2292
      return
2293

    
2294
    os_dict = {}
2295

    
2296
    for (name, os_path, status, diagnose,
2297
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2298

    
2299
      if name not in os_dict:
2300
        os_dict[name] = []
2301

    
2302
      # parameters is a list of lists instead of list of tuples due to
2303
      # JSON lacking a real tuple type, fix it:
2304
      parameters = [tuple(v) for v in parameters]
2305
      os_dict[name].append((os_path, status, diagnose,
2306
                            set(variants), set(parameters), set(api_ver)))
2307

    
2308
    nimg.oslist = os_dict
2309

    
2310
  def _VerifyNodeOS(self, ninfo, nimg, base):
2311
    """Verifies the node OS list.
2312

2313
    @type ninfo: L{objects.Node}
2314
    @param ninfo: the node to check
2315
    @param nimg: the node image object
2316
    @param base: the 'template' node we match against (e.g. from the master)
2317

2318
    """
2319
    node = ninfo.name
2320
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321

    
2322
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2323

    
2324
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2325
    for os_name, os_data in nimg.oslist.items():
2326
      assert os_data, "Empty OS status for OS %s?!" % os_name
2327
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2328
      _ErrorIf(not f_status, self.ENODEOS, node,
2329
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2330
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2331
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2332
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2333
      # comparisons with the 'base' image
2334
      test = os_name not in base.oslist
2335
      _ErrorIf(test, self.ENODEOS, node,
2336
               "Extra OS %s not present on reference node (%s)",
2337
               os_name, base.name)
2338
      if test:
2339
        continue
2340
      assert base.oslist[os_name], "Base node has empty OS status?"
2341
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2342
      if not b_status:
2343
        # base OS is invalid, skipping
2344
        continue
2345
      for kind, a, b in [("API version", f_api, b_api),
2346
                         ("variants list", f_var, b_var),
2347
                         ("parameters", beautify_params(f_param),
2348
                          beautify_params(b_param))]:
2349
        _ErrorIf(a != b, self.ENODEOS, node,
2350
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2351
                 kind, os_name, base.name,
2352
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2353

    
2354
    # check any missing OSes
2355
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2356
    _ErrorIf(missing, self.ENODEOS, node,
2357
             "OSes present on reference node %s but missing on this node: %s",
2358
             base.name, utils.CommaJoin(missing))
2359

    
2360
  def _VerifyOob(self, ninfo, nresult):
2361
    """Verifies out of band functionality of a node.
2362

2363
    @type ninfo: L{objects.Node}
2364
    @param ninfo: the node to check
2365
    @param nresult: the remote results for the node
2366

2367
    """
2368
    node = ninfo.name
2369
    # We just have to verify the paths on master and/or master candidates
2370
    # as the oob helper is invoked on the master
2371
    if ((ninfo.master_candidate or ninfo.master_capable) and
2372
        constants.NV_OOB_PATHS in nresult):
2373
      for path_result in nresult[constants.NV_OOB_PATHS]:
2374
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2375

    
2376
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2377
    """Verifies and updates the node volume data.
2378

2379
    This function will update a L{NodeImage}'s internal structures
2380
    with data from the remote call.
2381

2382
    @type ninfo: L{objects.Node}
2383
    @param ninfo: the node to check
2384
    @param nresult: the remote results for the node
2385
    @param nimg: the node image object
2386
    @param vg_name: the configured VG name
2387

2388
    """
2389
    node = ninfo.name
2390
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2391

    
2392
    nimg.lvm_fail = True
2393
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2394
    if vg_name is None:
2395
      pass
2396
    elif isinstance(lvdata, basestring):
2397
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2398
               utils.SafeEncode(lvdata))
2399
    elif not isinstance(lvdata, dict):
2400
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2401
    else:
2402
      nimg.volumes = lvdata
2403
      nimg.lvm_fail = False
2404

    
2405
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2406
    """Verifies and updates the node instance list.
2407

2408
    If the listing was successful, then updates this node's instance
2409
    list. Otherwise, it marks the RPC call as failed for the instance
2410
    list key.
2411

2412
    @type ninfo: L{objects.Node}
2413
    @param ninfo: the node to check
2414
    @param nresult: the remote results for the node
2415
    @param nimg: the node image object
2416

2417
    """
2418
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2419
    test = not isinstance(idata, list)
2420
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2421
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2422
    if test:
2423
      nimg.hyp_fail = True
2424
    else:
2425
      nimg.instances = idata
2426

    
2427
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2428
    """Verifies and computes a node information map
2429

2430
    @type ninfo: L{objects.Node}
2431
    @param ninfo: the node to check
2432
    @param nresult: the remote results for the node
2433
    @param nimg: the node image object
2434
    @param vg_name: the configured VG name
2435

2436
    """
2437
    node = ninfo.name
2438
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439

    
2440
    # try to read free memory (from the hypervisor)
2441
    hv_info = nresult.get(constants.NV_HVINFO, None)
2442
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2443
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2444
    if not test:
2445
      try:
2446
        nimg.mfree = int(hv_info["memory_free"])
2447
      except (ValueError, TypeError):
2448
        _ErrorIf(True, self.ENODERPC, node,
2449
                 "node returned invalid nodeinfo, check hypervisor")
2450

    
2451
    # FIXME: devise a free space model for file based instances as well
2452
    if vg_name is not None:
2453
      test = (constants.NV_VGLIST not in nresult or
2454
              vg_name not in nresult[constants.NV_VGLIST])
2455
      _ErrorIf(test, self.ENODELVM, node,
2456
               "node didn't return data for the volume group '%s'"
2457
               " - it is either missing or broken", vg_name)
2458
      if not test:
2459
        try:
2460
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2461
        except (ValueError, TypeError):
2462
          _ErrorIf(True, self.ENODERPC, node,
2463
                   "node returned invalid LVM info, check LVM status")
2464

    
2465
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2466
    """Gets per-disk status information for all instances.
2467

2468
    @type nodelist: list of strings
2469
    @param nodelist: Node names
2470
    @type node_image: dict of (name, L{objects.Node})
2471
    @param node_image: Node objects
2472
    @type instanceinfo: dict of (name, L{objects.Instance})
2473
    @param instanceinfo: Instance objects
2474
    @rtype: {instance: {node: [(succes, payload)]}}
2475
    @return: a dictionary of per-instance dictionaries with nodes as
2476
        keys and disk information as values; the disk information is a
2477
        list of tuples (success, payload)
2478

2479
    """
2480
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2481

    
2482
    node_disks = {}
2483
    node_disks_devonly = {}
2484
    diskless_instances = set()
2485
    diskless = constants.DT_DISKLESS
2486

    
2487
    for nname in nodelist:
2488
      node_instances = list(itertools.chain(node_image[nname].pinst,
2489
                                            node_image[nname].sinst))
2490
      diskless_instances.update(inst for inst in node_instances
2491
                                if instanceinfo[inst].disk_template == diskless)
2492
      disks = [(inst, disk)
2493
               for inst in node_instances
2494
               for disk in instanceinfo[inst].disks]
2495

    
2496
      if not disks:
2497
        # No need to collect data
2498
        continue
2499

    
2500
      node_disks[nname] = disks
2501

    
2502
      # Creating copies as SetDiskID below will modify the objects and that can
2503
      # lead to incorrect data returned from nodes
2504
      devonly = [dev.Copy() for (_, dev) in disks]
2505

    
2506
      for dev in devonly:
2507
        self.cfg.SetDiskID(dev, nname)
2508

    
2509
      node_disks_devonly[nname] = devonly
2510

    
2511
    assert len(node_disks) == len(node_disks_devonly)
2512

    
2513
    # Collect data from all nodes with disks
2514
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2515
                                                          node_disks_devonly)
2516

    
2517
    assert len(result) == len(node_disks)
2518

    
2519
    instdisk = {}
2520

    
2521
    for (nname, nres) in result.items():
2522
      disks = node_disks[nname]
2523

    
2524
      if nres.offline:
2525
        # No data from this node
2526
        data = len(disks) * [(False, "node offline")]
2527
      else:
2528
        msg = nres.fail_msg
2529
        _ErrorIf(msg, self.ENODERPC, nname,
2530
                 "while getting disk information: %s", msg)
2531
        if msg:
2532
          # No data from this node
2533
          data = len(disks) * [(False, msg)]
2534
        else:
2535
          data = []
2536
          for idx, i in enumerate(nres.payload):
2537
            if isinstance(i, (tuple, list)) and len(i) == 2:
2538
              data.append(i)
2539
            else:
2540
              logging.warning("Invalid result from node %s, entry %d: %s",
2541
                              nname, idx, i)
2542
              data.append((False, "Invalid result from the remote node"))
2543

    
2544
      for ((inst, _), status) in zip(disks, data):
2545
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2546

    
2547
    # Add empty entries for diskless instances.
2548
    for inst in diskless_instances:
2549
      assert inst not in instdisk
2550
      instdisk[inst] = {}
2551

    
2552
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2553
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2554
                      compat.all(isinstance(s, (tuple, list)) and
2555
                                 len(s) == 2 for s in statuses)
2556
                      for inst, nnames in instdisk.items()
2557
                      for nname, statuses in nnames.items())
2558
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2559

    
2560
    return instdisk
2561

    
2562
  @staticmethod
2563
  def _SshNodeSelector(group_uuid, all_nodes):
2564
    """Create endless iterators for all potential SSH check hosts.
2565

2566
    """
2567
    nodes = [node for node in all_nodes
2568
             if (node.group != group_uuid and
2569
                 not node.offline)]
2570
    keyfunc = operator.attrgetter("group")
2571

    
2572
    return map(itertools.cycle,
2573
               [sorted(map(operator.attrgetter("name"), names))
2574
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2575
                                                  keyfunc)])
2576

    
2577
  @classmethod
2578
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2579
    """Choose which nodes should talk to which other nodes.
2580

2581
    We will make nodes contact all nodes in their group, and one node from
2582
    every other group.
2583

2584
    @warning: This algorithm has a known issue if one node group is much
2585
      smaller than others (e.g. just one node). In such a case all other
2586
      nodes will talk to the single node.
2587

2588
    """
2589
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2590
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2591

    
2592
    return (online_nodes,
2593
            dict((name, sorted([i.next() for i in sel]))
2594
                 for name in online_nodes))
2595

    
2596
  def BuildHooksEnv(self):
2597
    """Build hooks env.
2598

2599
    Cluster-Verify hooks just ran in the post phase and their failure makes
2600
    the output be logged in the verify output and the verification to fail.
2601

2602
    """
2603
    env = {
2604
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2605
      }
2606

    
2607
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2608
               for node in self.my_node_info.values())
2609

    
2610
    return env
2611

    
2612
  def BuildHooksNodes(self):
2613
    """Build hooks nodes.
2614

2615
    """
2616
    return ([], self.my_node_names)
2617

    
2618
  def Exec(self, feedback_fn):
2619
    """Verify integrity of the node group, performing various test on nodes.
2620

2621
    """
2622
    # This method has too many local variables. pylint: disable=R0914
2623
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2624

    
2625
    if not self.my_node_names:
2626
      # empty node group
2627
      feedback_fn("* Empty node group, skipping verification")
2628
      return True
2629

    
2630
    self.bad = False
2631
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2632
    verbose = self.op.verbose
2633
    self._feedback_fn = feedback_fn
2634

    
2635
    vg_name = self.cfg.GetVGName()
2636
    drbd_helper = self.cfg.GetDRBDHelper()
2637
    cluster = self.cfg.GetClusterInfo()
2638
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2639
    hypervisors = cluster.enabled_hypervisors
2640
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2641

    
2642
    i_non_redundant = [] # Non redundant instances
2643
    i_non_a_balanced = [] # Non auto-balanced instances
2644
    n_offline = 0 # Count of offline nodes
2645
    n_drained = 0 # Count of nodes being drained
2646
    node_vol_should = {}
2647

    
2648
    # FIXME: verify OS list
2649

    
2650
    # File verification
2651
    filemap = _ComputeAncillaryFiles(cluster, False)
2652

    
2653
    # do local checksums
2654
    master_node = self.master_node = self.cfg.GetMasterNode()
2655
    master_ip = self.cfg.GetMasterIP()
2656

    
2657
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2658

    
2659
    node_verify_param = {
2660
      constants.NV_FILELIST:
2661
        utils.UniqueSequence(filename
2662
                             for files in filemap
2663
                             for filename in files),
2664
      constants.NV_NODELIST:
2665
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2666
                                  self.all_node_info.values()),
2667
      constants.NV_HYPERVISOR: hypervisors,
2668
      constants.NV_HVPARAMS:
2669
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2670
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2671
                                 for node in node_data_list
2672
                                 if not node.offline],
2673
      constants.NV_INSTANCELIST: hypervisors,
2674
      constants.NV_VERSION: None,
2675
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2676
      constants.NV_NODESETUP: None,
2677
      constants.NV_TIME: None,
2678
      constants.NV_MASTERIP: (master_node, master_ip),
2679
      constants.NV_OSLIST: None,
2680
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2681
      }
2682

    
2683
    if vg_name is not None:
2684
      node_verify_param[constants.NV_VGLIST] = None
2685
      node_verify_param[constants.NV_LVLIST] = vg_name
2686
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2687
      node_verify_param[constants.NV_DRBDLIST] = None
2688

    
2689
    if drbd_helper:
2690
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2691

    
2692
    # bridge checks
2693
    # FIXME: this needs to be changed per node-group, not cluster-wide
2694
    bridges = set()
2695
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2696
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2697
      bridges.add(default_nicpp[constants.NIC_LINK])
2698
    for instance in self.my_inst_info.values():
2699
      for nic in instance.nics:
2700
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2701
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
          bridges.add(full_nic[constants.NIC_LINK])
2703

    
2704
    if bridges:
2705
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2706

    
2707
    # Build our expected cluster state
2708
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2709
                                                 name=node.name,
2710
                                                 vm_capable=node.vm_capable))
2711
                      for node in node_data_list)
2712

    
2713
    # Gather OOB paths
2714
    oob_paths = []
2715
    for node in self.all_node_info.values():
2716
      path = _SupportsOob(self.cfg, node)
2717
      if path and path not in oob_paths:
2718
        oob_paths.append(path)
2719

    
2720
    if oob_paths:
2721
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2722

    
2723
    for instance in self.my_inst_names:
2724
      inst_config = self.my_inst_info[instance]
2725

    
2726
      for nname in inst_config.all_nodes:
2727
        if nname not in node_image:
2728
          gnode = self.NodeImage(name=nname)
2729
          gnode.ghost = (nname not in self.all_node_info)
2730
          node_image[nname] = gnode
2731

    
2732
      inst_config.MapLVsByNode(node_vol_should)
2733

    
2734
      pnode = inst_config.primary_node
2735
      node_image[pnode].pinst.append(instance)
2736

    
2737
      for snode in inst_config.secondary_nodes:
2738
        nimg = node_image[snode]
2739
        nimg.sinst.append(instance)
2740
        if pnode not in nimg.sbp:
2741
          nimg.sbp[pnode] = []
2742
        nimg.sbp[pnode].append(instance)
2743

    
2744
    # At this point, we have the in-memory data structures complete,
2745
    # except for the runtime information, which we'll gather next
2746

    
2747
    # Due to the way our RPC system works, exact response times cannot be
2748
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2749
    # time before and after executing the request, we can at least have a time
2750
    # window.
2751
    nvinfo_starttime = time.time()
2752
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2753
                                           node_verify_param,
2754
                                           self.cfg.GetClusterName())
2755
    nvinfo_endtime = time.time()
2756

    
2757
    if self.extra_lv_nodes and vg_name is not None:
2758
      extra_lv_nvinfo = \
2759
          self.rpc.call_node_verify(self.extra_lv_nodes,
2760
                                    {constants.NV_LVLIST: vg_name},
2761
                                    self.cfg.GetClusterName())
2762
    else:
2763
      extra_lv_nvinfo = {}
2764

    
2765
    all_drbd_map = self.cfg.ComputeDRBDMap()
2766

    
2767
    feedback_fn("* Gathering disk information (%s nodes)" %
2768
                len(self.my_node_names))
2769
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2770
                                     self.my_inst_info)
2771

    
2772
    feedback_fn("* Verifying configuration file consistency")
2773

    
2774
    # If not all nodes are being checked, we need to make sure the master node
2775
    # and a non-checked vm_capable node are in the list.
2776
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2777
    if absent_nodes:
2778
      vf_nvinfo = all_nvinfo.copy()
2779
      vf_node_info = list(self.my_node_info.values())
2780
      additional_nodes = []
2781
      if master_node not in self.my_node_info:
2782
        additional_nodes.append(master_node)
2783
        vf_node_info.append(self.all_node_info[master_node])
2784
      # Add the first vm_capable node we find which is not included
2785
      for node in absent_nodes:
2786
        nodeinfo = self.all_node_info[node]
2787
        if nodeinfo.vm_capable and not nodeinfo.offline:
2788
          additional_nodes.append(node)
2789
          vf_node_info.append(self.all_node_info[node])
2790
          break
2791
      key = constants.NV_FILELIST
2792
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2793
                                                 {key: node_verify_param[key]},
2794
                                                 self.cfg.GetClusterName()))
2795
    else:
2796
      vf_nvinfo = all_nvinfo
2797
      vf_node_info = self.my_node_info.values()
2798

    
2799
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2800

    
2801
    feedback_fn("* Verifying node status")
2802

    
2803
    refos_img = None
2804

    
2805
    for node_i in node_data_list:
2806
      node = node_i.name
2807
      nimg = node_image[node]
2808

    
2809
      if node_i.offline:
2810
        if verbose:
2811
          feedback_fn("* Skipping offline node %s" % (node,))
2812
        n_offline += 1
2813
        continue
2814

    
2815
      if node == master_node:
2816
        ntype = "master"
2817
      elif node_i.master_candidate:
2818
        ntype = "master candidate"
2819
      elif node_i.drained:
2820
        ntype = "drained"
2821
        n_drained += 1
2822
      else:
2823
        ntype = "regular"
2824
      if verbose:
2825
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2826

    
2827
      msg = all_nvinfo[node].fail_msg
2828
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2829
      if msg:
2830
        nimg.rpc_fail = True
2831
        continue
2832

    
2833
      nresult = all_nvinfo[node].payload
2834

    
2835
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2836
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2837
      self._VerifyNodeNetwork(node_i, nresult)
2838
      self._VerifyOob(node_i, nresult)
2839

    
2840
      if nimg.vm_capable:
2841
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2842
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2843
                             all_drbd_map)
2844

    
2845
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2846
        self._UpdateNodeInstances(node_i, nresult, nimg)
2847
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2848
        self._UpdateNodeOS(node_i, nresult, nimg)
2849

    
2850
        if not nimg.os_fail:
2851
          if refos_img is None:
2852
            refos_img = nimg
2853
          self._VerifyNodeOS(node_i, nimg, refos_img)
2854
        self._VerifyNodeBridges(node_i, nresult, bridges)
2855

    
2856
        # Check whether all running instancies are primary for the node. (This
2857
        # can no longer be done from _VerifyInstance below, since some of the
2858
        # wrong instances could be from other node groups.)
2859
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2860

    
2861
        for inst in non_primary_inst:
2862
          test = inst in self.all_inst_info
2863
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2864
                   "instance should not run on node %s", node_i.name)
2865
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2866
                   "node is running unknown instance %s", inst)
2867

    
2868
    for node, result in extra_lv_nvinfo.items():
2869
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2870
                              node_image[node], vg_name)
2871

    
2872
    feedback_fn("* Verifying instance status")
2873
    for instance in self.my_inst_names:
2874
      if verbose:
2875
        feedback_fn("* Verifying instance %s" % instance)
2876
      inst_config = self.my_inst_info[instance]
2877
      self._VerifyInstance(instance, inst_config, node_image,
2878
                           instdisk[instance])
2879
      inst_nodes_offline = []
2880

    
2881
      pnode = inst_config.primary_node
2882
      pnode_img = node_image[pnode]
2883
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2884
               self.ENODERPC, pnode, "instance %s, connection to"
2885
               " primary node failed", instance)
2886

    
2887
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2888
               self.EINSTANCEBADNODE, instance,
2889
               "instance is marked as running and lives on offline node %s",
2890
               inst_config.primary_node)
2891

    
2892
      # If the instance is non-redundant we cannot survive losing its primary
2893
      # node, so we are not N+1 compliant. On the other hand we have no disk
2894
      # templates with more than one secondary so that situation is not well
2895
      # supported either.
2896
      # FIXME: does not support file-backed instances
2897
      if not inst_config.secondary_nodes:
2898
        i_non_redundant.append(instance)
2899

    
2900
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2901
               instance, "instance has multiple secondary nodes: %s",
2902
               utils.CommaJoin(inst_config.secondary_nodes),
2903
               code=self.ETYPE_WARNING)
2904

    
2905
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2906
        pnode = inst_config.primary_node
2907
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2908
        instance_groups = {}
2909

    
2910
        for node in instance_nodes:
2911
          instance_groups.setdefault(self.all_node_info[node].group,
2912
                                     []).append(node)
2913

    
2914
        pretty_list = [
2915
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2916
          # Sort so that we always list the primary node first.
2917
          for group, nodes in sorted(instance_groups.items(),
2918
                                     key=lambda (_, nodes): pnode in nodes,
2919
                                     reverse=True)]
2920

    
2921
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2922
                      instance, "instance has primary and secondary nodes in"
2923
                      " different groups: %s", utils.CommaJoin(pretty_list),
2924
                      code=self.ETYPE_WARNING)
2925

    
2926
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2927
        i_non_a_balanced.append(instance)
2928

    
2929
      for snode in inst_config.secondary_nodes:
2930
        s_img = node_image[snode]
2931
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2932
                 "instance %s, connection to secondary node failed", instance)
2933

    
2934
        if s_img.offline:
2935
          inst_nodes_offline.append(snode)
2936

    
2937
      # warn that the instance lives on offline nodes
2938
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2939
               "instance has offline secondary node(s) %s",
2940
               utils.CommaJoin(inst_nodes_offline))
2941
      # ... or ghost/non-vm_capable nodes
2942
      for node in inst_config.all_nodes:
2943
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2944
                 "instance lives on ghost node %s", node)
2945
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2946
                 instance, "instance lives on non-vm_capable node %s", node)
2947

    
2948
    feedback_fn("* Verifying orphan volumes")
2949
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2950

    
2951
    # We will get spurious "unknown volume" warnings if any node of this group
2952
    # is secondary for an instance whose primary is in another group. To avoid
2953
    # them, we find these instances and add their volumes to node_vol_should.
2954
    for inst in self.all_inst_info.values():
2955
      for secondary in inst.secondary_nodes:
2956
        if (secondary in self.my_node_info
2957
            and inst.name not in self.my_inst_info):
2958
          inst.MapLVsByNode(node_vol_should)
2959
          break
2960

    
2961
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2962

    
2963
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2964
      feedback_fn("* Verifying N+1 Memory redundancy")
2965
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2966

    
2967
    feedback_fn("* Other Notes")
2968
    if i_non_redundant:
2969
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2970
                  % len(i_non_redundant))
2971

    
2972
    if i_non_a_balanced:
2973
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2974
                  % len(i_non_a_balanced))
2975

    
2976
    if n_offline:
2977
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2978

    
2979
    if n_drained:
2980
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2981

    
2982
    return not self.bad
2983

    
2984
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2985
    """Analyze the post-hooks' result
2986

2987
    This method analyses the hook result, handles it, and sends some
2988
    nicely-formatted feedback back to the user.
2989

2990
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2991
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2992
    @param hooks_results: the results of the multi-node hooks rpc call
2993
    @param feedback_fn: function used send feedback back to the caller
2994
    @param lu_result: previous Exec result
2995
    @return: the new Exec result, based on the previous result
2996
        and hook results
2997

2998
    """
2999
    # We only really run POST phase hooks, only for non-empty groups,
3000
    # and are only interested in their results
3001
    if not self.my_node_names:
3002
      # empty node group
3003
      pass
3004
    elif phase == constants.HOOKS_PHASE_POST:
3005
      # Used to change hooks' output to proper indentation
3006
      feedback_fn("* Hooks Results")
3007
      assert hooks_results, "invalid result from hooks"
3008

    
3009
      for node_name in hooks_results:
3010
        res = hooks_results[node_name]
3011
        msg = res.fail_msg
3012
        test = msg and not res.offline
3013
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3014
                      "Communication failure in hooks execution: %s", msg)
3015
        if res.offline or msg:
3016
          # No need to investigate payload if node is offline or gave
3017
          # an error.
3018
          continue
3019
        for script, hkr, output in res.payload:
3020
          test = hkr == constants.HKR_FAIL
3021
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3022
                        "Script %s failed, output:", script)
3023
          if test:
3024
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3025
            feedback_fn("%s" % output)
3026
            lu_result = False
3027

    
3028
    return lu_result
3029

    
3030

    
3031
class LUClusterVerifyDisks(NoHooksLU):
3032
  """Verifies the cluster disks status.
3033

3034
  """
3035
  REQ_BGL = False
3036

    
3037
  def ExpandNames(self):
3038
    self.share_locks = _ShareAll()
3039
    self.needed_locks = {
3040
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3041
      }
3042

    
3043
  def Exec(self, feedback_fn):
3044
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3045

    
3046
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3047
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3048
                           for group in group_names])
3049

    
3050

    
3051
class LUGroupVerifyDisks(NoHooksLU):
3052
  """Verifies the status of all disks in a node group.
3053

3054
  """
3055
  REQ_BGL = False
3056

    
3057
  def ExpandNames(self):
3058
    # Raises errors.OpPrereqError on its own if group can't be found
3059
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3060

    
3061
    self.share_locks = _ShareAll()
3062
    self.needed_locks = {
3063
      locking.LEVEL_INSTANCE: [],
3064
      locking.LEVEL_NODEGROUP: [],
3065
      locking.LEVEL_NODE: [],
3066
      }
3067

    
3068
  def DeclareLocks(self, level):
3069
    if level == locking.LEVEL_INSTANCE:
3070
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3071

    
3072
      # Lock instances optimistically, needs verification once node and group
3073
      # locks have been acquired
3074
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3075
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3076

    
3077
    elif level == locking.LEVEL_NODEGROUP:
3078
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3079

    
3080
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3081
        set([self.group_uuid] +
3082
            # Lock all groups used by instances optimistically; this requires
3083
            # going via the node before it's locked, requiring verification
3084
            # later on
3085
            [group_uuid
3086
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3087
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3088

    
3089
    elif level == locking.LEVEL_NODE:
3090
      # This will only lock the nodes in the group to be verified which contain
3091
      # actual instances
3092
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3093
      self._LockInstancesNodes()
3094

    
3095
      # Lock all nodes in group to be verified
3096
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3097
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3098
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3099

    
3100
  def CheckPrereq(self):
3101
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3102
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3103
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3104

    
3105
    assert self.group_uuid in owned_groups
3106

    
3107
    # Check if locked instances are still correct
3108
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3109

    
3110
    # Get instance information
3111
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3112

    
3113
    # Check if node groups for locked instances are still correct
3114
    for (instance_name, inst) in self.instances.items():
3115
      assert owned_nodes.issuperset(inst.all_nodes), \
3116
        "Instance %s's nodes changed while we kept the lock" % instance_name
3117

    
3118
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3119
                                             owned_groups)
3120

    
3121
      assert self.group_uuid in inst_groups, \
3122
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3123

    
3124
  def Exec(self, feedback_fn):
3125
    """Verify integrity of cluster disks.
3126

3127
    @rtype: tuple of three items
3128
    @return: a tuple of (dict of node-to-node_error, list of instances
3129
        which need activate-disks, dict of instance: (node, volume) for
3130
        missing volumes
3131

3132
    """
3133
    res_nodes = {}
3134
    res_instances = set()
3135
    res_missing = {}
3136

    
3137
    nv_dict = _MapInstanceDisksToNodes([inst
3138
                                        for inst in self.instances.values()
3139
                                        if inst.admin_up])
3140

    
3141
    if nv_dict:
3142
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3143
                             set(self.cfg.GetVmCapableNodeList()))
3144

    
3145
      node_lvs = self.rpc.call_lv_list(nodes, [])
3146

    
3147
      for (node, node_res) in node_lvs.items():
3148
        if node_res.offline:
3149
          continue
3150

    
3151
        msg = node_res.fail_msg
3152
        if msg:
3153
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3154
          res_nodes[node] = msg
3155
          continue
3156

    
3157
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3158
          inst = nv_dict.pop((node, lv_name), None)
3159
          if not (lv_online or inst is None):
3160
            res_instances.add(inst)
3161

    
3162
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3163
      # better
3164
      for key, inst in nv_dict.iteritems():
3165
        res_missing.setdefault(inst, []).append(key)
3166

    
3167
    return (res_nodes, list(res_instances), res_missing)
3168

    
3169

    
3170
class LUClusterRepairDiskSizes(NoHooksLU):
3171
  """Verifies the cluster disks sizes.
3172

3173
  """
3174
  REQ_BGL = False
3175

    
3176
  def ExpandNames(self):
3177
    if self.op.instances:
3178
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3179
      self.needed_locks = {
3180
        locking.LEVEL_NODE: [],
3181
        locking.LEVEL_INSTANCE: self.wanted_names,
3182
        }
3183
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3184
    else:
3185
      self.wanted_names = None
3186
      self.needed_locks = {
3187
        locking.LEVEL_NODE: locking.ALL_SET,
3188
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3189
        }
3190
    self.share_locks = _ShareAll()
3191

    
3192
  def DeclareLocks(self, level):
3193
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3194
      self._LockInstancesNodes(primary_only=True)
3195

    
3196
  def CheckPrereq(self):
3197
    """Check prerequisites.
3198

3199
    This only checks the optional instance list against the existing names.
3200

3201
    """
3202
    if self.wanted_names is None:
3203
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3204

    
3205
    self.wanted_instances = \
3206
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3207

    
3208
  def _EnsureChildSizes(self, disk):
3209
    """Ensure children of the disk have the needed disk size.
3210

3211
    This is valid mainly for DRBD8 and fixes an issue where the
3212
    children have smaller disk size.
3213

3214
    @param disk: an L{ganeti.objects.Disk} object
3215

3216
    """
3217
    if disk.dev_type == constants.LD_DRBD8:
3218
      assert disk.children, "Empty children for DRBD8?"
3219
      fchild = disk.children[0]
3220
      mismatch = fchild.size < disk.size
3221
      if mismatch:
3222
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3223
                     fchild.size, disk.size)
3224
        fchild.size = disk.size
3225

    
3226
      # and we recurse on this child only, not on the metadev
3227
      return self._EnsureChildSizes(fchild) or mismatch
3228
    else:
3229
      return False
3230

    
3231
  def Exec(self, feedback_fn):
3232
    """Verify the size of cluster disks.
3233

3234
    """
3235
    # TODO: check child disks too
3236
    # TODO: check differences in size between primary/secondary nodes
3237
    per_node_disks = {}
3238
    for instance in self.wanted_instances:
3239
      pnode = instance.primary_node
3240
      if pnode not in per_node_disks:
3241
        per_node_disks[pnode] = []
3242
      for idx, disk in enumerate(instance.disks):
3243
        per_node_disks[pnode].append((instance, idx, disk))
3244

    
3245
    changed = []
3246
    for node, dskl in per_node_disks.items():
3247
      newl = [v[2].Copy() for v in dskl]
3248
      for dsk in newl:
3249
        self.cfg.SetDiskID(dsk, node)
3250
      result = self.rpc.call_blockdev_getsize(node, newl)
3251
      if result.fail_msg:
3252
        self.LogWarning("Failure in blockdev_getsize call to node"
3253
                        " %s, ignoring", node)
3254
        continue
3255
      if len(result.payload) != len(dskl):
3256
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3257
                        " result.payload=%s", node, len(dskl), result.payload)
3258
        self.LogWarning("Invalid result from node %s, ignoring node results",
3259
                        node)
3260
        continue
3261
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3262
        if size is None:
3263
          self.LogWarning("Disk %d of instance %s did not return size"
3264
                          " information, ignoring", idx, instance.name)
3265
          continue
3266
        if not isinstance(size, (int, long)):
3267
          self.LogWarning("Disk %d of instance %s did not return valid"
3268
                          " size information, ignoring", idx, instance.name)
3269
          continue
3270
        size = size >> 20
3271
        if size != disk.size:
3272
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3273
                       " correcting: recorded %d, actual %d", idx,
3274
                       instance.name, disk.size, size)
3275
          disk.size = size
3276
          self.cfg.Update(instance, feedback_fn)
3277
          changed.append((instance.name, idx, size))
3278
        if self._EnsureChildSizes(disk):
3279
          self.cfg.Update(instance, feedback_fn)
3280
          changed.append((instance.name, idx, disk.size))
3281
    return changed
3282

    
3283

    
3284
class LUClusterRename(LogicalUnit):
3285
  """Rename the cluster.
3286

3287
  """
3288
  HPATH = "cluster-rename"
3289
  HTYPE = constants.HTYPE_CLUSTER
3290

    
3291
  def BuildHooksEnv(self):
3292
    """Build hooks env.
3293

3294
    """
3295
    return {
3296
      "OP_TARGET": self.cfg.GetClusterName(),
3297
      "NEW_NAME": self.op.name,
3298
      }
3299

    
3300
  def BuildHooksNodes(self):
3301
    """Build hooks nodes.
3302

3303
    """
3304
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3305

    
3306
  def CheckPrereq(self):
3307
    """Verify that the passed name is a valid one.
3308

3309
    """
3310
    hostname = netutils.GetHostname(name=self.op.name,
3311
                                    family=self.cfg.GetPrimaryIPFamily())
3312

    
3313
    new_name = hostname.name
3314
    self.ip = new_ip = hostname.ip
3315
    old_name = self.cfg.GetClusterName()
3316
    old_ip = self.cfg.GetMasterIP()
3317
    if new_name == old_name and new_ip == old_ip:
3318
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3319
                                 " cluster has changed",
3320
                                 errors.ECODE_INVAL)
3321
    if new_ip != old_ip:
3322
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3323
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3324
                                   " reachable on the network" %
3325
                                   new_ip, errors.ECODE_NOTUNIQUE)
3326

    
3327
    self.op.name = new_name
3328

    
3329
  def Exec(self, feedback_fn):
3330
    """Rename the cluster.
3331

3332
    """
3333
    clustername = self.op.name
3334
    ip = self.ip
3335

    
3336
    # shutdown the master IP
3337
    master = self.cfg.GetMasterNode()
3338
    result = self.rpc.call_node_deactivate_master_ip(master)
3339
    result.Raise("Could not disable the master role")
3340

    
3341
    try:
3342
      cluster = self.cfg.GetClusterInfo()
3343
      cluster.cluster_name = clustername
3344
      cluster.master_ip = ip
3345
      self.cfg.Update(cluster, feedback_fn)
3346

    
3347
      # update the known hosts file
3348
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3349
      node_list = self.cfg.GetOnlineNodeList()
3350
      try:
3351
        node_list.remove(master)
3352
      except ValueError:
3353
        pass
3354
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3355
    finally:
3356
      result = self.rpc.call_node_activate_master_ip(master)
3357
      msg = result.fail_msg
3358
      if msg:
3359
        self.LogWarning("Could not re-enable the master role on"
3360
                        " the master, please restart manually: %s", msg)
3361

    
3362
    return clustername
3363

    
3364

    
3365
class LUClusterSetParams(LogicalUnit):
3366
  """Change the parameters of the cluster.
3367

3368
  """
3369
  HPATH = "cluster-modify"
3370
  HTYPE = constants.HTYPE_CLUSTER
3371
  REQ_BGL = False
3372

    
3373
  def CheckArguments(self):
3374
    """Check parameters
3375

3376
    """
3377
    if self.op.uid_pool:
3378
      uidpool.CheckUidPool(self.op.uid_pool)
3379

    
3380
    if self.op.add_uids:
3381
      uidpool.CheckUidPool(self.op.add_uids)
3382

    
3383
    if self.op.remove_uids:
3384
      uidpool.CheckUidPool(self.op.remove_uids)
3385

    
3386
  def ExpandNames(self):
3387
    # FIXME: in the future maybe other cluster params won't require checking on
3388
    # all nodes to be modified.
3389
    self.needed_locks = {
3390
      locking.LEVEL_NODE: locking.ALL_SET,
3391
    }
3392
    self.share_locks[locking.LEVEL_NODE] = 1
3393

    
3394
  def BuildHooksEnv(self):
3395
    """Build hooks env.
3396

3397
    """
3398
    return {
3399
      "OP_TARGET": self.cfg.GetClusterName(),
3400
      "NEW_VG_NAME": self.op.vg_name,
3401
      }
3402

    
3403
  def BuildHooksNodes(self):
3404
    """Build hooks nodes.
3405

3406
    """
3407
    mn = self.cfg.GetMasterNode()
3408
    return ([mn], [mn])
3409

    
3410
  def CheckPrereq(self):
3411
    """Check prerequisites.
3412

3413
    This checks whether the given params don't conflict and
3414
    if the given volume group is valid.
3415

3416
    """
3417
    if self.op.vg_name is not None and not self.op.vg_name:
3418
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3419
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3420
                                   " instances exist", errors.ECODE_INVAL)
3421

    
3422
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3423
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3424
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3425
                                   " drbd-based instances exist",
3426
                                   errors.ECODE_INVAL)
3427

    
3428
    node_list = self.owned_locks(locking.LEVEL_NODE)
3429

    
3430
    # if vg_name not None, checks given volume group on all nodes
3431
    if self.op.vg_name:
3432
      vglist = self.rpc.call_vg_list(node_list)
3433
      for node in node_list:
3434
        msg = vglist[node].fail_msg
3435
        if msg:
3436
          # ignoring down node
3437
          self.LogWarning("Error while gathering data on node %s"
3438
                          " (ignoring node): %s", node, msg)
3439
          continue
3440
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3441
                                              self.op.vg_name,
3442
                                              constants.MIN_VG_SIZE)
3443
        if vgstatus:
3444
          raise errors.OpPrereqError("Error on node '%s': %s" %
3445
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3446

    
3447
    if self.op.drbd_helper:
3448
      # checks given drbd helper on all nodes
3449
      helpers = self.rpc.call_drbd_helper(node_list)
3450
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3451
        if ninfo.offline:
3452
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3453
          continue
3454
        msg = helpers[node].fail_msg
3455
        if msg:
3456
          raise errors.OpPrereqError("Error checking drbd helper on node"
3457
                                     " '%s': %s" % (node, msg),
3458
                                     errors.ECODE_ENVIRON)
3459
        node_helper = helpers[node].payload
3460
        if node_helper != self.op.drbd_helper:
3461
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3462
                                     (node, node_helper), errors.ECODE_ENVIRON)
3463

    
3464
    self.cluster = cluster = self.cfg.GetClusterInfo()
3465
    # validate params changes
3466
    if self.op.beparams:
3467
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3468
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3469

    
3470
    if self.op.ndparams:
3471
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3472
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3473

    
3474
      # TODO: we need a more general way to handle resetting
3475
      # cluster-level parameters to default values
3476
      if self.new_ndparams["oob_program"] == "":
3477
        self.new_ndparams["oob_program"] = \
3478
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3479

    
3480
    if self.op.nicparams:
3481
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3482
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3483
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3484
      nic_errors = []
3485

    
3486
      # check all instances for consistency
3487
      for instance in self.cfg.GetAllInstancesInfo().values():
3488
        for nic_idx, nic in enumerate(instance.nics):
3489
          params_copy = copy.deepcopy(nic.nicparams)
3490
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3491

    
3492
          # check parameter syntax
3493
          try:
3494
            objects.NIC.CheckParameterSyntax(params_filled)
3495
          except errors.ConfigurationError, err:
3496
            nic_errors.append("Instance %s, nic/%d: %s" %
3497
                              (instance.name, nic_idx, err))
3498

    
3499
          # if we're moving instances to routed, check that they have an ip
3500
          target_mode = params_filled[constants.NIC_MODE]
3501
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3502
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3503
                              " address" % (instance.name, nic_idx))
3504
      if nic_errors:
3505
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3506
                                   "\n".join(nic_errors))
3507

    
3508
    # hypervisor list/parameters
3509
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3510
    if self.op.hvparams:
3511
      for hv_name, hv_dict in self.op.hvparams.items():
3512
        if hv_name not in self.new_hvparams:
3513
          self.new_hvparams[hv_name] = hv_dict
3514
        else:
3515
          self.new_hvparams[hv_name].update(hv_dict)
3516

    
3517
    # os hypervisor parameters
3518
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3519
    if self.op.os_hvp:
3520
      for os_name, hvs in self.op.os_hvp.items():
3521
        if os_name not in self.new_os_hvp:
3522
          self.new_os_hvp[os_name] = hvs
3523
        else:
3524
          for hv_name, hv_dict in hvs.items():
3525
            if hv_name not in self.new_os_hvp[os_name]:
3526
              self.new_os_hvp[os_name][hv_name] = hv_dict
3527
            else:
3528
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3529

    
3530
    # os parameters
3531
    self.new_osp = objects.FillDict(cluster.osparams, {})
3532
    if self.op.osparams:
3533
      for os_name, osp in self.op.osparams.items():
3534
        if os_name not in self.new_osp:
3535
          self.new_osp[os_name] = {}
3536

    
3537
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3538
                                                  use_none=True)
3539

    
3540
        if not self.new_osp[os_name]:
3541
          # we removed all parameters
3542
          del self.new_osp[os_name]
3543
        else:
3544
          # check the parameter validity (remote check)
3545
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3546
                         os_name, self.new_osp[os_name])
3547

    
3548
    # changes to the hypervisor list
3549
    if self.op.enabled_hypervisors is not None:
3550
      self.hv_list = self.op.enabled_hypervisors
3551
      for hv in self.hv_list:
3552
        # if the hypervisor doesn't already exist in the cluster
3553
        # hvparams, we initialize it to empty, and then (in both
3554
        # cases) we make sure to fill the defaults, as we might not
3555
        # have a complete defaults list if the hypervisor wasn't
3556
        # enabled before
3557
        if hv not in new_hvp:
3558
          new_hvp[hv] = {}
3559
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3560
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3561
    else:
3562
      self.hv_list = cluster.enabled_hypervisors
3563

    
3564
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3565
      # either the enabled list has changed, or the parameters have, validate
3566
      for hv_name, hv_params in self.new_hvparams.items():
3567
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3568
            (self.op.enabled_hypervisors and
3569
             hv_name in self.op.enabled_hypervisors)):
3570
          # either this is a new hypervisor, or its parameters have changed
3571
          hv_class = hypervisor.GetHypervisor(hv_name)
3572
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3573
          hv_class.CheckParameterSyntax(hv_params)
3574
          _CheckHVParams(self, node_list, hv_name, hv_params)
3575

    
3576
    if self.op.os_hvp:
3577
      # no need to check any newly-enabled hypervisors, since the
3578
      # defaults have already been checked in the above code-block
3579
      for os_name, os_hvp in self.new_os_hvp.items():
3580
        for hv_name, hv_params in os_hvp.items():
3581
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3582
          # we need to fill in the new os_hvp on top of the actual hv_p
3583
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3584
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3585
          hv_class = hypervisor.GetHypervisor(hv_name)
3586
          hv_class.CheckParameterSyntax(new_osp)
3587
          _CheckHVParams(self, node_list, hv_name, new_osp)
3588

    
3589
    if self.op.default_iallocator:
3590
      alloc_script = utils.FindFile(self.op.default_iallocator,
3591
                                    constants.IALLOCATOR_SEARCH_PATH,
3592
                                    os.path.isfile)
3593
      if alloc_script is None:
3594
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3595
                                   " specified" % self.op.default_iallocator,
3596
                                   errors.ECODE_INVAL)
3597

    
3598
  def Exec(self, feedback_fn):
3599
    """Change the parameters of the cluster.
3600

3601
    """
3602
    if self.op.vg_name is not None:
3603
      new_volume = self.op.vg_name
3604
      if not new_volume:
3605
        new_volume = None
3606
      if new_volume != self.cfg.GetVGName():
3607
        self.cfg.SetVGName(new_volume)
3608
      else:
3609
        feedback_fn("Cluster LVM configuration already in desired"
3610
                    " state, not changing")
3611
    if self.op.drbd_helper is not None:
3612
      new_helper = self.op.drbd_helper
3613
      if not new_helper:
3614
        new_helper = None
3615
      if new_helper != self.cfg.GetDRBDHelper():
3616
        self.cfg.SetDRBDHelper(new_helper)
3617
      else:
3618
        feedback_fn("Cluster DRBD helper already in desired state,"
3619
                    " not changing")
3620
    if self.op.hvparams:
3621
      self.cluster.hvparams = self.new_hvparams
3622
    if self.op.os_hvp:
3623
      self.cluster.os_hvp = self.new_os_hvp
3624
    if self.op.enabled_hypervisors is not None:
3625
      self.cluster.hvparams = self.new_hvparams
3626
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3627
    if self.op.beparams:
3628
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3629
    if self.op.nicparams:
3630
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3631
    if self.op.osparams:
3632
      self.cluster.osparams = self.new_osp
3633
    if self.op.ndparams:
3634
      self.cluster.ndparams = self.new_ndparams
3635

    
3636
    if self.op.candidate_pool_size is not None:
3637
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3638
      # we need to update the pool size here, otherwise the save will fail
3639
      _AdjustCandidatePool(self, [])
3640

    
3641
    if self.op.maintain_node_health is not None:
3642
      self.cluster.maintain_node_health = self.op.maintain_node_health
3643

    
3644
    if self.op.prealloc_wipe_disks is not None:
3645
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3646

    
3647
    if self.op.add_uids is not None:
3648
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3649

    
3650
    if self.op.remove_uids is not None:
3651
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3652

    
3653
    if self.op.uid_pool is not None:
3654
      self.cluster.uid_pool = self.op.uid_pool
3655

    
3656
    if self.op.default_iallocator is not None:
3657
      self.cluster.default_iallocator = self.op.default_iallocator
3658

    
3659
    if self.op.reserved_lvs is not None:
3660
      self.cluster.reserved_lvs = self.op.reserved_lvs
3661

    
3662
    def helper_os(aname, mods, desc):
3663
      desc += " OS list"
3664
      lst = getattr(self.cluster, aname)
3665
      for key, val in mods:
3666
        if key == constants.DDM_ADD:
3667
          if val in lst:
3668
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3669
          else:
3670
            lst.append(val)
3671
        elif key == constants.DDM_REMOVE:
3672
          if val in lst:
3673
            lst.remove(val)
3674
          else:
3675
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3676
        else:
3677
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3678

    
3679
    if self.op.hidden_os:
3680
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3681

    
3682
    if self.op.blacklisted_os:
3683
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3684

    
3685
    if self.op.master_netdev:
3686
      master = self.cfg.GetMasterNode()
3687
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3688
                  self.cluster.master_netdev)
3689
      result = self.rpc.call_node_deactivate_master_ip(master)
3690
      result.Raise("Could not disable the master ip")
3691
      feedback_fn("Changing master_netdev from %s to %s" %
3692
                  (self.cluster.master_netdev, self.op.master_netdev))
3693
      self.cluster.master_netdev = self.op.master_netdev
3694

    
3695
    self.cfg.Update(self.cluster, feedback_fn)
3696

    
3697
    if self.op.master_netdev:
3698
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3699
                  self.op.master_netdev)
3700
      result = self.rpc.call_node_activate_master_ip(master)
3701
      if result.fail_msg:
3702
        self.LogWarning("Could not re-enable the master ip on"
3703
                        " the master, please restart manually: %s",
3704
                        result.fail_msg)
3705

    
3706

    
3707
def _UploadHelper(lu, nodes, fname):
3708
  """Helper for uploading a file and showing warnings.
3709

3710
  """
3711
  if os.path.exists(fname):
3712
    result = lu.rpc.call_upload_file(nodes, fname)
3713
    for to_node, to_result in result.items():
3714
      msg = to_result.fail_msg
3715
      if msg:
3716
        msg = ("Copy of file %s to node %s failed: %s" %
3717
               (fname, to_node, msg))
3718
        lu.proc.LogWarning(msg)
3719

    
3720

    
3721
def _ComputeAncillaryFiles(cluster, redist):
3722
  """Compute files external to Ganeti which need to be consistent.
3723

3724
  @type redist: boolean
3725
  @param redist: Whether to include files which need to be redistributed
3726

3727
  """
3728
  # Compute files for all nodes
3729
  files_all = set([
3730
    constants.SSH_KNOWN_HOSTS_FILE,
3731
    constants.CONFD_HMAC_KEY,
3732
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3733
    constants.RAPI_USERS_FILE,
3734
    ])
3735

    
3736
  if not redist:
3737
    files_all.update(constants.ALL_CERT_FILES)
3738
    files_all.update(ssconf.SimpleStore().GetFileList())
3739
  else:
3740
    # we need to ship at least the RAPI certificate
3741
    files_all.add(constants.RAPI_CERT_FILE)
3742

    
3743
  if cluster.modify_etc_hosts:
3744
    files_all.add(constants.ETC_HOSTS)
3745

    
3746
  # Files which are optional, these must:
3747
  # - be present in one other category as well
3748
  # - either exist or not exist on all nodes of that category (mc, vm all)
3749
  files_opt = set([
3750
    constants.RAPI_USERS_FILE,
3751
    ])
3752

    
3753
  # Files which should only be on master candidates
3754
  files_mc = set()
3755
  if not redist:
3756
    files_mc.add(constants.CLUSTER_CONF_FILE)
3757

    
3758
  # Files which should only be on VM-capable nodes
3759
  files_vm = set(filename
3760
    for hv_name in cluster.enabled_hypervisors
3761
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3762

    
3763
  files_opt |= set(filename
3764
    for hv_name in cluster.enabled_hypervisors
3765
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3766

    
3767
  # Filenames in each category must be unique
3768
  all_files_set = files_all | files_mc | files_vm
3769
  assert (len(all_files_set) ==
3770
          sum(map(len, [files_all, files_mc, files_vm]))), \
3771
         "Found file listed in more than one file list"
3772

    
3773
  # Optional files must be present in one other category
3774
  assert all_files_set.issuperset(files_opt), \
3775
         "Optional file not in a different required list"
3776

    
3777
  return (files_all, files_opt, files_mc, files_vm)
3778

    
3779

    
3780
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3781
  """Distribute additional files which are part of the cluster configuration.
3782

3783
  ConfigWriter takes care of distributing the config and ssconf files, but
3784
  there are more files which should be distributed to all nodes. This function
3785
  makes sure those are copied.
3786

3787
  @param lu: calling logical unit
3788
  @param additional_nodes: list of nodes not in the config to distribute to
3789
  @type additional_vm: boolean
3790
  @param additional_vm: whether the additional nodes are vm-capable or not
3791

3792
  """
3793
  # Gather target nodes
3794
  cluster = lu.cfg.GetClusterInfo()
3795
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3796

    
3797
  online_nodes = lu.cfg.GetOnlineNodeList()
3798
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3799

    
3800
  if additional_nodes is not None:
3801
    online_nodes.extend(additional_nodes)
3802
    if additional_vm:
3803
      vm_nodes.extend(additional_nodes)
3804

    
3805
  # Never distribute to master node
3806
  for nodelist in [online_nodes, vm_nodes]:
3807
    if master_info.name in nodelist:
3808
      nodelist.remove(master_info.name)
3809

    
3810
  # Gather file lists
3811
  (files_all, _, files_mc, files_vm) = \
3812
    _ComputeAncillaryFiles(cluster, True)
3813

    
3814
  # Never re-distribute configuration file from here
3815
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3816
              constants.CLUSTER_CONF_FILE in files_vm)
3817
  assert not files_mc, "Master candidates not handled in this function"
3818

    
3819
  filemap = [
3820
    (online_nodes, files_all),
3821
    (vm_nodes, files_vm),
3822
    ]
3823

    
3824
  # Upload the files
3825
  for (node_list, files) in filemap:
3826
    for fname in files:
3827
      _UploadHelper(lu, node_list, fname)
3828

    
3829

    
3830
class LUClusterRedistConf(NoHooksLU):
3831
  """Force the redistribution of cluster configuration.
3832

3833
  This is a very simple LU.
3834

3835
  """
3836
  REQ_BGL = False
3837

    
3838
  def ExpandNames(self):
3839
    self.needed_locks = {
3840
      locking.LEVEL_NODE: locking.ALL_SET,
3841
    }
3842
    self.share_locks[locking.LEVEL_NODE] = 1
3843

    
3844
  def Exec(self, feedback_fn):
3845
    """Redistribute the configuration.
3846

3847
    """
3848
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3849
    _RedistributeAncillaryFiles(self)
3850

    
3851

    
3852
class LUClusterActivateMasterIp(NoHooksLU):
3853
  """Activate the master IP on the master node.
3854

3855
  """
3856
  def Exec(self, feedback_fn):
3857
    """Activate the master IP.
3858

3859
    """
3860
    master = self.cfg.GetMasterNode()
3861
    self.rpc.call_node_activate_master_ip(master)
3862

    
3863

    
3864
class LUClusterDeactivateMasterIp(NoHooksLU):
3865
  """Deactivate the master IP on the master node.
3866

3867
  """
3868
  def Exec(self, feedback_fn):
3869
    """Deactivate the master IP.
3870

3871
    """
3872
    master = self.cfg.GetMasterNode()
3873
    self.rpc.call_node_deactivate_master_ip(master)
3874

    
3875

    
3876
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3877
  """Sleep and poll for an instance's disk to sync.
3878

3879
  """
3880
  if not instance.disks or disks is not None and not disks:
3881
    return True
3882

    
3883
  disks = _ExpandCheckDisks(instance, disks)
3884

    
3885
  if not oneshot:
3886
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3887

    
3888
  node = instance.primary_node
3889

    
3890
  for dev in disks:
3891
    lu.cfg.SetDiskID(dev, node)
3892

    
3893
  # TODO: Convert to utils.Retry
3894

    
3895
  retries = 0
3896
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3897
  while True:
3898
    max_time = 0
3899
    done = True
3900
    cumul_degraded = False
3901
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3902
    msg = rstats.fail_msg
3903
    if msg:
3904
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3905
      retries += 1
3906
      if retries >= 10:
3907
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3908
                                 " aborting." % node)
3909
      time.sleep(6)
3910
      continue
3911
    rstats = rstats.payload
3912
    retries = 0
3913
    for i, mstat in enumerate(rstats):
3914
      if mstat is None:
3915
        lu.LogWarning("Can't compute data for node %s/%s",
3916
                           node, disks[i].iv_name)
3917
        continue
3918

    
3919
      cumul_degraded = (cumul_degraded or
3920
                        (mstat.is_degraded and mstat.sync_percent is None))
3921
      if mstat.sync_percent is not None:
3922
        done = False
3923
        if mstat.estimated_time is not None:
3924
          rem_time = ("%s remaining (estimated)" %
3925
                      utils.FormatSeconds(mstat.estimated_time))
3926
          max_time = mstat.estimated_time
3927
        else:
3928
          rem_time = "no time estimate"
3929
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3930
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3931

    
3932
    # if we're done but degraded, let's do a few small retries, to
3933
    # make sure we see a stable and not transient situation; therefore
3934
    # we force restart of the loop
3935
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3936
      logging.info("Degraded disks found, %d retries left", degr_retries)
3937
      degr_retries -= 1
3938
      time.sleep(1)
3939
      continue
3940

    
3941
    if done or oneshot:
3942
      break
3943

    
3944
    time.sleep(min(60, max_time))
3945

    
3946
  if done:
3947
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3948
  return not cumul_degraded
3949

    
3950

    
3951
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3952
  """Check that mirrors are not degraded.
3953

3954
  The ldisk parameter, if True, will change the test from the
3955
  is_degraded attribute (which represents overall non-ok status for
3956
  the device(s)) to the ldisk (representing the local storage status).
3957

3958
  """
3959
  lu.cfg.SetDiskID(dev, node)
3960

    
3961
  result = True
3962

    
3963
  if on_primary or dev.AssembleOnSecondary():
3964
    rstats = lu.rpc.call_blockdev_find(node, dev)
3965
    msg = rstats.fail_msg
3966
    if msg:
3967
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3968
      result = False
3969
    elif not rstats.payload:
3970
      lu.LogWarning("Can't find disk on node %s", node)
3971
      result = False
3972
    else:
3973
      if ldisk:
3974
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3975
      else:
3976
        result = result and not rstats.payload.is_degraded
3977

    
3978
  if dev.children:
3979
    for child in dev.children:
3980
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3981

    
3982
  return result
3983

    
3984

    
3985
class LUOobCommand(NoHooksLU):
3986
  """Logical unit for OOB handling.
3987

3988
  """
3989
  REG_BGL = False
3990
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3991

    
3992
  def ExpandNames(self):
3993
    """Gather locks we need.
3994

3995
    """
3996
    if self.op.node_names:
3997
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3998
      lock_names = self.op.node_names
3999
    else:
4000
      lock_names = locking.ALL_SET
4001

    
4002
    self.needed_locks = {
4003
      locking.LEVEL_NODE: lock_names,
4004
      }
4005

    
4006
  def CheckPrereq(self):
4007
    """Check prerequisites.
4008

4009
    This checks:
4010
     - the node exists in the configuration
4011
     - OOB is supported
4012

4013
    Any errors are signaled by raising errors.OpPrereqError.
4014

4015
    """
4016
    self.nodes = []
4017
    self.master_node = self.cfg.GetMasterNode()
4018

    
4019
    assert self.op.power_delay >= 0.0
4020

    
4021
    if self.op.node_names:
4022
      if (self.op.command in self._SKIP_MASTER and
4023
          self.master_node in self.op.node_names):
4024
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4025
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4026

    
4027
        if master_oob_handler:
4028
          additional_text = ("run '%s %s %s' if you want to operate on the"
4029
                             " master regardless") % (master_oob_handler,
4030
                                                      self.op.command,
4031
                                                      self.master_node)
4032
        else:
4033
          additional_text = "it does not support out-of-band operations"
4034

    
4035
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4036
                                    " allowed for %s; %s") %
4037
                                   (self.master_node, self.op.command,
4038
                                    additional_text), errors.ECODE_INVAL)
4039
    else:
4040
      self.op.node_names = self.cfg.GetNodeList()
4041
      if self.op.command in self._SKIP_MASTER:
4042
        self.op.node_names.remove(self.master_node)
4043

    
4044
    if self.op.command in self._SKIP_MASTER:
4045
      assert self.master_node not in self.op.node_names
4046

    
4047
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4048
      if node is None:
4049
        raise errors.OpPrereqError("Node %s not found" % node_name,
4050
                                   errors.ECODE_NOENT)
4051
      else:
4052
        self.nodes.append(node)
4053

    
4054
      if (not self.op.ignore_status and
4055
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4056
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4057
                                    " not marked offline") % node_name,
4058
                                   errors.ECODE_STATE)
4059

    
4060
  def Exec(self, feedback_fn):
4061
    """Execute OOB and return result if we expect any.
4062

4063
    """
4064
    master_node = self.master_node
4065
    ret = []
4066

    
4067
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4068
                                              key=lambda node: node.name)):
4069
      node_entry = [(constants.RS_NORMAL, node.name)]
4070
      ret.append(node_entry)
4071

    
4072
      oob_program = _SupportsOob(self.cfg, node)
4073

    
4074
      if not oob_program:
4075
        node_entry.append((constants.RS_UNAVAIL, None))
4076
        continue
4077

    
4078
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4079
                   self.op.command, oob_program, node.name)
4080
      result = self.rpc.call_run_oob(master_node, oob_program,
4081
                                     self.op.command, node.name,
4082
                                     self.op.timeout)
4083

    
4084
      if result.fail_msg:
4085
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4086
                        node.name, result.fail_msg)
4087
        node_entry.append((constants.RS_NODATA, None))
4088
      else:
4089
        try:
4090
          self._CheckPayload(result)
4091
        except errors.OpExecError, err:
4092
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4093
                          node.name, err)
4094
          node_entry.append((constants.RS_NODATA, None))
4095
        else:
4096
          if self.op.command == constants.OOB_HEALTH:
4097
            # For health we should log important events
4098
            for item, status in result.payload:
4099
              if status in [constants.OOB_STATUS_WARNING,
4100
                            constants.OOB_STATUS_CRITICAL]:
4101
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4102
                                item, node.name, status)
4103

    
4104
          if self.op.command == constants.OOB_POWER_ON:
4105
            node.powered = True
4106
          elif self.op.command == constants.OOB_POWER_OFF:
4107
            node.powered = False
4108
          elif self.op.command == constants.OOB_POWER_STATUS:
4109
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4110
            if powered != node.powered:
4111
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4112
                               " match actual power state (%s)"), node.powered,
4113
                              node.name, powered)
4114

    
4115
          # For configuration changing commands we should update the node
4116
          if self.op.command in (constants.OOB_POWER_ON,
4117
                                 constants.OOB_POWER_OFF):
4118
            self.cfg.Update(node, feedback_fn)
4119

    
4120
          node_entry.append((constants.RS_NORMAL, result.payload))
4121

    
4122
          if (self.op.command == constants.OOB_POWER_ON and
4123
              idx < len(self.nodes) - 1):
4124
            time.sleep(self.op.power_delay)
4125

    
4126
    return ret
4127

    
4128
  def _CheckPayload(self, result):
4129
    """Checks if the payload is valid.
4130

4131
    @param result: RPC result
4132
    @raises errors.OpExecError: If payload is not valid
4133

4134
    """
4135
    errs = []
4136
    if self.op.command == constants.OOB_HEALTH:
4137
      if not isinstance(result.payload, list):
4138
        errs.append("command 'health' is expected to return a list but got %s" %
4139
                    type(result.payload))
4140
      else:
4141
        for item, status in result.payload:
4142
          if status not in constants.OOB_STATUSES:
4143
            errs.append("health item '%s' has invalid status '%s'" %
4144
                        (item, status))
4145

    
4146
    if self.op.command == constants.OOB_POWER_STATUS:
4147
      if not isinstance(result.payload, dict):
4148
        errs.append("power-status is expected to return a dict but got %s" %
4149
                    type(result.payload))
4150

    
4151
    if self.op.command in [
4152
        constants.OOB_POWER_ON,
4153
        constants.OOB_POWER_OFF,
4154
        constants.OOB_POWER_CYCLE,
4155
        ]:
4156
      if result.payload is not None:
4157
        errs.append("%s is expected to not return payload but got '%s'" %
4158
                    (self.op.command, result.payload))
4159

    
4160
    if errs:
4161
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4162
                               utils.CommaJoin(errs))
4163

    
4164

    
4165
class _OsQuery(_QueryBase):
4166
  FIELDS = query.OS_FIELDS
4167

    
4168
  def ExpandNames(self, lu):
4169
    # Lock all nodes in shared mode
4170
    # Temporary removal of locks, should be reverted later
4171
    # TODO: reintroduce locks when they are lighter-weight
4172
    lu.needed_locks = {}
4173
    #self.share_locks[locking.LEVEL_NODE] = 1
4174
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4175

    
4176
    # The following variables interact with _QueryBase._GetNames
4177
    if self.names:
4178
      self.wanted = self.names
4179
    else:
4180
      self.wanted = locking.ALL_SET
4181

    
4182
    self.do_locking = self.use_locking
4183

    
4184
  def DeclareLocks(self, lu, level):
4185
    pass
4186

    
4187
  @staticmethod
4188
  def _DiagnoseByOS(rlist):
4189
    """Remaps a per-node return list into an a per-os per-node dictionary
4190

4191
    @param rlist: a map with node names as keys and OS objects as values
4192

4193
    @rtype: dict
4194
    @return: a dictionary with osnames as keys and as value another
4195
        map, with nodes as keys and tuples of (path, status, diagnose,
4196
        variants, parameters, api_versions) as values, eg::
4197

4198
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4199
                                     (/srv/..., False, "invalid api")],
4200
                           "node2": [(/srv/..., True, "", [], [])]}
4201
          }
4202

4203
    """
4204
    all_os = {}
4205
    # we build here the list of nodes that didn't fail the RPC (at RPC
4206
    # level), so that nodes with a non-responding node daemon don't
4207
    # make all OSes invalid
4208
    good_nodes = [node_name for node_name in rlist
4209
                  if not rlist[node_name].fail_msg]
4210
    for node_name, nr in rlist.items():
4211
      if nr.fail_msg or not nr.payload:
4212
        continue
4213
      for (name, path, status, diagnose, variants,
4214
           params, api_versions) in nr.payload:
4215
        if name not in all_os:
4216
          # build a list of nodes for this os containing empty lists
4217
          # for each node in node_list
4218
          all_os[name] = {}
4219
          for nname in good_nodes:
4220
            all_os[name][nname] = []
4221
        # convert params from [name, help] to (name, help)
4222
        params = [tuple(v) for v in params]
4223
        all_os[name][node_name].append((path, status, diagnose,
4224
                                        variants, params, api_versions))
4225
    return all_os
4226

    
4227
  def _GetQueryData(self, lu):
4228
    """Computes the list of nodes and their attributes.
4229

4230
    """
4231
    # Locking is not used
4232
    assert not (compat.any(lu.glm.is_owned(level)
4233
                           for level in locking.LEVELS
4234
                           if level != locking.LEVEL_CLUSTER) or
4235
                self.do_locking or self.use_locking)
4236

    
4237
    valid_nodes = [node.name
4238
                   for node in lu.cfg.GetAllNodesInfo().values()
4239
                   if not node.offline and node.vm_capable]
4240
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4241
    cluster = lu.cfg.GetClusterInfo()
4242

    
4243
    data = {}
4244

    
4245
    for (os_name, os_data) in pol.items():
4246
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4247
                          hidden=(os_name in cluster.hidden_os),
4248
                          blacklisted=(os_name in cluster.blacklisted_os))
4249

    
4250
      variants = set()
4251
      parameters = set()
4252
      api_versions = set()
4253

    
4254
      for idx, osl in enumerate(os_data.values()):
4255
        info.valid = bool(info.valid and osl and osl[0][1])
4256
        if not info.valid:
4257
          break
4258

    
4259
        (node_variants, node_params, node_api) = osl[0][3:6]
4260
        if idx == 0:
4261
          # First entry
4262
          variants.update(node_variants)
4263
          parameters.update(node_params)
4264
          api_versions.update(node_api)
4265
        else:
4266
          # Filter out inconsistent values
4267
          variants.intersection_update(node_variants)
4268
          parameters.intersection_update(node_params)
4269
          api_versions.intersection_update(node_api)
4270

    
4271
      info.variants = list(variants)
4272
      info.parameters = list(parameters)
4273
      info.api_versions = list(api_versions)
4274

    
4275
      data[os_name] = info
4276

    
4277
    # Prepare data in requested order
4278
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4279
            if name in data]
4280

    
4281

    
4282
class LUOsDiagnose(NoHooksLU):
4283
  """Logical unit for OS diagnose/query.
4284

4285
  """
4286
  REQ_BGL = False
4287

    
4288
  @staticmethod
4289
  def _BuildFilter(fields, names):
4290
    """Builds a filter for querying OSes.
4291

4292
    """
4293
    name_filter = qlang.MakeSimpleFilter("name", names)
4294

    
4295
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4296
    # respective field is not requested
4297
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4298
                     for fname in ["hidden", "blacklisted"]
4299
                     if fname not in fields]
4300
    if "valid" not in fields:
4301
      status_filter.append([qlang.OP_TRUE, "valid"])
4302

    
4303
    if status_filter:
4304
      status_filter.insert(0, qlang.OP_AND)
4305
    else:
4306
      status_filter = None
4307

    
4308
    if name_filter and status_filter:
4309
      return [qlang.OP_AND, name_filter, status_filter]
4310
    elif name_filter:
4311
      return name_filter
4312
    else:
4313
      return status_filter
4314

    
4315
  def CheckArguments(self):
4316
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4317
                       self.op.output_fields, False)
4318

    
4319
  def ExpandNames(self):
4320
    self.oq.ExpandNames(self)
4321

    
4322
  def Exec(self, feedback_fn):
4323
    return self.oq.OldStyleQuery(self)
4324

    
4325

    
4326
class LUNodeRemove(LogicalUnit):
4327
  """Logical unit for removing a node.
4328

4329
  """
4330
  HPATH = "node-remove"
4331
  HTYPE = constants.HTYPE_NODE
4332

    
4333
  def BuildHooksEnv(self):
4334
    """Build hooks env.
4335

4336
    This doesn't run on the target node in the pre phase as a failed
4337
    node would then be impossible to remove.
4338

4339
    """
4340
    return {
4341
      "OP_TARGET": self.op.node_name,
4342
      "NODE_NAME": self.op.node_name,
4343
      }
4344

    
4345
  def BuildHooksNodes(self):
4346
    """Build hooks nodes.
4347

4348
    """
4349
    all_nodes = self.cfg.GetNodeList()
4350
    try:
4351
      all_nodes.remove(self.op.node_name)
4352
    except ValueError:
4353
      logging.warning("Node '%s', which is about to be removed, was not found"
4354
                      " in the list of all nodes", self.op.node_name)
4355
    return (all_nodes, all_nodes)
4356

    
4357
  def CheckPrereq(self):
4358
    """Check prerequisites.
4359

4360
    This checks:
4361
     - the node exists in the configuration
4362
     - it does not have primary or secondary instances
4363
     - it's not the master
4364

4365
    Any errors are signaled by raising errors.OpPrereqError.
4366

4367
    """
4368
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4369
    node = self.cfg.GetNodeInfo(self.op.node_name)
4370
    assert node is not None
4371

    
4372
    masternode = self.cfg.GetMasterNode()
4373
    if node.name == masternode:
4374
      raise errors.OpPrereqError("Node is the master node, failover to another"
4375
                                 " node is required", errors.ECODE_INVAL)
4376

    
4377
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4378
      if node.name in instance.all_nodes:
4379
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4380
                                   " please remove first" % instance_name,
4381
                                   errors.ECODE_INVAL)
4382
    self.op.node_name = node.name
4383
    self.node = node
4384

    
4385
  def Exec(self, feedback_fn):
4386
    """Removes the node from the cluster.
4387

4388
    """
4389
    node = self.node
4390
    logging.info("Stopping the node daemon and removing configs from node %s",
4391
                 node.name)
4392

    
4393
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4394

    
4395
    # Promote nodes to master candidate as needed
4396
    _AdjustCandidatePool(self, exceptions=[node.name])
4397
    self.context.RemoveNode(node.name)
4398

    
4399
    # Run post hooks on the node before it's removed
4400
    _RunPostHook(self, node.name)
4401

    
4402
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4403
    msg = result.fail_msg
4404
    if msg:
4405
      self.LogWarning("Errors encountered on the remote node while leaving"
4406
                      " the cluster: %s", msg)
4407

    
4408
    # Remove node from our /etc/hosts
4409
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4410
      master_node = self.cfg.GetMasterNode()
4411
      result = self.rpc.call_etc_hosts_modify(master_node,
4412
                                              constants.ETC_HOSTS_REMOVE,
4413
                                              node.name, None)
4414
      result.Raise("Can't update hosts file with new host data")
4415
      _RedistributeAncillaryFiles(self)
4416

    
4417

    
4418
class _NodeQuery(_QueryBase):
4419
  FIELDS = query.NODE_FIELDS
4420

    
4421
  def ExpandNames(self, lu):
4422
    lu.needed_locks = {}
4423
    lu.share_locks = _ShareAll()
4424

    
4425
    if self.names:
4426
      self.wanted = _GetWantedNodes(lu, self.names)
4427
    else:
4428
      self.wanted = locking.ALL_SET
4429

    
4430
    self.do_locking = (self.use_locking and
4431
                       query.NQ_LIVE in self.requested_data)
4432

    
4433
    if self.do_locking:
4434
      # If any non-static field is requested we need to lock the nodes
4435
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4436

    
4437
  def DeclareLocks(self, lu, level):
4438
    pass
4439

    
4440
  def _GetQueryData(self, lu):
4441
    """Computes the list of nodes and their attributes.
4442

4443
    """
4444
    all_info = lu.cfg.GetAllNodesInfo()
4445

    
4446
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4447

    
4448
    # Gather data as requested
4449
    if query.NQ_LIVE in self.requested_data:
4450
      # filter out non-vm_capable nodes
4451
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4452

    
4453
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4454
                                        lu.cfg.GetHypervisorType())
4455
      live_data = dict((name, nresult.payload)
4456
                       for (name, nresult) in node_data.items()
4457
                       if not nresult.fail_msg and nresult.payload)
4458
    else:
4459
      live_data = None
4460

    
4461
    if query.NQ_INST in self.requested_data:
4462
      node_to_primary = dict([(name, set()) for name in nodenames])
4463
      node_to_secondary = dict([(name, set()) for name in nodenames])
4464

    
4465
      inst_data = lu.cfg.GetAllInstancesInfo()
4466

    
4467
      for inst in inst_data.values():
4468
        if inst.primary_node in node_to_primary:
4469
          node_to_primary[inst.primary_node].add(inst.name)
4470
        for secnode in inst.secondary_nodes:
4471
          if secnode in node_to_secondary:
4472
            node_to_secondary[secnode].add(inst.name)
4473
    else:
4474
      node_to_primary = None
4475
      node_to_secondary = None
4476

    
4477
    if query.NQ_OOB in self.requested_data:
4478
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4479
                         for name, node in all_info.iteritems())
4480
    else:
4481
      oob_support = None
4482

    
4483
    if query.NQ_GROUP in self.requested_data:
4484
      groups = lu.cfg.GetAllNodeGroupsInfo()
4485
    else:
4486
      groups = {}
4487

    
4488
    return query.NodeQueryData([all_info[name] for name in nodenames],
4489
                               live_data, lu.cfg.GetMasterNode(),
4490
                               node_to_primary, node_to_secondary, groups,
4491
                               oob_support, lu.cfg.GetClusterInfo())
4492

    
4493

    
4494
class LUNodeQuery(NoHooksLU):
4495
  """Logical unit for querying nodes.
4496

4497
  """
4498
  # pylint: disable=W0142
4499
  REQ_BGL = False
4500

    
4501
  def CheckArguments(self):
4502
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4503
                         self.op.output_fields, self.op.use_locking)
4504

    
4505
  def ExpandNames(self):
4506
    self.nq.ExpandNames(self)
4507

    
4508
  def Exec(self, feedback_fn):
4509
    return self.nq.OldStyleQuery(self)
4510

    
4511

    
4512
class LUNodeQueryvols(NoHooksLU):
4513
  """Logical unit for getting volumes on node(s).
4514

4515
  """
4516
  REQ_BGL = False
4517
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4518
  _FIELDS_STATIC = utils.FieldSet("node")
4519

    
4520
  def CheckArguments(self):
4521
    _CheckOutputFields(static=self._FIELDS_STATIC,
4522
                       dynamic=self._FIELDS_DYNAMIC,
4523
                       selected=self.op.output_fields)
4524

    
4525
  def ExpandNames(self):
4526
    self.needed_locks = {}
4527
    self.share_locks[locking.LEVEL_NODE] = 1
4528
    if not self.op.nodes:
4529
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4530
    else:
4531
      self.needed_locks[locking.LEVEL_NODE] = \
4532
        _GetWantedNodes(self, self.op.nodes)
4533

    
4534
  def Exec(self, feedback_fn):
4535
    """Computes the list of nodes and their attributes.
4536

4537
    """
4538
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4539
    volumes = self.rpc.call_node_volumes(nodenames)
4540

    
4541
    ilist = self.cfg.GetAllInstancesInfo()
4542
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4543

    
4544
    output = []
4545
    for node in nodenames:
4546
      nresult = volumes[node]
4547
      if nresult.offline:
4548
        continue
4549
      msg = nresult.fail_msg
4550
      if msg:
4551
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4552
        continue
4553

    
4554
      node_vols = sorted(nresult.payload,
4555
                         key=operator.itemgetter("dev"))
4556

    
4557
      for vol in node_vols:
4558
        node_output = []
4559
        for field in self.op.output_fields:
4560
          if field == "node":
4561
            val = node
4562
          elif field == "phys":
4563
            val = vol["dev"]
4564
          elif field == "vg":
4565
            val = vol["vg"]
4566
          elif field == "name":
4567
            val = vol["name"]
4568
          elif field == "size":
4569
            val = int(float(vol["size"]))
4570
          elif field == "instance":
4571
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4572
          else:
4573
            raise errors.ParameterError(field)
4574
          node_output.append(str(val))
4575

    
4576
        output.append(node_output)
4577

    
4578
    return output
4579

    
4580

    
4581
class LUNodeQueryStorage(NoHooksLU):
4582
  """Logical unit for getting information on storage units on node(s).
4583

4584
  """
4585
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4586
  REQ_BGL = False
4587

    
4588
  def CheckArguments(self):
4589
    _CheckOutputFields(static=self._FIELDS_STATIC,
4590
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4591
                       selected=self.op.output_fields)
4592

    
4593
  def ExpandNames(self):
4594
    self.needed_locks = {}
4595
    self.share_locks[locking.LEVEL_NODE] = 1
4596

    
4597
    if self.op.nodes:
4598
      self.needed_locks[locking.LEVEL_NODE] = \
4599
        _GetWantedNodes(self, self.op.nodes)
4600
    else:
4601
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4602

    
4603
  def Exec(self, feedback_fn):
4604
    """Computes the list of nodes and their attributes.
4605

4606
    """
4607
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4608

    
4609
    # Always get name to sort by
4610
    if constants.SF_NAME in self.op.output_fields:
4611
      fields = self.op.output_fields[:]
4612
    else:
4613
      fields = [constants.SF_NAME] + self.op.output_fields
4614

    
4615
    # Never ask for node or type as it's only known to the LU
4616
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4617
      while extra in fields:
4618
        fields.remove(extra)
4619

    
4620
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4621
    name_idx = field_idx[constants.SF_NAME]
4622

    
4623
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4624
    data = self.rpc.call_storage_list(self.nodes,
4625
                                      self.op.storage_type, st_args,
4626
                                      self.op.name, fields)
4627

    
4628
    result = []
4629

    
4630
    for node in utils.NiceSort(self.nodes):
4631
      nresult = data[node]
4632
      if nresult.offline:
4633
        continue
4634

    
4635
      msg = nresult.fail_msg
4636
      if msg:
4637
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4638
        continue
4639

    
4640
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4641

    
4642
      for name in utils.NiceSort(rows.keys()):
4643
        row = rows[name]
4644

    
4645
        out = []
4646

    
4647
        for field in self.op.output_fields:
4648
          if field == constants.SF_NODE:
4649
            val = node
4650
          elif field == constants.SF_TYPE:
4651
            val = self.op.storage_type
4652
          elif field in field_idx:
4653
            val = row[field_idx[field]]
4654
          else:
4655
            raise errors.ParameterError(field)
4656

    
4657
          out.append(val)
4658

    
4659
        result.append(out)
4660

    
4661
    return result
4662

    
4663

    
4664
class _InstanceQuery(_QueryBase):
4665
  FIELDS = query.INSTANCE_FIELDS
4666

    
4667
  def ExpandNames(self, lu):
4668
    lu.needed_locks = {}
4669
    lu.share_locks = _ShareAll()
4670

    
4671
    if self.names:
4672
      self.wanted = _GetWantedInstances(lu, self.names)
4673
    else:
4674
      self.wanted = locking.ALL_SET
4675

    
4676
    self.do_locking = (self.use_locking and
4677
                       query.IQ_LIVE in self.requested_data)
4678
    if self.do_locking:
4679
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4680
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4681
      lu.needed_locks[locking.LEVEL_NODE] = []
4682
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4683

    
4684
    self.do_grouplocks = (self.do_locking and
4685
                          query.IQ_NODES in self.requested_data)
4686

    
4687
  def DeclareLocks(self, lu, level):
4688
    if self.do_locking:
4689
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4690
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4691

    
4692
        # Lock all groups used by instances optimistically; this requires going
4693
        # via the node before it's locked, requiring verification later on
4694
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4695
          set(group_uuid
4696
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4697
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4698
      elif level == locking.LEVEL_NODE:
4699
        lu._LockInstancesNodes() # pylint: disable=W0212
4700

    
4701
  @staticmethod
4702
  def _CheckGroupLocks(lu):
4703
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4704
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4705

    
4706
    # Check if node groups for locked instances are still correct
4707
    for instance_name in owned_instances:
4708
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4709

    
4710
  def _GetQueryData(self, lu):
4711
    """Computes the list of instances and their attributes.
4712

4713
    """
4714
    if self.do_grouplocks:
4715
      self._CheckGroupLocks(lu)
4716

    
4717
    cluster = lu.cfg.GetClusterInfo()
4718
    all_info = lu.cfg.GetAllInstancesInfo()
4719

    
4720
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4721

    
4722
    instance_list = [all_info[name] for name in instance_names]
4723
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4724
                                        for inst in instance_list)))
4725
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4726
    bad_nodes = []
4727
    offline_nodes = []
4728
    wrongnode_inst = set()
4729

    
4730
    # Gather data as requested
4731
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4732
      live_data = {}
4733
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4734
      for name in nodes:
4735
        result = node_data[name]
4736
        if result.offline:
4737
          # offline nodes will be in both lists
4738
          assert result.fail_msg
4739
          offline_nodes.append(name)
4740
        if result.fail_msg:
4741
          bad_nodes.append(name)
4742
        elif result.payload:
4743
          for inst in result.payload:
4744
            if inst in all_info:
4745
              if all_info[inst].primary_node == name:
4746
                live_data.update(result.payload)
4747
              else:
4748
                wrongnode_inst.add(inst)
4749
            else:
4750
              # orphan instance; we don't list it here as we don't
4751
              # handle this case yet in the output of instance listing
4752
              logging.warning("Orphan instance '%s' found on node %s",
4753
                              inst, name)
4754
        # else no instance is alive
4755
    else:
4756
      live_data = {}
4757

    
4758
    if query.IQ_DISKUSAGE in self.requested_data:
4759
      disk_usage = dict((inst.name,
4760
                         _ComputeDiskSize(inst.disk_template,
4761
                                          [{constants.IDISK_SIZE: disk.size}
4762
                                           for disk in inst.disks]))
4763
                        for inst in instance_list)
4764
    else:
4765
      disk_usage = None
4766

    
4767
    if query.IQ_CONSOLE in self.requested_data:
4768
      consinfo = {}
4769
      for inst in instance_list:
4770
        if inst.name in live_data:
4771
          # Instance is running
4772
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4773
        else:
4774
          consinfo[inst.name] = None
4775
      assert set(consinfo.keys()) == set(instance_names)
4776
    else:
4777
      consinfo = None
4778

    
4779
    if query.IQ_NODES in self.requested_data:
4780
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4781
                                            instance_list)))
4782
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4783
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4784
                    for uuid in set(map(operator.attrgetter("group"),
4785
                                        nodes.values())))
4786
    else:
4787
      nodes = None
4788
      groups = None
4789

    
4790
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4791
                                   disk_usage, offline_nodes, bad_nodes,
4792
                                   live_data, wrongnode_inst, consinfo,
4793
                                   nodes, groups)
4794

    
4795

    
4796
class LUQuery(NoHooksLU):
4797
  """Query for resources/items of a certain kind.
4798

4799
  """
4800
  # pylint: disable=W0142
4801
  REQ_BGL = False
4802

    
4803
  def CheckArguments(self):
4804
    qcls = _GetQueryImplementation(self.op.what)
4805

    
4806
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4807

    
4808
  def ExpandNames(self):
4809
    self.impl.ExpandNames(self)
4810

    
4811
  def DeclareLocks(self, level):
4812
    self.impl.DeclareLocks(self, level)
4813

    
4814
  def Exec(self, feedback_fn):
4815
    return self.impl.NewStyleQuery(self)
4816

    
4817

    
4818
class LUQueryFields(NoHooksLU):
4819
  """Query for resources/items of a certain kind.
4820

4821
  """
4822
  # pylint: disable=W0142
4823
  REQ_BGL = False
4824

    
4825
  def CheckArguments(self):
4826
    self.qcls = _GetQueryImplementation(self.op.what)
4827

    
4828
  def ExpandNames(self):
4829
    self.needed_locks = {}
4830

    
4831
  def Exec(self, feedback_fn):
4832
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4833

    
4834

    
4835
class LUNodeModifyStorage(NoHooksLU):
4836
  """Logical unit for modifying a storage volume on a node.
4837

4838
  """
4839
  REQ_BGL = False
4840

    
4841
  def CheckArguments(self):
4842
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4843

    
4844
    storage_type = self.op.storage_type
4845

    
4846
    try:
4847
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4848
    except KeyError:
4849
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4850
                                 " modified" % storage_type,
4851
                                 errors.ECODE_INVAL)
4852

    
4853
    diff = set(self.op.changes.keys()) - modifiable
4854
    if diff:
4855
      raise errors.OpPrereqError("The following fields can not be modified for"
4856
                                 " storage units of type '%s': %r" %
4857
                                 (storage_type, list(diff)),
4858
                                 errors.ECODE_INVAL)
4859

    
4860
  def ExpandNames(self):
4861
    self.needed_locks = {
4862
      locking.LEVEL_NODE: self.op.node_name,
4863
      }
4864

    
4865
  def Exec(self, feedback_fn):
4866
    """Computes the list of nodes and their attributes.
4867

4868
    """
4869
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4870
    result = self.rpc.call_storage_modify(self.op.node_name,
4871
                                          self.op.storage_type, st_args,
4872
                                          self.op.name, self.op.changes)
4873
    result.Raise("Failed to modify storage unit '%s' on %s" %
4874
                 (self.op.name, self.op.node_name))
4875

    
4876

    
4877
class LUNodeAdd(LogicalUnit):
4878
  """Logical unit for adding node to the cluster.
4879

4880
  """
4881
  HPATH = "node-add"
4882
  HTYPE = constants.HTYPE_NODE
4883
  _NFLAGS = ["master_capable", "vm_capable"]
4884

    
4885
  def CheckArguments(self):
4886
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4887
    # validate/normalize the node name
4888
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4889
                                         family=self.primary_ip_family)
4890
    self.op.node_name = self.hostname.name
4891

    
4892
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4893
      raise errors.OpPrereqError("Cannot readd the master node",
4894
                                 errors.ECODE_STATE)
4895

    
4896
    if self.op.readd and self.op.group:
4897
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4898
                                 " being readded", errors.ECODE_INVAL)
4899

    
4900
  def BuildHooksEnv(self):
4901
    """Build hooks env.
4902

4903
    This will run on all nodes before, and on all nodes + the new node after.
4904

4905
    """
4906
    return {
4907
      "OP_TARGET": self.op.node_name,
4908
      "NODE_NAME": self.op.node_name,
4909
      "NODE_PIP": self.op.primary_ip,
4910
      "NODE_SIP": self.op.secondary_ip,
4911
      "MASTER_CAPABLE": str(self.op.master_capable),
4912
      "VM_CAPABLE": str(self.op.vm_capable),
4913
      }
4914

    
4915
  def BuildHooksNodes(self):
4916
    """Build hooks nodes.
4917

4918
    """
4919
    # Exclude added node
4920
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4921
    post_nodes = pre_nodes + [self.op.node_name, ]
4922

    
4923
    return (pre_nodes, post_nodes)
4924

    
4925
  def CheckPrereq(self):
4926
    """Check prerequisites.
4927

4928
    This checks:
4929
     - the new node is not already in the config
4930
     - it is resolvable
4931
     - its parameters (single/dual homed) matches the cluster
4932

4933
    Any errors are signaled by raising errors.OpPrereqError.
4934

4935
    """
4936
    cfg = self.cfg
4937
    hostname = self.hostname
4938
    node = hostname.name
4939
    primary_ip = self.op.primary_ip = hostname.ip
4940
    if self.op.secondary_ip is None:
4941
      if self.primary_ip_family == netutils.IP6Address.family:
4942
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4943
                                   " IPv4 address must be given as secondary",
4944
                                   errors.ECODE_INVAL)
4945
      self.op.secondary_ip = primary_ip
4946

    
4947
    secondary_ip = self.op.secondary_ip
4948
    if not netutils.IP4Address.IsValid(secondary_ip):
4949
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4950
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4951

    
4952
    node_list = cfg.GetNodeList()
4953
    if not self.op.readd and node in node_list:
4954
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4955
                                 node, errors.ECODE_EXISTS)
4956
    elif self.op.readd and node not in node_list:
4957
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4958
                                 errors.ECODE_NOENT)
4959

    
4960
    self.changed_primary_ip = False
4961

    
4962
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4963
      if self.op.readd and node == existing_node_name:
4964
        if existing_node.secondary_ip != secondary_ip:
4965
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4966
                                     " address configuration as before",
4967
                                     errors.ECODE_INVAL)
4968
        if existing_node.primary_ip != primary_ip:
4969
          self.changed_primary_ip = True
4970

    
4971
        continue
4972

    
4973
      if (existing_node.primary_ip == primary_ip or
4974
          existing_node.secondary_ip == primary_ip or
4975
          existing_node.primary_ip == secondary_ip or
4976
          existing_node.secondary_ip == secondary_ip):
4977
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4978
                                   " existing node %s" % existing_node.name,
4979
                                   errors.ECODE_NOTUNIQUE)
4980

    
4981
    # After this 'if' block, None is no longer a valid value for the
4982
    # _capable op attributes
4983
    if self.op.readd:
4984
      old_node = self.cfg.GetNodeInfo(node)
4985
      assert old_node is not None, "Can't retrieve locked node %s" % node
4986
      for attr in self._NFLAGS:
4987
        if getattr(self.op, attr) is None:
4988
          setattr(self.op, attr, getattr(old_node, attr))
4989
    else:
4990
      for attr in self._NFLAGS:
4991
        if getattr(self.op, attr) is None:
4992
          setattr(self.op, attr, True)
4993

    
4994
    if self.op.readd and not self.op.vm_capable:
4995
      pri, sec = cfg.GetNodeInstances(node)
4996
      if pri or sec:
4997
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4998
                                   " flag set to false, but it already holds"
4999
                                   " instances" % node,
5000
                                   errors.ECODE_STATE)
5001

    
5002
    # check that the type of the node (single versus dual homed) is the
5003
    # same as for the master
5004
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5005
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5006
    newbie_singlehomed = secondary_ip == primary_ip
5007
    if master_singlehomed != newbie_singlehomed:
5008
      if master_singlehomed:
5009
        raise errors.OpPrereqError("The master has no secondary ip but the"
5010
                                   " new node has one",
5011
                                   errors.ECODE_INVAL)
5012
      else:
5013
        raise errors.OpPrereqError("The master has a secondary ip but the"
5014
                                   " new node doesn't have one",
5015
                                   errors.ECODE_INVAL)
5016

    
5017
    # checks reachability
5018
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5019
      raise errors.OpPrereqError("Node not reachable by ping",
5020
                                 errors.ECODE_ENVIRON)
5021

    
5022
    if not newbie_singlehomed:
5023
      # check reachability from my secondary ip to newbie's secondary ip
5024
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5025
                           source=myself.secondary_ip):
5026
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5027
                                   " based ping to node daemon port",
5028
                                   errors.ECODE_ENVIRON)
5029

    
5030
    if self.op.readd:
5031
      exceptions = [node]
5032
    else:
5033
      exceptions = []
5034

    
5035
    if self.op.master_capable:
5036
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5037
    else:
5038
      self.master_candidate = False
5039

    
5040
    if self.op.readd:
5041
      self.new_node = old_node
5042
    else:
5043
      node_group = cfg.LookupNodeGroup(self.op.group)
5044
      self.new_node = objects.Node(name=node,
5045
                                   primary_ip=primary_ip,
5046
                                   secondary_ip=secondary_ip,
5047
                                   master_candidate=self.master_candidate,
5048
                                   offline=False, drained=False,
5049
                                   group=node_group)
5050

    
5051
    if self.op.ndparams:
5052
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5053

    
5054
  def Exec(self, feedback_fn):
5055
    """Adds the new node to the cluster.
5056

5057
    """
5058
    new_node = self.new_node
5059
    node = new_node.name
5060

    
5061
    # We adding a new node so we assume it's powered
5062
    new_node.powered = True
5063

    
5064
    # for re-adds, reset the offline/drained/master-candidate flags;
5065
    # we need to reset here, otherwise offline would prevent RPC calls
5066
    # later in the procedure; this also means that if the re-add
5067
    # fails, we are left with a non-offlined, broken node
5068
    if self.op.readd:
5069
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5070
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5071
      # if we demote the node, we do cleanup later in the procedure
5072
      new_node.master_candidate = self.master_candidate
5073
      if self.changed_primary_ip:
5074
        new_node.primary_ip = self.op.primary_ip
5075

    
5076
    # copy the master/vm_capable flags
5077
    for attr in self._NFLAGS:
5078
      setattr(new_node, attr, getattr(self.op, attr))
5079

    
5080
    # notify the user about any possible mc promotion
5081
    if new_node.master_candidate:
5082
      self.LogInfo("Node will be a master candidate")
5083

    
5084
    if self.op.ndparams:
5085
      new_node.ndparams = self.op.ndparams
5086
    else:
5087
      new_node.ndparams = {}
5088

    
5089
    # check connectivity
5090
    result = self.rpc.call_version([node])[node]
5091
    result.Raise("Can't get version information from node %s" % node)
5092
    if constants.PROTOCOL_VERSION == result.payload:
5093
      logging.info("Communication to node %s fine, sw version %s match",
5094
                   node, result.payload)
5095
    else:
5096
      raise errors.OpExecError("Version mismatch master version %s,"
5097
                               " node version %s" %
5098
                               (constants.PROTOCOL_VERSION, result.payload))
5099

    
5100
    # Add node to our /etc/hosts, and add key to known_hosts
5101
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5102
      master_node = self.cfg.GetMasterNode()
5103
      result = self.rpc.call_etc_hosts_modify(master_node,
5104
                                              constants.ETC_HOSTS_ADD,
5105
                                              self.hostname.name,
5106
                                              self.hostname.ip)
5107
      result.Raise("Can't update hosts file with new host data")
5108

    
5109
    if new_node.secondary_ip != new_node.primary_ip:
5110
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5111
                               False)
5112

    
5113
    node_verify_list = [self.cfg.GetMasterNode()]
5114
    node_verify_param = {
5115
      constants.NV_NODELIST: ([node], {}),
5116
      # TODO: do a node-net-test as well?
5117
    }
5118

    
5119
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5120
                                       self.cfg.GetClusterName())
5121
    for verifier in node_verify_list:
5122
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5123
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5124
      if nl_payload:
5125
        for failed in nl_payload:
5126
          feedback_fn("ssh/hostname verification failed"
5127
                      " (checking from %s): %s" %
5128
                      (verifier, nl_payload[failed]))
5129
        raise errors.OpExecError("ssh/hostname verification failed")
5130

    
5131
    if self.op.readd:
5132
      _RedistributeAncillaryFiles(self)
5133
      self.context.ReaddNode(new_node)
5134
      # make sure we redistribute the config
5135
      self.cfg.Update(new_node, feedback_fn)
5136
      # and make sure the new node will not have old files around
5137
      if not new_node.master_candidate:
5138
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5139
        msg = result.fail_msg
5140
        if msg:
5141
          self.LogWarning("Node failed to demote itself from master"
5142
                          " candidate status: %s" % msg)
5143
    else:
5144
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5145
                                  additional_vm=self.op.vm_capable)
5146
      self.context.AddNode(new_node, self.proc.GetECId())
5147

    
5148

    
5149
class LUNodeSetParams(LogicalUnit):
5150
  """Modifies the parameters of a node.
5151

5152
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5153
      to the node role (as _ROLE_*)
5154
  @cvar _R2F: a dictionary from node role to tuples of flags
5155
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5156

5157
  """
5158
  HPATH = "node-modify"
5159
  HTYPE = constants.HTYPE_NODE
5160
  REQ_BGL = False
5161
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5162
  _F2R = {
5163
    (True, False, False): _ROLE_CANDIDATE,
5164
    (False, True, False): _ROLE_DRAINED,
5165
    (False, False, True): _ROLE_OFFLINE,
5166
    (False, False, False): _ROLE_REGULAR,
5167
    }
5168
  _R2F = dict((v, k) for k, v in _F2R.items())
5169
  _FLAGS = ["master_candidate", "drained", "offline"]
5170

    
5171
  def CheckArguments(self):
5172
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5173
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5174
                self.op.master_capable, self.op.vm_capable,
5175
                self.op.secondary_ip, self.op.ndparams]
5176
    if all_mods.count(None) == len(all_mods):
5177
      raise errors.OpPrereqError("Please pass at least one modification",
5178
                                 errors.ECODE_INVAL)
5179
    if all_mods.count(True) > 1:
5180
      raise errors.OpPrereqError("Can't set the node into more than one"
5181
                                 " state at the same time",
5182
                                 errors.ECODE_INVAL)
5183

    
5184
    # Boolean value that tells us whether we might be demoting from MC
5185
    self.might_demote = (self.op.master_candidate == False or
5186
                         self.op.offline == True or
5187
                         self.op.drained == True or
5188
                         self.op.master_capable == False)
5189

    
5190
    if self.op.secondary_ip:
5191
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5192
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5193
                                   " address" % self.op.secondary_ip,
5194
                                   errors.ECODE_INVAL)
5195

    
5196
    self.lock_all = self.op.auto_promote and self.might_demote
5197
    self.lock_instances = self.op.secondary_ip is not None
5198

    
5199
  def ExpandNames(self):
5200
    if self.lock_all:
5201
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5202
    else:
5203
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5204

    
5205
    if self.lock_instances:
5206
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5207

    
5208
  def DeclareLocks(self, level):
5209
    # If we have locked all instances, before waiting to lock nodes, release
5210
    # all the ones living on nodes unrelated to the current operation.
5211
    if level == locking.LEVEL_NODE and self.lock_instances:
5212
      self.affected_instances = []
5213
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5214
        instances_keep = []
5215

    
5216
        # Build list of instances to release
5217
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5218
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5219
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5220
              self.op.node_name in instance.all_nodes):
5221
            instances_keep.append(instance_name)
5222
            self.affected_instances.append(instance)
5223

    
5224
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5225

    
5226
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5227
                set(instances_keep))
5228

    
5229
  def BuildHooksEnv(self):
5230
    """Build hooks env.
5231

5232
    This runs on the master node.
5233

5234
    """
5235
    return {
5236
      "OP_TARGET": self.op.node_name,
5237
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5238
      "OFFLINE": str(self.op.offline),
5239
      "DRAINED": str(self.op.drained),
5240
      "MASTER_CAPABLE": str(self.op.master_capable),
5241
      "VM_CAPABLE": str(self.op.vm_capable),
5242
      }
5243

    
5244
  def BuildHooksNodes(self):
5245
    """Build hooks nodes.
5246

5247
    """
5248
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5249
    return (nl, nl)
5250

    
5251
  def CheckPrereq(self):
5252
    """Check prerequisites.
5253

5254
    This only checks the instance list against the existing names.
5255

5256
    """
5257
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5258

    
5259
    if (self.op.master_candidate is not None or
5260
        self.op.drained is not None or
5261
        self.op.offline is not None):
5262
      # we can't change the master's node flags
5263
      if self.op.node_name == self.cfg.GetMasterNode():
5264
        raise errors.OpPrereqError("The master role can be changed"
5265
                                   " only via master-failover",
5266
                                   errors.ECODE_INVAL)
5267

    
5268
    if self.op.master_candidate and not node.master_capable:
5269
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5270
                                 " it a master candidate" % node.name,
5271
                                 errors.ECODE_STATE)
5272

    
5273
    if self.op.vm_capable == False:
5274
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5275
      if ipri or isec:
5276
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5277
                                   " the vm_capable flag" % node.name,
5278
                                   errors.ECODE_STATE)
5279

    
5280
    if node.master_candidate and self.might_demote and not self.lock_all:
5281
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5282
      # check if after removing the current node, we're missing master
5283
      # candidates
5284
      (mc_remaining, mc_should, _) = \
5285
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5286
      if mc_remaining < mc_should:
5287
        raise errors.OpPrereqError("Not enough master candidates, please"
5288
                                   " pass auto promote option to allow"
5289
                                   " promotion", errors.ECODE_STATE)
5290

    
5291
    self.old_flags = old_flags = (node.master_candidate,
5292
                                  node.drained, node.offline)
5293
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5294
    self.old_role = old_role = self._F2R[old_flags]
5295

    
5296
    # Check for ineffective changes
5297
    for attr in self._FLAGS:
5298
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5299
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5300
        setattr(self.op, attr, None)
5301

    
5302
    # Past this point, any flag change to False means a transition
5303
    # away from the respective state, as only real changes are kept
5304

    
5305
    # TODO: We might query the real power state if it supports OOB
5306
    if _SupportsOob(self.cfg, node):
5307
      if self.op.offline is False and not (node.powered or
5308
                                           self.op.powered == True):
5309
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5310
                                    " offline status can be reset") %
5311
                                   self.op.node_name)
5312
    elif self.op.powered is not None:
5313
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5314
                                  " as it does not support out-of-band"
5315
                                  " handling") % self.op.node_name)
5316

    
5317
    # If we're being deofflined/drained, we'll MC ourself if needed
5318
    if (self.op.drained == False or self.op.offline == False or
5319
        (self.op.master_capable and not node.master_capable)):
5320
      if _DecideSelfPromotion(self):
5321
        self.op.master_candidate = True
5322
        self.LogInfo("Auto-promoting node to master candidate")
5323

    
5324
    # If we're no longer master capable, we'll demote ourselves from MC
5325
    if self.op.master_capable == False and node.master_candidate:
5326
      self.LogInfo("Demoting from master candidate")
5327
      self.op.master_candidate = False
5328

    
5329
    # Compute new role
5330
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5331
    if self.op.master_candidate:
5332
      new_role = self._ROLE_CANDIDATE
5333
    elif self.op.drained:
5334
      new_role = self._ROLE_DRAINED
5335
    elif self.op.offline:
5336
      new_role = self._ROLE_OFFLINE
5337
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5338
      # False is still in new flags, which means we're un-setting (the
5339
      # only) True flag
5340
      new_role = self._ROLE_REGULAR
5341
    else: # no new flags, nothing, keep old role
5342
      new_role = old_role
5343

    
5344
    self.new_role = new_role
5345

    
5346
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5347
      # Trying to transition out of offline status
5348
      result = self.rpc.call_version([node.name])[node.name]
5349
      if result.fail_msg:
5350
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5351
                                   " to report its version: %s" %
5352
                                   (node.name, result.fail_msg),
5353
                                   errors.ECODE_STATE)
5354
      else:
5355
        self.LogWarning("Transitioning node from offline to online state"
5356
                        " without using re-add. Please make sure the node"
5357
                        " is healthy!")
5358

    
5359
    if self.op.secondary_ip:
5360
      # Ok even without locking, because this can't be changed by any LU
5361
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5362
      master_singlehomed = master.secondary_ip == master.primary_ip
5363
      if master_singlehomed and self.op.secondary_ip:
5364
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5365
                                   " homed cluster", errors.ECODE_INVAL)
5366

    
5367
      if node.offline:
5368
        if self.affected_instances:
5369
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5370
                                     " node has instances (%s) configured"
5371
                                     " to use it" % self.affected_instances)
5372
      else:
5373
        # On online nodes, check that no instances are running, and that
5374
        # the node has the new ip and we can reach it.
5375
        for instance in self.affected_instances:
5376
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5377

    
5378
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5379
        if master.name != node.name:
5380
          # check reachability from master secondary ip to new secondary ip
5381
          if not netutils.TcpPing(self.op.secondary_ip,
5382
                                  constants.DEFAULT_NODED_PORT,
5383
                                  source=master.secondary_ip):
5384
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5385
                                       " based ping to node daemon port",
5386
                                       errors.ECODE_ENVIRON)
5387

    
5388
    if self.op.ndparams:
5389
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5390
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5391
      self.new_ndparams = new_ndparams
5392

    
5393
  def Exec(self, feedback_fn):
5394
    """Modifies a node.
5395

5396
    """
5397
    node = self.node
5398
    old_role = self.old_role
5399
    new_role = self.new_role
5400

    
5401
    result = []
5402

    
5403
    if self.op.ndparams:
5404
      node.ndparams = self.new_ndparams
5405

    
5406
    if self.op.powered is not None:
5407
      node.powered = self.op.powered
5408

    
5409
    for attr in ["master_capable", "vm_capable"]:
5410
      val = getattr(self.op, attr)
5411
      if val is not None:
5412
        setattr(node, attr, val)
5413
        result.append((attr, str(val)))
5414

    
5415
    if new_role != old_role:
5416
      # Tell the node to demote itself, if no longer MC and not offline
5417
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5418
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5419
        if msg:
5420
          self.LogWarning("Node failed to demote itself: %s", msg)
5421

    
5422
      new_flags = self._R2F[new_role]
5423
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5424
        if of != nf:
5425
          result.append((desc, str(nf)))
5426
      (node.master_candidate, node.drained, node.offline) = new_flags
5427

    
5428
      # we locked all nodes, we adjust the CP before updating this node
5429
      if self.lock_all:
5430
        _AdjustCandidatePool(self, [node.name])
5431

    
5432
    if self.op.secondary_ip:
5433
      node.secondary_ip = self.op.secondary_ip
5434
      result.append(("secondary_ip", self.op.secondary_ip))
5435

    
5436
    # this will trigger configuration file update, if needed
5437
    self.cfg.Update(node, feedback_fn)
5438

    
5439
    # this will trigger job queue propagation or cleanup if the mc
5440
    # flag changed
5441
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5442
      self.context.ReaddNode(node)
5443

    
5444
    return result
5445

    
5446

    
5447
class LUNodePowercycle(NoHooksLU):
5448
  """Powercycles a node.
5449

5450
  """
5451
  REQ_BGL = False
5452

    
5453
  def CheckArguments(self):
5454
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5455
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5456
      raise errors.OpPrereqError("The node is the master and the force"
5457
                                 " parameter was not set",
5458
                                 errors.ECODE_INVAL)
5459

    
5460
  def ExpandNames(self):
5461
    """Locking for PowercycleNode.
5462

5463
    This is a last-resort option and shouldn't block on other
5464
    jobs. Therefore, we grab no locks.
5465

5466
    """
5467
    self.needed_locks = {}
5468

    
5469
  def Exec(self, feedback_fn):
5470
    """Reboots a node.
5471

5472
    """
5473
    result = self.rpc.call_node_powercycle(self.op.node_name,
5474
                                           self.cfg.GetHypervisorType())
5475
    result.Raise("Failed to schedule the reboot")
5476
    return result.payload
5477

    
5478

    
5479
class LUClusterQuery(NoHooksLU):
5480
  """Query cluster configuration.
5481

5482
  """
5483
  REQ_BGL = False
5484

    
5485
  def ExpandNames(self):
5486
    self.needed_locks = {}
5487

    
5488
  def Exec(self, feedback_fn):
5489
    """Return cluster config.
5490

5491
    """
5492
    cluster = self.cfg.GetClusterInfo()
5493
    os_hvp = {}
5494

    
5495
    # Filter just for enabled hypervisors
5496
    for os_name, hv_dict in cluster.os_hvp.items():
5497
      os_hvp[os_name] = {}
5498
      for hv_name, hv_params in hv_dict.items():
5499
        if hv_name in cluster.enabled_hypervisors:
5500
          os_hvp[os_name][hv_name] = hv_params
5501

    
5502
    # Convert ip_family to ip_version
5503
    primary_ip_version = constants.IP4_VERSION
5504
    if cluster.primary_ip_family == netutils.IP6Address.family:
5505
      primary_ip_version = constants.IP6_VERSION
5506

    
5507
    result = {
5508
      "software_version": constants.RELEASE_VERSION,
5509
      "protocol_version": constants.PROTOCOL_VERSION,
5510
      "config_version": constants.CONFIG_VERSION,
5511
      "os_api_version": max(constants.OS_API_VERSIONS),
5512
      "export_version": constants.EXPORT_VERSION,
5513
      "architecture": (platform.architecture()[0], platform.machine()),
5514
      "name": cluster.cluster_name,
5515
      "master": cluster.master_node,
5516
      "default_hypervisor": cluster.enabled_hypervisors[0],
5517
      "enabled_hypervisors": cluster.enabled_hypervisors,
5518
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5519
                        for hypervisor_name in cluster.enabled_hypervisors]),
5520
      "os_hvp": os_hvp,
5521
      "beparams": cluster.beparams,
5522
      "osparams": cluster.osparams,
5523
      "nicparams": cluster.nicparams,
5524
      "ndparams": cluster.ndparams,
5525
      "candidate_pool_size": cluster.candidate_pool_size,
5526
      "master_netdev": cluster.master_netdev,
5527
      "volume_group_name": cluster.volume_group_name,
5528
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5529
      "file_storage_dir": cluster.file_storage_dir,
5530
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5531
      "maintain_node_health": cluster.maintain_node_health,
5532
      "ctime": cluster.ctime,
5533
      "mtime": cluster.mtime,
5534
      "uuid": cluster.uuid,
5535
      "tags": list(cluster.GetTags()),
5536
      "uid_pool": cluster.uid_pool,
5537
      "default_iallocator": cluster.default_iallocator,
5538
      "reserved_lvs": cluster.reserved_lvs,
5539
      "primary_ip_version": primary_ip_version,
5540
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5541
      "hidden_os": cluster.hidden_os,
5542
      "blacklisted_os": cluster.blacklisted_os,
5543
      }
5544

    
5545
    return result
5546

    
5547

    
5548
class LUClusterConfigQuery(NoHooksLU):
5549
  """Return configuration values.
5550

5551
  """
5552
  REQ_BGL = False
5553
  _FIELDS_DYNAMIC = utils.FieldSet()
5554
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5555
                                  "watcher_pause", "volume_group_name")
5556

    
5557
  def CheckArguments(self):
5558
    _CheckOutputFields(static=self._FIELDS_STATIC,
5559
                       dynamic=self._FIELDS_DYNAMIC,
5560
                       selected=self.op.output_fields)
5561

    
5562
  def ExpandNames(self):
5563
    self.needed_locks = {}
5564

    
5565
  def Exec(self, feedback_fn):
5566
    """Dump a representation of the cluster config to the standard output.
5567

5568
    """
5569
    values = []
5570
    for field in self.op.output_fields:
5571
      if field == "cluster_name":
5572
        entry = self.cfg.GetClusterName()
5573
      elif field == "master_node":
5574
        entry = self.cfg.GetMasterNode()
5575
      elif field == "drain_flag":
5576
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5577
      elif field == "watcher_pause":
5578
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5579
      elif field == "volume_group_name":
5580
        entry = self.cfg.GetVGName()
5581
      else:
5582
        raise errors.ParameterError(field)
5583
      values.append(entry)
5584
    return values
5585

    
5586

    
5587
class LUInstanceActivateDisks(NoHooksLU):
5588
  """Bring up an instance's disks.
5589

5590
  """
5591
  REQ_BGL = False
5592

    
5593
  def ExpandNames(self):
5594
    self._ExpandAndLockInstance()
5595
    self.needed_locks[locking.LEVEL_NODE] = []
5596
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5597

    
5598
  def DeclareLocks(self, level):
5599
    if level == locking.LEVEL_NODE:
5600
      self._LockInstancesNodes()
5601

    
5602
  def CheckPrereq(self):
5603
    """Check prerequisites.
5604

5605
    This checks that the instance is in the cluster.
5606

5607
    """
5608
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5609
    assert self.instance is not None, \
5610
      "Cannot retrieve locked instance %s" % self.op.instance_name
5611
    _CheckNodeOnline(self, self.instance.primary_node)
5612

    
5613
  def Exec(self, feedback_fn):
5614
    """Activate the disks.
5615

5616
    """
5617
    disks_ok, disks_info = \
5618
              _AssembleInstanceDisks(self, self.instance,
5619
                                     ignore_size=self.op.ignore_size)
5620
    if not disks_ok:
5621
      raise errors.OpExecError("Cannot activate block devices")
5622

    
5623
    return disks_info
5624

    
5625

    
5626
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5627
                           ignore_size=False):
5628
  """Prepare the block devices for an instance.
5629

5630
  This sets up the block devices on all nodes.
5631

5632
  @type lu: L{LogicalUnit}
5633
  @param lu: the logical unit on whose behalf we execute
5634
  @type instance: L{objects.Instance}
5635
  @param instance: the instance for whose disks we assemble
5636
  @type disks: list of L{objects.Disk} or None
5637
  @param disks: which disks to assemble (or all, if None)
5638
  @type ignore_secondaries: boolean
5639
  @param ignore_secondaries: if true, errors on secondary nodes
5640
      won't result in an error return from the function
5641
  @type ignore_size: boolean
5642
  @param ignore_size: if true, the current known size of the disk
5643
      will not be used during the disk activation, useful for cases
5644
      when the size is wrong
5645
  @return: False if the operation failed, otherwise a list of
5646
      (host, instance_visible_name, node_visible_name)
5647
      with the mapping from node devices to instance devices
5648

5649
  """
5650
  device_info = []
5651
  disks_ok = True
5652
  iname = instance.name
5653
  disks = _ExpandCheckDisks(instance, disks)
5654

    
5655
  # With the two passes mechanism we try to reduce the window of
5656
  # opportunity for the race condition of switching DRBD to primary
5657
  # before handshaking occured, but we do not eliminate it
5658

    
5659
  # The proper fix would be to wait (with some limits) until the
5660
  # connection has been made and drbd transitions from WFConnection
5661
  # into any other network-connected state (Connected, SyncTarget,
5662
  # SyncSource, etc.)
5663

    
5664
  # 1st pass, assemble on all nodes in secondary mode
5665
  for idx, inst_disk in enumerate(disks):
5666
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5667
      if ignore_size:
5668
        node_disk = node_disk.Copy()
5669
        node_disk.UnsetSize()
5670
      lu.cfg.SetDiskID(node_disk, node)
5671
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5672
      msg = result.fail_msg
5673
      if msg:
5674
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5675
                           " (is_primary=False, pass=1): %s",
5676
                           inst_disk.iv_name, node, msg)
5677
        if not ignore_secondaries:
5678
          disks_ok = False
5679

    
5680
  # FIXME: race condition on drbd migration to primary
5681

    
5682
  # 2nd pass, do only the primary node
5683
  for idx, inst_disk in enumerate(disks):
5684
    dev_path = None
5685

    
5686
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5687
      if node != instance.primary_node:
5688
        continue
5689
      if ignore_size:
5690
        node_disk = node_disk.Copy()
5691
        node_disk.UnsetSize()
5692
      lu.cfg.SetDiskID(node_disk, node)
5693
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5694
      msg = result.fail_msg
5695
      if msg:
5696
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5697
                           " (is_primary=True, pass=2): %s",
5698
                           inst_disk.iv_name, node, msg)
5699
        disks_ok = False
5700
      else:
5701
        dev_path = result.payload
5702

    
5703
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5704

    
5705
  # leave the disks configured for the primary node
5706
  # this is a workaround that would be fixed better by
5707
  # improving the logical/physical id handling
5708
  for disk in disks:
5709
    lu.cfg.SetDiskID(disk, instance.primary_node)
5710

    
5711
  return disks_ok, device_info
5712

    
5713

    
5714
def _StartInstanceDisks(lu, instance, force):
5715
  """Start the disks of an instance.
5716

5717
  """
5718
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5719
                                           ignore_secondaries=force)
5720
  if not disks_ok:
5721
    _ShutdownInstanceDisks(lu, instance)
5722
    if force is not None and not force:
5723
      lu.proc.LogWarning("", hint="If the message above refers to a"
5724
                         " secondary node,"
5725
                         " you can retry the operation using '--force'.")
5726
    raise errors.OpExecError("Disk consistency error")
5727

    
5728

    
5729
class LUInstanceDeactivateDisks(NoHooksLU):
5730
  """Shutdown an instance's disks.
5731

5732
  """
5733
  REQ_BGL = False
5734

    
5735
  def ExpandNames(self):
5736
    self._ExpandAndLockInstance()
5737
    self.needed_locks[locking.LEVEL_NODE] = []
5738
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5739

    
5740
  def DeclareLocks(self, level):
5741
    if level == locking.LEVEL_NODE:
5742
      self._LockInstancesNodes()
5743

    
5744
  def CheckPrereq(self):
5745
    """Check prerequisites.
5746

5747
    This checks that the instance is in the cluster.
5748

5749
    """
5750
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5751
    assert self.instance is not None, \
5752
      "Cannot retrieve locked instance %s" % self.op.instance_name
5753

    
5754
  def Exec(self, feedback_fn):
5755
    """Deactivate the disks
5756

5757
    """
5758
    instance = self.instance
5759
    if self.op.force:
5760
      _ShutdownInstanceDisks(self, instance)
5761
    else:
5762
      _SafeShutdownInstanceDisks(self, instance)
5763

    
5764

    
5765
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5766
  """Shutdown block devices of an instance.
5767

5768
  This function checks if an instance is running, before calling
5769
  _ShutdownInstanceDisks.
5770

5771
  """
5772
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5773
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5774

    
5775

    
5776
def _ExpandCheckDisks(instance, disks):
5777
  """Return the instance disks selected by the disks list
5778

5779
  @type disks: list of L{objects.Disk} or None
5780
  @param disks: selected disks
5781
  @rtype: list of L{objects.Disk}
5782
  @return: selected instance disks to act on
5783

5784
  """
5785
  if disks is None:
5786
    return instance.disks
5787
  else:
5788
    if not set(disks).issubset(instance.disks):
5789
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5790
                                   " target instance")
5791
    return disks
5792

    
5793

    
5794
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5795
  """Shutdown block devices of an instance.
5796

5797
  This does the shutdown on all nodes of the instance.
5798

5799
  If the ignore_primary is false, errors on the primary node are
5800
  ignored.
5801

5802
  """
5803
  all_result = True
5804
  disks = _ExpandCheckDisks(instance, disks)
5805

    
5806
  for disk in disks:
5807
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5808
      lu.cfg.SetDiskID(top_disk, node)
5809
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5810
      msg = result.fail_msg
5811
      if msg:
5812
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5813
                      disk.iv_name, node, msg)
5814
        if ((node == instance.primary_node and not ignore_primary) or
5815
            (node != instance.primary_node and not result.offline)):
5816
          all_result = False
5817
  return all_result
5818

    
5819

    
5820
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5821
  """Checks if a node has enough free memory.
5822

5823
  This function check if a given node has the needed amount of free
5824
  memory. In case the node has less memory or we cannot get the
5825
  information from the node, this function raise an OpPrereqError
5826
  exception.
5827

5828
  @type lu: C{LogicalUnit}
5829
  @param lu: a logical unit from which we get configuration data
5830
  @type node: C{str}
5831
  @param node: the node to check
5832
  @type reason: C{str}
5833
  @param reason: string to use in the error message
5834
  @type requested: C{int}
5835
  @param requested: the amount of memory in MiB to check for
5836
  @type hypervisor_name: C{str}
5837
  @param hypervisor_name: the hypervisor to ask for memory stats
5838
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5839
      we cannot check the node
5840

5841
  """
5842
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5843
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5844
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5845
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5846
  if not isinstance(free_mem, int):
5847
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5848
                               " was '%s'" % (node, free_mem),
5849
                               errors.ECODE_ENVIRON)
5850
  if requested > free_mem:
5851
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5852
                               " needed %s MiB, available %s MiB" %
5853
                               (node, reason, requested, free_mem),
5854
                               errors.ECODE_NORES)
5855

    
5856

    
5857
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5858
  """Checks if nodes have enough free disk space in the all VGs.
5859

5860
  This function check if all given nodes have the needed amount of
5861
  free disk. In case any node has less disk or we cannot get the
5862
  information from the node, this function raise an OpPrereqError
5863
  exception.
5864

5865
  @type lu: C{LogicalUnit}
5866
  @param lu: a logical unit from which we get configuration data
5867
  @type nodenames: C{list}
5868
  @param nodenames: the list of node names to check
5869
  @type req_sizes: C{dict}
5870
  @param req_sizes: the hash of vg and corresponding amount of disk in
5871
      MiB to check for
5872
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5873
      or we cannot check the node
5874

5875
  """
5876
  for vg, req_size in req_sizes.items():
5877
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5878

    
5879

    
5880
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5881
  """Checks if nodes have enough free disk space in the specified VG.
5882

5883
  This function check if all given nodes have the needed amount of
5884
  free disk. In case any node has less disk or we cannot get the
5885
  information from the node, this function raise an OpPrereqError
5886
  exception.
5887

5888
  @type lu: C{LogicalUnit}
5889
  @param lu: a logical unit from which we get configuration data
5890
  @type nodenames: C{list}
5891
  @param nodenames: the list of node names to check
5892
  @type vg: C{str}
5893
  @param vg: the volume group to check
5894
  @type requested: C{int}
5895
  @param requested: the amount of disk in MiB to check for
5896
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5897
      or we cannot check the node
5898

5899
  """
5900
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5901
  for node in nodenames:
5902
    info = nodeinfo[node]
5903
    info.Raise("Cannot get current information from node %s" % node,
5904
               prereq=True, ecode=errors.ECODE_ENVIRON)
5905
    vg_free = info.payload.get("vg_free", None)
5906
    if not isinstance(vg_free, int):
5907
      raise errors.OpPrereqError("Can't compute free disk space on node"
5908
                                 " %s for vg %s, result was '%s'" %
5909
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5910
    if requested > vg_free:
5911
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5912
                                 " vg %s: required %d MiB, available %d MiB" %
5913
                                 (node, vg, requested, vg_free),
5914
                                 errors.ECODE_NORES)
5915

    
5916

    
5917
class LUInstanceStartup(LogicalUnit):
5918
  """Starts an instance.
5919

5920
  """
5921
  HPATH = "instance-start"
5922
  HTYPE = constants.HTYPE_INSTANCE
5923
  REQ_BGL = False
5924

    
5925
  def CheckArguments(self):
5926
    # extra beparams
5927
    if self.op.beparams:
5928
      # fill the beparams dict
5929
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5930

    
5931
  def ExpandNames(self):
5932
    self._ExpandAndLockInstance()
5933

    
5934
  def BuildHooksEnv(self):
5935
    """Build hooks env.
5936

5937
    This runs on master, primary and secondary nodes of the instance.
5938

5939
    """
5940
    env = {
5941
      "FORCE": self.op.force,
5942
      }
5943

    
5944
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5945

    
5946
    return env
5947

    
5948
  def BuildHooksNodes(self):
5949
    """Build hooks nodes.
5950

5951
    """
5952
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5953
    return (nl, nl)
5954

    
5955
  def CheckPrereq(self):
5956
    """Check prerequisites.
5957

5958
    This checks that the instance is in the cluster.
5959

5960
    """
5961
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5962
    assert self.instance is not None, \
5963
      "Cannot retrieve locked instance %s" % self.op.instance_name
5964

    
5965
    # extra hvparams
5966
    if self.op.hvparams:
5967
      # check hypervisor parameter syntax (locally)
5968
      cluster = self.cfg.GetClusterInfo()
5969
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5970
      filled_hvp = cluster.FillHV(instance)
5971
      filled_hvp.update(self.op.hvparams)
5972
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5973
      hv_type.CheckParameterSyntax(filled_hvp)
5974
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5975

    
5976
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5977

    
5978
    if self.primary_offline and self.op.ignore_offline_nodes:
5979
      self.proc.LogWarning("Ignoring offline primary node")
5980

    
5981
      if self.op.hvparams or self.op.beparams:
5982
        self.proc.LogWarning("Overridden parameters are ignored")
5983
    else:
5984
      _CheckNodeOnline(self, instance.primary_node)
5985

    
5986
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5987

    
5988
      # check bridges existence
5989
      _CheckInstanceBridgesExist(self, instance)
5990

    
5991
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5992
                                                instance.name,
5993
                                                instance.hypervisor)
5994
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5995
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5996
      if not remote_info.payload: # not running already
5997
        _CheckNodeFreeMemory(self, instance.primary_node,
5998
                             "starting instance %s" % instance.name,
5999
                             bep[constants.BE_MEMORY], instance.hypervisor)
6000

    
6001
  def Exec(self, feedback_fn):
6002
    """Start the instance.
6003

6004
    """
6005
    instance = self.instance
6006
    force = self.op.force
6007

    
6008
    if not self.op.no_remember:
6009
      self.cfg.MarkInstanceUp(instance.name)
6010

    
6011
    if self.primary_offline:
6012
      assert self.op.ignore_offline_nodes
6013
      self.proc.LogInfo("Primary node offline, marked instance as started")
6014
    else:
6015
      node_current = instance.primary_node
6016

    
6017
      _StartInstanceDisks(self, instance, force)
6018

    
6019
      result = self.rpc.call_instance_start(node_current, instance,
6020
                                            self.op.hvparams, self.op.beparams,
6021
                                            self.op.startup_paused)
6022
      msg = result.fail_msg
6023
      if msg:
6024
        _ShutdownInstanceDisks(self, instance)
6025
        raise errors.OpExecError("Could not start instance: %s" % msg)
6026

    
6027

    
6028
class LUInstanceReboot(LogicalUnit):
6029
  """Reboot an instance.
6030

6031
  """
6032
  HPATH = "instance-reboot"
6033
  HTYPE = constants.HTYPE_INSTANCE
6034
  REQ_BGL = False
6035

    
6036
  def ExpandNames(self):
6037
    self._ExpandAndLockInstance()
6038

    
6039
  def BuildHooksEnv(self):
6040
    """Build hooks env.
6041

6042
    This runs on master, primary and secondary nodes of the instance.
6043

6044
    """
6045
    env = {
6046
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6047
      "REBOOT_TYPE": self.op.reboot_type,
6048
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6049
      }
6050

    
6051
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6052

    
6053
    return env
6054

    
6055
  def BuildHooksNodes(self):
6056
    """Build hooks nodes.
6057

6058
    """
6059
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6060
    return (nl, nl)
6061

    
6062
  def CheckPrereq(self):
6063
    """Check prerequisites.
6064

6065
    This checks that the instance is in the cluster.
6066

6067
    """
6068
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6069
    assert self.instance is not None, \
6070
      "Cannot retrieve locked instance %s" % self.op.instance_name
6071

    
6072
    _CheckNodeOnline(self, instance.primary_node)
6073

    
6074
    # check bridges existence
6075
    _CheckInstanceBridgesExist(self, instance)
6076

    
6077
  def Exec(self, feedback_fn):
6078
    """Reboot the instance.
6079

6080
    """
6081
    instance = self.instance
6082
    ignore_secondaries = self.op.ignore_secondaries
6083
    reboot_type = self.op.reboot_type
6084

    
6085
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6086
                                              instance.name,
6087
                                              instance.hypervisor)
6088
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6089
    instance_running = bool(remote_info.payload)
6090

    
6091
    node_current = instance.primary_node
6092

    
6093
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6094
                                            constants.INSTANCE_REBOOT_HARD]:
6095
      for disk in instance.disks:
6096
        self.cfg.SetDiskID(disk, node_current)
6097
      result = self.rpc.call_instance_reboot(node_current, instance,
6098
                                             reboot_type,
6099
                                             self.op.shutdown_timeout)
6100
      result.Raise("Could not reboot instance")
6101
    else:
6102
      if instance_running:
6103
        result = self.rpc.call_instance_shutdown(node_current, instance,
6104
                                                 self.op.shutdown_timeout)
6105
        result.Raise("Could not shutdown instance for full reboot")
6106
        _ShutdownInstanceDisks(self, instance)
6107
      else:
6108
        self.LogInfo("Instance %s was already stopped, starting now",
6109
                     instance.name)
6110
      _StartInstanceDisks(self, instance, ignore_secondaries)
6111
      result = self.rpc.call_instance_start(node_current, instance,
6112
                                            None, None, False)
6113
      msg = result.fail_msg
6114
      if msg:
6115
        _ShutdownInstanceDisks(self, instance)
6116
        raise errors.OpExecError("Could not start instance for"
6117
                                 " full reboot: %s" % msg)
6118

    
6119
    self.cfg.MarkInstanceUp(instance.name)
6120

    
6121

    
6122
class LUInstanceShutdown(LogicalUnit):
6123
  """Shutdown an instance.
6124

6125
  """
6126
  HPATH = "instance-stop"
6127
  HTYPE = constants.HTYPE_INSTANCE
6128
  REQ_BGL = False
6129

    
6130
  def ExpandNames(self):
6131
    self._ExpandAndLockInstance()
6132

    
6133
  def BuildHooksEnv(self):
6134
    """Build hooks env.
6135

6136
    This runs on master, primary and secondary nodes of the instance.
6137

6138
    """
6139
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6140
    env["TIMEOUT"] = self.op.timeout
6141
    return env
6142

    
6143
  def BuildHooksNodes(self):
6144
    """Build hooks nodes.
6145

6146
    """
6147
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6148
    return (nl, nl)
6149

    
6150
  def CheckPrereq(self):
6151
    """Check prerequisites.
6152

6153
    This checks that the instance is in the cluster.
6154

6155
    """
6156
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6157
    assert self.instance is not None, \
6158
      "Cannot retrieve locked instance %s" % self.op.instance_name
6159

    
6160
    self.primary_offline = \
6161
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6162

    
6163
    if self.primary_offline and self.op.ignore_offline_nodes:
6164
      self.proc.LogWarning("Ignoring offline primary node")
6165
    else:
6166
      _CheckNodeOnline(self, self.instance.primary_node)
6167

    
6168
  def Exec(self, feedback_fn):
6169
    """Shutdown the instance.
6170

6171
    """
6172
    instance = self.instance
6173
    node_current = instance.primary_node
6174
    timeout = self.op.timeout
6175

    
6176
    if not self.op.no_remember:
6177
      self.cfg.MarkInstanceDown(instance.name)
6178

    
6179
    if self.primary_offline:
6180
      assert self.op.ignore_offline_nodes
6181
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6182
    else:
6183
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6184
      msg = result.fail_msg
6185
      if msg:
6186
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6187

    
6188
      _ShutdownInstanceDisks(self, instance)
6189

    
6190

    
6191
class LUInstanceReinstall(LogicalUnit):
6192
  """Reinstall an instance.
6193

6194
  """
6195
  HPATH = "instance-reinstall"
6196
  HTYPE = constants.HTYPE_INSTANCE
6197
  REQ_BGL = False
6198

    
6199
  def ExpandNames(self):
6200
    self._ExpandAndLockInstance()
6201

    
6202
  def BuildHooksEnv(self):
6203
    """Build hooks env.
6204

6205
    This runs on master, primary and secondary nodes of the instance.
6206

6207
    """
6208
    return _BuildInstanceHookEnvByObject(self, self.instance)
6209

    
6210
  def BuildHooksNodes(self):
6211
    """Build hooks nodes.
6212

6213
    """
6214
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6215
    return (nl, nl)
6216

    
6217
  def CheckPrereq(self):
6218
    """Check prerequisites.
6219

6220
    This checks that the instance is in the cluster and is not running.
6221

6222
    """
6223
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6224
    assert instance is not None, \
6225
      "Cannot retrieve locked instance %s" % self.op.instance_name
6226
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6227
                     " offline, cannot reinstall")
6228
    for node in instance.secondary_nodes:
6229
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6230
                       " cannot reinstall")
6231

    
6232
    if instance.disk_template == constants.DT_DISKLESS:
6233
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6234
                                 self.op.instance_name,
6235
                                 errors.ECODE_INVAL)
6236
    _CheckInstanceDown(self, instance, "cannot reinstall")
6237

    
6238
    if self.op.os_type is not None:
6239
      # OS verification
6240
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6241
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6242
      instance_os = self.op.os_type
6243
    else:
6244
      instance_os = instance.os
6245

    
6246
    nodelist = list(instance.all_nodes)
6247

    
6248
    if self.op.osparams:
6249
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6250
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6251
      self.os_inst = i_osdict # the new dict (without defaults)
6252
    else:
6253
      self.os_inst = None
6254

    
6255
    self.instance = instance
6256

    
6257
  def Exec(self, feedback_fn):
6258
    """Reinstall the instance.
6259

6260
    """
6261
    inst = self.instance
6262

    
6263
    if self.op.os_type is not None:
6264
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6265
      inst.os = self.op.os_type
6266
      # Write to configuration
6267
      self.cfg.Update(inst, feedback_fn)
6268

    
6269
    _StartInstanceDisks(self, inst, None)
6270
    try:
6271
      feedback_fn("Running the instance OS create scripts...")
6272
      # FIXME: pass debug option from opcode to backend
6273
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6274
                                             self.op.debug_level,
6275
                                             osparams=self.os_inst)
6276
      result.Raise("Could not install OS for instance %s on node %s" %
6277
                   (inst.name, inst.primary_node))
6278
    finally:
6279
      _ShutdownInstanceDisks(self, inst)
6280

    
6281

    
6282
class LUInstanceRecreateDisks(LogicalUnit):
6283
  """Recreate an instance's missing disks.
6284

6285
  """
6286
  HPATH = "instance-recreate-disks"
6287
  HTYPE = constants.HTYPE_INSTANCE
6288
  REQ_BGL = False
6289

    
6290
  def CheckArguments(self):
6291
    # normalise the disk list
6292
    self.op.disks = sorted(frozenset(self.op.disks))
6293

    
6294
  def ExpandNames(self):
6295
    self._ExpandAndLockInstance()
6296
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6297
    if self.op.nodes:
6298
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6299
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6300
    else:
6301
      self.needed_locks[locking.LEVEL_NODE] = []
6302

    
6303
  def DeclareLocks(self, level):
6304
    if level == locking.LEVEL_NODE:
6305
      # if we replace the nodes, we only need to lock the old primary,
6306
      # otherwise we need to lock all nodes for disk re-creation
6307
      primary_only = bool(self.op.nodes)
6308
      self._LockInstancesNodes(primary_only=primary_only)
6309

    
6310
  def BuildHooksEnv(self):
6311
    """Build hooks env.
6312

6313
    This runs on master, primary and secondary nodes of the instance.
6314

6315
    """
6316
    return _BuildInstanceHookEnvByObject(self, self.instance)
6317

    
6318
  def BuildHooksNodes(self):
6319
    """Build hooks nodes.
6320

6321
    """
6322
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6323
    return (nl, nl)
6324

    
6325
  def CheckPrereq(self):
6326
    """Check prerequisites.
6327

6328
    This checks that the instance is in the cluster and is not running.
6329

6330
    """
6331
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6332
    assert instance is not None, \
6333
      "Cannot retrieve locked instance %s" % self.op.instance_name
6334
    if self.op.nodes:
6335
      if len(self.op.nodes) != len(instance.all_nodes):
6336
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6337
                                   " %d replacement nodes were specified" %
6338
                                   (instance.name, len(instance.all_nodes),
6339
                                    len(self.op.nodes)),
6340
                                   errors.ECODE_INVAL)
6341
      assert instance.disk_template != constants.DT_DRBD8 or \
6342
          len(self.op.nodes) == 2
6343
      assert instance.disk_template != constants.DT_PLAIN or \
6344
          len(self.op.nodes) == 1
6345
      primary_node = self.op.nodes[0]
6346
    else:
6347
      primary_node = instance.primary_node
6348
    _CheckNodeOnline(self, primary_node)
6349

    
6350
    if instance.disk_template == constants.DT_DISKLESS:
6351
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6352
                                 self.op.instance_name, errors.ECODE_INVAL)
6353
    # if we replace nodes *and* the old primary is offline, we don't
6354
    # check
6355
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6356
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6357
    if not (self.op.nodes and old_pnode.offline):
6358
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6359

    
6360
    if not self.op.disks:
6361
      self.op.disks = range(len(instance.disks))
6362
    else:
6363
      for idx in self.op.disks:
6364
        if idx >= len(instance.disks):
6365
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6366
                                     errors.ECODE_INVAL)
6367
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6368
      raise errors.OpPrereqError("Can't recreate disks partially and"
6369
                                 " change the nodes at the same time",
6370
                                 errors.ECODE_INVAL)
6371
    self.instance = instance
6372

    
6373
  def Exec(self, feedback_fn):
6374
    """Recreate the disks.
6375

6376
    """
6377
    instance = self.instance
6378

    
6379
    to_skip = []
6380
    mods = [] # keeps track of needed logical_id changes
6381

    
6382
    for idx, disk in enumerate(instance.disks):
6383
      if idx not in self.op.disks: # disk idx has not been passed in
6384
        to_skip.append(idx)
6385
        continue
6386
      # update secondaries for disks, if needed
6387
      if self.op.nodes:
6388
        if disk.dev_type == constants.LD_DRBD8:
6389
          # need to update the nodes and minors
6390
          assert len(self.op.nodes) == 2
6391
          assert len(disk.logical_id) == 6 # otherwise disk internals
6392
                                           # have changed
6393
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6394
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6395
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6396
                    new_minors[0], new_minors[1], old_secret)
6397
          assert len(disk.logical_id) == len(new_id)
6398
          mods.append((idx, new_id))
6399

    
6400
    # now that we have passed all asserts above, we can apply the mods
6401
    # in a single run (to avoid partial changes)
6402
    for idx, new_id in mods:
6403
      instance.disks[idx].logical_id = new_id
6404

    
6405
    # change primary node, if needed
6406
    if self.op.nodes:
6407
      instance.primary_node = self.op.nodes[0]
6408
      self.LogWarning("Changing the instance's nodes, you will have to"
6409
                      " remove any disks left on the older nodes manually")
6410

    
6411
    if self.op.nodes:
6412
      self.cfg.Update(instance, feedback_fn)
6413

    
6414
    _CreateDisks(self, instance, to_skip=to_skip)
6415

    
6416

    
6417
class LUInstanceRename(LogicalUnit):
6418
  """Rename an instance.
6419

6420
  """
6421
  HPATH = "instance-rename"
6422
  HTYPE = constants.HTYPE_INSTANCE
6423

    
6424
  def CheckArguments(self):
6425
    """Check arguments.
6426

6427
    """
6428
    if self.op.ip_check and not self.op.name_check:
6429
      # TODO: make the ip check more flexible and not depend on the name check
6430
      raise errors.OpPrereqError("IP address check requires a name check",
6431
                                 errors.ECODE_INVAL)
6432

    
6433
  def BuildHooksEnv(self):
6434
    """Build hooks env.
6435

6436
    This runs on master, primary and secondary nodes of the instance.
6437

6438
    """
6439
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6440
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6441
    return env
6442

    
6443
  def BuildHooksNodes(self):
6444
    """Build hooks nodes.
6445

6446
    """
6447
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6448
    return (nl, nl)
6449

    
6450
  def CheckPrereq(self):
6451
    """Check prerequisites.
6452

6453
    This checks that the instance is in the cluster and is not running.
6454

6455
    """
6456
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6457
                                                self.op.instance_name)
6458
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6459
    assert instance is not None
6460
    _CheckNodeOnline(self, instance.primary_node)
6461
    _CheckInstanceDown(self, instance, "cannot rename")
6462
    self.instance = instance
6463

    
6464
    new_name = self.op.new_name
6465
    if self.op.name_check:
6466
      hostname = netutils.GetHostname(name=new_name)
6467
      if hostname != new_name:
6468
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6469
                     hostname.name)
6470
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6471
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6472
                                    " same as given hostname '%s'") %
6473
                                    (hostname.name, self.op.new_name),
6474
                                    errors.ECODE_INVAL)
6475
      new_name = self.op.new_name = hostname.name
6476
      if (self.op.ip_check and
6477
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6478
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6479
                                   (hostname.ip, new_name),
6480
                                   errors.ECODE_NOTUNIQUE)
6481

    
6482
    instance_list = self.cfg.GetInstanceList()
6483
    if new_name in instance_list and new_name != instance.name:
6484
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6485
                                 new_name, errors.ECODE_EXISTS)
6486

    
6487
  def Exec(self, feedback_fn):
6488
    """Rename the instance.
6489

6490
    """
6491
    inst = self.instance
6492
    old_name = inst.name
6493

    
6494
    rename_file_storage = False
6495
    if (inst.disk_template in constants.DTS_FILEBASED and
6496
        self.op.new_name != inst.name):
6497
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6498
      rename_file_storage = True
6499

    
6500
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6501
    # Change the instance lock. This is definitely safe while we hold the BGL.
6502
    # Otherwise the new lock would have to be added in acquired mode.
6503
    assert self.REQ_BGL
6504
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6505
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6506

    
6507
    # re-read the instance from the configuration after rename
6508
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6509

    
6510
    if rename_file_storage:
6511
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6512
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6513
                                                     old_file_storage_dir,
6514
                                                     new_file_storage_dir)
6515
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6516
                   " (but the instance has been renamed in Ganeti)" %
6517
                   (inst.primary_node, old_file_storage_dir,
6518
                    new_file_storage_dir))
6519

    
6520
    _StartInstanceDisks(self, inst, None)
6521
    try:
6522
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6523
                                                 old_name, self.op.debug_level)
6524
      msg = result.fail_msg
6525
      if msg:
6526
        msg = ("Could not run OS rename script for instance %s on node %s"
6527
               " (but the instance has been renamed in Ganeti): %s" %
6528
               (inst.name, inst.primary_node, msg))
6529
        self.proc.LogWarning(msg)
6530
    finally:
6531
      _ShutdownInstanceDisks(self, inst)
6532

    
6533
    return inst.name
6534

    
6535

    
6536
class LUInstanceRemove(LogicalUnit):
6537
  """Remove an instance.
6538

6539
  """
6540
  HPATH = "instance-remove"
6541
  HTYPE = constants.HTYPE_INSTANCE
6542
  REQ_BGL = False
6543

    
6544
  def ExpandNames(self):
6545
    self._ExpandAndLockInstance()
6546
    self.needed_locks[locking.LEVEL_NODE] = []
6547
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6548

    
6549
  def DeclareLocks(self, level):
6550
    if level == locking.LEVEL_NODE:
6551
      self._LockInstancesNodes()
6552

    
6553
  def BuildHooksEnv(self):
6554
    """Build hooks env.
6555

6556
    This runs on master, primary and secondary nodes of the instance.
6557

6558
    """
6559
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6560
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6561
    return env
6562

    
6563
  def BuildHooksNodes(self):
6564
    """Build hooks nodes.
6565

6566
    """
6567
    nl = [self.cfg.GetMasterNode()]
6568
    nl_post = list(self.instance.all_nodes) + nl
6569
    return (nl, nl_post)
6570

    
6571
  def CheckPrereq(self):
6572
    """Check prerequisites.
6573

6574
    This checks that the instance is in the cluster.
6575

6576
    """
6577
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6578
    assert self.instance is not None, \
6579
      "Cannot retrieve locked instance %s" % self.op.instance_name
6580

    
6581
  def Exec(self, feedback_fn):
6582
    """Remove the instance.
6583

6584
    """
6585
    instance = self.instance
6586
    logging.info("Shutting down instance %s on node %s",
6587
                 instance.name, instance.primary_node)
6588

    
6589
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6590
                                             self.op.shutdown_timeout)
6591
    msg = result.fail_msg
6592
    if msg:
6593
      if self.op.ignore_failures:
6594
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6595
      else:
6596
        raise errors.OpExecError("Could not shutdown instance %s on"
6597
                                 " node %s: %s" %
6598
                                 (instance.name, instance.primary_node, msg))
6599

    
6600
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6601

    
6602

    
6603
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6604
  """Utility function to remove an instance.
6605

6606
  """
6607
  logging.info("Removing block devices for instance %s", instance.name)
6608

    
6609
  if not _RemoveDisks(lu, instance):
6610
    if not ignore_failures:
6611
      raise errors.OpExecError("Can't remove instance's disks")
6612
    feedback_fn("Warning: can't remove instance's disks")
6613

    
6614
  logging.info("Removing instance %s out of cluster config", instance.name)
6615

    
6616
  lu.cfg.RemoveInstance(instance.name)
6617

    
6618
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6619
    "Instance lock removal conflict"
6620

    
6621
  # Remove lock for the instance
6622
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6623

    
6624

    
6625
class LUInstanceQuery(NoHooksLU):
6626
  """Logical unit for querying instances.
6627

6628
  """
6629
  # pylint: disable=W0142
6630
  REQ_BGL = False
6631

    
6632
  def CheckArguments(self):
6633
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6634
                             self.op.output_fields, self.op.use_locking)
6635

    
6636
  def ExpandNames(self):
6637
    self.iq.ExpandNames(self)
6638

    
6639
  def DeclareLocks(self, level):
6640
    self.iq.DeclareLocks(self, level)
6641

    
6642
  def Exec(self, feedback_fn):
6643
    return self.iq.OldStyleQuery(self)
6644

    
6645

    
6646
class LUInstanceFailover(LogicalUnit):
6647
  """Failover an instance.
6648

6649
  """
6650
  HPATH = "instance-failover"
6651
  HTYPE = constants.HTYPE_INSTANCE
6652
  REQ_BGL = False
6653

    
6654
  def CheckArguments(self):
6655
    """Check the arguments.
6656

6657
    """
6658
    self.iallocator = getattr(self.op, "iallocator", None)
6659
    self.target_node = getattr(self.op, "target_node", None)
6660

    
6661
  def ExpandNames(self):
6662
    self._ExpandAndLockInstance()
6663

    
6664
    if self.op.target_node is not None:
6665
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6666

    
6667
    self.needed_locks[locking.LEVEL_NODE] = []
6668
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6669

    
6670
    ignore_consistency = self.op.ignore_consistency
6671
    shutdown_timeout = self.op.shutdown_timeout
6672
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6673
                                       cleanup=False,
6674
                                       failover=True,
6675
                                       ignore_consistency=ignore_consistency,
6676
                                       shutdown_timeout=shutdown_timeout)
6677
    self.tasklets = [self._migrater]
6678

    
6679
  def DeclareLocks(self, level):
6680
    if level == locking.LEVEL_NODE:
6681
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6682
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6683
        if self.op.target_node is None:
6684
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6685
        else:
6686
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6687
                                                   self.op.target_node]
6688
        del self.recalculate_locks[locking.LEVEL_NODE]
6689
      else:
6690
        self._LockInstancesNodes()
6691

    
6692
  def BuildHooksEnv(self):
6693
    """Build hooks env.
6694

6695
    This runs on master, primary and secondary nodes of the instance.
6696

6697
    """
6698
    instance = self._migrater.instance
6699
    source_node = instance.primary_node
6700
    target_node = self.op.target_node
6701
    env = {
6702
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6703
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6704
      "OLD_PRIMARY": source_node,
6705
      "NEW_PRIMARY": target_node,
6706
      }
6707

    
6708
    if instance.disk_template in constants.DTS_INT_MIRROR:
6709
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6710
      env["NEW_SECONDARY"] = source_node
6711
    else:
6712
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6713

    
6714
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6715

    
6716
    return env
6717

    
6718
  def BuildHooksNodes(self):
6719
    """Build hooks nodes.
6720

6721
    """
6722
    instance = self._migrater.instance
6723
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6724
    return (nl, nl + [instance.primary_node])
6725

    
6726

    
6727
class LUInstanceMigrate(LogicalUnit):
6728
  """Migrate an instance.
6729

6730
  This is migration without shutting down, compared to the failover,
6731
  which is done with shutdown.
6732

6733
  """
6734
  HPATH = "instance-migrate"
6735
  HTYPE = constants.HTYPE_INSTANCE
6736
  REQ_BGL = False
6737

    
6738
  def ExpandNames(self):
6739
    self._ExpandAndLockInstance()
6740

    
6741
    if self.op.target_node is not None:
6742
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6743

    
6744
    self.needed_locks[locking.LEVEL_NODE] = []
6745
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6746

    
6747
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6748
                                       cleanup=self.op.cleanup,
6749
                                       failover=False,
6750
                                       fallback=self.op.allow_failover)
6751
    self.tasklets = [self._migrater]
6752

    
6753
  def DeclareLocks(self, level):
6754
    if level == locking.LEVEL_NODE:
6755
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6756
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6757
        if self.op.target_node is None:
6758
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6759
        else:
6760
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6761
                                                   self.op.target_node]
6762
        del self.recalculate_locks[locking.LEVEL_NODE]
6763
      else:
6764
        self._LockInstancesNodes()
6765

    
6766
  def BuildHooksEnv(self):
6767
    """Build hooks env.
6768

6769
    This runs on master, primary and secondary nodes of the instance.
6770

6771
    """
6772
    instance = self._migrater.instance
6773
    source_node = instance.primary_node
6774
    target_node = self.op.target_node
6775
    env = _BuildInstanceHookEnvByObject(self, instance)
6776
    env.update({
6777
      "MIGRATE_LIVE": self._migrater.live,
6778
      "MIGRATE_CLEANUP": self.op.cleanup,
6779
      "OLD_PRIMARY": source_node,
6780
      "NEW_PRIMARY": target_node,
6781
      })
6782

    
6783
    if instance.disk_template in constants.DTS_INT_MIRROR:
6784
      env["OLD_SECONDARY"] = target_node
6785
      env["NEW_SECONDARY"] = source_node
6786
    else:
6787
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6788

    
6789
    return env
6790

    
6791
  def BuildHooksNodes(self):
6792
    """Build hooks nodes.
6793

6794
    """
6795
    instance = self._migrater.instance
6796
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6797
    return (nl, nl + [instance.primary_node])
6798

    
6799

    
6800
class LUInstanceMove(LogicalUnit):
6801
  """Move an instance by data-copying.
6802

6803
  """
6804
  HPATH = "instance-move"
6805
  HTYPE = constants.HTYPE_INSTANCE
6806
  REQ_BGL = False
6807

    
6808
  def ExpandNames(self):
6809
    self._ExpandAndLockInstance()
6810
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6811
    self.op.target_node = target_node
6812
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6813
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6814

    
6815
  def DeclareLocks(self, level):
6816
    if level == locking.LEVEL_NODE:
6817
      self._LockInstancesNodes(primary_only=True)
6818

    
6819
  def BuildHooksEnv(self):
6820
    """Build hooks env.
6821

6822
    This runs on master, primary and secondary nodes of the instance.
6823

6824
    """
6825
    env = {
6826
      "TARGET_NODE": self.op.target_node,
6827
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6828
      }
6829
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6830
    return env
6831

    
6832
  def BuildHooksNodes(self):
6833
    """Build hooks nodes.
6834

6835
    """
6836
    nl = [
6837
      self.cfg.GetMasterNode(),
6838
      self.instance.primary_node,
6839
      self.op.target_node,
6840
      ]
6841
    return (nl, nl)
6842

    
6843
  def CheckPrereq(self):
6844
    """Check prerequisites.
6845

6846
    This checks that the instance is in the cluster.
6847

6848
    """
6849
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850
    assert self.instance is not None, \
6851
      "Cannot retrieve locked instance %s" % self.op.instance_name
6852

    
6853
    node = self.cfg.GetNodeInfo(self.op.target_node)
6854
    assert node is not None, \
6855
      "Cannot retrieve locked node %s" % self.op.target_node
6856

    
6857
    self.target_node = target_node = node.name
6858

    
6859
    if target_node == instance.primary_node:
6860
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6861
                                 (instance.name, target_node),
6862
                                 errors.ECODE_STATE)
6863

    
6864
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6865

    
6866
    for idx, dsk in enumerate(instance.disks):
6867
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6868
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6869
                                   " cannot copy" % idx, errors.ECODE_STATE)
6870

    
6871
    _CheckNodeOnline(self, target_node)
6872
    _CheckNodeNotDrained(self, target_node)
6873
    _CheckNodeVmCapable(self, target_node)
6874

    
6875
    if instance.admin_up:
6876
      # check memory requirements on the secondary node
6877
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6878
                           instance.name, bep[constants.BE_MEMORY],
6879
                           instance.hypervisor)
6880
    else:
6881
      self.LogInfo("Not checking memory on the secondary node as"
6882
                   " instance will not be started")
6883

    
6884
    # check bridge existance
6885
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6886

    
6887
  def Exec(self, feedback_fn):
6888
    """Move an instance.
6889

6890
    The move is done by shutting it down on its present node, copying
6891
    the data over (slow) and starting it on the new node.
6892

6893
    """
6894
    instance = self.instance
6895

    
6896
    source_node = instance.primary_node
6897
    target_node = self.target_node
6898

    
6899
    self.LogInfo("Shutting down instance %s on source node %s",
6900
                 instance.name, source_node)
6901

    
6902
    result = self.rpc.call_instance_shutdown(source_node, instance,
6903
                                             self.op.shutdown_timeout)
6904
    msg = result.fail_msg
6905
    if msg:
6906
      if self.op.ignore_consistency:
6907
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6908
                             " Proceeding anyway. Please make sure node"
6909
                             " %s is down. Error details: %s",
6910
                             instance.name, source_node, source_node, msg)
6911
      else:
6912
        raise errors.OpExecError("Could not shutdown instance %s on"
6913
                                 " node %s: %s" %
6914
                                 (instance.name, source_node, msg))
6915

    
6916
    # create the target disks
6917
    try:
6918
      _CreateDisks(self, instance, target_node=target_node)
6919
    except errors.OpExecError:
6920
      self.LogWarning("Device creation failed, reverting...")
6921
      try:
6922
        _RemoveDisks(self, instance, target_node=target_node)
6923
      finally:
6924
        self.cfg.ReleaseDRBDMinors(instance.name)
6925
        raise
6926

    
6927
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6928

    
6929
    errs = []
6930
    # activate, get path, copy the data over
6931
    for idx, disk in enumerate(instance.disks):
6932
      self.LogInfo("Copying data for disk %d", idx)
6933
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6934
                                               instance.name, True, idx)
6935
      if result.fail_msg:
6936
        self.LogWarning("Can't assemble newly created disk %d: %s",
6937
                        idx, result.fail_msg)
6938
        errs.append(result.fail_msg)
6939
        break
6940
      dev_path = result.payload
6941
      result = self.rpc.call_blockdev_export(source_node, disk,
6942
                                             target_node, dev_path,
6943
                                             cluster_name)
6944
      if result.fail_msg:
6945
        self.LogWarning("Can't copy data over for disk %d: %s",
6946
                        idx, result.fail_msg)
6947
        errs.append(result.fail_msg)
6948
        break
6949

    
6950
    if errs:
6951
      self.LogWarning("Some disks failed to copy, aborting")
6952
      try:
6953
        _RemoveDisks(self, instance, target_node=target_node)
6954
      finally:
6955
        self.cfg.ReleaseDRBDMinors(instance.name)
6956
        raise errors.OpExecError("Errors during disk copy: %s" %
6957
                                 (",".join(errs),))
6958

    
6959
    instance.primary_node = target_node
6960
    self.cfg.Update(instance, feedback_fn)
6961

    
6962
    self.LogInfo("Removing the disks on the original node")
6963
    _RemoveDisks(self, instance, target_node=source_node)
6964

    
6965
    # Only start the instance if it's marked as up
6966
    if instance.admin_up:
6967
      self.LogInfo("Starting instance %s on node %s",
6968
                   instance.name, target_node)
6969

    
6970
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6971
                                           ignore_secondaries=True)
6972
      if not disks_ok:
6973
        _ShutdownInstanceDisks(self, instance)
6974
        raise errors.OpExecError("Can't activate the instance's disks")
6975

    
6976
      result = self.rpc.call_instance_start(target_node, instance,
6977
                                            None, None, False)
6978
      msg = result.fail_msg
6979
      if msg:
6980
        _ShutdownInstanceDisks(self, instance)
6981
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6982
                                 (instance.name, target_node, msg))
6983

    
6984

    
6985
class LUNodeMigrate(LogicalUnit):
6986
  """Migrate all instances from a node.
6987

6988
  """
6989
  HPATH = "node-migrate"
6990
  HTYPE = constants.HTYPE_NODE
6991
  REQ_BGL = False
6992

    
6993
  def CheckArguments(self):
6994
    pass
6995

    
6996
  def ExpandNames(self):
6997
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6998

    
6999
    self.share_locks = _ShareAll()
7000
    self.needed_locks = {
7001
      locking.LEVEL_NODE: [self.op.node_name],
7002
      }
7003

    
7004
  def BuildHooksEnv(self):
7005
    """Build hooks env.
7006

7007
    This runs on the master, the primary and all the secondaries.
7008

7009
    """
7010
    return {
7011
      "NODE_NAME": self.op.node_name,
7012
      }
7013

    
7014
  def BuildHooksNodes(self):
7015
    """Build hooks nodes.
7016

7017
    """
7018
    nl = [self.cfg.GetMasterNode()]
7019
    return (nl, nl)
7020

    
7021
  def CheckPrereq(self):
7022
    pass
7023

    
7024
  def Exec(self, feedback_fn):
7025
    # Prepare jobs for migration instances
7026
    jobs = [
7027
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7028
                                 mode=self.op.mode,
7029
                                 live=self.op.live,
7030
                                 iallocator=self.op.iallocator,
7031
                                 target_node=self.op.target_node)]
7032
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7033
      ]
7034

    
7035
    # TODO: Run iallocator in this opcode and pass correct placement options to
7036
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7037
    # running the iallocator and the actual migration, a good consistency model
7038
    # will have to be found.
7039

    
7040
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7041
            frozenset([self.op.node_name]))
7042

    
7043
    return ResultWithJobs(jobs)
7044

    
7045

    
7046
class TLMigrateInstance(Tasklet):
7047
  """Tasklet class for instance migration.
7048

7049
  @type live: boolean
7050
  @ivar live: whether the migration will be done live or non-live;
7051
      this variable is initalized only after CheckPrereq has run
7052
  @type cleanup: boolean
7053
  @ivar cleanup: Wheater we cleanup from a failed migration
7054
  @type iallocator: string
7055
  @ivar iallocator: The iallocator used to determine target_node
7056
  @type target_node: string
7057
  @ivar target_node: If given, the target_node to reallocate the instance to
7058
  @type failover: boolean
7059
  @ivar failover: Whether operation results in failover or migration
7060
  @type fallback: boolean
7061
  @ivar fallback: Whether fallback to failover is allowed if migration not
7062
                  possible
7063
  @type ignore_consistency: boolean
7064
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7065
                            and target node
7066
  @type shutdown_timeout: int
7067
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7068

7069
  """
7070
  def __init__(self, lu, instance_name, cleanup=False,
7071
               failover=False, fallback=False,
7072
               ignore_consistency=False,
7073
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7074
    """Initializes this class.
7075

7076
    """
7077
    Tasklet.__init__(self, lu)
7078

    
7079
    # Parameters
7080
    self.instance_name = instance_name
7081
    self.cleanup = cleanup
7082
    self.live = False # will be overridden later
7083
    self.failover = failover
7084
    self.fallback = fallback
7085
    self.ignore_consistency = ignore_consistency
7086
    self.shutdown_timeout = shutdown_timeout
7087

    
7088
  def CheckPrereq(self):
7089
    """Check prerequisites.
7090

7091
    This checks that the instance is in the cluster.
7092

7093
    """
7094
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7095
    instance = self.cfg.GetInstanceInfo(instance_name)
7096
    assert instance is not None
7097
    self.instance = instance
7098

    
7099
    if (not self.cleanup and not instance.admin_up and not self.failover and
7100
        self.fallback):
7101
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7102
                      " to failover")
7103
      self.failover = True
7104

    
7105
    if instance.disk_template not in constants.DTS_MIRRORED:
7106
      if self.failover:
7107
        text = "failovers"
7108
      else:
7109
        text = "migrations"
7110
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7111
                                 " %s" % (instance.disk_template, text),
7112
                                 errors.ECODE_STATE)
7113

    
7114
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7115
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7116

    
7117
      if self.lu.op.iallocator:
7118
        self._RunAllocator()
7119
      else:
7120
        # We set set self.target_node as it is required by
7121
        # BuildHooksEnv
7122
        self.target_node = self.lu.op.target_node
7123

    
7124
      # self.target_node is already populated, either directly or by the
7125
      # iallocator run
7126
      target_node = self.target_node
7127
      if self.target_node == instance.primary_node:
7128
        raise errors.OpPrereqError("Cannot migrate instance %s"
7129
                                   " to its primary (%s)" %
7130
                                   (instance.name, instance.primary_node))
7131

    
7132
      if len(self.lu.tasklets) == 1:
7133
        # It is safe to release locks only when we're the only tasklet
7134
        # in the LU
7135
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7136
                      keep=[instance.primary_node, self.target_node])
7137

    
7138
    else:
7139
      secondary_nodes = instance.secondary_nodes
7140
      if not secondary_nodes:
7141
        raise errors.ConfigurationError("No secondary node but using"
7142
                                        " %s disk template" %
7143
                                        instance.disk_template)
7144
      target_node = secondary_nodes[0]
7145
      if self.lu.op.iallocator or (self.lu.op.target_node and
7146
                                   self.lu.op.target_node != target_node):
7147
        if self.failover:
7148
          text = "failed over"
7149
        else:
7150
          text = "migrated"
7151
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7152
                                   " be %s to arbitrary nodes"
7153
                                   " (neither an iallocator nor a target"
7154
                                   " node can be passed)" %
7155
                                   (instance.disk_template, text),
7156
                                   errors.ECODE_INVAL)
7157

    
7158
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7159

    
7160
    # check memory requirements on the secondary node
7161
    if not self.failover or instance.admin_up:
7162
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7163
                           instance.name, i_be[constants.BE_MEMORY],
7164
                           instance.hypervisor)
7165
    else:
7166
      self.lu.LogInfo("Not checking memory on the secondary node as"
7167
                      " instance will not be started")
7168

    
7169
    # check bridge existance
7170
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7171

    
7172
    if not self.cleanup:
7173
      _CheckNodeNotDrained(self.lu, target_node)
7174
      if not self.failover:
7175
        result = self.rpc.call_instance_migratable(instance.primary_node,
7176
                                                   instance)
7177
        if result.fail_msg and self.fallback:
7178
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7179
                          " failover")
7180
          self.failover = True
7181
        else:
7182
          result.Raise("Can't migrate, please use failover",
7183
                       prereq=True, ecode=errors.ECODE_STATE)
7184

    
7185
    assert not (self.failover and self.cleanup)
7186

    
7187
    if not self.failover:
7188
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7189
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7190
                                   " parameters are accepted",
7191
                                   errors.ECODE_INVAL)
7192
      if self.lu.op.live is not None:
7193
        if self.lu.op.live:
7194
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7195
        else:
7196
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7197
        # reset the 'live' parameter to None so that repeated
7198
        # invocations of CheckPrereq do not raise an exception
7199
        self.lu.op.live = None
7200
      elif self.lu.op.mode is None:
7201
        # read the default value from the hypervisor
7202
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7203
                                                skip_globals=False)
7204
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7205

    
7206
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7207
    else:
7208
      # Failover is never live
7209
      self.live = False
7210

    
7211
  def _RunAllocator(self):
7212
    """Run the allocator based on input opcode.
7213

7214
    """
7215
    ial = IAllocator(self.cfg, self.rpc,
7216
                     mode=constants.IALLOCATOR_MODE_RELOC,
7217
                     name=self.instance_name,
7218
                     # TODO See why hail breaks with a single node below
7219
                     relocate_from=[self.instance.primary_node,
7220
                                    self.instance.primary_node],
7221
                     )
7222

    
7223
    ial.Run(self.lu.op.iallocator)
7224

    
7225
    if not ial.success:
7226
      raise errors.OpPrereqError("Can't compute nodes using"
7227
                                 " iallocator '%s': %s" %
7228
                                 (self.lu.op.iallocator, ial.info),
7229
                                 errors.ECODE_NORES)
7230
    if len(ial.result) != ial.required_nodes:
7231
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7232
                                 " of nodes (%s), required %s" %
7233
                                 (self.lu.op.iallocator, len(ial.result),
7234
                                  ial.required_nodes), errors.ECODE_FAULT)
7235
    self.target_node = ial.result[0]
7236
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7237
                 self.instance_name, self.lu.op.iallocator,
7238
                 utils.CommaJoin(ial.result))
7239

    
7240
  def _WaitUntilSync(self):
7241
    """Poll with custom rpc for disk sync.
7242

7243
    This uses our own step-based rpc call.
7244

7245
    """
7246
    self.feedback_fn("* wait until resync is done")
7247
    all_done = False
7248
    while not all_done:
7249
      all_done = True
7250
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7251
                                            self.nodes_ip,
7252
                                            self.instance.disks)
7253
      min_percent = 100
7254
      for node, nres in result.items():
7255
        nres.Raise("Cannot resync disks on node %s" % node)
7256
        node_done, node_percent = nres.payload
7257
        all_done = all_done and node_done
7258
        if node_percent is not None:
7259
          min_percent = min(min_percent, node_percent)
7260
      if not all_done:
7261
        if min_percent < 100:
7262
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7263
        time.sleep(2)
7264

    
7265
  def _EnsureSecondary(self, node):
7266
    """Demote a node to secondary.
7267

7268
    """
7269
    self.feedback_fn("* switching node %s to secondary mode" % node)
7270

    
7271
    for dev in self.instance.disks:
7272
      self.cfg.SetDiskID(dev, node)
7273

    
7274
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7275
                                          self.instance.disks)
7276
    result.Raise("Cannot change disk to secondary on node %s" % node)
7277

    
7278
  def _GoStandalone(self):
7279
    """Disconnect from the network.
7280

7281
    """
7282
    self.feedback_fn("* changing into standalone mode")
7283
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7284
                                               self.instance.disks)
7285
    for node, nres in result.items():
7286
      nres.Raise("Cannot disconnect disks node %s" % node)
7287

    
7288
  def _GoReconnect(self, multimaster):
7289
    """Reconnect to the network.
7290

7291
    """
7292
    if multimaster:
7293
      msg = "dual-master"
7294
    else:
7295
      msg = "single-master"
7296
    self.feedback_fn("* changing disks into %s mode" % msg)
7297
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7298
                                           self.instance.disks,
7299
                                           self.instance.name, multimaster)
7300
    for node, nres in result.items():
7301
      nres.Raise("Cannot change disks config on node %s" % node)
7302

    
7303
  def _ExecCleanup(self):
7304
    """Try to cleanup after a failed migration.
7305

7306
    The cleanup is done by:
7307
      - check that the instance is running only on one node
7308
        (and update the config if needed)
7309
      - change disks on its secondary node to secondary
7310
      - wait until disks are fully synchronized
7311
      - disconnect from the network
7312
      - change disks into single-master mode
7313
      - wait again until disks are fully synchronized
7314

7315
    """
7316
    instance = self.instance
7317
    target_node = self.target_node
7318
    source_node = self.source_node
7319

    
7320
    # check running on only one node
7321
    self.feedback_fn("* checking where the instance actually runs"
7322
                     " (if this hangs, the hypervisor might be in"
7323
                     " a bad state)")
7324
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7325
    for node, result in ins_l.items():
7326
      result.Raise("Can't contact node %s" % node)
7327

    
7328
    runningon_source = instance.name in ins_l[source_node].payload
7329
    runningon_target = instance.name in ins_l[target_node].payload
7330

    
7331
    if runningon_source and runningon_target:
7332
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7333
                               " or the hypervisor is confused; you will have"
7334
                               " to ensure manually that it runs only on one"
7335
                               " and restart this operation")
7336

    
7337
    if not (runningon_source or runningon_target):
7338
      raise errors.OpExecError("Instance does not seem to be running at all;"
7339
                               " in this case it's safer to repair by"
7340
                               " running 'gnt-instance stop' to ensure disk"
7341
                               " shutdown, and then restarting it")
7342

    
7343
    if runningon_target:
7344
      # the migration has actually succeeded, we need to update the config
7345
      self.feedback_fn("* instance running on secondary node (%s),"
7346
                       " updating config" % target_node)
7347
      instance.primary_node = target_node
7348
      self.cfg.Update(instance, self.feedback_fn)
7349
      demoted_node = source_node
7350
    else:
7351
      self.feedback_fn("* instance confirmed to be running on its"
7352
                       " primary node (%s)" % source_node)
7353
      demoted_node = target_node
7354

    
7355
    if instance.disk_template in constants.DTS_INT_MIRROR:
7356
      self._EnsureSecondary(demoted_node)
7357
      try:
7358
        self._WaitUntilSync()
7359
      except errors.OpExecError:
7360
        # we ignore here errors, since if the device is standalone, it
7361
        # won't be able to sync
7362
        pass
7363
      self._GoStandalone()
7364
      self._GoReconnect(False)
7365
      self._WaitUntilSync()
7366

    
7367
    self.feedback_fn("* done")
7368

    
7369
  def _RevertDiskStatus(self):
7370
    """Try to revert the disk status after a failed migration.
7371

7372
    """
7373
    target_node = self.target_node
7374
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7375
      return
7376

    
7377
    try:
7378
      self._EnsureSecondary(target_node)
7379
      self._GoStandalone()
7380
      self._GoReconnect(False)
7381
      self._WaitUntilSync()
7382
    except errors.OpExecError, err:
7383
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7384
                         " please try to recover the instance manually;"
7385
                         " error '%s'" % str(err))
7386

    
7387
  def _AbortMigration(self):
7388
    """Call the hypervisor code to abort a started migration.
7389

7390
    """
7391
    instance = self.instance
7392
    target_node = self.target_node
7393
    migration_info = self.migration_info
7394

    
7395
    abort_result = self.rpc.call_finalize_migration(target_node,
7396
                                                    instance,
7397
                                                    migration_info,
7398
                                                    False)
7399
    abort_msg = abort_result.fail_msg
7400
    if abort_msg:
7401
      logging.error("Aborting migration failed on target node %s: %s",
7402
                    target_node, abort_msg)
7403
      # Don't raise an exception here, as we stil have to try to revert the
7404
      # disk status, even if this step failed.
7405

    
7406
  def _ExecMigration(self):
7407
    """Migrate an instance.
7408

7409
    The migrate is done by:
7410
      - change the disks into dual-master mode
7411
      - wait until disks are fully synchronized again
7412
      - migrate the instance
7413
      - change disks on the new secondary node (the old primary) to secondary
7414
      - wait until disks are fully synchronized
7415
      - change disks into single-master mode
7416

7417
    """
7418
    instance = self.instance
7419
    target_node = self.target_node
7420
    source_node = self.source_node
7421

    
7422
    # Check for hypervisor version mismatch and warn the user.
7423
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7424
                                       None, self.instance.hypervisor)
7425
    src_info = nodeinfo[source_node]
7426
    dst_info = nodeinfo[target_node]
7427

    
7428
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7429
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7430
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7431
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7432
      if src_version != dst_version:
7433
        self.feedback_fn("* warning: hypervisor version mismatch between"
7434
                         " source (%s) and target (%s) node" %
7435
                         (src_version, dst_version))
7436

    
7437
    self.feedback_fn("* checking disk consistency between source and target")
7438
    for dev in instance.disks:
7439
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7440
        raise errors.OpExecError("Disk %s is degraded or not fully"
7441
                                 " synchronized on target node,"
7442
                                 " aborting migration" % dev.iv_name)
7443

    
7444
    # First get the migration information from the remote node
7445
    result = self.rpc.call_migration_info(source_node, instance)
7446
    msg = result.fail_msg
7447
    if msg:
7448
      log_err = ("Failed fetching source migration information from %s: %s" %
7449
                 (source_node, msg))
7450
      logging.error(log_err)
7451
      raise errors.OpExecError(log_err)
7452

    
7453
    self.migration_info = migration_info = result.payload
7454

    
7455
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7456
      # Then switch the disks to master/master mode
7457
      self._EnsureSecondary(target_node)
7458
      self._GoStandalone()
7459
      self._GoReconnect(True)
7460
      self._WaitUntilSync()
7461

    
7462
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7463
    result = self.rpc.call_accept_instance(target_node,
7464
                                           instance,
7465
                                           migration_info,
7466
                                           self.nodes_ip[target_node])
7467

    
7468
    msg = result.fail_msg
7469
    if msg:
7470
      logging.error("Instance pre-migration failed, trying to revert"
7471
                    " disk status: %s", msg)
7472
      self.feedback_fn("Pre-migration failed, aborting")
7473
      self._AbortMigration()
7474
      self._RevertDiskStatus()
7475
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7476
                               (instance.name, msg))
7477

    
7478
    self.feedback_fn("* migrating instance to %s" % target_node)
7479
    result = self.rpc.call_instance_migrate(source_node, instance,
7480
                                            self.nodes_ip[target_node],
7481
                                            self.live)
7482
    msg = result.fail_msg
7483
    if msg:
7484
      logging.error("Instance migration failed, trying to revert"
7485
                    " disk status: %s", msg)
7486
      self.feedback_fn("Migration failed, aborting")
7487
      self._AbortMigration()
7488
      self._RevertDiskStatus()
7489
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7490
                               (instance.name, msg))
7491

    
7492
    instance.primary_node = target_node
7493
    # distribute new instance config to the other nodes
7494
    self.cfg.Update(instance, self.feedback_fn)
7495

    
7496
    result = self.rpc.call_finalize_migration(target_node,
7497
                                              instance,
7498
                                              migration_info,
7499
                                              True)
7500
    msg = result.fail_msg
7501
    if msg:
7502
      logging.error("Instance migration succeeded, but finalization failed:"
7503
                    " %s", msg)
7504
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7505
                               msg)
7506

    
7507
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7508
      self._EnsureSecondary(source_node)
7509
      self._WaitUntilSync()
7510
      self._GoStandalone()
7511
      self._GoReconnect(False)
7512
      self._WaitUntilSync()
7513

    
7514
    self.feedback_fn("* done")
7515

    
7516
  def _ExecFailover(self):
7517
    """Failover an instance.
7518

7519
    The failover is done by shutting it down on its present node and
7520
    starting it on the secondary.
7521

7522
    """
7523
    instance = self.instance
7524
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7525

    
7526
    source_node = instance.primary_node
7527
    target_node = self.target_node
7528

    
7529
    if instance.admin_up:
7530
      self.feedback_fn("* checking disk consistency between source and target")
7531
      for dev in instance.disks:
7532
        # for drbd, these are drbd over lvm
7533
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7534
          if primary_node.offline:
7535
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7536
                             " target node %s" %
7537
                             (primary_node.name, dev.iv_name, target_node))
7538
          elif not self.ignore_consistency:
7539
            raise errors.OpExecError("Disk %s is degraded on target node,"
7540
                                     " aborting failover" % dev.iv_name)
7541
    else:
7542
      self.feedback_fn("* not checking disk consistency as instance is not"
7543
                       " running")
7544

    
7545
    self.feedback_fn("* shutting down instance on source node")
7546
    logging.info("Shutting down instance %s on node %s",
7547
                 instance.name, source_node)
7548

    
7549
    result = self.rpc.call_instance_shutdown(source_node, instance,
7550
                                             self.shutdown_timeout)
7551
    msg = result.fail_msg
7552
    if msg:
7553
      if self.ignore_consistency or primary_node.offline:
7554
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7555
                           " proceeding anyway; please make sure node"
7556
                           " %s is down; error details: %s",
7557
                           instance.name, source_node, source_node, msg)
7558
      else:
7559
        raise errors.OpExecError("Could not shutdown instance %s on"
7560
                                 " node %s: %s" %
7561
                                 (instance.name, source_node, msg))
7562

    
7563
    self.feedback_fn("* deactivating the instance's disks on source node")
7564
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7565
      raise errors.OpExecError("Can't shut down the instance's disks")
7566

    
7567
    instance.primary_node = target_node
7568
    # distribute new instance config to the other nodes
7569
    self.cfg.Update(instance, self.feedback_fn)
7570

    
7571
    # Only start the instance if it's marked as up
7572
    if instance.admin_up:
7573
      self.feedback_fn("* activating the instance's disks on target node %s" %
7574
                       target_node)
7575
      logging.info("Starting instance %s on node %s",
7576
                   instance.name, target_node)
7577

    
7578
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7579
                                           ignore_secondaries=True)
7580
      if not disks_ok:
7581
        _ShutdownInstanceDisks(self.lu, instance)
7582
        raise errors.OpExecError("Can't activate the instance's disks")
7583

    
7584
      self.feedback_fn("* starting the instance on the target node %s" %
7585
                       target_node)
7586
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7587
                                            False)
7588
      msg = result.fail_msg
7589
      if msg:
7590
        _ShutdownInstanceDisks(self.lu, instance)
7591
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7592
                                 (instance.name, target_node, msg))
7593

    
7594
  def Exec(self, feedback_fn):
7595
    """Perform the migration.
7596

7597
    """
7598
    self.feedback_fn = feedback_fn
7599
    self.source_node = self.instance.primary_node
7600

    
7601
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7602
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7603
      self.target_node = self.instance.secondary_nodes[0]
7604
      # Otherwise self.target_node has been populated either
7605
      # directly, or through an iallocator.
7606

    
7607
    self.all_nodes = [self.source_node, self.target_node]
7608
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7609
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7610

    
7611
    if self.failover:
7612
      feedback_fn("Failover instance %s" % self.instance.name)
7613
      self._ExecFailover()
7614
    else:
7615
      feedback_fn("Migrating instance %s" % self.instance.name)
7616

    
7617
      if self.cleanup:
7618
        return self._ExecCleanup()
7619
      else:
7620
        return self._ExecMigration()
7621

    
7622

    
7623
def _CreateBlockDev(lu, node, instance, device, force_create,
7624
                    info, force_open):
7625
  """Create a tree of block devices on a given node.
7626

7627
  If this device type has to be created on secondaries, create it and
7628
  all its children.
7629

7630
  If not, just recurse to children keeping the same 'force' value.
7631

7632
  @param lu: the lu on whose behalf we execute
7633
  @param node: the node on which to create the device
7634
  @type instance: L{objects.Instance}
7635
  @param instance: the instance which owns the device
7636
  @type device: L{objects.Disk}
7637
  @param device: the device to create
7638
  @type force_create: boolean
7639
  @param force_create: whether to force creation of this device; this
7640
      will be change to True whenever we find a device which has
7641
      CreateOnSecondary() attribute
7642
  @param info: the extra 'metadata' we should attach to the device
7643
      (this will be represented as a LVM tag)
7644
  @type force_open: boolean
7645
  @param force_open: this parameter will be passes to the
7646
      L{backend.BlockdevCreate} function where it specifies
7647
      whether we run on primary or not, and it affects both
7648
      the child assembly and the device own Open() execution
7649

7650
  """
7651
  if device.CreateOnSecondary():
7652
    force_create = True
7653

    
7654
  if device.children:
7655
    for child in device.children:
7656
      _CreateBlockDev(lu, node, instance, child, force_create,
7657
                      info, force_open)
7658

    
7659
  if not force_create:
7660
    return
7661

    
7662
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7663

    
7664

    
7665
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7666
  """Create a single block device on a given node.
7667

7668
  This will not recurse over children of the device, so they must be
7669
  created in advance.
7670

7671
  @param lu: the lu on whose behalf we execute
7672
  @param node: the node on which to create the device
7673
  @type instance: L{objects.Instance}
7674
  @param instance: the instance which owns the device
7675
  @type device: L{objects.Disk}
7676
  @param device: the device to create
7677
  @param info: the extra 'metadata' we should attach to the device
7678
      (this will be represented as a LVM tag)
7679
  @type force_open: boolean
7680
  @param force_open: this parameter will be passes to the
7681
      L{backend.BlockdevCreate} function where it specifies
7682
      whether we run on primary or not, and it affects both
7683
      the child assembly and the device own Open() execution
7684

7685
  """
7686
  lu.cfg.SetDiskID(device, node)
7687
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7688
                                       instance.name, force_open, info)
7689
  result.Raise("Can't create block device %s on"
7690
               " node %s for instance %s" % (device, node, instance.name))
7691
  if device.physical_id is None:
7692
    device.physical_id = result.payload
7693

    
7694

    
7695
def _GenerateUniqueNames(lu, exts):
7696
  """Generate a suitable LV name.
7697

7698
  This will generate a logical volume name for the given instance.
7699

7700
  """
7701
  results = []
7702
  for val in exts:
7703
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7704
    results.append("%s%s" % (new_id, val))
7705
  return results
7706

    
7707

    
7708
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7709
                         iv_name, p_minor, s_minor):
7710
  """Generate a drbd8 device complete with its children.
7711

7712
  """
7713
  assert len(vgnames) == len(names) == 2
7714
  port = lu.cfg.AllocatePort()
7715
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7716
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7717
                          logical_id=(vgnames[0], names[0]))
7718
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7719
                          logical_id=(vgnames[1], names[1]))
7720
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7721
                          logical_id=(primary, secondary, port,
7722
                                      p_minor, s_minor,
7723
                                      shared_secret),
7724
                          children=[dev_data, dev_meta],
7725
                          iv_name=iv_name)
7726
  return drbd_dev
7727

    
7728

    
7729
def _GenerateDiskTemplate(lu, template_name,
7730
                          instance_name, primary_node,
7731
                          secondary_nodes, disk_info,
7732
                          file_storage_dir, file_driver,
7733
                          base_index, feedback_fn):
7734
  """Generate the entire disk layout for a given template type.
7735

7736
  """
7737
  #TODO: compute space requirements
7738

    
7739
  vgname = lu.cfg.GetVGName()
7740
  disk_count = len(disk_info)
7741
  disks = []
7742
  if template_name == constants.DT_DISKLESS:
7743
    pass
7744
  elif template_name == constants.DT_PLAIN:
7745
    if len(secondary_nodes) != 0:
7746
      raise errors.ProgrammerError("Wrong template configuration")
7747

    
7748
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7749
                                      for i in range(disk_count)])
7750
    for idx, disk in enumerate(disk_info):
7751
      disk_index = idx + base_index
7752
      vg = disk.get(constants.IDISK_VG, vgname)
7753
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7754
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7755
                              size=disk[constants.IDISK_SIZE],
7756
                              logical_id=(vg, names[idx]),
7757
                              iv_name="disk/%d" % disk_index,
7758
                              mode=disk[constants.IDISK_MODE])
7759
      disks.append(disk_dev)
7760
  elif template_name == constants.DT_DRBD8:
7761
    if len(secondary_nodes) != 1:
7762
      raise errors.ProgrammerError("Wrong template configuration")
7763
    remote_node = secondary_nodes[0]
7764
    minors = lu.cfg.AllocateDRBDMinor(
7765
      [primary_node, remote_node] * len(disk_info), instance_name)
7766

    
7767
    names = []
7768
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7769
                                               for i in range(disk_count)]):
7770
      names.append(lv_prefix + "_data")
7771
      names.append(lv_prefix + "_meta")
7772
    for idx, disk in enumerate(disk_info):
7773
      disk_index = idx + base_index
7774
      data_vg = disk.get(constants.IDISK_VG, vgname)
7775
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7776
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7777
                                      disk[constants.IDISK_SIZE],
7778
                                      [data_vg, meta_vg],
7779
                                      names[idx * 2:idx * 2 + 2],
7780
                                      "disk/%d" % disk_index,
7781
                                      minors[idx * 2], minors[idx * 2 + 1])
7782
      disk_dev.mode = disk[constants.IDISK_MODE]
7783
      disks.append(disk_dev)
7784
  elif template_name == constants.DT_FILE:
7785
    if len(secondary_nodes) != 0:
7786
      raise errors.ProgrammerError("Wrong template configuration")
7787

    
7788
    opcodes.RequireFileStorage()
7789

    
7790
    for idx, disk in enumerate(disk_info):
7791
      disk_index = idx + base_index
7792
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7793
                              size=disk[constants.IDISK_SIZE],
7794
                              iv_name="disk/%d" % disk_index,
7795
                              logical_id=(file_driver,
7796
                                          "%s/disk%d" % (file_storage_dir,
7797
                                                         disk_index)),
7798
                              mode=disk[constants.IDISK_MODE])
7799
      disks.append(disk_dev)
7800
  elif template_name == constants.DT_SHARED_FILE:
7801
    if len(secondary_nodes) != 0:
7802
      raise errors.ProgrammerError("Wrong template configuration")
7803

    
7804
    opcodes.RequireSharedFileStorage()
7805

    
7806
    for idx, disk in enumerate(disk_info):
7807
      disk_index = idx + base_index
7808
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7809
                              size=disk[constants.IDISK_SIZE],
7810
                              iv_name="disk/%d" % disk_index,
7811
                              logical_id=(file_driver,
7812
                                          "%s/disk%d" % (file_storage_dir,
7813
                                                         disk_index)),
7814
                              mode=disk[constants.IDISK_MODE])
7815
      disks.append(disk_dev)
7816
  elif template_name == constants.DT_BLOCK:
7817
    if len(secondary_nodes) != 0:
7818
      raise errors.ProgrammerError("Wrong template configuration")
7819

    
7820
    for idx, disk in enumerate(disk_info):
7821
      disk_index = idx + base_index
7822
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7823
                              size=disk[constants.IDISK_SIZE],
7824
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7825
                                          disk[constants.IDISK_ADOPT]),
7826
                              iv_name="disk/%d" % disk_index,
7827
                              mode=disk[constants.IDISK_MODE])
7828
      disks.append(disk_dev)
7829

    
7830
  else:
7831
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7832
  return disks
7833

    
7834

    
7835
def _GetInstanceInfoText(instance):
7836
  """Compute that text that should be added to the disk's metadata.
7837

7838
  """
7839
  return "originstname+%s" % instance.name
7840

    
7841

    
7842
def _CalcEta(time_taken, written, total_size):
7843
  """Calculates the ETA based on size written and total size.
7844

7845
  @param time_taken: The time taken so far
7846
  @param written: amount written so far
7847
  @param total_size: The total size of data to be written
7848
  @return: The remaining time in seconds
7849

7850
  """
7851
  avg_time = time_taken / float(written)
7852
  return (total_size - written) * avg_time
7853

    
7854

    
7855
def _WipeDisks(lu, instance):
7856
  """Wipes instance disks.
7857

7858
  @type lu: L{LogicalUnit}
7859
  @param lu: the logical unit on whose behalf we execute
7860
  @type instance: L{objects.Instance}
7861
  @param instance: the instance whose disks we should create
7862
  @return: the success of the wipe
7863

7864
  """
7865
  node = instance.primary_node
7866

    
7867
  for device in instance.disks:
7868
    lu.cfg.SetDiskID(device, node)
7869

    
7870
  logging.info("Pause sync of instance %s disks", instance.name)
7871
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7872

    
7873
  for idx, success in enumerate(result.payload):
7874
    if not success:
7875
      logging.warn("pause-sync of instance %s for disks %d failed",
7876
                   instance.name, idx)
7877

    
7878
  try:
7879
    for idx, device in enumerate(instance.disks):
7880
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7881
      # MAX_WIPE_CHUNK at max
7882
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7883
                            constants.MIN_WIPE_CHUNK_PERCENT)
7884
      # we _must_ make this an int, otherwise rounding errors will
7885
      # occur
7886
      wipe_chunk_size = int(wipe_chunk_size)
7887

    
7888
      lu.LogInfo("* Wiping disk %d", idx)
7889
      logging.info("Wiping disk %d for instance %s, node %s using"
7890
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7891

    
7892
      offset = 0
7893
      size = device.size
7894
      last_output = 0
7895
      start_time = time.time()
7896

    
7897
      while offset < size:
7898
        wipe_size = min(wipe_chunk_size, size - offset)
7899
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7900
                      idx, offset, wipe_size)
7901
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7902
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7903
                     (idx, offset, wipe_size))
7904
        now = time.time()
7905
        offset += wipe_size
7906
        if now - last_output >= 60:
7907
          eta = _CalcEta(now - start_time, offset, size)
7908
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7909
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7910
          last_output = now
7911
  finally:
7912
    logging.info("Resume sync of instance %s disks", instance.name)
7913

    
7914
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7915

    
7916
    for idx, success in enumerate(result.payload):
7917
      if not success:
7918
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7919
                      " look at the status and troubleshoot the issue", idx)
7920
        logging.warn("resume-sync of instance %s for disks %d failed",
7921
                     instance.name, idx)
7922

    
7923

    
7924
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7925
  """Create all disks for an instance.
7926

7927
  This abstracts away some work from AddInstance.
7928

7929
  @type lu: L{LogicalUnit}
7930
  @param lu: the logical unit on whose behalf we execute
7931
  @type instance: L{objects.Instance}
7932
  @param instance: the instance whose disks we should create
7933
  @type to_skip: list
7934
  @param to_skip: list of indices to skip
7935
  @type target_node: string
7936
  @param target_node: if passed, overrides the target node for creation
7937
  @rtype: boolean
7938
  @return: the success of the creation
7939

7940
  """
7941
  info = _GetInstanceInfoText(instance)
7942
  if target_node is None:
7943
    pnode = instance.primary_node
7944
    all_nodes = instance.all_nodes
7945
  else:
7946
    pnode = target_node
7947
    all_nodes = [pnode]
7948

    
7949
  if instance.disk_template in constants.DTS_FILEBASED:
7950
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7951
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7952

    
7953
    result.Raise("Failed to create directory '%s' on"
7954
                 " node %s" % (file_storage_dir, pnode))
7955

    
7956
  # Note: this needs to be kept in sync with adding of disks in
7957
  # LUInstanceSetParams
7958
  for idx, device in enumerate(instance.disks):
7959
    if to_skip and idx in to_skip:
7960
      continue
7961
    logging.info("Creating volume %s for instance %s",
7962
                 device.iv_name, instance.name)
7963
    #HARDCODE
7964
    for node in all_nodes:
7965
      f_create = node == pnode
7966
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7967

    
7968

    
7969
def _RemoveDisks(lu, instance, target_node=None):
7970
  """Remove all disks for an instance.
7971

7972
  This abstracts away some work from `AddInstance()` and
7973
  `RemoveInstance()`. Note that in case some of the devices couldn't
7974
  be removed, the removal will continue with the other ones (compare
7975
  with `_CreateDisks()`).
7976

7977
  @type lu: L{LogicalUnit}
7978
  @param lu: the logical unit on whose behalf we execute
7979
  @type instance: L{objects.Instance}
7980
  @param instance: the instance whose disks we should remove
7981
  @type target_node: string
7982
  @param target_node: used to override the node on which to remove the disks
7983
  @rtype: boolean
7984
  @return: the success of the removal
7985

7986
  """
7987
  logging.info("Removing block devices for instance %s", instance.name)
7988

    
7989
  all_result = True
7990
  for device in instance.disks:
7991
    if target_node:
7992
      edata = [(target_node, device)]
7993
    else:
7994
      edata = device.ComputeNodeTree(instance.primary_node)
7995
    for node, disk in edata:
7996
      lu.cfg.SetDiskID(disk, node)
7997
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7998
      if msg:
7999
        lu.LogWarning("Could not remove block device %s on node %s,"
8000
                      " continuing anyway: %s", device.iv_name, node, msg)
8001
        all_result = False
8002

    
8003
  if instance.disk_template == constants.DT_FILE:
8004
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8005
    if target_node:
8006
      tgt = target_node
8007
    else:
8008
      tgt = instance.primary_node
8009
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8010
    if result.fail_msg:
8011
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8012
                    file_storage_dir, instance.primary_node, result.fail_msg)
8013
      all_result = False
8014

    
8015
  return all_result
8016

    
8017

    
8018
def _ComputeDiskSizePerVG(disk_template, disks):
8019
  """Compute disk size requirements in the volume group
8020

8021
  """
8022
  def _compute(disks, payload):
8023
    """Universal algorithm.
8024

8025
    """
8026
    vgs = {}
8027
    for disk in disks:
8028
      vgs[disk[constants.IDISK_VG]] = \
8029
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8030

    
8031
    return vgs
8032

    
8033
  # Required free disk space as a function of disk and swap space
8034
  req_size_dict = {
8035
    constants.DT_DISKLESS: {},
8036
    constants.DT_PLAIN: _compute(disks, 0),
8037
    # 128 MB are added for drbd metadata for each disk
8038
    constants.DT_DRBD8: _compute(disks, 128),
8039
    constants.DT_FILE: {},
8040
    constants.DT_SHARED_FILE: {},
8041
  }
8042

    
8043
  if disk_template not in req_size_dict:
8044
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8045
                                 " is unknown" % disk_template)
8046

    
8047
  return req_size_dict[disk_template]
8048

    
8049

    
8050
def _ComputeDiskSize(disk_template, disks):
8051
  """Compute disk size requirements in the volume group
8052

8053
  """
8054
  # Required free disk space as a function of disk and swap space
8055
  req_size_dict = {
8056
    constants.DT_DISKLESS: None,
8057
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8058
    # 128 MB are added for drbd metadata for each disk
8059
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8060
    constants.DT_FILE: None,
8061
    constants.DT_SHARED_FILE: 0,
8062
    constants.DT_BLOCK: 0,
8063
  }
8064

    
8065
  if disk_template not in req_size_dict:
8066
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8067
                                 " is unknown" % disk_template)
8068

    
8069
  return req_size_dict[disk_template]
8070

    
8071

    
8072
def _FilterVmNodes(lu, nodenames):
8073
  """Filters out non-vm_capable nodes from a list.
8074

8075
  @type lu: L{LogicalUnit}
8076
  @param lu: the logical unit for which we check
8077
  @type nodenames: list
8078
  @param nodenames: the list of nodes on which we should check
8079
  @rtype: list
8080
  @return: the list of vm-capable nodes
8081

8082
  """
8083
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8084
  return [name for name in nodenames if name not in vm_nodes]
8085

    
8086

    
8087
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8088
  """Hypervisor parameter validation.
8089

8090
  This function abstract the hypervisor parameter validation to be
8091
  used in both instance create and instance modify.
8092

8093
  @type lu: L{LogicalUnit}
8094
  @param lu: the logical unit for which we check
8095
  @type nodenames: list
8096
  @param nodenames: the list of nodes on which we should check
8097
  @type hvname: string
8098
  @param hvname: the name of the hypervisor we should use
8099
  @type hvparams: dict
8100
  @param hvparams: the parameters which we need to check
8101
  @raise errors.OpPrereqError: if the parameters are not valid
8102

8103
  """
8104
  nodenames = _FilterVmNodes(lu, nodenames)
8105
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8106
                                                  hvname,
8107
                                                  hvparams)
8108
  for node in nodenames:
8109
    info = hvinfo[node]
8110
    if info.offline:
8111
      continue
8112
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8113

    
8114

    
8115
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8116
  """OS parameters validation.
8117

8118
  @type lu: L{LogicalUnit}
8119
  @param lu: the logical unit for which we check
8120
  @type required: boolean
8121
  @param required: whether the validation should fail if the OS is not
8122
      found
8123
  @type nodenames: list
8124
  @param nodenames: the list of nodes on which we should check
8125
  @type osname: string
8126
  @param osname: the name of the hypervisor we should use
8127
  @type osparams: dict
8128
  @param osparams: the parameters which we need to check
8129
  @raise errors.OpPrereqError: if the parameters are not valid
8130

8131
  """
8132
  nodenames = _FilterVmNodes(lu, nodenames)
8133
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8134
                                   [constants.OS_VALIDATE_PARAMETERS],
8135
                                   osparams)
8136
  for node, nres in result.items():
8137
    # we don't check for offline cases since this should be run only
8138
    # against the master node and/or an instance's nodes
8139
    nres.Raise("OS Parameters validation failed on node %s" % node)
8140
    if not nres.payload:
8141
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8142
                 osname, node)
8143

    
8144

    
8145
class LUInstanceCreate(LogicalUnit):
8146
  """Create an instance.
8147

8148
  """
8149
  HPATH = "instance-add"
8150
  HTYPE = constants.HTYPE_INSTANCE
8151
  REQ_BGL = False
8152

    
8153
  def CheckArguments(self):
8154
    """Check arguments.
8155

8156
    """
8157
    # do not require name_check to ease forward/backward compatibility
8158
    # for tools
8159
    if self.op.no_install and self.op.start:
8160
      self.LogInfo("No-installation mode selected, disabling startup")
8161
      self.op.start = False
8162
    # validate/normalize the instance name
8163
    self.op.instance_name = \
8164
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8165

    
8166
    if self.op.ip_check and not self.op.name_check:
8167
      # TODO: make the ip check more flexible and not depend on the name check
8168
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8169
                                 " check", errors.ECODE_INVAL)
8170

    
8171
    # check nics' parameter names
8172
    for nic in self.op.nics:
8173
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8174

    
8175
    # check disks. parameter names and consistent adopt/no-adopt strategy
8176
    has_adopt = has_no_adopt = False
8177
    for disk in self.op.disks:
8178
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8179
      if constants.IDISK_ADOPT in disk:
8180
        has_adopt = True
8181
      else:
8182
        has_no_adopt = True
8183
    if has_adopt and has_no_adopt:
8184
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8185
                                 errors.ECODE_INVAL)
8186
    if has_adopt:
8187
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8188
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8189
                                   " '%s' disk template" %
8190
                                   self.op.disk_template,
8191
                                   errors.ECODE_INVAL)
8192
      if self.op.iallocator is not None:
8193
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8194
                                   " iallocator script", errors.ECODE_INVAL)
8195
      if self.op.mode == constants.INSTANCE_IMPORT:
8196
        raise errors.OpPrereqError("Disk adoption not allowed for"
8197
                                   " instance import", errors.ECODE_INVAL)
8198
    else:
8199
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8200
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8201
                                   " but no 'adopt' parameter given" %
8202
                                   self.op.disk_template,
8203
                                   errors.ECODE_INVAL)
8204

    
8205
    self.adopt_disks = has_adopt
8206

    
8207
    # instance name verification
8208
    if self.op.name_check:
8209
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8210
      self.op.instance_name = self.hostname1.name
8211
      # used in CheckPrereq for ip ping check
8212
      self.check_ip = self.hostname1.ip
8213
    else:
8214
      self.check_ip = None
8215

    
8216
    # file storage checks
8217
    if (self.op.file_driver and
8218
        not self.op.file_driver in constants.FILE_DRIVER):
8219
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8220
                                 self.op.file_driver, errors.ECODE_INVAL)
8221

    
8222
    if self.op.disk_template == constants.DT_FILE:
8223
      opcodes.RequireFileStorage()
8224
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8225
      opcodes.RequireSharedFileStorage()
8226

    
8227
    ### Node/iallocator related checks
8228
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8229

    
8230
    if self.op.pnode is not None:
8231
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8232
        if self.op.snode is None:
8233
          raise errors.OpPrereqError("The networked disk templates need"
8234
                                     " a mirror node", errors.ECODE_INVAL)
8235
      elif self.op.snode:
8236
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8237
                        " template")
8238
        self.op.snode = None
8239

    
8240
    self._cds = _GetClusterDomainSecret()
8241

    
8242
    if self.op.mode == constants.INSTANCE_IMPORT:
8243
      # On import force_variant must be True, because if we forced it at
8244
      # initial install, our only chance when importing it back is that it
8245
      # works again!
8246
      self.op.force_variant = True
8247

    
8248
      if self.op.no_install:
8249
        self.LogInfo("No-installation mode has no effect during import")
8250

    
8251
    elif self.op.mode == constants.INSTANCE_CREATE:
8252
      if self.op.os_type is None:
8253
        raise errors.OpPrereqError("No guest OS specified",
8254
                                   errors.ECODE_INVAL)
8255
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8256
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8257
                                   " installation" % self.op.os_type,
8258
                                   errors.ECODE_STATE)
8259
      if self.op.disk_template is None:
8260
        raise errors.OpPrereqError("No disk template specified",
8261
                                   errors.ECODE_INVAL)
8262

    
8263
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8264
      # Check handshake to ensure both clusters have the same domain secret
8265
      src_handshake = self.op.source_handshake
8266
      if not src_handshake:
8267
        raise errors.OpPrereqError("Missing source handshake",
8268
                                   errors.ECODE_INVAL)
8269

    
8270
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8271
                                                           src_handshake)
8272
      if errmsg:
8273
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8274
                                   errors.ECODE_INVAL)
8275

    
8276
      # Load and check source CA
8277
      self.source_x509_ca_pem = self.op.source_x509_ca
8278
      if not self.source_x509_ca_pem:
8279
        raise errors.OpPrereqError("Missing source X509 CA",
8280
                                   errors.ECODE_INVAL)
8281

    
8282
      try:
8283
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8284
                                                    self._cds)
8285
      except OpenSSL.crypto.Error, err:
8286
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8287
                                   (err, ), errors.ECODE_INVAL)
8288

    
8289
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8290
      if errcode is not None:
8291
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8292
                                   errors.ECODE_INVAL)
8293

    
8294
      self.source_x509_ca = cert
8295

    
8296
      src_instance_name = self.op.source_instance_name
8297
      if not src_instance_name:
8298
        raise errors.OpPrereqError("Missing source instance name",
8299
                                   errors.ECODE_INVAL)
8300

    
8301
      self.source_instance_name = \
8302
          netutils.GetHostname(name=src_instance_name).name
8303

    
8304
    else:
8305
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8306
                                 self.op.mode, errors.ECODE_INVAL)
8307

    
8308
  def ExpandNames(self):
8309
    """ExpandNames for CreateInstance.
8310

8311
    Figure out the right locks for instance creation.
8312

8313
    """
8314
    self.needed_locks = {}
8315

    
8316
    instance_name = self.op.instance_name
8317
    # this is just a preventive check, but someone might still add this
8318
    # instance in the meantime, and creation will fail at lock-add time
8319
    if instance_name in self.cfg.GetInstanceList():
8320
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8321
                                 instance_name, errors.ECODE_EXISTS)
8322

    
8323
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8324

    
8325
    if self.op.iallocator:
8326
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8327
    else:
8328
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8329
      nodelist = [self.op.pnode]
8330
      if self.op.snode is not None:
8331
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8332
        nodelist.append(self.op.snode)
8333
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8334

    
8335
    # in case of import lock the source node too
8336
    if self.op.mode == constants.INSTANCE_IMPORT:
8337
      src_node = self.op.src_node
8338
      src_path = self.op.src_path
8339

    
8340
      if src_path is None:
8341
        self.op.src_path = src_path = self.op.instance_name
8342

    
8343
      if src_node is None:
8344
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8345
        self.op.src_node = None
8346
        if os.path.isabs(src_path):
8347
          raise errors.OpPrereqError("Importing an instance from a path"
8348
                                     " requires a source node option",
8349
                                     errors.ECODE_INVAL)
8350
      else:
8351
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8352
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8353
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8354
        if not os.path.isabs(src_path):
8355
          self.op.src_path = src_path = \
8356
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8357

    
8358
  def _RunAllocator(self):
8359
    """Run the allocator based on input opcode.
8360

8361
    """
8362
    nics = [n.ToDict() for n in self.nics]
8363
    ial = IAllocator(self.cfg, self.rpc,
8364
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8365
                     name=self.op.instance_name,
8366
                     disk_template=self.op.disk_template,
8367
                     tags=self.op.tags,
8368
                     os=self.op.os_type,
8369
                     vcpus=self.be_full[constants.BE_VCPUS],
8370
                     memory=self.be_full[constants.BE_MEMORY],
8371
                     disks=self.disks,
8372
                     nics=nics,
8373
                     hypervisor=self.op.hypervisor,
8374
                     )
8375

    
8376
    ial.Run(self.op.iallocator)
8377

    
8378
    if not ial.success:
8379
      raise errors.OpPrereqError("Can't compute nodes using"
8380
                                 " iallocator '%s': %s" %
8381
                                 (self.op.iallocator, ial.info),
8382
                                 errors.ECODE_NORES)
8383
    if len(ial.result) != ial.required_nodes:
8384
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8385
                                 " of nodes (%s), required %s" %
8386
                                 (self.op.iallocator, len(ial.result),
8387
                                  ial.required_nodes), errors.ECODE_FAULT)
8388
    self.op.pnode = ial.result[0]
8389
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8390
                 self.op.instance_name, self.op.iallocator,
8391
                 utils.CommaJoin(ial.result))
8392
    if ial.required_nodes == 2:
8393
      self.op.snode = ial.result[1]
8394

    
8395
  def BuildHooksEnv(self):
8396
    """Build hooks env.
8397

8398
    This runs on master, primary and secondary nodes of the instance.
8399

8400
    """
8401
    env = {
8402
      "ADD_MODE": self.op.mode,
8403
      }
8404
    if self.op.mode == constants.INSTANCE_IMPORT:
8405
      env["SRC_NODE"] = self.op.src_node
8406
      env["SRC_PATH"] = self.op.src_path
8407
      env["SRC_IMAGES"] = self.src_images
8408

    
8409
    env.update(_BuildInstanceHookEnv(
8410
      name=self.op.instance_name,
8411
      primary_node=self.op.pnode,
8412
      secondary_nodes=self.secondaries,
8413
      status=self.op.start,
8414
      os_type=self.op.os_type,
8415
      memory=self.be_full[constants.BE_MEMORY],
8416
      vcpus=self.be_full[constants.BE_VCPUS],
8417
      nics=_NICListToTuple(self, self.nics),
8418
      disk_template=self.op.disk_template,
8419
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8420
             for d in self.disks],
8421
      bep=self.be_full,
8422
      hvp=self.hv_full,
8423
      hypervisor_name=self.op.hypervisor,
8424
      tags=self.op.tags,
8425
    ))
8426

    
8427
    return env
8428

    
8429
  def BuildHooksNodes(self):
8430
    """Build hooks nodes.
8431

8432
    """
8433
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8434
    return nl, nl
8435

    
8436
  def _ReadExportInfo(self):
8437
    """Reads the export information from disk.
8438

8439
    It will override the opcode source node and path with the actual
8440
    information, if these two were not specified before.
8441

8442
    @return: the export information
8443

8444
    """
8445
    assert self.op.mode == constants.INSTANCE_IMPORT
8446

    
8447
    src_node = self.op.src_node
8448
    src_path = self.op.src_path
8449

    
8450
    if src_node is None:
8451
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8452
      exp_list = self.rpc.call_export_list(locked_nodes)
8453
      found = False
8454
      for node in exp_list:
8455
        if exp_list[node].fail_msg:
8456
          continue
8457
        if src_path in exp_list[node].payload:
8458
          found = True
8459
          self.op.src_node = src_node = node
8460
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8461
                                                       src_path)
8462
          break
8463
      if not found:
8464
        raise errors.OpPrereqError("No export found for relative path %s" %
8465
                                    src_path, errors.ECODE_INVAL)
8466

    
8467
    _CheckNodeOnline(self, src_node)
8468
    result = self.rpc.call_export_info(src_node, src_path)
8469
    result.Raise("No export or invalid export found in dir %s" % src_path)
8470

    
8471
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8472
    if not export_info.has_section(constants.INISECT_EXP):
8473
      raise errors.ProgrammerError("Corrupted export config",
8474
                                   errors.ECODE_ENVIRON)
8475

    
8476
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8477
    if (int(ei_version) != constants.EXPORT_VERSION):
8478
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8479
                                 (ei_version, constants.EXPORT_VERSION),
8480
                                 errors.ECODE_ENVIRON)
8481
    return export_info
8482

    
8483
  def _ReadExportParams(self, einfo):
8484
    """Use export parameters as defaults.
8485

8486
    In case the opcode doesn't specify (as in override) some instance
8487
    parameters, then try to use them from the export information, if
8488
    that declares them.
8489

8490
    """
8491
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8492

    
8493
    if self.op.disk_template is None:
8494
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8495
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8496
                                          "disk_template")
8497
      else:
8498
        raise errors.OpPrereqError("No disk template specified and the export"
8499
                                   " is missing the disk_template information",
8500
                                   errors.ECODE_INVAL)
8501

    
8502
    if not self.op.disks:
8503
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8504
        disks = []
8505
        # TODO: import the disk iv_name too
8506
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8507
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8508
          disks.append({constants.IDISK_SIZE: disk_sz})
8509
        self.op.disks = disks
8510
      else:
8511
        raise errors.OpPrereqError("No disk info specified and the export"
8512
                                   " is missing the disk information",
8513
                                   errors.ECODE_INVAL)
8514

    
8515
    if (not self.op.nics and
8516
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8517
      nics = []
8518
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8519
        ndict = {}
8520
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8521
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8522
          ndict[name] = v
8523
        nics.append(ndict)
8524
      self.op.nics = nics
8525

    
8526
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8527
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8528

    
8529
    if (self.op.hypervisor is None and
8530
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8531
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8532

    
8533
    if einfo.has_section(constants.INISECT_HYP):
8534
      # use the export parameters but do not override the ones
8535
      # specified by the user
8536
      for name, value in einfo.items(constants.INISECT_HYP):
8537
        if name not in self.op.hvparams:
8538
          self.op.hvparams[name] = value
8539

    
8540
    if einfo.has_section(constants.INISECT_BEP):
8541
      # use the parameters, without overriding
8542
      for name, value in einfo.items(constants.INISECT_BEP):
8543
        if name not in self.op.beparams:
8544
          self.op.beparams[name] = value
8545
    else:
8546
      # try to read the parameters old style, from the main section
8547
      for name in constants.BES_PARAMETERS:
8548
        if (name not in self.op.beparams and
8549
            einfo.has_option(constants.INISECT_INS, name)):
8550
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8551

    
8552
    if einfo.has_section(constants.INISECT_OSP):
8553
      # use the parameters, without overriding
8554
      for name, value in einfo.items(constants.INISECT_OSP):
8555
        if name not in self.op.osparams:
8556
          self.op.osparams[name] = value
8557

    
8558
  def _RevertToDefaults(self, cluster):
8559
    """Revert the instance parameters to the default values.
8560

8561
    """
8562
    # hvparams
8563
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8564
    for name in self.op.hvparams.keys():
8565
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8566
        del self.op.hvparams[name]
8567
    # beparams
8568
    be_defs = cluster.SimpleFillBE({})
8569
    for name in self.op.beparams.keys():
8570
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8571
        del self.op.beparams[name]
8572
    # nic params
8573
    nic_defs = cluster.SimpleFillNIC({})
8574
    for nic in self.op.nics:
8575
      for name in constants.NICS_PARAMETERS:
8576
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8577
          del nic[name]
8578
    # osparams
8579
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8580
    for name in self.op.osparams.keys():
8581
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8582
        del self.op.osparams[name]
8583

    
8584
  def _CalculateFileStorageDir(self):
8585
    """Calculate final instance file storage dir.
8586

8587
    """
8588
    # file storage dir calculation/check
8589
    self.instance_file_storage_dir = None
8590
    if self.op.disk_template in constants.DTS_FILEBASED:
8591
      # build the full file storage dir path
8592
      joinargs = []
8593

    
8594
      if self.op.disk_template == constants.DT_SHARED_FILE:
8595
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8596
      else:
8597
        get_fsd_fn = self.cfg.GetFileStorageDir
8598

    
8599
      cfg_storagedir = get_fsd_fn()
8600
      if not cfg_storagedir:
8601
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8602
      joinargs.append(cfg_storagedir)
8603

    
8604
      if self.op.file_storage_dir is not None:
8605
        joinargs.append(self.op.file_storage_dir)
8606

    
8607
      joinargs.append(self.op.instance_name)
8608

    
8609
      # pylint: disable=W0142
8610
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8611

    
8612
  def CheckPrereq(self):
8613
    """Check prerequisites.
8614

8615
    """
8616
    self._CalculateFileStorageDir()
8617

    
8618
    if self.op.mode == constants.INSTANCE_IMPORT:
8619
      export_info = self._ReadExportInfo()
8620
      self._ReadExportParams(export_info)
8621

    
8622
    if (not self.cfg.GetVGName() and
8623
        self.op.disk_template not in constants.DTS_NOT_LVM):
8624
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8625
                                 " instances", errors.ECODE_STATE)
8626

    
8627
    if self.op.hypervisor is None:
8628
      self.op.hypervisor = self.cfg.GetHypervisorType()
8629

    
8630
    cluster = self.cfg.GetClusterInfo()
8631
    enabled_hvs = cluster.enabled_hypervisors
8632
    if self.op.hypervisor not in enabled_hvs:
8633
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8634
                                 " cluster (%s)" % (self.op.hypervisor,
8635
                                  ",".join(enabled_hvs)),
8636
                                 errors.ECODE_STATE)
8637

    
8638
    # Check tag validity
8639
    for tag in self.op.tags:
8640
      objects.TaggableObject.ValidateTag(tag)
8641

    
8642
    # check hypervisor parameter syntax (locally)
8643
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8644
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8645
                                      self.op.hvparams)
8646
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8647
    hv_type.CheckParameterSyntax(filled_hvp)
8648
    self.hv_full = filled_hvp
8649
    # check that we don't specify global parameters on an instance
8650
    _CheckGlobalHvParams(self.op.hvparams)
8651

    
8652
    # fill and remember the beparams dict
8653
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8654
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8655

    
8656
    # build os parameters
8657
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8658

    
8659
    # now that hvp/bep are in final format, let's reset to defaults,
8660
    # if told to do so
8661
    if self.op.identify_defaults:
8662
      self._RevertToDefaults(cluster)
8663

    
8664
    # NIC buildup
8665
    self.nics = []
8666
    for idx, nic in enumerate(self.op.nics):
8667
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8668
      nic_mode = nic_mode_req
8669
      if nic_mode is None:
8670
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8671

    
8672
      # in routed mode, for the first nic, the default ip is 'auto'
8673
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8674
        default_ip_mode = constants.VALUE_AUTO
8675
      else:
8676
        default_ip_mode = constants.VALUE_NONE
8677

    
8678
      # ip validity checks
8679
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8680
      if ip is None or ip.lower() == constants.VALUE_NONE:
8681
        nic_ip = None
8682
      elif ip.lower() == constants.VALUE_AUTO:
8683
        if not self.op.name_check:
8684
          raise errors.OpPrereqError("IP address set to auto but name checks"
8685
                                     " have been skipped",
8686
                                     errors.ECODE_INVAL)
8687
        nic_ip = self.hostname1.ip
8688
      else:
8689
        if not netutils.IPAddress.IsValid(ip):
8690
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8691
                                     errors.ECODE_INVAL)
8692
        nic_ip = ip
8693

    
8694
      # TODO: check the ip address for uniqueness
8695
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8696
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8697
                                   errors.ECODE_INVAL)
8698

    
8699
      # MAC address verification
8700
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8701
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8702
        mac = utils.NormalizeAndValidateMac(mac)
8703

    
8704
        try:
8705
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8706
        except errors.ReservationError:
8707
          raise errors.OpPrereqError("MAC address %s already in use"
8708
                                     " in cluster" % mac,
8709
                                     errors.ECODE_NOTUNIQUE)
8710

    
8711
      #  Build nic parameters
8712
      link = nic.get(constants.INIC_LINK, None)
8713
      nicparams = {}
8714
      if nic_mode_req:
8715
        nicparams[constants.NIC_MODE] = nic_mode_req
8716
      if link:
8717
        nicparams[constants.NIC_LINK] = link
8718

    
8719
      check_params = cluster.SimpleFillNIC(nicparams)
8720
      objects.NIC.CheckParameterSyntax(check_params)
8721
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8722

    
8723
    # disk checks/pre-build
8724
    default_vg = self.cfg.GetVGName()
8725
    self.disks = []
8726
    for disk in self.op.disks:
8727
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8728
      if mode not in constants.DISK_ACCESS_SET:
8729
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8730
                                   mode, errors.ECODE_INVAL)
8731
      size = disk.get(constants.IDISK_SIZE, None)
8732
      if size is None:
8733
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8734
      try:
8735
        size = int(size)
8736
      except (TypeError, ValueError):
8737
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8738
                                   errors.ECODE_INVAL)
8739

    
8740
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8741
      new_disk = {
8742
        constants.IDISK_SIZE: size,
8743
        constants.IDISK_MODE: mode,
8744
        constants.IDISK_VG: data_vg,
8745
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8746
        }
8747
      if constants.IDISK_ADOPT in disk:
8748
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8749
      self.disks.append(new_disk)
8750

    
8751
    if self.op.mode == constants.INSTANCE_IMPORT:
8752

    
8753
      # Check that the new instance doesn't have less disks than the export
8754
      instance_disks = len(self.disks)
8755
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8756
      if instance_disks < export_disks:
8757
        raise errors.OpPrereqError("Not enough disks to import."
8758
                                   " (instance: %d, export: %d)" %
8759
                                   (instance_disks, export_disks),
8760
                                   errors.ECODE_INVAL)
8761

    
8762
      disk_images = []
8763
      for idx in range(export_disks):
8764
        option = "disk%d_dump" % idx
8765
        if export_info.has_option(constants.INISECT_INS, option):
8766
          # FIXME: are the old os-es, disk sizes, etc. useful?
8767
          export_name = export_info.get(constants.INISECT_INS, option)
8768
          image = utils.PathJoin(self.op.src_path, export_name)
8769
          disk_images.append(image)
8770
        else:
8771
          disk_images.append(False)
8772

    
8773
      self.src_images = disk_images
8774

    
8775
      old_name = export_info.get(constants.INISECT_INS, "name")
8776
      try:
8777
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8778
      except (TypeError, ValueError), err:
8779
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8780
                                   " an integer: %s" % str(err),
8781
                                   errors.ECODE_STATE)
8782
      if self.op.instance_name == old_name:
8783
        for idx, nic in enumerate(self.nics):
8784
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8785
            nic_mac_ini = "nic%d_mac" % idx
8786
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8787

    
8788
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8789

    
8790
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8791
    if self.op.ip_check:
8792
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8793
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8794
                                   (self.check_ip, self.op.instance_name),
8795
                                   errors.ECODE_NOTUNIQUE)
8796

    
8797
    #### mac address generation
8798
    # By generating here the mac address both the allocator and the hooks get
8799
    # the real final mac address rather than the 'auto' or 'generate' value.
8800
    # There is a race condition between the generation and the instance object
8801
    # creation, which means that we know the mac is valid now, but we're not
8802
    # sure it will be when we actually add the instance. If things go bad
8803
    # adding the instance will abort because of a duplicate mac, and the
8804
    # creation job will fail.
8805
    for nic in self.nics:
8806
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8807
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8808

    
8809
    #### allocator run
8810

    
8811
    if self.op.iallocator is not None:
8812
      self._RunAllocator()
8813

    
8814
    #### node related checks
8815

    
8816
    # check primary node
8817
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8818
    assert self.pnode is not None, \
8819
      "Cannot retrieve locked node %s" % self.op.pnode
8820
    if pnode.offline:
8821
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8822
                                 pnode.name, errors.ECODE_STATE)
8823
    if pnode.drained:
8824
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8825
                                 pnode.name, errors.ECODE_STATE)
8826
    if not pnode.vm_capable:
8827
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8828
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8829

    
8830
    self.secondaries = []
8831

    
8832
    # mirror node verification
8833
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8834
      if self.op.snode == pnode.name:
8835
        raise errors.OpPrereqError("The secondary node cannot be the"
8836
                                   " primary node", errors.ECODE_INVAL)
8837
      _CheckNodeOnline(self, self.op.snode)
8838
      _CheckNodeNotDrained(self, self.op.snode)
8839
      _CheckNodeVmCapable(self, self.op.snode)
8840
      self.secondaries.append(self.op.snode)
8841

    
8842
    nodenames = [pnode.name] + self.secondaries
8843

    
8844
    if not self.adopt_disks:
8845
      # Check lv size requirements, if not adopting
8846
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8847
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8848

    
8849
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8850
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8851
                                disk[constants.IDISK_ADOPT])
8852
                     for disk in self.disks])
8853
      if len(all_lvs) != len(self.disks):
8854
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8855
                                   errors.ECODE_INVAL)
8856
      for lv_name in all_lvs:
8857
        try:
8858
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8859
          # to ReserveLV uses the same syntax
8860
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8861
        except errors.ReservationError:
8862
          raise errors.OpPrereqError("LV named %s used by another instance" %
8863
                                     lv_name, errors.ECODE_NOTUNIQUE)
8864

    
8865
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8866
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8867

    
8868
      node_lvs = self.rpc.call_lv_list([pnode.name],
8869
                                       vg_names.payload.keys())[pnode.name]
8870
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8871
      node_lvs = node_lvs.payload
8872

    
8873
      delta = all_lvs.difference(node_lvs.keys())
8874
      if delta:
8875
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8876
                                   utils.CommaJoin(delta),
8877
                                   errors.ECODE_INVAL)
8878
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8879
      if online_lvs:
8880
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8881
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8882
                                   errors.ECODE_STATE)
8883
      # update the size of disk based on what is found
8884
      for dsk in self.disks:
8885
        dsk[constants.IDISK_SIZE] = \
8886
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8887
                                        dsk[constants.IDISK_ADOPT])][0]))
8888

    
8889
    elif self.op.disk_template == constants.DT_BLOCK:
8890
      # Normalize and de-duplicate device paths
8891
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8892
                       for disk in self.disks])
8893
      if len(all_disks) != len(self.disks):
8894
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8895
                                   errors.ECODE_INVAL)
8896
      baddisks = [d for d in all_disks
8897
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8898
      if baddisks:
8899
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8900
                                   " cannot be adopted" %
8901
                                   (", ".join(baddisks),
8902
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8903
                                   errors.ECODE_INVAL)
8904

    
8905
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8906
                                            list(all_disks))[pnode.name]
8907
      node_disks.Raise("Cannot get block device information from node %s" %
8908
                       pnode.name)
8909
      node_disks = node_disks.payload
8910
      delta = all_disks.difference(node_disks.keys())
8911
      if delta:
8912
        raise errors.OpPrereqError("Missing block device(s): %s" %
8913
                                   utils.CommaJoin(delta),
8914
                                   errors.ECODE_INVAL)
8915
      for dsk in self.disks:
8916
        dsk[constants.IDISK_SIZE] = \
8917
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8918

    
8919
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8920

    
8921
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8922
    # check OS parameters (remotely)
8923
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8924

    
8925
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8926

    
8927
    # memory check on primary node
8928
    if self.op.start:
8929
      _CheckNodeFreeMemory(self, self.pnode.name,
8930
                           "creating instance %s" % self.op.instance_name,
8931
                           self.be_full[constants.BE_MEMORY],
8932
                           self.op.hypervisor)
8933

    
8934
    self.dry_run_result = list(nodenames)
8935

    
8936
  def Exec(self, feedback_fn):
8937
    """Create and add the instance to the cluster.
8938

8939
    """
8940
    instance = self.op.instance_name
8941
    pnode_name = self.pnode.name
8942

    
8943
    ht_kind = self.op.hypervisor
8944
    if ht_kind in constants.HTS_REQ_PORT:
8945
      network_port = self.cfg.AllocatePort()
8946
    else:
8947
      network_port = None
8948

    
8949
    disks = _GenerateDiskTemplate(self,
8950
                                  self.op.disk_template,
8951
                                  instance, pnode_name,
8952
                                  self.secondaries,
8953
                                  self.disks,
8954
                                  self.instance_file_storage_dir,
8955
                                  self.op.file_driver,
8956
                                  0,
8957
                                  feedback_fn)
8958

    
8959
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8960
                            primary_node=pnode_name,
8961
                            nics=self.nics, disks=disks,
8962
                            disk_template=self.op.disk_template,
8963
                            admin_up=False,
8964
                            network_port=network_port,
8965
                            beparams=self.op.beparams,
8966
                            hvparams=self.op.hvparams,
8967
                            hypervisor=self.op.hypervisor,
8968
                            osparams=self.op.osparams,
8969
                            )
8970

    
8971
    if self.op.tags:
8972
      for tag in self.op.tags:
8973
        iobj.AddTag(tag)
8974

    
8975
    if self.adopt_disks:
8976
      if self.op.disk_template == constants.DT_PLAIN:
8977
        # rename LVs to the newly-generated names; we need to construct
8978
        # 'fake' LV disks with the old data, plus the new unique_id
8979
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8980
        rename_to = []
8981
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8982
          rename_to.append(t_dsk.logical_id)
8983
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8984
          self.cfg.SetDiskID(t_dsk, pnode_name)
8985
        result = self.rpc.call_blockdev_rename(pnode_name,
8986
                                               zip(tmp_disks, rename_to))
8987
        result.Raise("Failed to rename adoped LVs")
8988
    else:
8989
      feedback_fn("* creating instance disks...")
8990
      try:
8991
        _CreateDisks(self, iobj)
8992
      except errors.OpExecError:
8993
        self.LogWarning("Device creation failed, reverting...")
8994
        try:
8995
          _RemoveDisks(self, iobj)
8996
        finally:
8997
          self.cfg.ReleaseDRBDMinors(instance)
8998
          raise
8999

    
9000
    feedback_fn("adding instance %s to cluster config" % instance)
9001

    
9002
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9003

    
9004
    # Declare that we don't want to remove the instance lock anymore, as we've
9005
    # added the instance to the config
9006
    del self.remove_locks[locking.LEVEL_INSTANCE]
9007

    
9008
    if self.op.mode == constants.INSTANCE_IMPORT:
9009
      # Release unused nodes
9010
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9011
    else:
9012
      # Release all nodes
9013
      _ReleaseLocks(self, locking.LEVEL_NODE)
9014

    
9015
    disk_abort = False
9016
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9017
      feedback_fn("* wiping instance disks...")
9018
      try:
9019
        _WipeDisks(self, iobj)
9020
      except errors.OpExecError, err:
9021
        logging.exception("Wiping disks failed")
9022
        self.LogWarning("Wiping instance disks failed (%s)", err)
9023
        disk_abort = True
9024

    
9025
    if disk_abort:
9026
      # Something is already wrong with the disks, don't do anything else
9027
      pass
9028
    elif self.op.wait_for_sync:
9029
      disk_abort = not _WaitForSync(self, iobj)
9030
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9031
      # make sure the disks are not degraded (still sync-ing is ok)
9032
      feedback_fn("* checking mirrors status")
9033
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9034
    else:
9035
      disk_abort = False
9036

    
9037
    if disk_abort:
9038
      _RemoveDisks(self, iobj)
9039
      self.cfg.RemoveInstance(iobj.name)
9040
      # Make sure the instance lock gets removed
9041
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9042
      raise errors.OpExecError("There are some degraded disks for"
9043
                               " this instance")
9044

    
9045
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9046
      if self.op.mode == constants.INSTANCE_CREATE:
9047
        if not self.op.no_install:
9048
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9049
                        not self.op.wait_for_sync)
9050
          if pause_sync:
9051
            feedback_fn("* pausing disk sync to install instance OS")
9052
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9053
                                                              iobj.disks, True)
9054
            for idx, success in enumerate(result.payload):
9055
              if not success:
9056
                logging.warn("pause-sync of instance %s for disk %d failed",
9057
                             instance, idx)
9058

    
9059
          feedback_fn("* running the instance OS create scripts...")
9060
          # FIXME: pass debug option from opcode to backend
9061
          os_add_result = \
9062
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9063
                                          self.op.debug_level)
9064
          if pause_sync:
9065
            feedback_fn("* resuming disk sync")
9066
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9067
                                                              iobj.disks, False)
9068
            for idx, success in enumerate(result.payload):
9069
              if not success:
9070
                logging.warn("resume-sync of instance %s for disk %d failed",
9071
                             instance, idx)
9072

    
9073
          os_add_result.Raise("Could not add os for instance %s"
9074
                              " on node %s" % (instance, pnode_name))
9075

    
9076
      elif self.op.mode == constants.INSTANCE_IMPORT:
9077
        feedback_fn("* running the instance OS import scripts...")
9078

    
9079
        transfers = []
9080

    
9081
        for idx, image in enumerate(self.src_images):
9082
          if not image:
9083
            continue
9084

    
9085
          # FIXME: pass debug option from opcode to backend
9086
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9087
                                             constants.IEIO_FILE, (image, ),
9088
                                             constants.IEIO_SCRIPT,
9089
                                             (iobj.disks[idx], idx),
9090
                                             None)
9091
          transfers.append(dt)
9092

    
9093
        import_result = \
9094
          masterd.instance.TransferInstanceData(self, feedback_fn,
9095
                                                self.op.src_node, pnode_name,
9096
                                                self.pnode.secondary_ip,
9097
                                                iobj, transfers)
9098
        if not compat.all(import_result):
9099
          self.LogWarning("Some disks for instance %s on node %s were not"
9100
                          " imported successfully" % (instance, pnode_name))
9101

    
9102
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9103
        feedback_fn("* preparing remote import...")
9104
        # The source cluster will stop the instance before attempting to make a
9105
        # connection. In some cases stopping an instance can take a long time,
9106
        # hence the shutdown timeout is added to the connection timeout.
9107
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9108
                           self.op.source_shutdown_timeout)
9109
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9110

    
9111
        assert iobj.primary_node == self.pnode.name
9112
        disk_results = \
9113
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9114
                                        self.source_x509_ca,
9115
                                        self._cds, timeouts)
9116
        if not compat.all(disk_results):
9117
          # TODO: Should the instance still be started, even if some disks
9118
          # failed to import (valid for local imports, too)?
9119
          self.LogWarning("Some disks for instance %s on node %s were not"
9120
                          " imported successfully" % (instance, pnode_name))
9121

    
9122
        # Run rename script on newly imported instance
9123
        assert iobj.name == instance
9124
        feedback_fn("Running rename script for %s" % instance)
9125
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9126
                                                   self.source_instance_name,
9127
                                                   self.op.debug_level)
9128
        if result.fail_msg:
9129
          self.LogWarning("Failed to run rename script for %s on node"
9130
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9131

    
9132
      else:
9133
        # also checked in the prereq part
9134
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9135
                                     % self.op.mode)
9136

    
9137
    if self.op.start:
9138
      iobj.admin_up = True
9139
      self.cfg.Update(iobj, feedback_fn)
9140
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9141
      feedback_fn("* starting instance...")
9142
      result = self.rpc.call_instance_start(pnode_name, iobj,
9143
                                            None, None, False)
9144
      result.Raise("Could not start instance")
9145

    
9146
    return list(iobj.all_nodes)
9147

    
9148

    
9149
class LUInstanceConsole(NoHooksLU):
9150
  """Connect to an instance's console.
9151

9152
  This is somewhat special in that it returns the command line that
9153
  you need to run on the master node in order to connect to the
9154
  console.
9155

9156
  """
9157
  REQ_BGL = False
9158

    
9159
  def ExpandNames(self):
9160
    self._ExpandAndLockInstance()
9161

    
9162
  def CheckPrereq(self):
9163
    """Check prerequisites.
9164

9165
    This checks that the instance is in the cluster.
9166

9167
    """
9168
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9169
    assert self.instance is not None, \
9170
      "Cannot retrieve locked instance %s" % self.op.instance_name
9171
    _CheckNodeOnline(self, self.instance.primary_node)
9172

    
9173
  def Exec(self, feedback_fn):
9174
    """Connect to the console of an instance
9175

9176
    """
9177
    instance = self.instance
9178
    node = instance.primary_node
9179

    
9180
    node_insts = self.rpc.call_instance_list([node],
9181
                                             [instance.hypervisor])[node]
9182
    node_insts.Raise("Can't get node information from %s" % node)
9183

    
9184
    if instance.name not in node_insts.payload:
9185
      if instance.admin_up:
9186
        state = constants.INSTST_ERRORDOWN
9187
      else:
9188
        state = constants.INSTST_ADMINDOWN
9189
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9190
                               (instance.name, state))
9191

    
9192
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9193

    
9194
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9195

    
9196

    
9197
def _GetInstanceConsole(cluster, instance):
9198
  """Returns console information for an instance.
9199

9200
  @type cluster: L{objects.Cluster}
9201
  @type instance: L{objects.Instance}
9202
  @rtype: dict
9203

9204
  """
9205
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9206
  # beparams and hvparams are passed separately, to avoid editing the
9207
  # instance and then saving the defaults in the instance itself.
9208
  hvparams = cluster.FillHV(instance)
9209
  beparams = cluster.FillBE(instance)
9210
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9211

    
9212
  assert console.instance == instance.name
9213
  assert console.Validate()
9214

    
9215
  return console.ToDict()
9216

    
9217

    
9218
class LUInstanceReplaceDisks(LogicalUnit):
9219
  """Replace the disks of an instance.
9220

9221
  """
9222
  HPATH = "mirrors-replace"
9223
  HTYPE = constants.HTYPE_INSTANCE
9224
  REQ_BGL = False
9225

    
9226
  def CheckArguments(self):
9227
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9228
                                  self.op.iallocator)
9229

    
9230
  def ExpandNames(self):
9231
    self._ExpandAndLockInstance()
9232

    
9233
    assert locking.LEVEL_NODE not in self.needed_locks
9234
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9235

    
9236
    assert self.op.iallocator is None or self.op.remote_node is None, \
9237
      "Conflicting options"
9238

    
9239
    if self.op.remote_node is not None:
9240
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9241

    
9242
      # Warning: do not remove the locking of the new secondary here
9243
      # unless DRBD8.AddChildren is changed to work in parallel;
9244
      # currently it doesn't since parallel invocations of
9245
      # FindUnusedMinor will conflict
9246
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9247
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9248
    else:
9249
      self.needed_locks[locking.LEVEL_NODE] = []
9250
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9251

    
9252
      if self.op.iallocator is not None:
9253
        # iallocator will select a new node in the same group
9254
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9255

    
9256
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9257
                                   self.op.iallocator, self.op.remote_node,
9258
                                   self.op.disks, False, self.op.early_release)
9259

    
9260
    self.tasklets = [self.replacer]
9261

    
9262
  def DeclareLocks(self, level):
9263
    if level == locking.LEVEL_NODEGROUP:
9264
      assert self.op.remote_node is None
9265
      assert self.op.iallocator is not None
9266
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9267

    
9268
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9269
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9270
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9271

    
9272
    elif level == locking.LEVEL_NODE:
9273
      if self.op.iallocator is not None:
9274
        assert self.op.remote_node is None
9275
        assert not self.needed_locks[locking.LEVEL_NODE]
9276

    
9277
        # Lock member nodes of all locked groups
9278
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9279
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9280
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9281
      else:
9282
        self._LockInstancesNodes()
9283

    
9284
  def BuildHooksEnv(self):
9285
    """Build hooks env.
9286

9287
    This runs on the master, the primary and all the secondaries.
9288

9289
    """
9290
    instance = self.replacer.instance
9291
    env = {
9292
      "MODE": self.op.mode,
9293
      "NEW_SECONDARY": self.op.remote_node,
9294
      "OLD_SECONDARY": instance.secondary_nodes[0],
9295
      }
9296
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9297
    return env
9298

    
9299
  def BuildHooksNodes(self):
9300
    """Build hooks nodes.
9301

9302
    """
9303
    instance = self.replacer.instance
9304
    nl = [
9305
      self.cfg.GetMasterNode(),
9306
      instance.primary_node,
9307
      ]
9308
    if self.op.remote_node is not None:
9309
      nl.append(self.op.remote_node)
9310
    return nl, nl
9311

    
9312
  def CheckPrereq(self):
9313
    """Check prerequisites.
9314

9315
    """
9316
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9317
            self.op.iallocator is None)
9318

    
9319
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9320
    if owned_groups:
9321
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9322

    
9323
    return LogicalUnit.CheckPrereq(self)
9324

    
9325

    
9326
class TLReplaceDisks(Tasklet):
9327
  """Replaces disks for an instance.
9328

9329
  Note: Locking is not within the scope of this class.
9330

9331
  """
9332
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9333
               disks, delay_iallocator, early_release):
9334
    """Initializes this class.
9335

9336
    """
9337
    Tasklet.__init__(self, lu)
9338

    
9339
    # Parameters
9340
    self.instance_name = instance_name
9341
    self.mode = mode
9342
    self.iallocator_name = iallocator_name
9343
    self.remote_node = remote_node
9344
    self.disks = disks
9345
    self.delay_iallocator = delay_iallocator
9346
    self.early_release = early_release
9347

    
9348
    # Runtime data
9349
    self.instance = None
9350
    self.new_node = None
9351
    self.target_node = None
9352
    self.other_node = None
9353
    self.remote_node_info = None
9354
    self.node_secondary_ip = None
9355

    
9356
  @staticmethod
9357
  def CheckArguments(mode, remote_node, iallocator):
9358
    """Helper function for users of this class.
9359

9360
    """
9361
    # check for valid parameter combination
9362
    if mode == constants.REPLACE_DISK_CHG:
9363
      if remote_node is None and iallocator is None:
9364
        raise errors.OpPrereqError("When changing the secondary either an"
9365
                                   " iallocator script must be used or the"
9366
                                   " new node given", errors.ECODE_INVAL)
9367

    
9368
      if remote_node is not None and iallocator is not None:
9369
        raise errors.OpPrereqError("Give either the iallocator or the new"
9370
                                   " secondary, not both", errors.ECODE_INVAL)
9371

    
9372
    elif remote_node is not None or iallocator is not None:
9373
      # Not replacing the secondary
9374
      raise errors.OpPrereqError("The iallocator and new node options can"
9375
                                 " only be used when changing the"
9376
                                 " secondary node", errors.ECODE_INVAL)
9377

    
9378
  @staticmethod
9379
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9380
    """Compute a new secondary node using an IAllocator.
9381

9382
    """
9383
    ial = IAllocator(lu.cfg, lu.rpc,
9384
                     mode=constants.IALLOCATOR_MODE_RELOC,
9385
                     name=instance_name,
9386
                     relocate_from=list(relocate_from))
9387

    
9388
    ial.Run(iallocator_name)
9389

    
9390
    if not ial.success:
9391
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9392
                                 " %s" % (iallocator_name, ial.info),
9393
                                 errors.ECODE_NORES)
9394

    
9395
    if len(ial.result) != ial.required_nodes:
9396
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9397
                                 " of nodes (%s), required %s" %
9398
                                 (iallocator_name,
9399
                                  len(ial.result), ial.required_nodes),
9400
                                 errors.ECODE_FAULT)
9401

    
9402
    remote_node_name = ial.result[0]
9403

    
9404
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9405
               instance_name, remote_node_name)
9406

    
9407
    return remote_node_name
9408

    
9409
  def _FindFaultyDisks(self, node_name):
9410
    """Wrapper for L{_FindFaultyInstanceDisks}.
9411

9412
    """
9413
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9414
                                    node_name, True)
9415

    
9416
  def _CheckDisksActivated(self, instance):
9417
    """Checks if the instance disks are activated.
9418

9419
    @param instance: The instance to check disks
9420
    @return: True if they are activated, False otherwise
9421

9422
    """
9423
    nodes = instance.all_nodes
9424

    
9425
    for idx, dev in enumerate(instance.disks):
9426
      for node in nodes:
9427
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9428
        self.cfg.SetDiskID(dev, node)
9429

    
9430
        result = self.rpc.call_blockdev_find(node, dev)
9431

    
9432
        if result.offline:
9433
          continue
9434
        elif result.fail_msg or not result.payload:
9435
          return False
9436

    
9437
    return True
9438

    
9439
  def CheckPrereq(self):
9440
    """Check prerequisites.
9441

9442
    This checks that the instance is in the cluster.
9443

9444
    """
9445
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9446
    assert instance is not None, \
9447
      "Cannot retrieve locked instance %s" % self.instance_name
9448

    
9449
    if instance.disk_template != constants.DT_DRBD8:
9450
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9451
                                 " instances", errors.ECODE_INVAL)
9452

    
9453
    if len(instance.secondary_nodes) != 1:
9454
      raise errors.OpPrereqError("The instance has a strange layout,"
9455
                                 " expected one secondary but found %d" %
9456
                                 len(instance.secondary_nodes),
9457
                                 errors.ECODE_FAULT)
9458

    
9459
    if not self.delay_iallocator:
9460
      self._CheckPrereq2()
9461

    
9462
  def _CheckPrereq2(self):
9463
    """Check prerequisites, second part.
9464

9465
    This function should always be part of CheckPrereq. It was separated and is
9466
    now called from Exec because during node evacuation iallocator was only
9467
    called with an unmodified cluster model, not taking planned changes into
9468
    account.
9469

9470
    """
9471
    instance = self.instance
9472
    secondary_node = instance.secondary_nodes[0]
9473

    
9474
    if self.iallocator_name is None:
9475
      remote_node = self.remote_node
9476
    else:
9477
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9478
                                       instance.name, instance.secondary_nodes)
9479

    
9480
    if remote_node is None:
9481
      self.remote_node_info = None
9482
    else:
9483
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9484
             "Remote node '%s' is not locked" % remote_node
9485

    
9486
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9487
      assert self.remote_node_info is not None, \
9488
        "Cannot retrieve locked node %s" % remote_node
9489

    
9490
    if remote_node == self.instance.primary_node:
9491
      raise errors.OpPrereqError("The specified node is the primary node of"
9492
                                 " the instance", errors.ECODE_INVAL)
9493

    
9494
    if remote_node == secondary_node:
9495
      raise errors.OpPrereqError("The specified node is already the"
9496
                                 " secondary node of the instance",
9497
                                 errors.ECODE_INVAL)
9498

    
9499
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9500
                                    constants.REPLACE_DISK_CHG):
9501
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9502
                                 errors.ECODE_INVAL)
9503

    
9504
    if self.mode == constants.REPLACE_DISK_AUTO:
9505
      if not self._CheckDisksActivated(instance):
9506
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9507
                                   " first" % self.instance_name,
9508
                                   errors.ECODE_STATE)
9509
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9510
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9511

    
9512
      if faulty_primary and faulty_secondary:
9513
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9514
                                   " one node and can not be repaired"
9515
                                   " automatically" % self.instance_name,
9516
                                   errors.ECODE_STATE)
9517

    
9518
      if faulty_primary:
9519
        self.disks = faulty_primary
9520
        self.target_node = instance.primary_node
9521
        self.other_node = secondary_node
9522
        check_nodes = [self.target_node, self.other_node]
9523
      elif faulty_secondary:
9524
        self.disks = faulty_secondary
9525
        self.target_node = secondary_node
9526
        self.other_node = instance.primary_node
9527
        check_nodes = [self.target_node, self.other_node]
9528
      else:
9529
        self.disks = []
9530
        check_nodes = []
9531

    
9532
    else:
9533
      # Non-automatic modes
9534
      if self.mode == constants.REPLACE_DISK_PRI:
9535
        self.target_node = instance.primary_node
9536
        self.other_node = secondary_node
9537
        check_nodes = [self.target_node, self.other_node]
9538

    
9539
      elif self.mode == constants.REPLACE_DISK_SEC:
9540
        self.target_node = secondary_node
9541
        self.other_node = instance.primary_node
9542
        check_nodes = [self.target_node, self.other_node]
9543

    
9544
      elif self.mode == constants.REPLACE_DISK_CHG:
9545
        self.new_node = remote_node
9546
        self.other_node = instance.primary_node
9547
        self.target_node = secondary_node
9548
        check_nodes = [self.new_node, self.other_node]
9549

    
9550
        _CheckNodeNotDrained(self.lu, remote_node)
9551
        _CheckNodeVmCapable(self.lu, remote_node)
9552

    
9553
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9554
        assert old_node_info is not None
9555
        if old_node_info.offline and not self.early_release:
9556
          # doesn't make sense to delay the release
9557
          self.early_release = True
9558
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9559
                          " early-release mode", secondary_node)
9560

    
9561
      else:
9562
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9563
                                     self.mode)
9564

    
9565
      # If not specified all disks should be replaced
9566
      if not self.disks:
9567
        self.disks = range(len(self.instance.disks))
9568

    
9569
    for node in check_nodes:
9570
      _CheckNodeOnline(self.lu, node)
9571

    
9572
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9573
                                                          self.other_node,
9574
                                                          self.target_node]
9575
                              if node_name is not None)
9576

    
9577
    # Release unneeded node locks
9578
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9579

    
9580
    # Release any owned node group
9581
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9582
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9583

    
9584
    # Check whether disks are valid
9585
    for disk_idx in self.disks:
9586
      instance.FindDisk(disk_idx)
9587

    
9588
    # Get secondary node IP addresses
9589
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9590
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9591

    
9592
  def Exec(self, feedback_fn):
9593
    """Execute disk replacement.
9594

9595
    This dispatches the disk replacement to the appropriate handler.
9596

9597
    """
9598
    if self.delay_iallocator:
9599
      self._CheckPrereq2()
9600

    
9601
    if __debug__:
9602
      # Verify owned locks before starting operation
9603
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9604
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9605
          ("Incorrect node locks, owning %s, expected %s" %
9606
           (owned_nodes, self.node_secondary_ip.keys()))
9607

    
9608
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9609
      assert list(owned_instances) == [self.instance_name], \
9610
          "Instance '%s' not locked" % self.instance_name
9611

    
9612
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9613
          "Should not own any node group lock at this point"
9614

    
9615
    if not self.disks:
9616
      feedback_fn("No disks need replacement")
9617
      return
9618

    
9619
    feedback_fn("Replacing disk(s) %s for %s" %
9620
                (utils.CommaJoin(self.disks), self.instance.name))
9621

    
9622
    activate_disks = (not self.instance.admin_up)
9623

    
9624
    # Activate the instance disks if we're replacing them on a down instance
9625
    if activate_disks:
9626
      _StartInstanceDisks(self.lu, self.instance, True)
9627

    
9628
    try:
9629
      # Should we replace the secondary node?
9630
      if self.new_node is not None:
9631
        fn = self._ExecDrbd8Secondary
9632
      else:
9633
        fn = self._ExecDrbd8DiskOnly
9634

    
9635
      result = fn(feedback_fn)
9636
    finally:
9637
      # Deactivate the instance disks if we're replacing them on a
9638
      # down instance
9639
      if activate_disks:
9640
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9641

    
9642
    if __debug__:
9643
      # Verify owned locks
9644
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9645
      nodes = frozenset(self.node_secondary_ip)
9646
      assert ((self.early_release and not owned_nodes) or
9647
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9648
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9649
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9650

    
9651
    return result
9652

    
9653
  def _CheckVolumeGroup(self, nodes):
9654
    self.lu.LogInfo("Checking volume groups")
9655

    
9656
    vgname = self.cfg.GetVGName()
9657

    
9658
    # Make sure volume group exists on all involved nodes
9659
    results = self.rpc.call_vg_list(nodes)
9660
    if not results:
9661
      raise errors.OpExecError("Can't list volume groups on the nodes")
9662

    
9663
    for node in nodes:
9664
      res = results[node]
9665
      res.Raise("Error checking node %s" % node)
9666
      if vgname not in res.payload:
9667
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9668
                                 (vgname, node))
9669

    
9670
  def _CheckDisksExistence(self, nodes):
9671
    # Check disk existence
9672
    for idx, dev in enumerate(self.instance.disks):
9673
      if idx not in self.disks:
9674
        continue
9675

    
9676
      for node in nodes:
9677
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9678
        self.cfg.SetDiskID(dev, node)
9679

    
9680
        result = self.rpc.call_blockdev_find(node, dev)
9681

    
9682
        msg = result.fail_msg
9683
        if msg or not result.payload:
9684
          if not msg:
9685
            msg = "disk not found"
9686
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9687
                                   (idx, node, msg))
9688

    
9689
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9690
    for idx, dev in enumerate(self.instance.disks):
9691
      if idx not in self.disks:
9692
        continue
9693

    
9694
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9695
                      (idx, node_name))
9696

    
9697
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9698
                                   ldisk=ldisk):
9699
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9700
                                 " replace disks for instance %s" %
9701
                                 (node_name, self.instance.name))
9702

    
9703
  def _CreateNewStorage(self, node_name):
9704
    """Create new storage on the primary or secondary node.
9705

9706
    This is only used for same-node replaces, not for changing the
9707
    secondary node, hence we don't want to modify the existing disk.
9708

9709
    """
9710
    iv_names = {}
9711

    
9712
    for idx, dev in enumerate(self.instance.disks):
9713
      if idx not in self.disks:
9714
        continue
9715

    
9716
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9717

    
9718
      self.cfg.SetDiskID(dev, node_name)
9719

    
9720
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9721
      names = _GenerateUniqueNames(self.lu, lv_names)
9722

    
9723
      vg_data = dev.children[0].logical_id[0]
9724
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9725
                             logical_id=(vg_data, names[0]))
9726
      vg_meta = dev.children[1].logical_id[0]
9727
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9728
                             logical_id=(vg_meta, names[1]))
9729

    
9730
      new_lvs = [lv_data, lv_meta]
9731
      old_lvs = [child.Copy() for child in dev.children]
9732
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9733

    
9734
      # we pass force_create=True to force the LVM creation
9735
      for new_lv in new_lvs:
9736
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9737
                        _GetInstanceInfoText(self.instance), False)
9738

    
9739
    return iv_names
9740

    
9741
  def _CheckDevices(self, node_name, iv_names):
9742
    for name, (dev, _, _) in iv_names.iteritems():
9743
      self.cfg.SetDiskID(dev, node_name)
9744

    
9745
      result = self.rpc.call_blockdev_find(node_name, dev)
9746

    
9747
      msg = result.fail_msg
9748
      if msg or not result.payload:
9749
        if not msg:
9750
          msg = "disk not found"
9751
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9752
                                 (name, msg))
9753

    
9754
      if result.payload.is_degraded:
9755
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9756

    
9757
  def _RemoveOldStorage(self, node_name, iv_names):
9758
    for name, (_, old_lvs, _) in iv_names.iteritems():
9759
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9760

    
9761
      for lv in old_lvs:
9762
        self.cfg.SetDiskID(lv, node_name)
9763

    
9764
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9765
        if msg:
9766
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9767
                             hint="remove unused LVs manually")
9768

    
9769
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9770
    """Replace a disk on the primary or secondary for DRBD 8.
9771

9772
    The algorithm for replace is quite complicated:
9773

9774
      1. for each disk to be replaced:
9775

9776
        1. create new LVs on the target node with unique names
9777
        1. detach old LVs from the drbd device
9778
        1. rename old LVs to name_replaced.<time_t>
9779
        1. rename new LVs to old LVs
9780
        1. attach the new LVs (with the old names now) to the drbd device
9781

9782
      1. wait for sync across all devices
9783

9784
      1. for each modified disk:
9785

9786
        1. remove old LVs (which have the name name_replaces.<time_t>)
9787

9788
    Failures are not very well handled.
9789

9790
    """
9791
    steps_total = 6
9792

    
9793
    # Step: check device activation
9794
    self.lu.LogStep(1, steps_total, "Check device existence")
9795
    self._CheckDisksExistence([self.other_node, self.target_node])
9796
    self._CheckVolumeGroup([self.target_node, self.other_node])
9797

    
9798
    # Step: check other node consistency
9799
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9800
    self._CheckDisksConsistency(self.other_node,
9801
                                self.other_node == self.instance.primary_node,
9802
                                False)
9803

    
9804
    # Step: create new storage
9805
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9806
    iv_names = self._CreateNewStorage(self.target_node)
9807

    
9808
    # Step: for each lv, detach+rename*2+attach
9809
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9810
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9811
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9812

    
9813
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9814
                                                     old_lvs)
9815
      result.Raise("Can't detach drbd from local storage on node"
9816
                   " %s for device %s" % (self.target_node, dev.iv_name))
9817
      #dev.children = []
9818
      #cfg.Update(instance)
9819

    
9820
      # ok, we created the new LVs, so now we know we have the needed
9821
      # storage; as such, we proceed on the target node to rename
9822
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9823
      # using the assumption that logical_id == physical_id (which in
9824
      # turn is the unique_id on that node)
9825

    
9826
      # FIXME(iustin): use a better name for the replaced LVs
9827
      temp_suffix = int(time.time())
9828
      ren_fn = lambda d, suff: (d.physical_id[0],
9829
                                d.physical_id[1] + "_replaced-%s" % suff)
9830

    
9831
      # Build the rename list based on what LVs exist on the node
9832
      rename_old_to_new = []
9833
      for to_ren in old_lvs:
9834
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9835
        if not result.fail_msg and result.payload:
9836
          # device exists
9837
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9838

    
9839
      self.lu.LogInfo("Renaming the old LVs on the target node")
9840
      result = self.rpc.call_blockdev_rename(self.target_node,
9841
                                             rename_old_to_new)
9842
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9843

    
9844
      # Now we rename the new LVs to the old LVs
9845
      self.lu.LogInfo("Renaming the new LVs on the target node")
9846
      rename_new_to_old = [(new, old.physical_id)
9847
                           for old, new in zip(old_lvs, new_lvs)]
9848
      result = self.rpc.call_blockdev_rename(self.target_node,
9849
                                             rename_new_to_old)
9850
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9851

    
9852
      # Intermediate steps of in memory modifications
9853
      for old, new in zip(old_lvs, new_lvs):
9854
        new.logical_id = old.logical_id
9855
        self.cfg.SetDiskID(new, self.target_node)
9856

    
9857
      # We need to modify old_lvs so that removal later removes the
9858
      # right LVs, not the newly added ones; note that old_lvs is a
9859
      # copy here
9860
      for disk in old_lvs:
9861
        disk.logical_id = ren_fn(disk, temp_suffix)
9862
        self.cfg.SetDiskID(disk, self.target_node)
9863

    
9864
      # Now that the new lvs have the old name, we can add them to the device
9865
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9866
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9867
                                                  new_lvs)
9868
      msg = result.fail_msg
9869
      if msg:
9870
        for new_lv in new_lvs:
9871
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9872
                                               new_lv).fail_msg
9873
          if msg2:
9874
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9875
                               hint=("cleanup manually the unused logical"
9876
                                     "volumes"))
9877
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9878

    
9879
    cstep = 5
9880
    if self.early_release:
9881
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9882
      cstep += 1
9883
      self._RemoveOldStorage(self.target_node, iv_names)
9884
      # WARNING: we release both node locks here, do not do other RPCs
9885
      # than WaitForSync to the primary node
9886
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9887
                    names=[self.target_node, self.other_node])
9888

    
9889
    # Wait for sync
9890
    # This can fail as the old devices are degraded and _WaitForSync
9891
    # does a combined result over all disks, so we don't check its return value
9892
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9893
    cstep += 1
9894
    _WaitForSync(self.lu, self.instance)
9895

    
9896
    # Check all devices manually
9897
    self._CheckDevices(self.instance.primary_node, iv_names)
9898

    
9899
    # Step: remove old storage
9900
    if not self.early_release:
9901
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9902
      cstep += 1
9903
      self._RemoveOldStorage(self.target_node, iv_names)
9904

    
9905
  def _ExecDrbd8Secondary(self, feedback_fn):
9906
    """Replace the secondary node for DRBD 8.
9907

9908
    The algorithm for replace is quite complicated:
9909
      - for all disks of the instance:
9910
        - create new LVs on the new node with same names
9911
        - shutdown the drbd device on the old secondary
9912
        - disconnect the drbd network on the primary
9913
        - create the drbd device on the new secondary
9914
        - network attach the drbd on the primary, using an artifice:
9915
          the drbd code for Attach() will connect to the network if it
9916
          finds a device which is connected to the good local disks but
9917
          not network enabled
9918
      - wait for sync across all devices
9919
      - remove all disks from the old secondary
9920

9921
    Failures are not very well handled.
9922

9923
    """
9924
    steps_total = 6
9925

    
9926
    pnode = self.instance.primary_node
9927

    
9928
    # Step: check device activation
9929
    self.lu.LogStep(1, steps_total, "Check device existence")
9930
    self._CheckDisksExistence([self.instance.primary_node])
9931
    self._CheckVolumeGroup([self.instance.primary_node])
9932

    
9933
    # Step: check other node consistency
9934
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9935
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9936

    
9937
    # Step: create new storage
9938
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9939
    for idx, dev in enumerate(self.instance.disks):
9940
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9941
                      (self.new_node, idx))
9942
      # we pass force_create=True to force LVM creation
9943
      for new_lv in dev.children:
9944
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9945
                        _GetInstanceInfoText(self.instance), False)
9946

    
9947
    # Step 4: dbrd minors and drbd setups changes
9948
    # after this, we must manually remove the drbd minors on both the
9949
    # error and the success paths
9950
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9951
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9952
                                         for dev in self.instance.disks],
9953
                                        self.instance.name)
9954
    logging.debug("Allocated minors %r", minors)
9955

    
9956
    iv_names = {}
9957
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9958
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9959
                      (self.new_node, idx))
9960
      # create new devices on new_node; note that we create two IDs:
9961
      # one without port, so the drbd will be activated without
9962
      # networking information on the new node at this stage, and one
9963
      # with network, for the latter activation in step 4
9964
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9965
      if self.instance.primary_node == o_node1:
9966
        p_minor = o_minor1
9967
      else:
9968
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9969
        p_minor = o_minor2
9970

    
9971
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9972
                      p_minor, new_minor, o_secret)
9973
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9974
                    p_minor, new_minor, o_secret)
9975

    
9976
      iv_names[idx] = (dev, dev.children, new_net_id)
9977
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9978
                    new_net_id)
9979
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9980
                              logical_id=new_alone_id,
9981
                              children=dev.children,
9982
                              size=dev.size)
9983
      try:
9984
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9985
                              _GetInstanceInfoText(self.instance), False)
9986
      except errors.GenericError:
9987
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9988
        raise
9989

    
9990
    # We have new devices, shutdown the drbd on the old secondary
9991
    for idx, dev in enumerate(self.instance.disks):
9992
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9993
      self.cfg.SetDiskID(dev, self.target_node)
9994
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9995
      if msg:
9996
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9997
                           "node: %s" % (idx, msg),
9998
                           hint=("Please cleanup this device manually as"
9999
                                 " soon as possible"))
10000

    
10001
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10002
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10003
                                               self.instance.disks)[pnode]
10004

    
10005
    msg = result.fail_msg
10006
    if msg:
10007
      # detaches didn't succeed (unlikely)
10008
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10009
      raise errors.OpExecError("Can't detach the disks from the network on"
10010
                               " old node: %s" % (msg,))
10011

    
10012
    # if we managed to detach at least one, we update all the disks of
10013
    # the instance to point to the new secondary
10014
    self.lu.LogInfo("Updating instance configuration")
10015
    for dev, _, new_logical_id in iv_names.itervalues():
10016
      dev.logical_id = new_logical_id
10017
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10018

    
10019
    self.cfg.Update(self.instance, feedback_fn)
10020

    
10021
    # and now perform the drbd attach
10022
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10023
                    " (standalone => connected)")
10024
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10025
                                            self.new_node],
10026
                                           self.node_secondary_ip,
10027
                                           self.instance.disks,
10028
                                           self.instance.name,
10029
                                           False)
10030
    for to_node, to_result in result.items():
10031
      msg = to_result.fail_msg
10032
      if msg:
10033
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10034
                           to_node, msg,
10035
                           hint=("please do a gnt-instance info to see the"
10036
                                 " status of disks"))
10037
    cstep = 5
10038
    if self.early_release:
10039
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10040
      cstep += 1
10041
      self._RemoveOldStorage(self.target_node, iv_names)
10042
      # WARNING: we release all node locks here, do not do other RPCs
10043
      # than WaitForSync to the primary node
10044
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10045
                    names=[self.instance.primary_node,
10046
                           self.target_node,
10047
                           self.new_node])
10048

    
10049
    # Wait for sync
10050
    # This can fail as the old devices are degraded and _WaitForSync
10051
    # does a combined result over all disks, so we don't check its return value
10052
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10053
    cstep += 1
10054
    _WaitForSync(self.lu, self.instance)
10055

    
10056
    # Check all devices manually
10057
    self._CheckDevices(self.instance.primary_node, iv_names)
10058

    
10059
    # Step: remove old storage
10060
    if not self.early_release:
10061
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10062
      self._RemoveOldStorage(self.target_node, iv_names)
10063

    
10064

    
10065
class LURepairNodeStorage(NoHooksLU):
10066
  """Repairs the volume group on a node.
10067

10068
  """
10069
  REQ_BGL = False
10070

    
10071
  def CheckArguments(self):
10072
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10073

    
10074
    storage_type = self.op.storage_type
10075

    
10076
    if (constants.SO_FIX_CONSISTENCY not in
10077
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10078
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10079
                                 " repaired" % storage_type,
10080
                                 errors.ECODE_INVAL)
10081

    
10082
  def ExpandNames(self):
10083
    self.needed_locks = {
10084
      locking.LEVEL_NODE: [self.op.node_name],
10085
      }
10086

    
10087
  def _CheckFaultyDisks(self, instance, node_name):
10088
    """Ensure faulty disks abort the opcode or at least warn."""
10089
    try:
10090
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10091
                                  node_name, True):
10092
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10093
                                   " node '%s'" % (instance.name, node_name),
10094
                                   errors.ECODE_STATE)
10095
    except errors.OpPrereqError, err:
10096
      if self.op.ignore_consistency:
10097
        self.proc.LogWarning(str(err.args[0]))
10098
      else:
10099
        raise
10100

    
10101
  def CheckPrereq(self):
10102
    """Check prerequisites.
10103

10104
    """
10105
    # Check whether any instance on this node has faulty disks
10106
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10107
      if not inst.admin_up:
10108
        continue
10109
      check_nodes = set(inst.all_nodes)
10110
      check_nodes.discard(self.op.node_name)
10111
      for inst_node_name in check_nodes:
10112
        self._CheckFaultyDisks(inst, inst_node_name)
10113

    
10114
  def Exec(self, feedback_fn):
10115
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10116
                (self.op.name, self.op.node_name))
10117

    
10118
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10119
    result = self.rpc.call_storage_execute(self.op.node_name,
10120
                                           self.op.storage_type, st_args,
10121
                                           self.op.name,
10122
                                           constants.SO_FIX_CONSISTENCY)
10123
    result.Raise("Failed to repair storage unit '%s' on %s" %
10124
                 (self.op.name, self.op.node_name))
10125

    
10126

    
10127
class LUNodeEvacuate(NoHooksLU):
10128
  """Evacuates instances off a list of nodes.
10129

10130
  """
10131
  REQ_BGL = False
10132

    
10133
  def CheckArguments(self):
10134
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10135

    
10136
  def ExpandNames(self):
10137
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10138

    
10139
    if self.op.remote_node is not None:
10140
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10141
      assert self.op.remote_node
10142

    
10143
      if self.op.remote_node == self.op.node_name:
10144
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10145
                                   " secondary node", errors.ECODE_INVAL)
10146

    
10147
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10148
        raise errors.OpPrereqError("Without the use of an iallocator only"
10149
                                   " secondary instances can be evacuated",
10150
                                   errors.ECODE_INVAL)
10151

    
10152
    # Declare locks
10153
    self.share_locks = _ShareAll()
10154
    self.needed_locks = {
10155
      locking.LEVEL_INSTANCE: [],
10156
      locking.LEVEL_NODEGROUP: [],
10157
      locking.LEVEL_NODE: [],
10158
      }
10159

    
10160
    if self.op.remote_node is None:
10161
      # Iallocator will choose any node(s) in the same group
10162
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10163
    else:
10164
      group_nodes = frozenset([self.op.remote_node])
10165

    
10166
    # Determine nodes to be locked
10167
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10168

    
10169
  def _DetermineInstances(self):
10170
    """Builds list of instances to operate on.
10171

10172
    """
10173
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10174

    
10175
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10176
      # Primary instances only
10177
      inst_fn = _GetNodePrimaryInstances
10178
      assert self.op.remote_node is None, \
10179
        "Evacuating primary instances requires iallocator"
10180
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10181
      # Secondary instances only
10182
      inst_fn = _GetNodeSecondaryInstances
10183
    else:
10184
      # All instances
10185
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10186
      inst_fn = _GetNodeInstances
10187

    
10188
    return inst_fn(self.cfg, self.op.node_name)
10189

    
10190
  def DeclareLocks(self, level):
10191
    if level == locking.LEVEL_INSTANCE:
10192
      # Lock instances optimistically, needs verification once node and group
10193
      # locks have been acquired
10194
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10195
        set(i.name for i in self._DetermineInstances())
10196

    
10197
    elif level == locking.LEVEL_NODEGROUP:
10198
      # Lock node groups optimistically, needs verification once nodes have
10199
      # been acquired
10200
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10201
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10202

    
10203
    elif level == locking.LEVEL_NODE:
10204
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10205

    
10206
  def CheckPrereq(self):
10207
    # Verify locks
10208
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10209
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10210
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10211

    
10212
    assert owned_nodes == self.lock_nodes
10213

    
10214
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10215
    if owned_groups != wanted_groups:
10216
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10217
                               " current groups are '%s', used to be '%s'" %
10218
                               (utils.CommaJoin(wanted_groups),
10219
                                utils.CommaJoin(owned_groups)))
10220

    
10221
    # Determine affected instances
10222
    self.instances = self._DetermineInstances()
10223
    self.instance_names = [i.name for i in self.instances]
10224

    
10225
    if set(self.instance_names) != owned_instances:
10226
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10227
                               " were acquired, current instances are '%s',"
10228
                               " used to be '%s'" %
10229
                               (self.op.node_name,
10230
                                utils.CommaJoin(self.instance_names),
10231
                                utils.CommaJoin(owned_instances)))
10232

    
10233
    if self.instance_names:
10234
      self.LogInfo("Evacuating instances from node '%s': %s",
10235
                   self.op.node_name,
10236
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10237
    else:
10238
      self.LogInfo("No instances to evacuate from node '%s'",
10239
                   self.op.node_name)
10240

    
10241
    if self.op.remote_node is not None:
10242
      for i in self.instances:
10243
        if i.primary_node == self.op.remote_node:
10244
          raise errors.OpPrereqError("Node %s is the primary node of"
10245
                                     " instance %s, cannot use it as"
10246
                                     " secondary" %
10247
                                     (self.op.remote_node, i.name),
10248
                                     errors.ECODE_INVAL)
10249

    
10250
  def Exec(self, feedback_fn):
10251
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10252

    
10253
    if not self.instance_names:
10254
      # No instances to evacuate
10255
      jobs = []
10256

    
10257
    elif self.op.iallocator is not None:
10258
      # TODO: Implement relocation to other group
10259
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10260
                       evac_mode=self.op.mode,
10261
                       instances=list(self.instance_names))
10262

    
10263
      ial.Run(self.op.iallocator)
10264

    
10265
      if not ial.success:
10266
        raise errors.OpPrereqError("Can't compute node evacuation using"
10267
                                   " iallocator '%s': %s" %
10268
                                   (self.op.iallocator, ial.info),
10269
                                   errors.ECODE_NORES)
10270

    
10271
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10272

    
10273
    elif self.op.remote_node is not None:
10274
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10275
      jobs = [
10276
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10277
                                        remote_node=self.op.remote_node,
10278
                                        disks=[],
10279
                                        mode=constants.REPLACE_DISK_CHG,
10280
                                        early_release=self.op.early_release)]
10281
        for instance_name in self.instance_names
10282
        ]
10283

    
10284
    else:
10285
      raise errors.ProgrammerError("No iallocator or remote node")
10286

    
10287
    return ResultWithJobs(jobs)
10288

    
10289

    
10290
def _SetOpEarlyRelease(early_release, op):
10291
  """Sets C{early_release} flag on opcodes if available.
10292

10293
  """
10294
  try:
10295
    op.early_release = early_release
10296
  except AttributeError:
10297
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10298

    
10299
  return op
10300

    
10301

    
10302
def _NodeEvacDest(use_nodes, group, nodes):
10303
  """Returns group or nodes depending on caller's choice.
10304

10305
  """
10306
  if use_nodes:
10307
    return utils.CommaJoin(nodes)
10308
  else:
10309
    return group
10310

    
10311

    
10312
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10313
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10314

10315
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10316
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10317

10318
  @type lu: L{LogicalUnit}
10319
  @param lu: Logical unit instance
10320
  @type alloc_result: tuple/list
10321
  @param alloc_result: Result from iallocator
10322
  @type early_release: bool
10323
  @param early_release: Whether to release locks early if possible
10324
  @type use_nodes: bool
10325
  @param use_nodes: Whether to display node names instead of groups
10326

10327
  """
10328
  (moved, failed, jobs) = alloc_result
10329

    
10330
  if failed:
10331
    lu.LogWarning("Unable to evacuate instances %s",
10332
                  utils.CommaJoin("%s (%s)" % (name, reason)
10333
                                  for (name, reason) in failed))
10334

    
10335
  if moved:
10336
    lu.LogInfo("Instances to be moved: %s",
10337
               utils.CommaJoin("%s (to %s)" %
10338
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10339
                               for (name, group, nodes) in moved))
10340

    
10341
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10342
              map(opcodes.OpCode.LoadOpCode, ops))
10343
          for ops in jobs]
10344

    
10345

    
10346
class LUInstanceGrowDisk(LogicalUnit):
10347
  """Grow a disk of an instance.
10348

10349
  """
10350
  HPATH = "disk-grow"
10351
  HTYPE = constants.HTYPE_INSTANCE
10352
  REQ_BGL = False
10353

    
10354
  def ExpandNames(self):
10355
    self._ExpandAndLockInstance()
10356
    self.needed_locks[locking.LEVEL_NODE] = []
10357
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10358

    
10359
  def DeclareLocks(self, level):
10360
    if level == locking.LEVEL_NODE:
10361
      self._LockInstancesNodes()
10362

    
10363
  def BuildHooksEnv(self):
10364
    """Build hooks env.
10365

10366
    This runs on the master, the primary and all the secondaries.
10367

10368
    """
10369
    env = {
10370
      "DISK": self.op.disk,
10371
      "AMOUNT": self.op.amount,
10372
      }
10373
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10374
    return env
10375

    
10376
  def BuildHooksNodes(self):
10377
    """Build hooks nodes.
10378

10379
    """
10380
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10381
    return (nl, nl)
10382

    
10383
  def CheckPrereq(self):
10384
    """Check prerequisites.
10385

10386
    This checks that the instance is in the cluster.
10387

10388
    """
10389
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10390
    assert instance is not None, \
10391
      "Cannot retrieve locked instance %s" % self.op.instance_name
10392
    nodenames = list(instance.all_nodes)
10393
    for node in nodenames:
10394
      _CheckNodeOnline(self, node)
10395

    
10396
    self.instance = instance
10397

    
10398
    if instance.disk_template not in constants.DTS_GROWABLE:
10399
      raise errors.OpPrereqError("Instance's disk layout does not support"
10400
                                 " growing", errors.ECODE_INVAL)
10401

    
10402
    self.disk = instance.FindDisk(self.op.disk)
10403

    
10404
    if instance.disk_template not in (constants.DT_FILE,
10405
                                      constants.DT_SHARED_FILE):
10406
      # TODO: check the free disk space for file, when that feature will be
10407
      # supported
10408
      _CheckNodesFreeDiskPerVG(self, nodenames,
10409
                               self.disk.ComputeGrowth(self.op.amount))
10410

    
10411
  def Exec(self, feedback_fn):
10412
    """Execute disk grow.
10413

10414
    """
10415
    instance = self.instance
10416
    disk = self.disk
10417

    
10418
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10419
    if not disks_ok:
10420
      raise errors.OpExecError("Cannot activate block device to grow")
10421

    
10422
    # First run all grow ops in dry-run mode
10423
    for node in instance.all_nodes:
10424
      self.cfg.SetDiskID(disk, node)
10425
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10426
      result.Raise("Grow request failed to node %s" % node)
10427

    
10428
    # We know that (as far as we can test) operations across different
10429
    # nodes will succeed, time to run it for real
10430
    for node in instance.all_nodes:
10431
      self.cfg.SetDiskID(disk, node)
10432
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10433
      result.Raise("Grow request failed to node %s" % node)
10434

    
10435
      # TODO: Rewrite code to work properly
10436
      # DRBD goes into sync mode for a short amount of time after executing the
10437
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10438
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10439
      # time is a work-around.
10440
      time.sleep(5)
10441

    
10442
    disk.RecordGrow(self.op.amount)
10443
    self.cfg.Update(instance, feedback_fn)
10444
    if self.op.wait_for_sync:
10445
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10446
      if disk_abort:
10447
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10448
                             " status; please check the instance")
10449
      if not instance.admin_up:
10450
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10451
    elif not instance.admin_up:
10452
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10453
                           " not supposed to be running because no wait for"
10454
                           " sync mode was requested")
10455

    
10456

    
10457
class LUInstanceQueryData(NoHooksLU):
10458
  """Query runtime instance data.
10459

10460
  """
10461
  REQ_BGL = False
10462

    
10463
  def ExpandNames(self):
10464
    self.needed_locks = {}
10465

    
10466
    # Use locking if requested or when non-static information is wanted
10467
    if not (self.op.static or self.op.use_locking):
10468
      self.LogWarning("Non-static data requested, locks need to be acquired")
10469
      self.op.use_locking = True
10470

    
10471
    if self.op.instances or not self.op.use_locking:
10472
      # Expand instance names right here
10473
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10474
    else:
10475
      # Will use acquired locks
10476
      self.wanted_names = None
10477

    
10478
    if self.op.use_locking:
10479
      self.share_locks = _ShareAll()
10480

    
10481
      if self.wanted_names is None:
10482
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10483
      else:
10484
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10485

    
10486
      self.needed_locks[locking.LEVEL_NODE] = []
10487
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10488

    
10489
  def DeclareLocks(self, level):
10490
    if self.op.use_locking and level == locking.LEVEL_NODE:
10491
      self._LockInstancesNodes()
10492

    
10493
  def CheckPrereq(self):
10494
    """Check prerequisites.
10495

10496
    This only checks the optional instance list against the existing names.
10497

10498
    """
10499
    if self.wanted_names is None:
10500
      assert self.op.use_locking, "Locking was not used"
10501
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10502

    
10503
    self.wanted_instances = \
10504
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10505

    
10506
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10507
    """Returns the status of a block device
10508

10509
    """
10510
    if self.op.static or not node:
10511
      return None
10512

    
10513
    self.cfg.SetDiskID(dev, node)
10514

    
10515
    result = self.rpc.call_blockdev_find(node, dev)
10516
    if result.offline:
10517
      return None
10518

    
10519
    result.Raise("Can't compute disk status for %s" % instance_name)
10520

    
10521
    status = result.payload
10522
    if status is None:
10523
      return None
10524

    
10525
    return (status.dev_path, status.major, status.minor,
10526
            status.sync_percent, status.estimated_time,
10527
            status.is_degraded, status.ldisk_status)
10528

    
10529
  def _ComputeDiskStatus(self, instance, snode, dev):
10530
    """Compute block device status.
10531

10532
    """
10533
    if dev.dev_type in constants.LDS_DRBD:
10534
      # we change the snode then (otherwise we use the one passed in)
10535
      if dev.logical_id[0] == instance.primary_node:
10536
        snode = dev.logical_id[1]
10537
      else:
10538
        snode = dev.logical_id[0]
10539

    
10540
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10541
                                              instance.name, dev)
10542
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10543

    
10544
    if dev.children:
10545
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10546
                                        instance, snode),
10547
                         dev.children)
10548
    else:
10549
      dev_children = []
10550

    
10551
    return {
10552
      "iv_name": dev.iv_name,
10553
      "dev_type": dev.dev_type,
10554
      "logical_id": dev.logical_id,
10555
      "physical_id": dev.physical_id,
10556
      "pstatus": dev_pstatus,
10557
      "sstatus": dev_sstatus,
10558
      "children": dev_children,
10559
      "mode": dev.mode,
10560
      "size": dev.size,
10561
      }
10562

    
10563
  def Exec(self, feedback_fn):
10564
    """Gather and return data"""
10565
    result = {}
10566

    
10567
    cluster = self.cfg.GetClusterInfo()
10568

    
10569
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10570
                                          for i in self.wanted_instances)
10571
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10572
      if self.op.static or pnode.offline:
10573
        remote_state = None
10574
        if pnode.offline:
10575
          self.LogWarning("Primary node %s is marked offline, returning static"
10576
                          " information only for instance %s" %
10577
                          (pnode.name, instance.name))
10578
      else:
10579
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10580
                                                  instance.name,
10581
                                                  instance.hypervisor)
10582
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10583
        remote_info = remote_info.payload
10584
        if remote_info and "state" in remote_info:
10585
          remote_state = "up"
10586
        else:
10587
          remote_state = "down"
10588

    
10589
      if instance.admin_up:
10590
        config_state = "up"
10591
      else:
10592
        config_state = "down"
10593

    
10594
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10595
                  instance.disks)
10596

    
10597
      result[instance.name] = {
10598
        "name": instance.name,
10599
        "config_state": config_state,
10600
        "run_state": remote_state,
10601
        "pnode": instance.primary_node,
10602
        "snodes": instance.secondary_nodes,
10603
        "os": instance.os,
10604
        # this happens to be the same format used for hooks
10605
        "nics": _NICListToTuple(self, instance.nics),
10606
        "disk_template": instance.disk_template,
10607
        "disks": disks,
10608
        "hypervisor": instance.hypervisor,
10609
        "network_port": instance.network_port,
10610
        "hv_instance": instance.hvparams,
10611
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10612
        "be_instance": instance.beparams,
10613
        "be_actual": cluster.FillBE(instance),
10614
        "os_instance": instance.osparams,
10615
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10616
        "serial_no": instance.serial_no,
10617
        "mtime": instance.mtime,
10618
        "ctime": instance.ctime,
10619
        "uuid": instance.uuid,
10620
        }
10621

    
10622
    return result
10623

    
10624

    
10625
class LUInstanceSetParams(LogicalUnit):
10626
  """Modifies an instances's parameters.
10627

10628
  """
10629
  HPATH = "instance-modify"
10630
  HTYPE = constants.HTYPE_INSTANCE
10631
  REQ_BGL = False
10632

    
10633
  def CheckArguments(self):
10634
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10635
            self.op.hvparams or self.op.beparams or self.op.os_name):
10636
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10637

    
10638
    if self.op.hvparams:
10639
      _CheckGlobalHvParams(self.op.hvparams)
10640

    
10641
    # Disk validation
10642
    disk_addremove = 0
10643
    for disk_op, disk_dict in self.op.disks:
10644
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10645
      if disk_op == constants.DDM_REMOVE:
10646
        disk_addremove += 1
10647
        continue
10648
      elif disk_op == constants.DDM_ADD:
10649
        disk_addremove += 1
10650
      else:
10651
        if not isinstance(disk_op, int):
10652
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10653
        if not isinstance(disk_dict, dict):
10654
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10655
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10656

    
10657
      if disk_op == constants.DDM_ADD:
10658
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10659
        if mode not in constants.DISK_ACCESS_SET:
10660
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10661
                                     errors.ECODE_INVAL)
10662
        size = disk_dict.get(constants.IDISK_SIZE, None)
10663
        if size is None:
10664
          raise errors.OpPrereqError("Required disk parameter size missing",
10665
                                     errors.ECODE_INVAL)
10666
        try:
10667
          size = int(size)
10668
        except (TypeError, ValueError), err:
10669
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10670
                                     str(err), errors.ECODE_INVAL)
10671
        disk_dict[constants.IDISK_SIZE] = size
10672
      else:
10673
        # modification of disk
10674
        if constants.IDISK_SIZE in disk_dict:
10675
          raise errors.OpPrereqError("Disk size change not possible, use"
10676
                                     " grow-disk", errors.ECODE_INVAL)
10677

    
10678
    if disk_addremove > 1:
10679
      raise errors.OpPrereqError("Only one disk add or remove operation"
10680
                                 " supported at a time", errors.ECODE_INVAL)
10681

    
10682
    if self.op.disks and self.op.disk_template is not None:
10683
      raise errors.OpPrereqError("Disk template conversion and other disk"
10684
                                 " changes not supported at the same time",
10685
                                 errors.ECODE_INVAL)
10686

    
10687
    if (self.op.disk_template and
10688
        self.op.disk_template in constants.DTS_INT_MIRROR and
10689
        self.op.remote_node is None):
10690
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10691
                                 " one requires specifying a secondary node",
10692
                                 errors.ECODE_INVAL)
10693

    
10694
    # NIC validation
10695
    nic_addremove = 0
10696
    for nic_op, nic_dict in self.op.nics:
10697
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10698
      if nic_op == constants.DDM_REMOVE:
10699
        nic_addremove += 1
10700
        continue
10701
      elif nic_op == constants.DDM_ADD:
10702
        nic_addremove += 1
10703
      else:
10704
        if not isinstance(nic_op, int):
10705
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10706
        if not isinstance(nic_dict, dict):
10707
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10708
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10709

    
10710
      # nic_dict should be a dict
10711
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10712
      if nic_ip is not None:
10713
        if nic_ip.lower() == constants.VALUE_NONE:
10714
          nic_dict[constants.INIC_IP] = None
10715
        else:
10716
          if not netutils.IPAddress.IsValid(nic_ip):
10717
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10718
                                       errors.ECODE_INVAL)
10719

    
10720
      nic_bridge = nic_dict.get("bridge", None)
10721
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10722
      if nic_bridge and nic_link:
10723
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10724
                                   " at the same time", errors.ECODE_INVAL)
10725
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10726
        nic_dict["bridge"] = None
10727
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10728
        nic_dict[constants.INIC_LINK] = None
10729

    
10730
      if nic_op == constants.DDM_ADD:
10731
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10732
        if nic_mac is None:
10733
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10734

    
10735
      if constants.INIC_MAC in nic_dict:
10736
        nic_mac = nic_dict[constants.INIC_MAC]
10737
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10738
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10739

    
10740
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10741
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10742
                                     " modifying an existing nic",
10743
                                     errors.ECODE_INVAL)
10744

    
10745
    if nic_addremove > 1:
10746
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10747
                                 " supported at a time", errors.ECODE_INVAL)
10748

    
10749
  def ExpandNames(self):
10750
    self._ExpandAndLockInstance()
10751
    self.needed_locks[locking.LEVEL_NODE] = []
10752
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10753

    
10754
  def DeclareLocks(self, level):
10755
    if level == locking.LEVEL_NODE:
10756
      self._LockInstancesNodes()
10757
      if self.op.disk_template and self.op.remote_node:
10758
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10759
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10760

    
10761
  def BuildHooksEnv(self):
10762
    """Build hooks env.
10763

10764
    This runs on the master, primary and secondaries.
10765

10766
    """
10767
    args = dict()
10768
    if constants.BE_MEMORY in self.be_new:
10769
      args["memory"] = self.be_new[constants.BE_MEMORY]
10770
    if constants.BE_VCPUS in self.be_new:
10771
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10772
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10773
    # information at all.
10774
    if self.op.nics:
10775
      args["nics"] = []
10776
      nic_override = dict(self.op.nics)
10777
      for idx, nic in enumerate(self.instance.nics):
10778
        if idx in nic_override:
10779
          this_nic_override = nic_override[idx]
10780
        else:
10781
          this_nic_override = {}
10782
        if constants.INIC_IP in this_nic_override:
10783
          ip = this_nic_override[constants.INIC_IP]
10784
        else:
10785
          ip = nic.ip
10786
        if constants.INIC_MAC in this_nic_override:
10787
          mac = this_nic_override[constants.INIC_MAC]
10788
        else:
10789
          mac = nic.mac
10790
        if idx in self.nic_pnew:
10791
          nicparams = self.nic_pnew[idx]
10792
        else:
10793
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10794
        mode = nicparams[constants.NIC_MODE]
10795
        link = nicparams[constants.NIC_LINK]
10796
        args["nics"].append((ip, mac, mode, link))
10797
      if constants.DDM_ADD in nic_override:
10798
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10799
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10800
        nicparams = self.nic_pnew[constants.DDM_ADD]
10801
        mode = nicparams[constants.NIC_MODE]
10802
        link = nicparams[constants.NIC_LINK]
10803
        args["nics"].append((ip, mac, mode, link))
10804
      elif constants.DDM_REMOVE in nic_override:
10805
        del args["nics"][-1]
10806

    
10807
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10808
    if self.op.disk_template:
10809
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10810

    
10811
    return env
10812

    
10813
  def BuildHooksNodes(self):
10814
    """Build hooks nodes.
10815

10816
    """
10817
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10818
    return (nl, nl)
10819

    
10820
  def CheckPrereq(self):
10821
    """Check prerequisites.
10822

10823
    This only checks the instance list against the existing names.
10824

10825
    """
10826
    # checking the new params on the primary/secondary nodes
10827

    
10828
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10829
    cluster = self.cluster = self.cfg.GetClusterInfo()
10830
    assert self.instance is not None, \
10831
      "Cannot retrieve locked instance %s" % self.op.instance_name
10832
    pnode = instance.primary_node
10833
    nodelist = list(instance.all_nodes)
10834

    
10835
    # OS change
10836
    if self.op.os_name and not self.op.force:
10837
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10838
                      self.op.force_variant)
10839
      instance_os = self.op.os_name
10840
    else:
10841
      instance_os = instance.os
10842

    
10843
    if self.op.disk_template:
10844
      if instance.disk_template == self.op.disk_template:
10845
        raise errors.OpPrereqError("Instance already has disk template %s" %
10846
                                   instance.disk_template, errors.ECODE_INVAL)
10847

    
10848
      if (instance.disk_template,
10849
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10850
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10851
                                   " %s to %s" % (instance.disk_template,
10852
                                                  self.op.disk_template),
10853
                                   errors.ECODE_INVAL)
10854
      _CheckInstanceDown(self, instance, "cannot change disk template")
10855
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10856
        if self.op.remote_node == pnode:
10857
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10858
                                     " as the primary node of the instance" %
10859
                                     self.op.remote_node, errors.ECODE_STATE)
10860
        _CheckNodeOnline(self, self.op.remote_node)
10861
        _CheckNodeNotDrained(self, self.op.remote_node)
10862
        # FIXME: here we assume that the old instance type is DT_PLAIN
10863
        assert instance.disk_template == constants.DT_PLAIN
10864
        disks = [{constants.IDISK_SIZE: d.size,
10865
                  constants.IDISK_VG: d.logical_id[0]}
10866
                 for d in instance.disks]
10867
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10868
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10869

    
10870
    # hvparams processing
10871
    if self.op.hvparams:
10872
      hv_type = instance.hypervisor
10873
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10874
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10875
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10876

    
10877
      # local check
10878
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10879
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10880
      self.hv_new = hv_new # the new actual values
10881
      self.hv_inst = i_hvdict # the new dict (without defaults)
10882
    else:
10883
      self.hv_new = self.hv_inst = {}
10884

    
10885
    # beparams processing
10886
    if self.op.beparams:
10887
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10888
                                   use_none=True)
10889
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10890
      be_new = cluster.SimpleFillBE(i_bedict)
10891
      self.be_new = be_new # the new actual values
10892
      self.be_inst = i_bedict # the new dict (without defaults)
10893
    else:
10894
      self.be_new = self.be_inst = {}
10895
    be_old = cluster.FillBE(instance)
10896

    
10897
    # osparams processing
10898
    if self.op.osparams:
10899
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10900
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10901
      self.os_inst = i_osdict # the new dict (without defaults)
10902
    else:
10903
      self.os_inst = {}
10904

    
10905
    self.warn = []
10906

    
10907
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10908
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10909
      mem_check_list = [pnode]
10910
      if be_new[constants.BE_AUTO_BALANCE]:
10911
        # either we changed auto_balance to yes or it was from before
10912
        mem_check_list.extend(instance.secondary_nodes)
10913
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10914
                                                  instance.hypervisor)
10915
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10916
                                         instance.hypervisor)
10917
      pninfo = nodeinfo[pnode]
10918
      msg = pninfo.fail_msg
10919
      if msg:
10920
        # Assume the primary node is unreachable and go ahead
10921
        self.warn.append("Can't get info from primary node %s: %s" %
10922
                         (pnode, msg))
10923
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10924
        self.warn.append("Node data from primary node %s doesn't contain"
10925
                         " free memory information" % pnode)
10926
      elif instance_info.fail_msg:
10927
        self.warn.append("Can't get instance runtime information: %s" %
10928
                        instance_info.fail_msg)
10929
      else:
10930
        if instance_info.payload:
10931
          current_mem = int(instance_info.payload["memory"])
10932
        else:
10933
          # Assume instance not running
10934
          # (there is a slight race condition here, but it's not very probable,
10935
          # and we have no other way to check)
10936
          current_mem = 0
10937
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10938
                    pninfo.payload["memory_free"])
10939
        if miss_mem > 0:
10940
          raise errors.OpPrereqError("This change will prevent the instance"
10941
                                     " from starting, due to %d MB of memory"
10942
                                     " missing on its primary node" % miss_mem,
10943
                                     errors.ECODE_NORES)
10944

    
10945
      if be_new[constants.BE_AUTO_BALANCE]:
10946
        for node, nres in nodeinfo.items():
10947
          if node not in instance.secondary_nodes:
10948
            continue
10949
          nres.Raise("Can't get info from secondary node %s" % node,
10950
                     prereq=True, ecode=errors.ECODE_STATE)
10951
          if not isinstance(nres.payload.get("memory_free", None), int):
10952
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10953
                                       " memory information" % node,
10954
                                       errors.ECODE_STATE)
10955
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10956
            raise errors.OpPrereqError("This change will prevent the instance"
10957
                                       " from failover to its secondary node"
10958
                                       " %s, due to not enough memory" % node,
10959
                                       errors.ECODE_STATE)
10960

    
10961
    # NIC processing
10962
    self.nic_pnew = {}
10963
    self.nic_pinst = {}
10964
    for nic_op, nic_dict in self.op.nics:
10965
      if nic_op == constants.DDM_REMOVE:
10966
        if not instance.nics:
10967
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10968
                                     errors.ECODE_INVAL)
10969
        continue
10970
      if nic_op != constants.DDM_ADD:
10971
        # an existing nic
10972
        if not instance.nics:
10973
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10974
                                     " no NICs" % nic_op,
10975
                                     errors.ECODE_INVAL)
10976
        if nic_op < 0 or nic_op >= len(instance.nics):
10977
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10978
                                     " are 0 to %d" %
10979
                                     (nic_op, len(instance.nics) - 1),
10980
                                     errors.ECODE_INVAL)
10981
        old_nic_params = instance.nics[nic_op].nicparams
10982
        old_nic_ip = instance.nics[nic_op].ip
10983
      else:
10984
        old_nic_params = {}
10985
        old_nic_ip = None
10986

    
10987
      update_params_dict = dict([(key, nic_dict[key])
10988
                                 for key in constants.NICS_PARAMETERS
10989
                                 if key in nic_dict])
10990

    
10991
      if "bridge" in nic_dict:
10992
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10993

    
10994
      new_nic_params = _GetUpdatedParams(old_nic_params,
10995
                                         update_params_dict)
10996
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10997
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10998
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10999
      self.nic_pinst[nic_op] = new_nic_params
11000
      self.nic_pnew[nic_op] = new_filled_nic_params
11001
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11002

    
11003
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11004
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11005
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11006
        if msg:
11007
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11008
          if self.op.force:
11009
            self.warn.append(msg)
11010
          else:
11011
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11012
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11013
        if constants.INIC_IP in nic_dict:
11014
          nic_ip = nic_dict[constants.INIC_IP]
11015
        else:
11016
          nic_ip = old_nic_ip
11017
        if nic_ip is None:
11018
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11019
                                     " on a routed nic", errors.ECODE_INVAL)
11020
      if constants.INIC_MAC in nic_dict:
11021
        nic_mac = nic_dict[constants.INIC_MAC]
11022
        if nic_mac is None:
11023
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11024
                                     errors.ECODE_INVAL)
11025
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11026
          # otherwise generate the mac
11027
          nic_dict[constants.INIC_MAC] = \
11028
            self.cfg.GenerateMAC(self.proc.GetECId())
11029
        else:
11030
          # or validate/reserve the current one
11031
          try:
11032
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11033
          except errors.ReservationError:
11034
            raise errors.OpPrereqError("MAC address %s already in use"
11035
                                       " in cluster" % nic_mac,
11036
                                       errors.ECODE_NOTUNIQUE)
11037

    
11038
    # DISK processing
11039
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11040
      raise errors.OpPrereqError("Disk operations not supported for"
11041
                                 " diskless instances",
11042
                                 errors.ECODE_INVAL)
11043
    for disk_op, _ in self.op.disks:
11044
      if disk_op == constants.DDM_REMOVE:
11045
        if len(instance.disks) == 1:
11046
          raise errors.OpPrereqError("Cannot remove the last disk of"
11047
                                     " an instance", errors.ECODE_INVAL)
11048
        _CheckInstanceDown(self, instance, "cannot remove disks")
11049

    
11050
      if (disk_op == constants.DDM_ADD and
11051
          len(instance.disks) >= constants.MAX_DISKS):
11052
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11053
                                   " add more" % constants.MAX_DISKS,
11054
                                   errors.ECODE_STATE)
11055
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11056
        # an existing disk
11057
        if disk_op < 0 or disk_op >= len(instance.disks):
11058
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11059
                                     " are 0 to %d" %
11060
                                     (disk_op, len(instance.disks)),
11061
                                     errors.ECODE_INVAL)
11062

    
11063
    return
11064

    
11065
  def _ConvertPlainToDrbd(self, feedback_fn):
11066
    """Converts an instance from plain to drbd.
11067

11068
    """
11069
    feedback_fn("Converting template to drbd")
11070
    instance = self.instance
11071
    pnode = instance.primary_node
11072
    snode = self.op.remote_node
11073

    
11074
    # create a fake disk info for _GenerateDiskTemplate
11075
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11076
                  constants.IDISK_VG: d.logical_id[0]}
11077
                 for d in instance.disks]
11078
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11079
                                      instance.name, pnode, [snode],
11080
                                      disk_info, None, None, 0, feedback_fn)
11081
    info = _GetInstanceInfoText(instance)
11082
    feedback_fn("Creating aditional volumes...")
11083
    # first, create the missing data and meta devices
11084
    for disk in new_disks:
11085
      # unfortunately this is... not too nice
11086
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11087
                            info, True)
11088
      for child in disk.children:
11089
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11090
    # at this stage, all new LVs have been created, we can rename the
11091
    # old ones
11092
    feedback_fn("Renaming original volumes...")
11093
    rename_list = [(o, n.children[0].logical_id)
11094
                   for (o, n) in zip(instance.disks, new_disks)]
11095
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11096
    result.Raise("Failed to rename original LVs")
11097

    
11098
    feedback_fn("Initializing DRBD devices...")
11099
    # all child devices are in place, we can now create the DRBD devices
11100
    for disk in new_disks:
11101
      for node in [pnode, snode]:
11102
        f_create = node == pnode
11103
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11104

    
11105
    # at this point, the instance has been modified
11106
    instance.disk_template = constants.DT_DRBD8
11107
    instance.disks = new_disks
11108
    self.cfg.Update(instance, feedback_fn)
11109

    
11110
    # disks are created, waiting for sync
11111
    disk_abort = not _WaitForSync(self, instance,
11112
                                  oneshot=not self.op.wait_for_sync)
11113
    if disk_abort:
11114
      raise errors.OpExecError("There are some degraded disks for"
11115
                               " this instance, please cleanup manually")
11116

    
11117
  def _ConvertDrbdToPlain(self, feedback_fn):
11118
    """Converts an instance from drbd to plain.
11119

11120
    """
11121
    instance = self.instance
11122
    assert len(instance.secondary_nodes) == 1
11123
    pnode = instance.primary_node
11124
    snode = instance.secondary_nodes[0]
11125
    feedback_fn("Converting template to plain")
11126

    
11127
    old_disks = instance.disks
11128
    new_disks = [d.children[0] for d in old_disks]
11129

    
11130
    # copy over size and mode
11131
    for parent, child in zip(old_disks, new_disks):
11132
      child.size = parent.size
11133
      child.mode = parent.mode
11134

    
11135
    # update instance structure
11136
    instance.disks = new_disks
11137
    instance.disk_template = constants.DT_PLAIN
11138
    self.cfg.Update(instance, feedback_fn)
11139

    
11140
    feedback_fn("Removing volumes on the secondary node...")
11141
    for disk in old_disks:
11142
      self.cfg.SetDiskID(disk, snode)
11143
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11144
      if msg:
11145
        self.LogWarning("Could not remove block device %s on node %s,"
11146
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11147

    
11148
    feedback_fn("Removing unneeded volumes on the primary node...")
11149
    for idx, disk in enumerate(old_disks):
11150
      meta = disk.children[1]
11151
      self.cfg.SetDiskID(meta, pnode)
11152
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11153
      if msg:
11154
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11155
                        " continuing anyway: %s", idx, pnode, msg)
11156

    
11157
  def Exec(self, feedback_fn):
11158
    """Modifies an instance.
11159

11160
    All parameters take effect only at the next restart of the instance.
11161

11162
    """
11163
    # Process here the warnings from CheckPrereq, as we don't have a
11164
    # feedback_fn there.
11165
    for warn in self.warn:
11166
      feedback_fn("WARNING: %s" % warn)
11167

    
11168
    result = []
11169
    instance = self.instance
11170
    # disk changes
11171
    for disk_op, disk_dict in self.op.disks:
11172
      if disk_op == constants.DDM_REMOVE:
11173
        # remove the last disk
11174
        device = instance.disks.pop()
11175
        device_idx = len(instance.disks)
11176
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11177
          self.cfg.SetDiskID(disk, node)
11178
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11179
          if msg:
11180
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11181
                            " continuing anyway", device_idx, node, msg)
11182
        result.append(("disk/%d" % device_idx, "remove"))
11183
      elif disk_op == constants.DDM_ADD:
11184
        # add a new disk
11185
        if instance.disk_template in (constants.DT_FILE,
11186
                                        constants.DT_SHARED_FILE):
11187
          file_driver, file_path = instance.disks[0].logical_id
11188
          file_path = os.path.dirname(file_path)
11189
        else:
11190
          file_driver = file_path = None
11191
        disk_idx_base = len(instance.disks)
11192
        new_disk = _GenerateDiskTemplate(self,
11193
                                         instance.disk_template,
11194
                                         instance.name, instance.primary_node,
11195
                                         instance.secondary_nodes,
11196
                                         [disk_dict],
11197
                                         file_path,
11198
                                         file_driver,
11199
                                         disk_idx_base, feedback_fn)[0]
11200
        instance.disks.append(new_disk)
11201
        info = _GetInstanceInfoText(instance)
11202

    
11203
        logging.info("Creating volume %s for instance %s",
11204
                     new_disk.iv_name, instance.name)
11205
        # Note: this needs to be kept in sync with _CreateDisks
11206
        #HARDCODE
11207
        for node in instance.all_nodes:
11208
          f_create = node == instance.primary_node
11209
          try:
11210
            _CreateBlockDev(self, node, instance, new_disk,
11211
                            f_create, info, f_create)
11212
          except errors.OpExecError, err:
11213
            self.LogWarning("Failed to create volume %s (%s) on"
11214
                            " node %s: %s",
11215
                            new_disk.iv_name, new_disk, node, err)
11216
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11217
                       (new_disk.size, new_disk.mode)))
11218
      else:
11219
        # change a given disk
11220
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11221
        result.append(("disk.mode/%d" % disk_op,
11222
                       disk_dict[constants.IDISK_MODE]))
11223

    
11224
    if self.op.disk_template:
11225
      r_shut = _ShutdownInstanceDisks(self, instance)
11226
      if not r_shut:
11227
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11228
                                 " proceed with disk template conversion")
11229
      mode = (instance.disk_template, self.op.disk_template)
11230
      try:
11231
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11232
      except:
11233
        self.cfg.ReleaseDRBDMinors(instance.name)
11234
        raise
11235
      result.append(("disk_template", self.op.disk_template))
11236

    
11237
    # NIC changes
11238
    for nic_op, nic_dict in self.op.nics:
11239
      if nic_op == constants.DDM_REMOVE:
11240
        # remove the last nic
11241
        del instance.nics[-1]
11242
        result.append(("nic.%d" % len(instance.nics), "remove"))
11243
      elif nic_op == constants.DDM_ADD:
11244
        # mac and bridge should be set, by now
11245
        mac = nic_dict[constants.INIC_MAC]
11246
        ip = nic_dict.get(constants.INIC_IP, None)
11247
        nicparams = self.nic_pinst[constants.DDM_ADD]
11248
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11249
        instance.nics.append(new_nic)
11250
        result.append(("nic.%d" % (len(instance.nics) - 1),
11251
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11252
                       (new_nic.mac, new_nic.ip,
11253
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11254
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11255
                       )))
11256
      else:
11257
        for key in (constants.INIC_MAC, constants.INIC_IP):
11258
          if key in nic_dict:
11259
            setattr(instance.nics[nic_op], key, nic_dict[key])
11260
        if nic_op in self.nic_pinst:
11261
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11262
        for key, val in nic_dict.iteritems():
11263
          result.append(("nic.%s/%d" % (key, nic_op), val))
11264

    
11265
    # hvparams changes
11266
    if self.op.hvparams:
11267
      instance.hvparams = self.hv_inst
11268
      for key, val in self.op.hvparams.iteritems():
11269
        result.append(("hv/%s" % key, val))
11270

    
11271
    # beparams changes
11272
    if self.op.beparams:
11273
      instance.beparams = self.be_inst
11274
      for key, val in self.op.beparams.iteritems():
11275
        result.append(("be/%s" % key, val))
11276

    
11277
    # OS change
11278
    if self.op.os_name:
11279
      instance.os = self.op.os_name
11280

    
11281
    # osparams changes
11282
    if self.op.osparams:
11283
      instance.osparams = self.os_inst
11284
      for key, val in self.op.osparams.iteritems():
11285
        result.append(("os/%s" % key, val))
11286

    
11287
    self.cfg.Update(instance, feedback_fn)
11288

    
11289
    return result
11290

    
11291
  _DISK_CONVERSIONS = {
11292
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11293
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11294
    }
11295

    
11296

    
11297
class LUInstanceChangeGroup(LogicalUnit):
11298
  HPATH = "instance-change-group"
11299
  HTYPE = constants.HTYPE_INSTANCE
11300
  REQ_BGL = False
11301

    
11302
  def ExpandNames(self):
11303
    self.share_locks = _ShareAll()
11304
    self.needed_locks = {
11305
      locking.LEVEL_NODEGROUP: [],
11306
      locking.LEVEL_NODE: [],
11307
      }
11308

    
11309
    self._ExpandAndLockInstance()
11310

    
11311
    if self.op.target_groups:
11312
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11313
                                  self.op.target_groups)
11314
    else:
11315
      self.req_target_uuids = None
11316

    
11317
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11318

    
11319
  def DeclareLocks(self, level):
11320
    if level == locking.LEVEL_NODEGROUP:
11321
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11322

    
11323
      if self.req_target_uuids:
11324
        lock_groups = set(self.req_target_uuids)
11325

    
11326
        # Lock all groups used by instance optimistically; this requires going
11327
        # via the node before it's locked, requiring verification later on
11328
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11329
        lock_groups.update(instance_groups)
11330
      else:
11331
        # No target groups, need to lock all of them
11332
        lock_groups = locking.ALL_SET
11333

    
11334
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11335

    
11336
    elif level == locking.LEVEL_NODE:
11337
      if self.req_target_uuids:
11338
        # Lock all nodes used by instances
11339
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11340
        self._LockInstancesNodes()
11341

    
11342
        # Lock all nodes in all potential target groups
11343
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11344
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11345
        member_nodes = [node_name
11346
                        for group in lock_groups
11347
                        for node_name in self.cfg.GetNodeGroup(group).members]
11348
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11349
      else:
11350
        # Lock all nodes as all groups are potential targets
11351
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11352

    
11353
  def CheckPrereq(self):
11354
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11355
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11356
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11357

    
11358
    assert (self.req_target_uuids is None or
11359
            owned_groups.issuperset(self.req_target_uuids))
11360
    assert owned_instances == set([self.op.instance_name])
11361

    
11362
    # Get instance information
11363
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11364

    
11365
    # Check if node groups for locked instance are still correct
11366
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11367
      ("Instance %s's nodes changed while we kept the lock" %
11368
       self.op.instance_name)
11369

    
11370
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11371
                                           owned_groups)
11372

    
11373
    if self.req_target_uuids:
11374
      # User requested specific target groups
11375
      self.target_uuids = self.req_target_uuids
11376
    else:
11377
      # All groups except those used by the instance are potential targets
11378
      self.target_uuids = owned_groups - inst_groups
11379

    
11380
    conflicting_groups = self.target_uuids & inst_groups
11381
    if conflicting_groups:
11382
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11383
                                 " used by the instance '%s'" %
11384
                                 (utils.CommaJoin(conflicting_groups),
11385
                                  self.op.instance_name),
11386
                                 errors.ECODE_INVAL)
11387

    
11388
    if not self.target_uuids:
11389
      raise errors.OpPrereqError("There are no possible target groups",
11390
                                 errors.ECODE_INVAL)
11391

    
11392
  def BuildHooksEnv(self):
11393
    """Build hooks env.
11394

11395
    """
11396
    assert self.target_uuids
11397

    
11398
    env = {
11399
      "TARGET_GROUPS": " ".join(self.target_uuids),
11400
      }
11401

    
11402
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11403

    
11404
    return env
11405

    
11406
  def BuildHooksNodes(self):
11407
    """Build hooks nodes.
11408

11409
    """
11410
    mn = self.cfg.GetMasterNode()
11411
    return ([mn], [mn])
11412

    
11413
  def Exec(self, feedback_fn):
11414
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11415

    
11416
    assert instances == [self.op.instance_name], "Instance not locked"
11417

    
11418
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11419
                     instances=instances, target_groups=list(self.target_uuids))
11420

    
11421
    ial.Run(self.op.iallocator)
11422

    
11423
    if not ial.success:
11424
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11425
                                 " instance '%s' using iallocator '%s': %s" %
11426
                                 (self.op.instance_name, self.op.iallocator,
11427
                                  ial.info),
11428
                                 errors.ECODE_NORES)
11429

    
11430
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11431

    
11432
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11433
                 " instance '%s'", len(jobs), self.op.instance_name)
11434

    
11435
    return ResultWithJobs(jobs)
11436

    
11437

    
11438
class LUBackupQuery(NoHooksLU):
11439
  """Query the exports list
11440

11441
  """
11442
  REQ_BGL = False
11443

    
11444
  def ExpandNames(self):
11445
    self.needed_locks = {}
11446
    self.share_locks[locking.LEVEL_NODE] = 1
11447
    if not self.op.nodes:
11448
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11449
    else:
11450
      self.needed_locks[locking.LEVEL_NODE] = \
11451
        _GetWantedNodes(self, self.op.nodes)
11452

    
11453
  def Exec(self, feedback_fn):
11454
    """Compute the list of all the exported system images.
11455

11456
    @rtype: dict
11457
    @return: a dictionary with the structure node->(export-list)
11458
        where export-list is a list of the instances exported on
11459
        that node.
11460

11461
    """
11462
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11463
    rpcresult = self.rpc.call_export_list(self.nodes)
11464
    result = {}
11465
    for node in rpcresult:
11466
      if rpcresult[node].fail_msg:
11467
        result[node] = False
11468
      else:
11469
        result[node] = rpcresult[node].payload
11470

    
11471
    return result
11472

    
11473

    
11474
class LUBackupPrepare(NoHooksLU):
11475
  """Prepares an instance for an export and returns useful information.
11476

11477
  """
11478
  REQ_BGL = False
11479

    
11480
  def ExpandNames(self):
11481
    self._ExpandAndLockInstance()
11482

    
11483
  def CheckPrereq(self):
11484
    """Check prerequisites.
11485

11486
    """
11487
    instance_name = self.op.instance_name
11488

    
11489
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11490
    assert self.instance is not None, \
11491
          "Cannot retrieve locked instance %s" % self.op.instance_name
11492
    _CheckNodeOnline(self, self.instance.primary_node)
11493

    
11494
    self._cds = _GetClusterDomainSecret()
11495

    
11496
  def Exec(self, feedback_fn):
11497
    """Prepares an instance for an export.
11498

11499
    """
11500
    instance = self.instance
11501

    
11502
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11503
      salt = utils.GenerateSecret(8)
11504

    
11505
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11506
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11507
                                              constants.RIE_CERT_VALIDITY)
11508
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11509

    
11510
      (name, cert_pem) = result.payload
11511

    
11512
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11513
                                             cert_pem)
11514

    
11515
      return {
11516
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11517
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11518
                          salt),
11519
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11520
        }
11521

    
11522
    return None
11523

    
11524

    
11525
class LUBackupExport(LogicalUnit):
11526
  """Export an instance to an image in the cluster.
11527

11528
  """
11529
  HPATH = "instance-export"
11530
  HTYPE = constants.HTYPE_INSTANCE
11531
  REQ_BGL = False
11532

    
11533
  def CheckArguments(self):
11534
    """Check the arguments.
11535

11536
    """
11537
    self.x509_key_name = self.op.x509_key_name
11538
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11539

    
11540
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11541
      if not self.x509_key_name:
11542
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11543
                                   errors.ECODE_INVAL)
11544

    
11545
      if not self.dest_x509_ca_pem:
11546
        raise errors.OpPrereqError("Missing destination X509 CA",
11547
                                   errors.ECODE_INVAL)
11548

    
11549
  def ExpandNames(self):
11550
    self._ExpandAndLockInstance()
11551

    
11552
    # Lock all nodes for local exports
11553
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11554
      # FIXME: lock only instance primary and destination node
11555
      #
11556
      # Sad but true, for now we have do lock all nodes, as we don't know where
11557
      # the previous export might be, and in this LU we search for it and
11558
      # remove it from its current node. In the future we could fix this by:
11559
      #  - making a tasklet to search (share-lock all), then create the
11560
      #    new one, then one to remove, after
11561
      #  - removing the removal operation altogether
11562
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11563

    
11564
  def DeclareLocks(self, level):
11565
    """Last minute lock declaration."""
11566
    # All nodes are locked anyway, so nothing to do here.
11567

    
11568
  def BuildHooksEnv(self):
11569
    """Build hooks env.
11570

11571
    This will run on the master, primary node and target node.
11572

11573
    """
11574
    env = {
11575
      "EXPORT_MODE": self.op.mode,
11576
      "EXPORT_NODE": self.op.target_node,
11577
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11578
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11579
      # TODO: Generic function for boolean env variables
11580
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11581
      }
11582

    
11583
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11584

    
11585
    return env
11586

    
11587
  def BuildHooksNodes(self):
11588
    """Build hooks nodes.
11589

11590
    """
11591
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11592

    
11593
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11594
      nl.append(self.op.target_node)
11595

    
11596
    return (nl, nl)
11597

    
11598
  def CheckPrereq(self):
11599
    """Check prerequisites.
11600

11601
    This checks that the instance and node names are valid.
11602

11603
    """
11604
    instance_name = self.op.instance_name
11605

    
11606
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11607
    assert self.instance is not None, \
11608
          "Cannot retrieve locked instance %s" % self.op.instance_name
11609
    _CheckNodeOnline(self, self.instance.primary_node)
11610

    
11611
    if (self.op.remove_instance and self.instance.admin_up and
11612
        not self.op.shutdown):
11613
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11614
                                 " down before")
11615

    
11616
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11617
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11618
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11619
      assert self.dst_node is not None
11620

    
11621
      _CheckNodeOnline(self, self.dst_node.name)
11622
      _CheckNodeNotDrained(self, self.dst_node.name)
11623

    
11624
      self._cds = None
11625
      self.dest_disk_info = None
11626
      self.dest_x509_ca = None
11627

    
11628
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11629
      self.dst_node = None
11630

    
11631
      if len(self.op.target_node) != len(self.instance.disks):
11632
        raise errors.OpPrereqError(("Received destination information for %s"
11633
                                    " disks, but instance %s has %s disks") %
11634
                                   (len(self.op.target_node), instance_name,
11635
                                    len(self.instance.disks)),
11636
                                   errors.ECODE_INVAL)
11637

    
11638
      cds = _GetClusterDomainSecret()
11639

    
11640
      # Check X509 key name
11641
      try:
11642
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11643
      except (TypeError, ValueError), err:
11644
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11645

    
11646
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11647
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11648
                                   errors.ECODE_INVAL)
11649

    
11650
      # Load and verify CA
11651
      try:
11652
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11653
      except OpenSSL.crypto.Error, err:
11654
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11655
                                   (err, ), errors.ECODE_INVAL)
11656

    
11657
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11658
      if errcode is not None:
11659
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11660
                                   (msg, ), errors.ECODE_INVAL)
11661

    
11662
      self.dest_x509_ca = cert
11663

    
11664
      # Verify target information
11665
      disk_info = []
11666
      for idx, disk_data in enumerate(self.op.target_node):
11667
        try:
11668
          (host, port, magic) = \
11669
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11670
        except errors.GenericError, err:
11671
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11672
                                     (idx, err), errors.ECODE_INVAL)
11673

    
11674
        disk_info.append((host, port, magic))
11675

    
11676
      assert len(disk_info) == len(self.op.target_node)
11677
      self.dest_disk_info = disk_info
11678

    
11679
    else:
11680
      raise errors.ProgrammerError("Unhandled export mode %r" %
11681
                                   self.op.mode)
11682

    
11683
    # instance disk type verification
11684
    # TODO: Implement export support for file-based disks
11685
    for disk in self.instance.disks:
11686
      if disk.dev_type == constants.LD_FILE:
11687
        raise errors.OpPrereqError("Export not supported for instances with"
11688
                                   " file-based disks", errors.ECODE_INVAL)
11689

    
11690
  def _CleanupExports(self, feedback_fn):
11691
    """Removes exports of current instance from all other nodes.
11692

11693
    If an instance in a cluster with nodes A..D was exported to node C, its
11694
    exports will be removed from the nodes A, B and D.
11695

11696
    """
11697
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11698

    
11699
    nodelist = self.cfg.GetNodeList()
11700
    nodelist.remove(self.dst_node.name)
11701

    
11702
    # on one-node clusters nodelist will be empty after the removal
11703
    # if we proceed the backup would be removed because OpBackupQuery
11704
    # substitutes an empty list with the full cluster node list.
11705
    iname = self.instance.name
11706
    if nodelist:
11707
      feedback_fn("Removing old exports for instance %s" % iname)
11708
      exportlist = self.rpc.call_export_list(nodelist)
11709
      for node in exportlist:
11710
        if exportlist[node].fail_msg:
11711
          continue
11712
        if iname in exportlist[node].payload:
11713
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11714
          if msg:
11715
            self.LogWarning("Could not remove older export for instance %s"
11716
                            " on node %s: %s", iname, node, msg)
11717

    
11718
  def Exec(self, feedback_fn):
11719
    """Export an instance to an image in the cluster.
11720

11721
    """
11722
    assert self.op.mode in constants.EXPORT_MODES
11723

    
11724
    instance = self.instance
11725
    src_node = instance.primary_node
11726

    
11727
    if self.op.shutdown:
11728
      # shutdown the instance, but not the disks
11729
      feedback_fn("Shutting down instance %s" % instance.name)
11730
      result = self.rpc.call_instance_shutdown(src_node, instance,
11731
                                               self.op.shutdown_timeout)
11732
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11733
      result.Raise("Could not shutdown instance %s on"
11734
                   " node %s" % (instance.name, src_node))
11735

    
11736
    # set the disks ID correctly since call_instance_start needs the
11737
    # correct drbd minor to create the symlinks
11738
    for disk in instance.disks:
11739
      self.cfg.SetDiskID(disk, src_node)
11740

    
11741
    activate_disks = (not instance.admin_up)
11742

    
11743
    if activate_disks:
11744
      # Activate the instance disks if we'exporting a stopped instance
11745
      feedback_fn("Activating disks for %s" % instance.name)
11746
      _StartInstanceDisks(self, instance, None)
11747

    
11748
    try:
11749
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11750
                                                     instance)
11751

    
11752
      helper.CreateSnapshots()
11753
      try:
11754
        if (self.op.shutdown and instance.admin_up and
11755
            not self.op.remove_instance):
11756
          assert not activate_disks
11757
          feedback_fn("Starting instance %s" % instance.name)
11758
          result = self.rpc.call_instance_start(src_node, instance,
11759
                                                None, None, False)
11760
          msg = result.fail_msg
11761
          if msg:
11762
            feedback_fn("Failed to start instance: %s" % msg)
11763
            _ShutdownInstanceDisks(self, instance)
11764
            raise errors.OpExecError("Could not start instance: %s" % msg)
11765

    
11766
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11767
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11768
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11769
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11770
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11771

    
11772
          (key_name, _, _) = self.x509_key_name
11773

    
11774
          dest_ca_pem = \
11775
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11776
                                            self.dest_x509_ca)
11777

    
11778
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11779
                                                     key_name, dest_ca_pem,
11780
                                                     timeouts)
11781
      finally:
11782
        helper.Cleanup()
11783

    
11784
      # Check for backwards compatibility
11785
      assert len(dresults) == len(instance.disks)
11786
      assert compat.all(isinstance(i, bool) for i in dresults), \
11787
             "Not all results are boolean: %r" % dresults
11788

    
11789
    finally:
11790
      if activate_disks:
11791
        feedback_fn("Deactivating disks for %s" % instance.name)
11792
        _ShutdownInstanceDisks(self, instance)
11793

    
11794
    if not (compat.all(dresults) and fin_resu):
11795
      failures = []
11796
      if not fin_resu:
11797
        failures.append("export finalization")
11798
      if not compat.all(dresults):
11799
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11800
                               if not dsk)
11801
        failures.append("disk export: disk(s) %s" % fdsk)
11802

    
11803
      raise errors.OpExecError("Export failed, errors in %s" %
11804
                               utils.CommaJoin(failures))
11805

    
11806
    # At this point, the export was successful, we can cleanup/finish
11807

    
11808
    # Remove instance if requested
11809
    if self.op.remove_instance:
11810
      feedback_fn("Removing instance %s" % instance.name)
11811
      _RemoveInstance(self, feedback_fn, instance,
11812
                      self.op.ignore_remove_failures)
11813

    
11814
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11815
      self._CleanupExports(feedback_fn)
11816

    
11817
    return fin_resu, dresults
11818

    
11819

    
11820
class LUBackupRemove(NoHooksLU):
11821
  """Remove exports related to the named instance.
11822

11823
  """
11824
  REQ_BGL = False
11825

    
11826
  def ExpandNames(self):
11827
    self.needed_locks = {}
11828
    # We need all nodes to be locked in order for RemoveExport to work, but we
11829
    # don't need to lock the instance itself, as nothing will happen to it (and
11830
    # we can remove exports also for a removed instance)
11831
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11832

    
11833
  def Exec(self, feedback_fn):
11834
    """Remove any export.
11835

11836
    """
11837
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11838
    # If the instance was not found we'll try with the name that was passed in.
11839
    # This will only work if it was an FQDN, though.
11840
    fqdn_warn = False
11841
    if not instance_name:
11842
      fqdn_warn = True
11843
      instance_name = self.op.instance_name
11844

    
11845
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11846
    exportlist = self.rpc.call_export_list(locked_nodes)
11847
    found = False
11848
    for node in exportlist:
11849
      msg = exportlist[node].fail_msg
11850
      if msg:
11851
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11852
        continue
11853
      if instance_name in exportlist[node].payload:
11854
        found = True
11855
        result = self.rpc.call_export_remove(node, instance_name)
11856
        msg = result.fail_msg
11857
        if msg:
11858
          logging.error("Could not remove export for instance %s"
11859
                        " on node %s: %s", instance_name, node, msg)
11860

    
11861
    if fqdn_warn and not found:
11862
      feedback_fn("Export not found. If trying to remove an export belonging"
11863
                  " to a deleted instance please use its Fully Qualified"
11864
                  " Domain Name.")
11865

    
11866

    
11867
class LUGroupAdd(LogicalUnit):
11868
  """Logical unit for creating node groups.
11869

11870
  """
11871
  HPATH = "group-add"
11872
  HTYPE = constants.HTYPE_GROUP
11873
  REQ_BGL = False
11874

    
11875
  def ExpandNames(self):
11876
    # We need the new group's UUID here so that we can create and acquire the
11877
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11878
    # that it should not check whether the UUID exists in the configuration.
11879
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11880
    self.needed_locks = {}
11881
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11882

    
11883
  def CheckPrereq(self):
11884
    """Check prerequisites.
11885

11886
    This checks that the given group name is not an existing node group
11887
    already.
11888

11889
    """
11890
    try:
11891
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11892
    except errors.OpPrereqError:
11893
      pass
11894
    else:
11895
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11896
                                 " node group (UUID: %s)" %
11897
                                 (self.op.group_name, existing_uuid),
11898
                                 errors.ECODE_EXISTS)
11899

    
11900
    if self.op.ndparams:
11901
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11902

    
11903
  def BuildHooksEnv(self):
11904
    """Build hooks env.
11905

11906
    """
11907
    return {
11908
      "GROUP_NAME": self.op.group_name,
11909
      }
11910

    
11911
  def BuildHooksNodes(self):
11912
    """Build hooks nodes.
11913

11914
    """
11915
    mn = self.cfg.GetMasterNode()
11916
    return ([mn], [mn])
11917

    
11918
  def Exec(self, feedback_fn):
11919
    """Add the node group to the cluster.
11920

11921
    """
11922
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11923
                                  uuid=self.group_uuid,
11924
                                  alloc_policy=self.op.alloc_policy,
11925
                                  ndparams=self.op.ndparams)
11926

    
11927
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11928
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11929

    
11930

    
11931
class LUGroupAssignNodes(NoHooksLU):
11932
  """Logical unit for assigning nodes to groups.
11933

11934
  """
11935
  REQ_BGL = False
11936

    
11937
  def ExpandNames(self):
11938
    # These raise errors.OpPrereqError on their own:
11939
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11940
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11941

    
11942
    # We want to lock all the affected nodes and groups. We have readily
11943
    # available the list of nodes, and the *destination* group. To gather the
11944
    # list of "source" groups, we need to fetch node information later on.
11945
    self.needed_locks = {
11946
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11947
      locking.LEVEL_NODE: self.op.nodes,
11948
      }
11949

    
11950
  def DeclareLocks(self, level):
11951
    if level == locking.LEVEL_NODEGROUP:
11952
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11953

    
11954
      # Try to get all affected nodes' groups without having the group or node
11955
      # lock yet. Needs verification later in the code flow.
11956
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11957

    
11958
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11959

    
11960
  def CheckPrereq(self):
11961
    """Check prerequisites.
11962

11963
    """
11964
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11965
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11966
            frozenset(self.op.nodes))
11967

    
11968
    expected_locks = (set([self.group_uuid]) |
11969
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11970
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11971
    if actual_locks != expected_locks:
11972
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11973
                               " current groups are '%s', used to be '%s'" %
11974
                               (utils.CommaJoin(expected_locks),
11975
                                utils.CommaJoin(actual_locks)))
11976

    
11977
    self.node_data = self.cfg.GetAllNodesInfo()
11978
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11979
    instance_data = self.cfg.GetAllInstancesInfo()
11980

    
11981
    if self.group is None:
11982
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11983
                               (self.op.group_name, self.group_uuid))
11984

    
11985
    (new_splits, previous_splits) = \
11986
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11987
                                             for node in self.op.nodes],
11988
                                            self.node_data, instance_data)
11989

    
11990
    if new_splits:
11991
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11992

    
11993
      if not self.op.force:
11994
        raise errors.OpExecError("The following instances get split by this"
11995
                                 " change and --force was not given: %s" %
11996
                                 fmt_new_splits)
11997
      else:
11998
        self.LogWarning("This operation will split the following instances: %s",
11999
                        fmt_new_splits)
12000

    
12001
        if previous_splits:
12002
          self.LogWarning("In addition, these already-split instances continue"
12003
                          " to be split across groups: %s",
12004
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12005

    
12006
  def Exec(self, feedback_fn):
12007
    """Assign nodes to a new group.
12008

12009
    """
12010
    for node in self.op.nodes:
12011
      self.node_data[node].group = self.group_uuid
12012

    
12013
    # FIXME: Depends on side-effects of modifying the result of
12014
    # C{cfg.GetAllNodesInfo}
12015

    
12016
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12017

    
12018
  @staticmethod
12019
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12020
    """Check for split instances after a node assignment.
12021

12022
    This method considers a series of node assignments as an atomic operation,
12023
    and returns information about split instances after applying the set of
12024
    changes.
12025

12026
    In particular, it returns information about newly split instances, and
12027
    instances that were already split, and remain so after the change.
12028

12029
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12030
    considered.
12031

12032
    @type changes: list of (node_name, new_group_uuid) pairs.
12033
    @param changes: list of node assignments to consider.
12034
    @param node_data: a dict with data for all nodes
12035
    @param instance_data: a dict with all instances to consider
12036
    @rtype: a two-tuple
12037
    @return: a list of instances that were previously okay and result split as a
12038
      consequence of this change, and a list of instances that were previously
12039
      split and this change does not fix.
12040

12041
    """
12042
    changed_nodes = dict((node, group) for node, group in changes
12043
                         if node_data[node].group != group)
12044

    
12045
    all_split_instances = set()
12046
    previously_split_instances = set()
12047

    
12048
    def InstanceNodes(instance):
12049
      return [instance.primary_node] + list(instance.secondary_nodes)
12050

    
12051
    for inst in instance_data.values():
12052
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12053
        continue
12054

    
12055
      instance_nodes = InstanceNodes(inst)
12056

    
12057
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12058
        previously_split_instances.add(inst.name)
12059

    
12060
      if len(set(changed_nodes.get(node, node_data[node].group)
12061
                 for node in instance_nodes)) > 1:
12062
        all_split_instances.add(inst.name)
12063

    
12064
    return (list(all_split_instances - previously_split_instances),
12065
            list(previously_split_instances & all_split_instances))
12066

    
12067

    
12068
class _GroupQuery(_QueryBase):
12069
  FIELDS = query.GROUP_FIELDS
12070

    
12071
  def ExpandNames(self, lu):
12072
    lu.needed_locks = {}
12073

    
12074
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12075
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12076

    
12077
    if not self.names:
12078
      self.wanted = [name_to_uuid[name]
12079
                     for name in utils.NiceSort(name_to_uuid.keys())]
12080
    else:
12081
      # Accept names to be either names or UUIDs.
12082
      missing = []
12083
      self.wanted = []
12084
      all_uuid = frozenset(self._all_groups.keys())
12085

    
12086
      for name in self.names:
12087
        if name in all_uuid:
12088
          self.wanted.append(name)
12089
        elif name in name_to_uuid:
12090
          self.wanted.append(name_to_uuid[name])
12091
        else:
12092
          missing.append(name)
12093

    
12094
      if missing:
12095
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12096
                                   utils.CommaJoin(missing),
12097
                                   errors.ECODE_NOENT)
12098

    
12099
  def DeclareLocks(self, lu, level):
12100
    pass
12101

    
12102
  def _GetQueryData(self, lu):
12103
    """Computes the list of node groups and their attributes.
12104

12105
    """
12106
    do_nodes = query.GQ_NODE in self.requested_data
12107
    do_instances = query.GQ_INST in self.requested_data
12108

    
12109
    group_to_nodes = None
12110
    group_to_instances = None
12111

    
12112
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12113
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12114
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12115
    # instance->node. Hence, we will need to process nodes even if we only need
12116
    # instance information.
12117
    if do_nodes or do_instances:
12118
      all_nodes = lu.cfg.GetAllNodesInfo()
12119
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12120
      node_to_group = {}
12121

    
12122
      for node in all_nodes.values():
12123
        if node.group in group_to_nodes:
12124
          group_to_nodes[node.group].append(node.name)
12125
          node_to_group[node.name] = node.group
12126

    
12127
      if do_instances:
12128
        all_instances = lu.cfg.GetAllInstancesInfo()
12129
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12130

    
12131
        for instance in all_instances.values():
12132
          node = instance.primary_node
12133
          if node in node_to_group:
12134
            group_to_instances[node_to_group[node]].append(instance.name)
12135

    
12136
        if not do_nodes:
12137
          # Do not pass on node information if it was not requested.
12138
          group_to_nodes = None
12139

    
12140
    return query.GroupQueryData([self._all_groups[uuid]
12141
                                 for uuid in self.wanted],
12142
                                group_to_nodes, group_to_instances)
12143

    
12144

    
12145
class LUGroupQuery(NoHooksLU):
12146
  """Logical unit for querying node groups.
12147

12148
  """
12149
  REQ_BGL = False
12150

    
12151
  def CheckArguments(self):
12152
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12153
                          self.op.output_fields, False)
12154

    
12155
  def ExpandNames(self):
12156
    self.gq.ExpandNames(self)
12157

    
12158
  def DeclareLocks(self, level):
12159
    self.gq.DeclareLocks(self, level)
12160

    
12161
  def Exec(self, feedback_fn):
12162
    return self.gq.OldStyleQuery(self)
12163

    
12164

    
12165
class LUGroupSetParams(LogicalUnit):
12166
  """Modifies the parameters of a node group.
12167

12168
  """
12169
  HPATH = "group-modify"
12170
  HTYPE = constants.HTYPE_GROUP
12171
  REQ_BGL = False
12172

    
12173
  def CheckArguments(self):
12174
    all_changes = [
12175
      self.op.ndparams,
12176
      self.op.alloc_policy,
12177
      ]
12178

    
12179
    if all_changes.count(None) == len(all_changes):
12180
      raise errors.OpPrereqError("Please pass at least one modification",
12181
                                 errors.ECODE_INVAL)
12182

    
12183
  def ExpandNames(self):
12184
    # This raises errors.OpPrereqError on its own:
12185
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12186

    
12187
    self.needed_locks = {
12188
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12189
      }
12190

    
12191
  def CheckPrereq(self):
12192
    """Check prerequisites.
12193

12194
    """
12195
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12196

    
12197
    if self.group is None:
12198
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12199
                               (self.op.group_name, self.group_uuid))
12200

    
12201
    if self.op.ndparams:
12202
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12203
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12204
      self.new_ndparams = new_ndparams
12205

    
12206
  def BuildHooksEnv(self):
12207
    """Build hooks env.
12208

12209
    """
12210
    return {
12211
      "GROUP_NAME": self.op.group_name,
12212
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12213
      }
12214

    
12215
  def BuildHooksNodes(self):
12216
    """Build hooks nodes.
12217

12218
    """
12219
    mn = self.cfg.GetMasterNode()
12220
    return ([mn], [mn])
12221

    
12222
  def Exec(self, feedback_fn):
12223
    """Modifies the node group.
12224

12225
    """
12226
    result = []
12227

    
12228
    if self.op.ndparams:
12229
      self.group.ndparams = self.new_ndparams
12230
      result.append(("ndparams", str(self.group.ndparams)))
12231

    
12232
    if self.op.alloc_policy:
12233
      self.group.alloc_policy = self.op.alloc_policy
12234

    
12235
    self.cfg.Update(self.group, feedback_fn)
12236
    return result
12237

    
12238

    
12239
class LUGroupRemove(LogicalUnit):
12240
  HPATH = "group-remove"
12241
  HTYPE = constants.HTYPE_GROUP
12242
  REQ_BGL = False
12243

    
12244
  def ExpandNames(self):
12245
    # This will raises errors.OpPrereqError on its own:
12246
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12247
    self.needed_locks = {
12248
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12249
      }
12250

    
12251
  def CheckPrereq(self):
12252
    """Check prerequisites.
12253

12254
    This checks that the given group name exists as a node group, that is
12255
    empty (i.e., contains no nodes), and that is not the last group of the
12256
    cluster.
12257

12258
    """
12259
    # Verify that the group is empty.
12260
    group_nodes = [node.name
12261
                   for node in self.cfg.GetAllNodesInfo().values()
12262
                   if node.group == self.group_uuid]
12263

    
12264
    if group_nodes:
12265
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12266
                                 " nodes: %s" %
12267
                                 (self.op.group_name,
12268
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12269
                                 errors.ECODE_STATE)
12270

    
12271
    # Verify the cluster would not be left group-less.
12272
    if len(self.cfg.GetNodeGroupList()) == 1:
12273
      raise errors.OpPrereqError("Group '%s' is the only group,"
12274
                                 " cannot be removed" %
12275
                                 self.op.group_name,
12276
                                 errors.ECODE_STATE)
12277

    
12278
  def BuildHooksEnv(self):
12279
    """Build hooks env.
12280

12281
    """
12282
    return {
12283
      "GROUP_NAME": self.op.group_name,
12284
      }
12285

    
12286
  def BuildHooksNodes(self):
12287
    """Build hooks nodes.
12288

12289
    """
12290
    mn = self.cfg.GetMasterNode()
12291
    return ([mn], [mn])
12292

    
12293
  def Exec(self, feedback_fn):
12294
    """Remove the node group.
12295

12296
    """
12297
    try:
12298
      self.cfg.RemoveNodeGroup(self.group_uuid)
12299
    except errors.ConfigurationError:
12300
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12301
                               (self.op.group_name, self.group_uuid))
12302

    
12303
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12304

    
12305

    
12306
class LUGroupRename(LogicalUnit):
12307
  HPATH = "group-rename"
12308
  HTYPE = constants.HTYPE_GROUP
12309
  REQ_BGL = False
12310

    
12311
  def ExpandNames(self):
12312
    # This raises errors.OpPrereqError on its own:
12313
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12314

    
12315
    self.needed_locks = {
12316
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12317
      }
12318

    
12319
  def CheckPrereq(self):
12320
    """Check prerequisites.
12321

12322
    Ensures requested new name is not yet used.
12323

12324
    """
12325
    try:
12326
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12327
    except errors.OpPrereqError:
12328
      pass
12329
    else:
12330
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12331
                                 " node group (UUID: %s)" %
12332
                                 (self.op.new_name, new_name_uuid),
12333
                                 errors.ECODE_EXISTS)
12334

    
12335
  def BuildHooksEnv(self):
12336
    """Build hooks env.
12337

12338
    """
12339
    return {
12340
      "OLD_NAME": self.op.group_name,
12341
      "NEW_NAME": self.op.new_name,
12342
      }
12343

    
12344
  def BuildHooksNodes(self):
12345
    """Build hooks nodes.
12346

12347
    """
12348
    mn = self.cfg.GetMasterNode()
12349

    
12350
    all_nodes = self.cfg.GetAllNodesInfo()
12351
    all_nodes.pop(mn, None)
12352

    
12353
    run_nodes = [mn]
12354
    run_nodes.extend(node.name for node in all_nodes.values()
12355
                     if node.group == self.group_uuid)
12356

    
12357
    return (run_nodes, run_nodes)
12358

    
12359
  def Exec(self, feedback_fn):
12360
    """Rename the node group.
12361

12362
    """
12363
    group = self.cfg.GetNodeGroup(self.group_uuid)
12364

    
12365
    if group is None:
12366
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12367
                               (self.op.group_name, self.group_uuid))
12368

    
12369
    group.name = self.op.new_name
12370
    self.cfg.Update(group, feedback_fn)
12371

    
12372
    return self.op.new_name
12373

    
12374

    
12375
class LUGroupEvacuate(LogicalUnit):
12376
  HPATH = "group-evacuate"
12377
  HTYPE = constants.HTYPE_GROUP
12378
  REQ_BGL = False
12379

    
12380
  def ExpandNames(self):
12381
    # This raises errors.OpPrereqError on its own:
12382
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12383

    
12384
    if self.op.target_groups:
12385
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12386
                                  self.op.target_groups)
12387
    else:
12388
      self.req_target_uuids = []
12389

    
12390
    if self.group_uuid in self.req_target_uuids:
12391
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12392
                                 " as a target group (targets are %s)" %
12393
                                 (self.group_uuid,
12394
                                  utils.CommaJoin(self.req_target_uuids)),
12395
                                 errors.ECODE_INVAL)
12396

    
12397
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12398

    
12399
    self.share_locks = _ShareAll()
12400
    self.needed_locks = {
12401
      locking.LEVEL_INSTANCE: [],
12402
      locking.LEVEL_NODEGROUP: [],
12403
      locking.LEVEL_NODE: [],
12404
      }
12405

    
12406
  def DeclareLocks(self, level):
12407
    if level == locking.LEVEL_INSTANCE:
12408
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12409

    
12410
      # Lock instances optimistically, needs verification once node and group
12411
      # locks have been acquired
12412
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12413
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12414

    
12415
    elif level == locking.LEVEL_NODEGROUP:
12416
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12417

    
12418
      if self.req_target_uuids:
12419
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12420

    
12421
        # Lock all groups used by instances optimistically; this requires going
12422
        # via the node before it's locked, requiring verification later on
12423
        lock_groups.update(group_uuid
12424
                           for instance_name in
12425
                             self.owned_locks(locking.LEVEL_INSTANCE)
12426
                           for group_uuid in
12427
                             self.cfg.GetInstanceNodeGroups(instance_name))
12428
      else:
12429
        # No target groups, need to lock all of them
12430
        lock_groups = locking.ALL_SET
12431

    
12432
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12433

    
12434
    elif level == locking.LEVEL_NODE:
12435
      # This will only lock the nodes in the group to be evacuated which
12436
      # contain actual instances
12437
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12438
      self._LockInstancesNodes()
12439

    
12440
      # Lock all nodes in group to be evacuated and target groups
12441
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12442
      assert self.group_uuid in owned_groups
12443
      member_nodes = [node_name
12444
                      for group in owned_groups
12445
                      for node_name in self.cfg.GetNodeGroup(group).members]
12446
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12447

    
12448
  def CheckPrereq(self):
12449
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12450
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12451
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12452

    
12453
    assert owned_groups.issuperset(self.req_target_uuids)
12454
    assert self.group_uuid in owned_groups
12455

    
12456
    # Check if locked instances are still correct
12457
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12458

    
12459
    # Get instance information
12460
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12461

    
12462
    # Check if node groups for locked instances are still correct
12463
    for instance_name in owned_instances:
12464
      inst = self.instances[instance_name]
12465
      assert owned_nodes.issuperset(inst.all_nodes), \
12466
        "Instance %s's nodes changed while we kept the lock" % instance_name
12467

    
12468
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12469
                                             owned_groups)
12470

    
12471
      assert self.group_uuid in inst_groups, \
12472
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12473

    
12474
    if self.req_target_uuids:
12475
      # User requested specific target groups
12476
      self.target_uuids = self.req_target_uuids
12477
    else:
12478
      # All groups except the one to be evacuated are potential targets
12479
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12480
                           if group_uuid != self.group_uuid]
12481

    
12482
      if not self.target_uuids:
12483
        raise errors.OpPrereqError("There are no possible target groups",
12484
                                   errors.ECODE_INVAL)
12485

    
12486
  def BuildHooksEnv(self):
12487
    """Build hooks env.
12488

12489
    """
12490
    return {
12491
      "GROUP_NAME": self.op.group_name,
12492
      "TARGET_GROUPS": " ".join(self.target_uuids),
12493
      }
12494

    
12495
  def BuildHooksNodes(self):
12496
    """Build hooks nodes.
12497

12498
    """
12499
    mn = self.cfg.GetMasterNode()
12500

    
12501
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12502

    
12503
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12504

    
12505
    return (run_nodes, run_nodes)
12506

    
12507
  def Exec(self, feedback_fn):
12508
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12509

    
12510
    assert self.group_uuid not in self.target_uuids
12511

    
12512
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12513
                     instances=instances, target_groups=self.target_uuids)
12514

    
12515
    ial.Run(self.op.iallocator)
12516

    
12517
    if not ial.success:
12518
      raise errors.OpPrereqError("Can't compute group evacuation using"
12519
                                 " iallocator '%s': %s" %
12520
                                 (self.op.iallocator, ial.info),
12521
                                 errors.ECODE_NORES)
12522

    
12523
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12524

    
12525
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12526
                 len(jobs), self.op.group_name)
12527

    
12528
    return ResultWithJobs(jobs)
12529

    
12530

    
12531
class TagsLU(NoHooksLU): # pylint: disable=W0223
12532
  """Generic tags LU.
12533

12534
  This is an abstract class which is the parent of all the other tags LUs.
12535

12536
  """
12537
  def ExpandNames(self):
12538
    self.group_uuid = None
12539
    self.needed_locks = {}
12540
    if self.op.kind == constants.TAG_NODE:
12541
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12542
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12543
    elif self.op.kind == constants.TAG_INSTANCE:
12544
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12545
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12546
    elif self.op.kind == constants.TAG_NODEGROUP:
12547
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12548

    
12549
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12550
    # not possible to acquire the BGL based on opcode parameters)
12551

    
12552
  def CheckPrereq(self):
12553
    """Check prerequisites.
12554

12555
    """
12556
    if self.op.kind == constants.TAG_CLUSTER:
12557
      self.target = self.cfg.GetClusterInfo()
12558
    elif self.op.kind == constants.TAG_NODE:
12559
      self.target = self.cfg.GetNodeInfo(self.op.name)
12560
    elif self.op.kind == constants.TAG_INSTANCE:
12561
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12562
    elif self.op.kind == constants.TAG_NODEGROUP:
12563
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12564
    else:
12565
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12566
                                 str(self.op.kind), errors.ECODE_INVAL)
12567

    
12568

    
12569
class LUTagsGet(TagsLU):
12570
  """Returns the tags of a given object.
12571

12572
  """
12573
  REQ_BGL = False
12574

    
12575
  def ExpandNames(self):
12576
    TagsLU.ExpandNames(self)
12577

    
12578
    # Share locks as this is only a read operation
12579
    self.share_locks = _ShareAll()
12580

    
12581
  def Exec(self, feedback_fn):
12582
    """Returns the tag list.
12583

12584
    """
12585
    return list(self.target.GetTags())
12586

    
12587

    
12588
class LUTagsSearch(NoHooksLU):
12589
  """Searches the tags for a given pattern.
12590

12591
  """
12592
  REQ_BGL = False
12593

    
12594
  def ExpandNames(self):
12595
    self.needed_locks = {}
12596

    
12597
  def CheckPrereq(self):
12598
    """Check prerequisites.
12599

12600
    This checks the pattern passed for validity by compiling it.
12601

12602
    """
12603
    try:
12604
      self.re = re.compile(self.op.pattern)
12605
    except re.error, err:
12606
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12607
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12608

    
12609
  def Exec(self, feedback_fn):
12610
    """Returns the tag list.
12611

12612
    """
12613
    cfg = self.cfg
12614
    tgts = [("/cluster", cfg.GetClusterInfo())]
12615
    ilist = cfg.GetAllInstancesInfo().values()
12616
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12617
    nlist = cfg.GetAllNodesInfo().values()
12618
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12619
    tgts.extend(("/nodegroup/%s" % n.name, n)
12620
                for n in cfg.GetAllNodeGroupsInfo().values())
12621
    results = []
12622
    for path, target in tgts:
12623
      for tag in target.GetTags():
12624
        if self.re.search(tag):
12625
          results.append((path, tag))
12626
    return results
12627

    
12628

    
12629
class LUTagsSet(TagsLU):
12630
  """Sets a tag on a given object.
12631

12632
  """
12633
  REQ_BGL = False
12634

    
12635
  def CheckPrereq(self):
12636
    """Check prerequisites.
12637

12638
    This checks the type and length of the tag name and value.
12639

12640
    """
12641
    TagsLU.CheckPrereq(self)
12642
    for tag in self.op.tags:
12643
      objects.TaggableObject.ValidateTag(tag)
12644

    
12645
  def Exec(self, feedback_fn):
12646
    """Sets the tag.
12647

12648
    """
12649
    try:
12650
      for tag in self.op.tags:
12651
        self.target.AddTag(tag)
12652
    except errors.TagError, err:
12653
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12654
    self.cfg.Update(self.target, feedback_fn)
12655

    
12656

    
12657
class LUTagsDel(TagsLU):
12658
  """Delete a list of tags from a given object.
12659

12660
  """
12661
  REQ_BGL = False
12662

    
12663
  def CheckPrereq(self):
12664
    """Check prerequisites.
12665

12666
    This checks that we have the given tag.
12667

12668
    """
12669
    TagsLU.CheckPrereq(self)
12670
    for tag in self.op.tags:
12671
      objects.TaggableObject.ValidateTag(tag)
12672
    del_tags = frozenset(self.op.tags)
12673
    cur_tags = self.target.GetTags()
12674

    
12675
    diff_tags = del_tags - cur_tags
12676
    if diff_tags:
12677
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12678
      raise errors.OpPrereqError("Tag(s) %s not found" %
12679
                                 (utils.CommaJoin(diff_names), ),
12680
                                 errors.ECODE_NOENT)
12681

    
12682
  def Exec(self, feedback_fn):
12683
    """Remove the tag from the object.
12684

12685
    """
12686
    for tag in self.op.tags:
12687
      self.target.RemoveTag(tag)
12688
    self.cfg.Update(self.target, feedback_fn)
12689

    
12690

    
12691
class LUTestDelay(NoHooksLU):
12692
  """Sleep for a specified amount of time.
12693

12694
  This LU sleeps on the master and/or nodes for a specified amount of
12695
  time.
12696

12697
  """
12698
  REQ_BGL = False
12699

    
12700
  def ExpandNames(self):
12701
    """Expand names and set required locks.
12702

12703
    This expands the node list, if any.
12704

12705
    """
12706
    self.needed_locks = {}
12707
    if self.op.on_nodes:
12708
      # _GetWantedNodes can be used here, but is not always appropriate to use
12709
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12710
      # more information.
12711
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12712
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12713

    
12714
  def _TestDelay(self):
12715
    """Do the actual sleep.
12716

12717
    """
12718
    if self.op.on_master:
12719
      if not utils.TestDelay(self.op.duration):
12720
        raise errors.OpExecError("Error during master delay test")
12721
    if self.op.on_nodes:
12722
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12723
      for node, node_result in result.items():
12724
        node_result.Raise("Failure during rpc call to node %s" % node)
12725

    
12726
  def Exec(self, feedback_fn):
12727
    """Execute the test delay opcode, with the wanted repetitions.
12728

12729
    """
12730
    if self.op.repeat == 0:
12731
      self._TestDelay()
12732
    else:
12733
      top_value = self.op.repeat - 1
12734
      for i in range(self.op.repeat):
12735
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12736
        self._TestDelay()
12737

    
12738

    
12739
class LUTestJqueue(NoHooksLU):
12740
  """Utility LU to test some aspects of the job queue.
12741

12742
  """
12743
  REQ_BGL = False
12744

    
12745
  # Must be lower than default timeout for WaitForJobChange to see whether it
12746
  # notices changed jobs
12747
  _CLIENT_CONNECT_TIMEOUT = 20.0
12748
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12749

    
12750
  @classmethod
12751
  def _NotifyUsingSocket(cls, cb, errcls):
12752
    """Opens a Unix socket and waits for another program to connect.
12753

12754
    @type cb: callable
12755
    @param cb: Callback to send socket name to client
12756
    @type errcls: class
12757
    @param errcls: Exception class to use for errors
12758

12759
    """
12760
    # Using a temporary directory as there's no easy way to create temporary
12761
    # sockets without writing a custom loop around tempfile.mktemp and
12762
    # socket.bind
12763
    tmpdir = tempfile.mkdtemp()
12764
    try:
12765
      tmpsock = utils.PathJoin(tmpdir, "sock")
12766

    
12767
      logging.debug("Creating temporary socket at %s", tmpsock)
12768
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12769
      try:
12770
        sock.bind(tmpsock)
12771
        sock.listen(1)
12772

    
12773
        # Send details to client
12774
        cb(tmpsock)
12775

    
12776
        # Wait for client to connect before continuing
12777
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12778
        try:
12779
          (conn, _) = sock.accept()
12780
        except socket.error, err:
12781
          raise errcls("Client didn't connect in time (%s)" % err)
12782
      finally:
12783
        sock.close()
12784
    finally:
12785
      # Remove as soon as client is connected
12786
      shutil.rmtree(tmpdir)
12787

    
12788
    # Wait for client to close
12789
    try:
12790
      try:
12791
        # pylint: disable=E1101
12792
        # Instance of '_socketobject' has no ... member
12793
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12794
        conn.recv(1)
12795
      except socket.error, err:
12796
        raise errcls("Client failed to confirm notification (%s)" % err)
12797
    finally:
12798
      conn.close()
12799

    
12800
  def _SendNotification(self, test, arg, sockname):
12801
    """Sends a notification to the client.
12802

12803
    @type test: string
12804
    @param test: Test name
12805
    @param arg: Test argument (depends on test)
12806
    @type sockname: string
12807
    @param sockname: Socket path
12808

12809
    """
12810
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12811

    
12812
  def _Notify(self, prereq, test, arg):
12813
    """Notifies the client of a test.
12814

12815
    @type prereq: bool
12816
    @param prereq: Whether this is a prereq-phase test
12817
    @type test: string
12818
    @param test: Test name
12819
    @param arg: Test argument (depends on test)
12820

12821
    """
12822
    if prereq:
12823
      errcls = errors.OpPrereqError
12824
    else:
12825
      errcls = errors.OpExecError
12826

    
12827
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12828
                                                  test, arg),
12829
                                   errcls)
12830

    
12831
  def CheckArguments(self):
12832
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12833
    self.expandnames_calls = 0
12834

    
12835
  def ExpandNames(self):
12836
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12837
    if checkargs_calls < 1:
12838
      raise errors.ProgrammerError("CheckArguments was not called")
12839

    
12840
    self.expandnames_calls += 1
12841

    
12842
    if self.op.notify_waitlock:
12843
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12844

    
12845
    self.LogInfo("Expanding names")
12846

    
12847
    # Get lock on master node (just to get a lock, not for a particular reason)
12848
    self.needed_locks = {
12849
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12850
      }
12851

    
12852
  def Exec(self, feedback_fn):
12853
    if self.expandnames_calls < 1:
12854
      raise errors.ProgrammerError("ExpandNames was not called")
12855

    
12856
    if self.op.notify_exec:
12857
      self._Notify(False, constants.JQT_EXEC, None)
12858

    
12859
    self.LogInfo("Executing")
12860

    
12861
    if self.op.log_messages:
12862
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12863
      for idx, msg in enumerate(self.op.log_messages):
12864
        self.LogInfo("Sending log message %s", idx + 1)
12865
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12866
        # Report how many test messages have been sent
12867
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12868

    
12869
    if self.op.fail:
12870
      raise errors.OpExecError("Opcode failure was requested")
12871

    
12872
    return True
12873

    
12874

    
12875
class IAllocator(object):
12876
  """IAllocator framework.
12877

12878
  An IAllocator instance has three sets of attributes:
12879
    - cfg that is needed to query the cluster
12880
    - input data (all members of the _KEYS class attribute are required)
12881
    - four buffer attributes (in|out_data|text), that represent the
12882
      input (to the external script) in text and data structure format,
12883
      and the output from it, again in two formats
12884
    - the result variables from the script (success, info, nodes) for
12885
      easy usage
12886

12887
  """
12888
  # pylint: disable=R0902
12889
  # lots of instance attributes
12890

    
12891
  def __init__(self, cfg, rpc, mode, **kwargs):
12892
    self.cfg = cfg
12893
    self.rpc = rpc
12894
    # init buffer variables
12895
    self.in_text = self.out_text = self.in_data = self.out_data = None
12896
    # init all input fields so that pylint is happy
12897
    self.mode = mode
12898
    self.memory = self.disks = self.disk_template = None
12899
    self.os = self.tags = self.nics = self.vcpus = None
12900
    self.hypervisor = None
12901
    self.relocate_from = None
12902
    self.name = None
12903
    self.instances = None
12904
    self.evac_mode = None
12905
    self.target_groups = []
12906
    # computed fields
12907
    self.required_nodes = None
12908
    # init result fields
12909
    self.success = self.info = self.result = None
12910

    
12911
    try:
12912
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12913
    except KeyError:
12914
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12915
                                   " IAllocator" % self.mode)
12916

    
12917
    keyset = [n for (n, _) in keydata]
12918

    
12919
    for key in kwargs:
12920
      if key not in keyset:
12921
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12922
                                     " IAllocator" % key)
12923
      setattr(self, key, kwargs[key])
12924

    
12925
    for key in keyset:
12926
      if key not in kwargs:
12927
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12928
                                     " IAllocator" % key)
12929
    self._BuildInputData(compat.partial(fn, self), keydata)
12930

    
12931
  def _ComputeClusterData(self):
12932
    """Compute the generic allocator input data.
12933

12934
    This is the data that is independent of the actual operation.
12935

12936
    """
12937
    cfg = self.cfg
12938
    cluster_info = cfg.GetClusterInfo()
12939
    # cluster data
12940
    data = {
12941
      "version": constants.IALLOCATOR_VERSION,
12942
      "cluster_name": cfg.GetClusterName(),
12943
      "cluster_tags": list(cluster_info.GetTags()),
12944
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12945
      # we don't have job IDs
12946
      }
12947
    ninfo = cfg.GetAllNodesInfo()
12948
    iinfo = cfg.GetAllInstancesInfo().values()
12949
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12950

    
12951
    # node data
12952
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12953

    
12954
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12955
      hypervisor_name = self.hypervisor
12956
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12957
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12958
    else:
12959
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12960

    
12961
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12962
                                        hypervisor_name)
12963
    node_iinfo = \
12964
      self.rpc.call_all_instances_info(node_list,
12965
                                       cluster_info.enabled_hypervisors)
12966

    
12967
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12968

    
12969
    config_ndata = self._ComputeBasicNodeData(ninfo)
12970
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12971
                                                 i_list, config_ndata)
12972
    assert len(data["nodes"]) == len(ninfo), \
12973
        "Incomplete node data computed"
12974

    
12975
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12976

    
12977
    self.in_data = data
12978

    
12979
  @staticmethod
12980
  def _ComputeNodeGroupData(cfg):
12981
    """Compute node groups data.
12982

12983
    """
12984
    ng = dict((guuid, {
12985
      "name": gdata.name,
12986
      "alloc_policy": gdata.alloc_policy,
12987
      })
12988
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12989

    
12990
    return ng
12991

    
12992
  @staticmethod
12993
  def _ComputeBasicNodeData(node_cfg):
12994
    """Compute global node data.
12995

12996
    @rtype: dict
12997
    @returns: a dict of name: (node dict, node config)
12998

12999
    """
13000
    # fill in static (config-based) values
13001
    node_results = dict((ninfo.name, {
13002
      "tags": list(ninfo.GetTags()),
13003
      "primary_ip": ninfo.primary_ip,
13004
      "secondary_ip": ninfo.secondary_ip,
13005
      "offline": ninfo.offline,
13006
      "drained": ninfo.drained,
13007
      "master_candidate": ninfo.master_candidate,
13008
      "group": ninfo.group,
13009
      "master_capable": ninfo.master_capable,
13010
      "vm_capable": ninfo.vm_capable,
13011
      })
13012
      for ninfo in node_cfg.values())
13013

    
13014
    return node_results
13015

    
13016
  @staticmethod
13017
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13018
                              node_results):
13019
    """Compute global node data.
13020

13021
    @param node_results: the basic node structures as filled from the config
13022

13023
    """
13024
    # make a copy of the current dict
13025
    node_results = dict(node_results)
13026
    for nname, nresult in node_data.items():
13027
      assert nname in node_results, "Missing basic data for node %s" % nname
13028
      ninfo = node_cfg[nname]
13029

    
13030
      if not (ninfo.offline or ninfo.drained):
13031
        nresult.Raise("Can't get data for node %s" % nname)
13032
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13033
                                nname)
13034
        remote_info = nresult.payload
13035

    
13036
        for attr in ["memory_total", "memory_free", "memory_dom0",
13037
                     "vg_size", "vg_free", "cpu_total"]:
13038
          if attr not in remote_info:
13039
            raise errors.OpExecError("Node '%s' didn't return attribute"
13040
                                     " '%s'" % (nname, attr))
13041
          if not isinstance(remote_info[attr], int):
13042
            raise errors.OpExecError("Node '%s' returned invalid value"
13043
                                     " for '%s': %s" %
13044
                                     (nname, attr, remote_info[attr]))
13045
        # compute memory used by primary instances
13046
        i_p_mem = i_p_up_mem = 0
13047
        for iinfo, beinfo in i_list:
13048
          if iinfo.primary_node == nname:
13049
            i_p_mem += beinfo[constants.BE_MEMORY]
13050
            if iinfo.name not in node_iinfo[nname].payload:
13051
              i_used_mem = 0
13052
            else:
13053
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13054
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13055
            remote_info["memory_free"] -= max(0, i_mem_diff)
13056

    
13057
            if iinfo.admin_up:
13058
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13059

    
13060
        # compute memory used by instances
13061
        pnr_dyn = {
13062
          "total_memory": remote_info["memory_total"],
13063
          "reserved_memory": remote_info["memory_dom0"],
13064
          "free_memory": remote_info["memory_free"],
13065
          "total_disk": remote_info["vg_size"],
13066
          "free_disk": remote_info["vg_free"],
13067
          "total_cpus": remote_info["cpu_total"],
13068
          "i_pri_memory": i_p_mem,
13069
          "i_pri_up_memory": i_p_up_mem,
13070
          }
13071
        pnr_dyn.update(node_results[nname])
13072
        node_results[nname] = pnr_dyn
13073

    
13074
    return node_results
13075

    
13076
  @staticmethod
13077
  def _ComputeInstanceData(cluster_info, i_list):
13078
    """Compute global instance data.
13079

13080
    """
13081
    instance_data = {}
13082
    for iinfo, beinfo in i_list:
13083
      nic_data = []
13084
      for nic in iinfo.nics:
13085
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13086
        nic_dict = {
13087
          "mac": nic.mac,
13088
          "ip": nic.ip,
13089
          "mode": filled_params[constants.NIC_MODE],
13090
          "link": filled_params[constants.NIC_LINK],
13091
          }
13092
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13093
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13094
        nic_data.append(nic_dict)
13095
      pir = {
13096
        "tags": list(iinfo.GetTags()),
13097
        "admin_up": iinfo.admin_up,
13098
        "vcpus": beinfo[constants.BE_VCPUS],
13099
        "memory": beinfo[constants.BE_MEMORY],
13100
        "os": iinfo.os,
13101
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13102
        "nics": nic_data,
13103
        "disks": [{constants.IDISK_SIZE: dsk.size,
13104
                   constants.IDISK_MODE: dsk.mode}
13105
                  for dsk in iinfo.disks],
13106
        "disk_template": iinfo.disk_template,
13107
        "hypervisor": iinfo.hypervisor,
13108
        }
13109
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13110
                                                 pir["disks"])
13111
      instance_data[iinfo.name] = pir
13112

    
13113
    return instance_data
13114

    
13115
  def _AddNewInstance(self):
13116
    """Add new instance data to allocator structure.
13117

13118
    This in combination with _AllocatorGetClusterData will create the
13119
    correct structure needed as input for the allocator.
13120

13121
    The checks for the completeness of the opcode must have already been
13122
    done.
13123

13124
    """
13125
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13126

    
13127
    if self.disk_template in constants.DTS_INT_MIRROR:
13128
      self.required_nodes = 2
13129
    else:
13130
      self.required_nodes = 1
13131

    
13132
    request = {
13133
      "name": self.name,
13134
      "disk_template": self.disk_template,
13135
      "tags": self.tags,
13136
      "os": self.os,
13137
      "vcpus": self.vcpus,
13138
      "memory": self.memory,
13139
      "disks": self.disks,
13140
      "disk_space_total": disk_space,
13141
      "nics": self.nics,
13142
      "required_nodes": self.required_nodes,
13143
      "hypervisor": self.hypervisor,
13144
      }
13145

    
13146
    return request
13147

    
13148
  def _AddRelocateInstance(self):
13149
    """Add relocate instance data to allocator structure.
13150

13151
    This in combination with _IAllocatorGetClusterData will create the
13152
    correct structure needed as input for the allocator.
13153

13154
    The checks for the completeness of the opcode must have already been
13155
    done.
13156

13157
    """
13158
    instance = self.cfg.GetInstanceInfo(self.name)
13159
    if instance is None:
13160
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13161
                                   " IAllocator" % self.name)
13162

    
13163
    if instance.disk_template not in constants.DTS_MIRRORED:
13164
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13165
                                 errors.ECODE_INVAL)
13166

    
13167
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13168
        len(instance.secondary_nodes) != 1:
13169
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13170
                                 errors.ECODE_STATE)
13171

    
13172
    self.required_nodes = 1
13173
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13174
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13175

    
13176
    request = {
13177
      "name": self.name,
13178
      "disk_space_total": disk_space,
13179
      "required_nodes": self.required_nodes,
13180
      "relocate_from": self.relocate_from,
13181
      }
13182
    return request
13183

    
13184
  def _AddNodeEvacuate(self):
13185
    """Get data for node-evacuate requests.
13186

13187
    """
13188
    return {
13189
      "instances": self.instances,
13190
      "evac_mode": self.evac_mode,
13191
      }
13192

    
13193
  def _AddChangeGroup(self):
13194
    """Get data for node-evacuate requests.
13195

13196
    """
13197
    return {
13198
      "instances": self.instances,
13199
      "target_groups": self.target_groups,
13200
      }
13201

    
13202
  def _BuildInputData(self, fn, keydata):
13203
    """Build input data structures.
13204

13205
    """
13206
    self._ComputeClusterData()
13207

    
13208
    request = fn()
13209
    request["type"] = self.mode
13210
    for keyname, keytype in keydata:
13211
      if keyname not in request:
13212
        raise errors.ProgrammerError("Request parameter %s is missing" %
13213
                                     keyname)
13214
      val = request[keyname]
13215
      if not keytype(val):
13216
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13217
                                     " validation, value %s, expected"
13218
                                     " type %s" % (keyname, val, keytype))
13219
    self.in_data["request"] = request
13220

    
13221
    self.in_text = serializer.Dump(self.in_data)
13222

    
13223
  _STRING_LIST = ht.TListOf(ht.TString)
13224
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13225
     # pylint: disable=E1101
13226
     # Class '...' has no 'OP_ID' member
13227
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13228
                          opcodes.OpInstanceMigrate.OP_ID,
13229
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13230
     })))
13231

    
13232
  _NEVAC_MOVED = \
13233
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13234
                       ht.TItems([ht.TNonEmptyString,
13235
                                  ht.TNonEmptyString,
13236
                                  ht.TListOf(ht.TNonEmptyString),
13237
                                 ])))
13238
  _NEVAC_FAILED = \
13239
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13240
                       ht.TItems([ht.TNonEmptyString,
13241
                                  ht.TMaybeString,
13242
                                 ])))
13243
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13244
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13245

    
13246
  _MODE_DATA = {
13247
    constants.IALLOCATOR_MODE_ALLOC:
13248
      (_AddNewInstance,
13249
       [
13250
        ("name", ht.TString),
13251
        ("memory", ht.TInt),
13252
        ("disks", ht.TListOf(ht.TDict)),
13253
        ("disk_template", ht.TString),
13254
        ("os", ht.TString),
13255
        ("tags", _STRING_LIST),
13256
        ("nics", ht.TListOf(ht.TDict)),
13257
        ("vcpus", ht.TInt),
13258
        ("hypervisor", ht.TString),
13259
        ], ht.TList),
13260
    constants.IALLOCATOR_MODE_RELOC:
13261
      (_AddRelocateInstance,
13262
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13263
       ht.TList),
13264
     constants.IALLOCATOR_MODE_NODE_EVAC:
13265
      (_AddNodeEvacuate, [
13266
        ("instances", _STRING_LIST),
13267
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13268
        ], _NEVAC_RESULT),
13269
     constants.IALLOCATOR_MODE_CHG_GROUP:
13270
      (_AddChangeGroup, [
13271
        ("instances", _STRING_LIST),
13272
        ("target_groups", _STRING_LIST),
13273
        ], _NEVAC_RESULT),
13274
    }
13275

    
13276
  def Run(self, name, validate=True, call_fn=None):
13277
    """Run an instance allocator and return the results.
13278

13279
    """
13280
    if call_fn is None:
13281
      call_fn = self.rpc.call_iallocator_runner
13282

    
13283
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13284
    result.Raise("Failure while running the iallocator script")
13285

    
13286
    self.out_text = result.payload
13287
    if validate:
13288
      self._ValidateResult()
13289

    
13290
  def _ValidateResult(self):
13291
    """Process the allocator results.
13292

13293
    This will process and if successful save the result in
13294
    self.out_data and the other parameters.
13295

13296
    """
13297
    try:
13298
      rdict = serializer.Load(self.out_text)
13299
    except Exception, err:
13300
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13301

    
13302
    if not isinstance(rdict, dict):
13303
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13304

    
13305
    # TODO: remove backwards compatiblity in later versions
13306
    if "nodes" in rdict and "result" not in rdict:
13307
      rdict["result"] = rdict["nodes"]
13308
      del rdict["nodes"]
13309

    
13310
    for key in "success", "info", "result":
13311
      if key not in rdict:
13312
        raise errors.OpExecError("Can't parse iallocator results:"
13313
                                 " missing key '%s'" % key)
13314
      setattr(self, key, rdict[key])
13315

    
13316
    if not self._result_check(self.result):
13317
      raise errors.OpExecError("Iallocator returned invalid result,"
13318
                               " expected %s, got %s" %
13319
                               (self._result_check, self.result),
13320
                               errors.ECODE_INVAL)
13321

    
13322
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13323
      assert self.relocate_from is not None
13324
      assert self.required_nodes == 1
13325

    
13326
      node2group = dict((name, ndata["group"])
13327
                        for (name, ndata) in self.in_data["nodes"].items())
13328

    
13329
      fn = compat.partial(self._NodesToGroups, node2group,
13330
                          self.in_data["nodegroups"])
13331

    
13332
      instance = self.cfg.GetInstanceInfo(self.name)
13333
      request_groups = fn(self.relocate_from + [instance.primary_node])
13334
      result_groups = fn(rdict["result"] + [instance.primary_node])
13335

    
13336
      if self.success and not set(result_groups).issubset(request_groups):
13337
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13338
                                 " differ from original groups (%s)" %
13339
                                 (utils.CommaJoin(result_groups),
13340
                                  utils.CommaJoin(request_groups)))
13341

    
13342
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13343
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13344

    
13345
    self.out_data = rdict
13346

    
13347
  @staticmethod
13348
  def _NodesToGroups(node2group, groups, nodes):
13349
    """Returns a list of unique group names for a list of nodes.
13350

13351
    @type node2group: dict
13352
    @param node2group: Map from node name to group UUID
13353
    @type groups: dict
13354
    @param groups: Group information
13355
    @type nodes: list
13356
    @param nodes: Node names
13357

13358
    """
13359
    result = set()
13360

    
13361
    for node in nodes:
13362
      try:
13363
        group_uuid = node2group[node]
13364
      except KeyError:
13365
        # Ignore unknown node
13366
        pass
13367
      else:
13368
        try:
13369
          group = groups[group_uuid]
13370
        except KeyError:
13371
          # Can't find group, let's use UUID
13372
          group_name = group_uuid
13373
        else:
13374
          group_name = group["name"]
13375

    
13376
        result.add(group_name)
13377

    
13378
    return sorted(result)
13379

    
13380

    
13381
class LUTestAllocator(NoHooksLU):
13382
  """Run allocator tests.
13383

13384
  This LU runs the allocator tests
13385

13386
  """
13387
  def CheckPrereq(self):
13388
    """Check prerequisites.
13389

13390
    This checks the opcode parameters depending on the director and mode test.
13391

13392
    """
13393
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13394
      for attr in ["memory", "disks", "disk_template",
13395
                   "os", "tags", "nics", "vcpus"]:
13396
        if not hasattr(self.op, attr):
13397
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13398
                                     attr, errors.ECODE_INVAL)
13399
      iname = self.cfg.ExpandInstanceName(self.op.name)
13400
      if iname is not None:
13401
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13402
                                   iname, errors.ECODE_EXISTS)
13403
      if not isinstance(self.op.nics, list):
13404
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13405
                                   errors.ECODE_INVAL)
13406
      if not isinstance(self.op.disks, list):
13407
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13408
                                   errors.ECODE_INVAL)
13409
      for row in self.op.disks:
13410
        if (not isinstance(row, dict) or
13411
            constants.IDISK_SIZE not in row or
13412
            not isinstance(row[constants.IDISK_SIZE], int) or
13413
            constants.IDISK_MODE not in row or
13414
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13415
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13416
                                     " parameter", errors.ECODE_INVAL)
13417
      if self.op.hypervisor is None:
13418
        self.op.hypervisor = self.cfg.GetHypervisorType()
13419
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13420
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13421
      self.op.name = fname
13422
      self.relocate_from = \
13423
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13424
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13425
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13426
      if not self.op.instances:
13427
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13428
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13429
    else:
13430
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13431
                                 self.op.mode, errors.ECODE_INVAL)
13432

    
13433
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13434
      if self.op.allocator is None:
13435
        raise errors.OpPrereqError("Missing allocator name",
13436
                                   errors.ECODE_INVAL)
13437
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13438
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13439
                                 self.op.direction, errors.ECODE_INVAL)
13440

    
13441
  def Exec(self, feedback_fn):
13442
    """Run the allocator test.
13443

13444
    """
13445
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13446
      ial = IAllocator(self.cfg, self.rpc,
13447
                       mode=self.op.mode,
13448
                       name=self.op.name,
13449
                       memory=self.op.memory,
13450
                       disks=self.op.disks,
13451
                       disk_template=self.op.disk_template,
13452
                       os=self.op.os,
13453
                       tags=self.op.tags,
13454
                       nics=self.op.nics,
13455
                       vcpus=self.op.vcpus,
13456
                       hypervisor=self.op.hypervisor,
13457
                       )
13458
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13459
      ial = IAllocator(self.cfg, self.rpc,
13460
                       mode=self.op.mode,
13461
                       name=self.op.name,
13462
                       relocate_from=list(self.relocate_from),
13463
                       )
13464
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13465
      ial = IAllocator(self.cfg, self.rpc,
13466
                       mode=self.op.mode,
13467
                       instances=self.op.instances,
13468
                       target_groups=self.op.target_groups)
13469
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13470
      ial = IAllocator(self.cfg, self.rpc,
13471
                       mode=self.op.mode,
13472
                       instances=self.op.instances,
13473
                       evac_mode=self.op.evac_mode)
13474
    else:
13475
      raise errors.ProgrammerError("Uncatched mode %s in"
13476
                                   " LUTestAllocator.Exec", self.op.mode)
13477

    
13478
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13479
      result = ial.in_text
13480
    else:
13481
      ial.Run(self.op.allocator, validate=False)
13482
      result = ial.out_text
13483
    return result
13484

    
13485

    
13486
#: Query type implementations
13487
_QUERY_IMPL = {
13488
  constants.QR_INSTANCE: _InstanceQuery,
13489
  constants.QR_NODE: _NodeQuery,
13490
  constants.QR_GROUP: _GroupQuery,
13491
  constants.QR_OS: _OsQuery,
13492
  }
13493

    
13494
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13495

    
13496

    
13497
def _GetQueryImplementation(name):
13498
  """Returns the implemtnation for a query type.
13499

13500
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13501

13502
  """
13503
  try:
13504
    return _QUERY_IMPL[name]
13505
  except KeyError:
13506
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13507
                               errors.ECODE_INVAL)