Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 0fa753ba

History | View | Annotate | Download (475.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = \
1708
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1709

    
1710
    self.needed_locks = {
1711
      locking.LEVEL_INSTANCE: inst_names,
1712
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1713
      locking.LEVEL_NODE: [],
1714
      }
1715

    
1716
    self.share_locks = _ShareAll()
1717

    
1718
  def DeclareLocks(self, level):
1719
    if level == locking.LEVEL_NODE:
1720
      # Get members of node group; this is unsafe and needs verification later
1721
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722

    
1723
      all_inst_info = self.cfg.GetAllInstancesInfo()
1724

    
1725
      # In Exec(), we warn about mirrored instances that have primary and
1726
      # secondary living in separate node groups. To fully verify that
1727
      # volumes for these instances are healthy, we will need to do an
1728
      # extra call to their secondaries. We ensure here those nodes will
1729
      # be locked.
1730
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1731
        # Important: access only the instances whose lock is owned
1732
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1733
          nodes.update(all_inst_info[inst].secondary_nodes)
1734

    
1735
      self.needed_locks[locking.LEVEL_NODE] = nodes
1736

    
1737
  def CheckPrereq(self):
1738
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1739
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740

    
1741
    group_nodes = set(self.group_info.members)
1742
    group_instances = \
1743
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1744

    
1745
    unlocked_nodes = \
1746
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1747

    
1748
    unlocked_instances = \
1749
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750

    
1751
    if unlocked_nodes:
1752
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1753
                                 utils.CommaJoin(unlocked_nodes),
1754
                                 errors.ECODE_STATE)
1755

    
1756
    if unlocked_instances:
1757
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1758
                                 utils.CommaJoin(unlocked_instances),
1759
                                 errors.ECODE_STATE)
1760

    
1761
    self.all_node_info = self.cfg.GetAllNodesInfo()
1762
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1763

    
1764
    self.my_node_names = utils.NiceSort(group_nodes)
1765
    self.my_inst_names = utils.NiceSort(group_instances)
1766

    
1767
    self.my_node_info = dict((name, self.all_node_info[name])
1768
                             for name in self.my_node_names)
1769

    
1770
    self.my_inst_info = dict((name, self.all_inst_info[name])
1771
                             for name in self.my_inst_names)
1772

    
1773
    # We detect here the nodes that will need the extra RPC calls for verifying
1774
    # split LV volumes; they should be locked.
1775
    extra_lv_nodes = set()
1776

    
1777
    for inst in self.my_inst_info.values():
1778
      if inst.disk_template in constants.DTS_INT_MIRROR:
1779
        for nname in inst.all_nodes:
1780
          if self.all_node_info[nname].group != self.group_uuid:
1781
            extra_lv_nodes.add(nname)
1782

    
1783
    unlocked_lv_nodes = \
1784
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1785

    
1786
    if unlocked_lv_nodes:
1787
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1788
                                 utils.CommaJoin(unlocked_lv_nodes),
1789
                                 errors.ECODE_STATE)
1790
    self.extra_lv_nodes = list(extra_lv_nodes)
1791

    
1792
  def _VerifyNode(self, ninfo, nresult):
1793
    """Perform some basic validation on data returned from a node.
1794

1795
      - check the result data structure is well formed and has all the
1796
        mandatory fields
1797
      - check ganeti version
1798

1799
    @type ninfo: L{objects.Node}
1800
    @param ninfo: the node to check
1801
    @param nresult: the results from the node
1802
    @rtype: boolean
1803
    @return: whether overall this call was successful (and we can expect
1804
         reasonable values in the respose)
1805

1806
    """
1807
    node = ninfo.name
1808
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1809

    
1810
    # main result, nresult should be a non-empty dict
1811
    test = not nresult or not isinstance(nresult, dict)
1812
    _ErrorIf(test, self.ENODERPC, node,
1813
                  "unable to verify node: no data returned")
1814
    if test:
1815
      return False
1816

    
1817
    # compares ganeti version
1818
    local_version = constants.PROTOCOL_VERSION
1819
    remote_version = nresult.get("version", None)
1820
    test = not (remote_version and
1821
                isinstance(remote_version, (list, tuple)) and
1822
                len(remote_version) == 2)
1823
    _ErrorIf(test, self.ENODERPC, node,
1824
             "connection to node returned invalid data")
1825
    if test:
1826
      return False
1827

    
1828
    test = local_version != remote_version[0]
1829
    _ErrorIf(test, self.ENODEVERSION, node,
1830
             "incompatible protocol versions: master %s,"
1831
             " node %s", local_version, remote_version[0])
1832
    if test:
1833
      return False
1834

    
1835
    # node seems compatible, we can actually try to look into its results
1836

    
1837
    # full package version
1838
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1839
                  self.ENODEVERSION, node,
1840
                  "software version mismatch: master %s, node %s",
1841
                  constants.RELEASE_VERSION, remote_version[1],
1842
                  code=self.ETYPE_WARNING)
1843

    
1844
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1845
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1846
      for hv_name, hv_result in hyp_result.iteritems():
1847
        test = hv_result is not None
1848
        _ErrorIf(test, self.ENODEHV, node,
1849
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1850

    
1851
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1852
    if ninfo.vm_capable and isinstance(hvp_result, list):
1853
      for item, hv_name, hv_result in hvp_result:
1854
        _ErrorIf(True, self.ENODEHV, node,
1855
                 "hypervisor %s parameter verify failure (source %s): %s",
1856
                 hv_name, item, hv_result)
1857

    
1858
    test = nresult.get(constants.NV_NODESETUP,
1859
                       ["Missing NODESETUP results"])
1860
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861
             "; ".join(test))
1862

    
1863
    return True
1864

    
1865
  def _VerifyNodeTime(self, ninfo, nresult,
1866
                      nvinfo_starttime, nvinfo_endtime):
1867
    """Check the node time.
1868

1869
    @type ninfo: L{objects.Node}
1870
    @param ninfo: the node to check
1871
    @param nresult: the remote results for the node
1872
    @param nvinfo_starttime: the start time of the RPC call
1873
    @param nvinfo_endtime: the end time of the RPC call
1874

1875
    """
1876
    node = ninfo.name
1877
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1878

    
1879
    ntime = nresult.get(constants.NV_TIME, None)
1880
    try:
1881
      ntime_merged = utils.MergeTime(ntime)
1882
    except (ValueError, TypeError):
1883
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1884
      return
1885

    
1886
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1887
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1888
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1889
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1890
    else:
1891
      ntime_diff = None
1892

    
1893
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1894
             "Node time diverges by at least %s from master node time",
1895
             ntime_diff)
1896

    
1897
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1898
    """Check the node LVM results.
1899

1900
    @type ninfo: L{objects.Node}
1901
    @param ninfo: the node to check
1902
    @param nresult: the remote results for the node
1903
    @param vg_name: the configured VG name
1904

1905
    """
1906
    if vg_name is None:
1907
      return
1908

    
1909
    node = ninfo.name
1910
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1911

    
1912
    # checks vg existence and size > 20G
1913
    vglist = nresult.get(constants.NV_VGLIST, None)
1914
    test = not vglist
1915
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1916
    if not test:
1917
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1918
                                            constants.MIN_VG_SIZE)
1919
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1920

    
1921
    # check pv names
1922
    pvlist = nresult.get(constants.NV_PVLIST, None)
1923
    test = pvlist is None
1924
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1925
    if not test:
1926
      # check that ':' is not present in PV names, since it's a
1927
      # special character for lvcreate (denotes the range of PEs to
1928
      # use on the PV)
1929
      for _, pvname, owner_vg in pvlist:
1930
        test = ":" in pvname
1931
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1932
                 " '%s' of VG '%s'", pvname, owner_vg)
1933

    
1934
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1935
    """Check the node bridges.
1936

1937
    @type ninfo: L{objects.Node}
1938
    @param ninfo: the node to check
1939
    @param nresult: the remote results for the node
1940
    @param bridges: the expected list of bridges
1941

1942
    """
1943
    if not bridges:
1944
      return
1945

    
1946
    node = ninfo.name
1947
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948

    
1949
    missing = nresult.get(constants.NV_BRIDGES, None)
1950
    test = not isinstance(missing, list)
1951
    _ErrorIf(test, self.ENODENET, node,
1952
             "did not return valid bridge information")
1953
    if not test:
1954
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1955
               utils.CommaJoin(sorted(missing)))
1956

    
1957
  def _VerifyNodeNetwork(self, ninfo, nresult):
1958
    """Check the node network connectivity results.
1959

1960
    @type ninfo: L{objects.Node}
1961
    @param ninfo: the node to check
1962
    @param nresult: the remote results for the node
1963

1964
    """
1965
    node = ninfo.name
1966
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1967

    
1968
    test = constants.NV_NODELIST not in nresult
1969
    _ErrorIf(test, self.ENODESSH, node,
1970
             "node hasn't returned node ssh connectivity data")
1971
    if not test:
1972
      if nresult[constants.NV_NODELIST]:
1973
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1974
          _ErrorIf(True, self.ENODESSH, node,
1975
                   "ssh communication with node '%s': %s", a_node, a_msg)
1976

    
1977
    test = constants.NV_NODENETTEST not in nresult
1978
    _ErrorIf(test, self.ENODENET, node,
1979
             "node hasn't returned node tcp connectivity data")
1980
    if not test:
1981
      if nresult[constants.NV_NODENETTEST]:
1982
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1983
        for anode in nlist:
1984
          _ErrorIf(True, self.ENODENET, node,
1985
                   "tcp communication with node '%s': %s",
1986
                   anode, nresult[constants.NV_NODENETTEST][anode])
1987

    
1988
    test = constants.NV_MASTERIP not in nresult
1989
    _ErrorIf(test, self.ENODENET, node,
1990
             "node hasn't returned node master IP reachability data")
1991
    if not test:
1992
      if not nresult[constants.NV_MASTERIP]:
1993
        if node == self.master_node:
1994
          msg = "the master node cannot reach the master IP (not configured?)"
1995
        else:
1996
          msg = "cannot reach the master IP"
1997
        _ErrorIf(True, self.ENODENET, node, msg)
1998

    
1999
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2000
                      diskstatus):
2001
    """Verify an instance.
2002

2003
    This function checks to see if the required block devices are
2004
    available on the instance's node.
2005

2006
    """
2007
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2008
    node_current = instanceconfig.primary_node
2009

    
2010
    node_vol_should = {}
2011
    instanceconfig.MapLVsByNode(node_vol_should)
2012

    
2013
    for node in node_vol_should:
2014
      n_img = node_image[node]
2015
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2016
        # ignore missing volumes on offline or broken nodes
2017
        continue
2018
      for volume in node_vol_should[node]:
2019
        test = volume not in n_img.volumes
2020
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2021
                 "volume %s missing on node %s", volume, node)
2022

    
2023
    if instanceconfig.admin_up:
2024
      pri_img = node_image[node_current]
2025
      test = instance not in pri_img.instances and not pri_img.offline
2026
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2027
               "instance not running on its primary node %s",
2028
               node_current)
2029

    
2030
    diskdata = [(nname, success, status, idx)
2031
                for (nname, disks) in diskstatus.items()
2032
                for idx, (success, status) in enumerate(disks)]
2033

    
2034
    for nname, success, bdev_status, idx in diskdata:
2035
      # the 'ghost node' construction in Exec() ensures that we have a
2036
      # node here
2037
      snode = node_image[nname]
2038
      bad_snode = snode.ghost or snode.offline
2039
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2040
               self.EINSTANCEFAULTYDISK, instance,
2041
               "couldn't retrieve status for disk/%s on %s: %s",
2042
               idx, nname, bdev_status)
2043
      _ErrorIf((instanceconfig.admin_up and success and
2044
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2045
               self.EINSTANCEFAULTYDISK, instance,
2046
               "disk/%s on %s is faulty", idx, nname)
2047

    
2048
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2049
    """Verify if there are any unknown volumes in the cluster.
2050

2051
    The .os, .swap and backup volumes are ignored. All other volumes are
2052
    reported as unknown.
2053

2054
    @type reserved: L{ganeti.utils.FieldSet}
2055
    @param reserved: a FieldSet of reserved volume names
2056

2057
    """
2058
    for node, n_img in node_image.items():
2059
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2060
          self.all_node_info[node].group != self.group_uuid):
2061
        # skip non-healthy nodes
2062
        continue
2063
      for volume in n_img.volumes:
2064
        test = ((node not in node_vol_should or
2065
                volume not in node_vol_should[node]) and
2066
                not reserved.Matches(volume))
2067
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2068
                      "volume %s is unknown", volume)
2069

    
2070
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2071
    """Verify N+1 Memory Resilience.
2072

2073
    Check that if one single node dies we can still start all the
2074
    instances it was primary for.
2075

2076
    """
2077
    cluster_info = self.cfg.GetClusterInfo()
2078
    for node, n_img in node_image.items():
2079
      # This code checks that every node which is now listed as
2080
      # secondary has enough memory to host all instances it is
2081
      # supposed to should a single other node in the cluster fail.
2082
      # FIXME: not ready for failover to an arbitrary node
2083
      # FIXME: does not support file-backed instances
2084
      # WARNING: we currently take into account down instances as well
2085
      # as up ones, considering that even if they're down someone
2086
      # might want to start them even in the event of a node failure.
2087
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2088
        # we're skipping nodes marked offline and nodes in other groups from
2089
        # the N+1 warning, since most likely we don't have good memory
2090
        # infromation from them; we already list instances living on such
2091
        # nodes, and that's enough warning
2092
        continue
2093
      for prinode, instances in n_img.sbp.items():
2094
        needed_mem = 0
2095
        for instance in instances:
2096
          bep = cluster_info.FillBE(instance_cfg[instance])
2097
          if bep[constants.BE_AUTO_BALANCE]:
2098
            needed_mem += bep[constants.BE_MEMORY]
2099
        test = n_img.mfree < needed_mem
2100
        self._ErrorIf(test, self.ENODEN1, node,
2101
                      "not enough memory to accomodate instance failovers"
2102
                      " should node %s fail (%dMiB needed, %dMiB available)",
2103
                      prinode, needed_mem, n_img.mfree)
2104

    
2105
  @classmethod
2106
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2107
                   (files_all, files_all_opt, files_mc, files_vm)):
2108
    """Verifies file checksums collected from all nodes.
2109

2110
    @param errorif: Callback for reporting errors
2111
    @param nodeinfo: List of L{objects.Node} objects
2112
    @param master_node: Name of master node
2113
    @param all_nvinfo: RPC results
2114

2115
    """
2116
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2117
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2118
           "Found file listed in more than one file list"
2119

    
2120
    # Define functions determining which nodes to consider for a file
2121
    files2nodefn = [
2122
      (files_all, None),
2123
      (files_all_opt, None),
2124
      (files_mc, lambda node: (node.master_candidate or
2125
                               node.name == master_node)),
2126
      (files_vm, lambda node: node.vm_capable),
2127
      ]
2128

    
2129
    # Build mapping from filename to list of nodes which should have the file
2130
    nodefiles = {}
2131
    for (files, fn) in files2nodefn:
2132
      if fn is None:
2133
        filenodes = nodeinfo
2134
      else:
2135
        filenodes = filter(fn, nodeinfo)
2136
      nodefiles.update((filename,
2137
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2138
                       for filename in files)
2139

    
2140
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2141

    
2142
    fileinfo = dict((filename, {}) for filename in nodefiles)
2143
    ignore_nodes = set()
2144

    
2145
    for node in nodeinfo:
2146
      if node.offline:
2147
        ignore_nodes.add(node.name)
2148
        continue
2149

    
2150
      nresult = all_nvinfo[node.name]
2151

    
2152
      if nresult.fail_msg or not nresult.payload:
2153
        node_files = None
2154
      else:
2155
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2156

    
2157
      test = not (node_files and isinstance(node_files, dict))
2158
      errorif(test, cls.ENODEFILECHECK, node.name,
2159
              "Node did not return file checksum data")
2160
      if test:
2161
        ignore_nodes.add(node.name)
2162
        continue
2163

    
2164
      # Build per-checksum mapping from filename to nodes having it
2165
      for (filename, checksum) in node_files.items():
2166
        assert filename in nodefiles
2167
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2168

    
2169
    for (filename, checksums) in fileinfo.items():
2170
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2171

    
2172
      # Nodes having the file
2173
      with_file = frozenset(node_name
2174
                            for nodes in fileinfo[filename].values()
2175
                            for node_name in nodes) - ignore_nodes
2176

    
2177
      expected_nodes = nodefiles[filename] - ignore_nodes
2178

    
2179
      # Nodes missing file
2180
      missing_file = expected_nodes - with_file
2181

    
2182
      if filename in files_all_opt:
2183
        # All or no nodes
2184
        errorif(missing_file and missing_file != expected_nodes,
2185
                cls.ECLUSTERFILECHECK, None,
2186
                "File %s is optional, but it must exist on all or no"
2187
                " nodes (not found on %s)",
2188
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2189
      else:
2190
        # Non-optional files
2191
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2192
                "File %s is missing from node(s) %s", filename,
2193
                utils.CommaJoin(utils.NiceSort(missing_file)))
2194

    
2195
        # Warn if a node has a file it shouldn't
2196
        unexpected = with_file - expected_nodes
2197
        errorif(unexpected,
2198
                cls.ECLUSTERFILECHECK, None,
2199
                "File %s should not exist on node(s) %s",
2200
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2201

    
2202
      # See if there are multiple versions of the file
2203
      test = len(checksums) > 1
2204
      if test:
2205
        variants = ["variant %s on %s" %
2206
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2207
                    for (idx, (checksum, nodes)) in
2208
                      enumerate(sorted(checksums.items()))]
2209
      else:
2210
        variants = []
2211

    
2212
      errorif(test, cls.ECLUSTERFILECHECK, None,
2213
              "File %s found with %s different checksums (%s)",
2214
              filename, len(checksums), "; ".join(variants))
2215

    
2216
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2217
                      drbd_map):
2218
    """Verifies and the node DRBD status.
2219

2220
    @type ninfo: L{objects.Node}
2221
    @param ninfo: the node to check
2222
    @param nresult: the remote results for the node
2223
    @param instanceinfo: the dict of instances
2224
    @param drbd_helper: the configured DRBD usermode helper
2225
    @param drbd_map: the DRBD map as returned by
2226
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2227

2228
    """
2229
    node = ninfo.name
2230
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2231

    
2232
    if drbd_helper:
2233
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2234
      test = (helper_result == None)
2235
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236
               "no drbd usermode helper returned")
2237
      if helper_result:
2238
        status, payload = helper_result
2239
        test = not status
2240
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2241
                 "drbd usermode helper check unsuccessful: %s", payload)
2242
        test = status and (payload != drbd_helper)
2243
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2244
                 "wrong drbd usermode helper: %s", payload)
2245

    
2246
    # compute the DRBD minors
2247
    node_drbd = {}
2248
    for minor, instance in drbd_map[node].items():
2249
      test = instance not in instanceinfo
2250
      _ErrorIf(test, self.ECLUSTERCFG, None,
2251
               "ghost instance '%s' in temporary DRBD map", instance)
2252
        # ghost instance should not be running, but otherwise we
2253
        # don't give double warnings (both ghost instance and
2254
        # unallocated minor in use)
2255
      if test:
2256
        node_drbd[minor] = (instance, False)
2257
      else:
2258
        instance = instanceinfo[instance]
2259
        node_drbd[minor] = (instance.name, instance.admin_up)
2260

    
2261
    # and now check them
2262
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2263
    test = not isinstance(used_minors, (tuple, list))
2264
    _ErrorIf(test, self.ENODEDRBD, node,
2265
             "cannot parse drbd status file: %s", str(used_minors))
2266
    if test:
2267
      # we cannot check drbd status
2268
      return
2269

    
2270
    for minor, (iname, must_exist) in node_drbd.items():
2271
      test = minor not in used_minors and must_exist
2272
      _ErrorIf(test, self.ENODEDRBD, node,
2273
               "drbd minor %d of instance %s is not active", minor, iname)
2274
    for minor in used_minors:
2275
      test = minor not in node_drbd
2276
      _ErrorIf(test, self.ENODEDRBD, node,
2277
               "unallocated drbd minor %d is in use", minor)
2278

    
2279
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2280
    """Builds the node OS structures.
2281

2282
    @type ninfo: L{objects.Node}
2283
    @param ninfo: the node to check
2284
    @param nresult: the remote results for the node
2285
    @param nimg: the node image object
2286

2287
    """
2288
    node = ninfo.name
2289
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2290

    
2291
    remote_os = nresult.get(constants.NV_OSLIST, None)
2292
    test = (not isinstance(remote_os, list) or
2293
            not compat.all(isinstance(v, list) and len(v) == 7
2294
                           for v in remote_os))
2295

    
2296
    _ErrorIf(test, self.ENODEOS, node,
2297
             "node hasn't returned valid OS data")
2298

    
2299
    nimg.os_fail = test
2300

    
2301
    if test:
2302
      return
2303

    
2304
    os_dict = {}
2305

    
2306
    for (name, os_path, status, diagnose,
2307
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2308

    
2309
      if name not in os_dict:
2310
        os_dict[name] = []
2311

    
2312
      # parameters is a list of lists instead of list of tuples due to
2313
      # JSON lacking a real tuple type, fix it:
2314
      parameters = [tuple(v) for v in parameters]
2315
      os_dict[name].append((os_path, status, diagnose,
2316
                            set(variants), set(parameters), set(api_ver)))
2317

    
2318
    nimg.oslist = os_dict
2319

    
2320
  def _VerifyNodeOS(self, ninfo, nimg, base):
2321
    """Verifies the node OS list.
2322

2323
    @type ninfo: L{objects.Node}
2324
    @param ninfo: the node to check
2325
    @param nimg: the node image object
2326
    @param base: the 'template' node we match against (e.g. from the master)
2327

2328
    """
2329
    node = ninfo.name
2330
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2331

    
2332
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2333

    
2334
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2335
    for os_name, os_data in nimg.oslist.items():
2336
      assert os_data, "Empty OS status for OS %s?!" % os_name
2337
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2338
      _ErrorIf(not f_status, self.ENODEOS, node,
2339
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2340
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2341
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2342
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2343
      # comparisons with the 'base' image
2344
      test = os_name not in base.oslist
2345
      _ErrorIf(test, self.ENODEOS, node,
2346
               "Extra OS %s not present on reference node (%s)",
2347
               os_name, base.name)
2348
      if test:
2349
        continue
2350
      assert base.oslist[os_name], "Base node has empty OS status?"
2351
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2352
      if not b_status:
2353
        # base OS is invalid, skipping
2354
        continue
2355
      for kind, a, b in [("API version", f_api, b_api),
2356
                         ("variants list", f_var, b_var),
2357
                         ("parameters", beautify_params(f_param),
2358
                          beautify_params(b_param))]:
2359
        _ErrorIf(a != b, self.ENODEOS, node,
2360
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2361
                 kind, os_name, base.name,
2362
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2363

    
2364
    # check any missing OSes
2365
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2366
    _ErrorIf(missing, self.ENODEOS, node,
2367
             "OSes present on reference node %s but missing on this node: %s",
2368
             base.name, utils.CommaJoin(missing))
2369

    
2370
  def _VerifyOob(self, ninfo, nresult):
2371
    """Verifies out of band functionality of a node.
2372

2373
    @type ninfo: L{objects.Node}
2374
    @param ninfo: the node to check
2375
    @param nresult: the remote results for the node
2376

2377
    """
2378
    node = ninfo.name
2379
    # We just have to verify the paths on master and/or master candidates
2380
    # as the oob helper is invoked on the master
2381
    if ((ninfo.master_candidate or ninfo.master_capable) and
2382
        constants.NV_OOB_PATHS in nresult):
2383
      for path_result in nresult[constants.NV_OOB_PATHS]:
2384
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2385

    
2386
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2387
    """Verifies and updates the node volume data.
2388

2389
    This function will update a L{NodeImage}'s internal structures
2390
    with data from the remote call.
2391

2392
    @type ninfo: L{objects.Node}
2393
    @param ninfo: the node to check
2394
    @param nresult: the remote results for the node
2395
    @param nimg: the node image object
2396
    @param vg_name: the configured VG name
2397

2398
    """
2399
    node = ninfo.name
2400
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2401

    
2402
    nimg.lvm_fail = True
2403
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2404
    if vg_name is None:
2405
      pass
2406
    elif isinstance(lvdata, basestring):
2407
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2408
               utils.SafeEncode(lvdata))
2409
    elif not isinstance(lvdata, dict):
2410
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2411
    else:
2412
      nimg.volumes = lvdata
2413
      nimg.lvm_fail = False
2414

    
2415
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2416
    """Verifies and updates the node instance list.
2417

2418
    If the listing was successful, then updates this node's instance
2419
    list. Otherwise, it marks the RPC call as failed for the instance
2420
    list key.
2421

2422
    @type ninfo: L{objects.Node}
2423
    @param ninfo: the node to check
2424
    @param nresult: the remote results for the node
2425
    @param nimg: the node image object
2426

2427
    """
2428
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2429
    test = not isinstance(idata, list)
2430
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2431
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2432
    if test:
2433
      nimg.hyp_fail = True
2434
    else:
2435
      nimg.instances = idata
2436

    
2437
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2438
    """Verifies and computes a node information map
2439

2440
    @type ninfo: L{objects.Node}
2441
    @param ninfo: the node to check
2442
    @param nresult: the remote results for the node
2443
    @param nimg: the node image object
2444
    @param vg_name: the configured VG name
2445

2446
    """
2447
    node = ninfo.name
2448
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2449

    
2450
    # try to read free memory (from the hypervisor)
2451
    hv_info = nresult.get(constants.NV_HVINFO, None)
2452
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2453
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2454
    if not test:
2455
      try:
2456
        nimg.mfree = int(hv_info["memory_free"])
2457
      except (ValueError, TypeError):
2458
        _ErrorIf(True, self.ENODERPC, node,
2459
                 "node returned invalid nodeinfo, check hypervisor")
2460

    
2461
    # FIXME: devise a free space model for file based instances as well
2462
    if vg_name is not None:
2463
      test = (constants.NV_VGLIST not in nresult or
2464
              vg_name not in nresult[constants.NV_VGLIST])
2465
      _ErrorIf(test, self.ENODELVM, node,
2466
               "node didn't return data for the volume group '%s'"
2467
               " - it is either missing or broken", vg_name)
2468
      if not test:
2469
        try:
2470
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2471
        except (ValueError, TypeError):
2472
          _ErrorIf(True, self.ENODERPC, node,
2473
                   "node returned invalid LVM info, check LVM status")
2474

    
2475
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2476
    """Gets per-disk status information for all instances.
2477

2478
    @type nodelist: list of strings
2479
    @param nodelist: Node names
2480
    @type node_image: dict of (name, L{objects.Node})
2481
    @param node_image: Node objects
2482
    @type instanceinfo: dict of (name, L{objects.Instance})
2483
    @param instanceinfo: Instance objects
2484
    @rtype: {instance: {node: [(succes, payload)]}}
2485
    @return: a dictionary of per-instance dictionaries with nodes as
2486
        keys and disk information as values; the disk information is a
2487
        list of tuples (success, payload)
2488

2489
    """
2490
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2491

    
2492
    node_disks = {}
2493
    node_disks_devonly = {}
2494
    diskless_instances = set()
2495
    diskless = constants.DT_DISKLESS
2496

    
2497
    for nname in nodelist:
2498
      node_instances = list(itertools.chain(node_image[nname].pinst,
2499
                                            node_image[nname].sinst))
2500
      diskless_instances.update(inst for inst in node_instances
2501
                                if instanceinfo[inst].disk_template == diskless)
2502
      disks = [(inst, disk)
2503
               for inst in node_instances
2504
               for disk in instanceinfo[inst].disks]
2505

    
2506
      if not disks:
2507
        # No need to collect data
2508
        continue
2509

    
2510
      node_disks[nname] = disks
2511

    
2512
      # Creating copies as SetDiskID below will modify the objects and that can
2513
      # lead to incorrect data returned from nodes
2514
      devonly = [dev.Copy() for (_, dev) in disks]
2515

    
2516
      for dev in devonly:
2517
        self.cfg.SetDiskID(dev, nname)
2518

    
2519
      node_disks_devonly[nname] = devonly
2520

    
2521
    assert len(node_disks) == len(node_disks_devonly)
2522

    
2523
    # Collect data from all nodes with disks
2524
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2525
                                                          node_disks_devonly)
2526

    
2527
    assert len(result) == len(node_disks)
2528

    
2529
    instdisk = {}
2530

    
2531
    for (nname, nres) in result.items():
2532
      disks = node_disks[nname]
2533

    
2534
      if nres.offline:
2535
        # No data from this node
2536
        data = len(disks) * [(False, "node offline")]
2537
      else:
2538
        msg = nres.fail_msg
2539
        _ErrorIf(msg, self.ENODERPC, nname,
2540
                 "while getting disk information: %s", msg)
2541
        if msg:
2542
          # No data from this node
2543
          data = len(disks) * [(False, msg)]
2544
        else:
2545
          data = []
2546
          for idx, i in enumerate(nres.payload):
2547
            if isinstance(i, (tuple, list)) and len(i) == 2:
2548
              data.append(i)
2549
            else:
2550
              logging.warning("Invalid result from node %s, entry %d: %s",
2551
                              nname, idx, i)
2552
              data.append((False, "Invalid result from the remote node"))
2553

    
2554
      for ((inst, _), status) in zip(disks, data):
2555
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2556

    
2557
    # Add empty entries for diskless instances.
2558
    for inst in diskless_instances:
2559
      assert inst not in instdisk
2560
      instdisk[inst] = {}
2561

    
2562
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2563
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2564
                      compat.all(isinstance(s, (tuple, list)) and
2565
                                 len(s) == 2 for s in statuses)
2566
                      for inst, nnames in instdisk.items()
2567
                      for nname, statuses in nnames.items())
2568
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2569

    
2570
    return instdisk
2571

    
2572
  @staticmethod
2573
  def _SshNodeSelector(group_uuid, all_nodes):
2574
    """Create endless iterators for all potential SSH check hosts.
2575

2576
    """
2577
    nodes = [node for node in all_nodes
2578
             if (node.group != group_uuid and
2579
                 not node.offline)]
2580
    keyfunc = operator.attrgetter("group")
2581

    
2582
    return map(itertools.cycle,
2583
               [sorted(map(operator.attrgetter("name"), names))
2584
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2585
                                                  keyfunc)])
2586

    
2587
  @classmethod
2588
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2589
    """Choose which nodes should talk to which other nodes.
2590

2591
    We will make nodes contact all nodes in their group, and one node from
2592
    every other group.
2593

2594
    @warning: This algorithm has a known issue if one node group is much
2595
      smaller than others (e.g. just one node). In such a case all other
2596
      nodes will talk to the single node.
2597

2598
    """
2599
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2600
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2601

    
2602
    return (online_nodes,
2603
            dict((name, sorted([i.next() for i in sel]))
2604
                 for name in online_nodes))
2605

    
2606
  def BuildHooksEnv(self):
2607
    """Build hooks env.
2608

2609
    Cluster-Verify hooks just ran in the post phase and their failure makes
2610
    the output be logged in the verify output and the verification to fail.
2611

2612
    """
2613
    env = {
2614
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2615
      }
2616

    
2617
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2618
               for node in self.my_node_info.values())
2619

    
2620
    return env
2621

    
2622
  def BuildHooksNodes(self):
2623
    """Build hooks nodes.
2624

2625
    """
2626
    return ([], self.my_node_names)
2627

    
2628
  def Exec(self, feedback_fn):
2629
    """Verify integrity of the node group, performing various test on nodes.
2630

2631
    """
2632
    # This method has too many local variables. pylint: disable=R0914
2633
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2634

    
2635
    if not self.my_node_names:
2636
      # empty node group
2637
      feedback_fn("* Empty node group, skipping verification")
2638
      return True
2639

    
2640
    self.bad = False
2641
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642
    verbose = self.op.verbose
2643
    self._feedback_fn = feedback_fn
2644

    
2645
    vg_name = self.cfg.GetVGName()
2646
    drbd_helper = self.cfg.GetDRBDHelper()
2647
    cluster = self.cfg.GetClusterInfo()
2648
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2649
    hypervisors = cluster.enabled_hypervisors
2650
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2651

    
2652
    i_non_redundant = [] # Non redundant instances
2653
    i_non_a_balanced = [] # Non auto-balanced instances
2654
    n_offline = 0 # Count of offline nodes
2655
    n_drained = 0 # Count of nodes being drained
2656
    node_vol_should = {}
2657

    
2658
    # FIXME: verify OS list
2659

    
2660
    # File verification
2661
    filemap = _ComputeAncillaryFiles(cluster, False)
2662

    
2663
    # do local checksums
2664
    master_node = self.master_node = self.cfg.GetMasterNode()
2665
    master_ip = self.cfg.GetMasterIP()
2666

    
2667
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2668

    
2669
    node_verify_param = {
2670
      constants.NV_FILELIST:
2671
        utils.UniqueSequence(filename
2672
                             for files in filemap
2673
                             for filename in files),
2674
      constants.NV_NODELIST:
2675
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2676
                                  self.all_node_info.values()),
2677
      constants.NV_HYPERVISOR: hypervisors,
2678
      constants.NV_HVPARAMS:
2679
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2680
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2681
                                 for node in node_data_list
2682
                                 if not node.offline],
2683
      constants.NV_INSTANCELIST: hypervisors,
2684
      constants.NV_VERSION: None,
2685
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2686
      constants.NV_NODESETUP: None,
2687
      constants.NV_TIME: None,
2688
      constants.NV_MASTERIP: (master_node, master_ip),
2689
      constants.NV_OSLIST: None,
2690
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2691
      }
2692

    
2693
    if vg_name is not None:
2694
      node_verify_param[constants.NV_VGLIST] = None
2695
      node_verify_param[constants.NV_LVLIST] = vg_name
2696
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2697
      node_verify_param[constants.NV_DRBDLIST] = None
2698

    
2699
    if drbd_helper:
2700
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2701

    
2702
    # bridge checks
2703
    # FIXME: this needs to be changed per node-group, not cluster-wide
2704
    bridges = set()
2705
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2706
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707
      bridges.add(default_nicpp[constants.NIC_LINK])
2708
    for instance in self.my_inst_info.values():
2709
      for nic in instance.nics:
2710
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2711
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2712
          bridges.add(full_nic[constants.NIC_LINK])
2713

    
2714
    if bridges:
2715
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2716

    
2717
    # Build our expected cluster state
2718
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2719
                                                 name=node.name,
2720
                                                 vm_capable=node.vm_capable))
2721
                      for node in node_data_list)
2722

    
2723
    # Gather OOB paths
2724
    oob_paths = []
2725
    for node in self.all_node_info.values():
2726
      path = _SupportsOob(self.cfg, node)
2727
      if path and path not in oob_paths:
2728
        oob_paths.append(path)
2729

    
2730
    if oob_paths:
2731
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2732

    
2733
    for instance in self.my_inst_names:
2734
      inst_config = self.my_inst_info[instance]
2735

    
2736
      for nname in inst_config.all_nodes:
2737
        if nname not in node_image:
2738
          gnode = self.NodeImage(name=nname)
2739
          gnode.ghost = (nname not in self.all_node_info)
2740
          node_image[nname] = gnode
2741

    
2742
      inst_config.MapLVsByNode(node_vol_should)
2743

    
2744
      pnode = inst_config.primary_node
2745
      node_image[pnode].pinst.append(instance)
2746

    
2747
      for snode in inst_config.secondary_nodes:
2748
        nimg = node_image[snode]
2749
        nimg.sinst.append(instance)
2750
        if pnode not in nimg.sbp:
2751
          nimg.sbp[pnode] = []
2752
        nimg.sbp[pnode].append(instance)
2753

    
2754
    # At this point, we have the in-memory data structures complete,
2755
    # except for the runtime information, which we'll gather next
2756

    
2757
    # Due to the way our RPC system works, exact response times cannot be
2758
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2759
    # time before and after executing the request, we can at least have a time
2760
    # window.
2761
    nvinfo_starttime = time.time()
2762
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2763
                                           node_verify_param,
2764
                                           self.cfg.GetClusterName())
2765
    nvinfo_endtime = time.time()
2766

    
2767
    if self.extra_lv_nodes and vg_name is not None:
2768
      extra_lv_nvinfo = \
2769
          self.rpc.call_node_verify(self.extra_lv_nodes,
2770
                                    {constants.NV_LVLIST: vg_name},
2771
                                    self.cfg.GetClusterName())
2772
    else:
2773
      extra_lv_nvinfo = {}
2774

    
2775
    all_drbd_map = self.cfg.ComputeDRBDMap()
2776

    
2777
    feedback_fn("* Gathering disk information (%s nodes)" %
2778
                len(self.my_node_names))
2779
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2780
                                     self.my_inst_info)
2781

    
2782
    feedback_fn("* Verifying configuration file consistency")
2783

    
2784
    # If not all nodes are being checked, we need to make sure the master node
2785
    # and a non-checked vm_capable node are in the list.
2786
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2787
    if absent_nodes:
2788
      vf_nvinfo = all_nvinfo.copy()
2789
      vf_node_info = list(self.my_node_info.values())
2790
      additional_nodes = []
2791
      if master_node not in self.my_node_info:
2792
        additional_nodes.append(master_node)
2793
        vf_node_info.append(self.all_node_info[master_node])
2794
      # Add the first vm_capable node we find which is not included
2795
      for node in absent_nodes:
2796
        nodeinfo = self.all_node_info[node]
2797
        if nodeinfo.vm_capable and not nodeinfo.offline:
2798
          additional_nodes.append(node)
2799
          vf_node_info.append(self.all_node_info[node])
2800
          break
2801
      key = constants.NV_FILELIST
2802
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2803
                                                 {key: node_verify_param[key]},
2804
                                                 self.cfg.GetClusterName()))
2805
    else:
2806
      vf_nvinfo = all_nvinfo
2807
      vf_node_info = self.my_node_info.values()
2808

    
2809
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2810

    
2811
    feedback_fn("* Verifying node status")
2812

    
2813
    refos_img = None
2814

    
2815
    for node_i in node_data_list:
2816
      node = node_i.name
2817
      nimg = node_image[node]
2818

    
2819
      if node_i.offline:
2820
        if verbose:
2821
          feedback_fn("* Skipping offline node %s" % (node,))
2822
        n_offline += 1
2823
        continue
2824

    
2825
      if node == master_node:
2826
        ntype = "master"
2827
      elif node_i.master_candidate:
2828
        ntype = "master candidate"
2829
      elif node_i.drained:
2830
        ntype = "drained"
2831
        n_drained += 1
2832
      else:
2833
        ntype = "regular"
2834
      if verbose:
2835
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2836

    
2837
      msg = all_nvinfo[node].fail_msg
2838
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2839
      if msg:
2840
        nimg.rpc_fail = True
2841
        continue
2842

    
2843
      nresult = all_nvinfo[node].payload
2844

    
2845
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2846
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2847
      self._VerifyNodeNetwork(node_i, nresult)
2848
      self._VerifyOob(node_i, nresult)
2849

    
2850
      if nimg.vm_capable:
2851
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2852
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2853
                             all_drbd_map)
2854

    
2855
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2856
        self._UpdateNodeInstances(node_i, nresult, nimg)
2857
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2858
        self._UpdateNodeOS(node_i, nresult, nimg)
2859

    
2860
        if not nimg.os_fail:
2861
          if refos_img is None:
2862
            refos_img = nimg
2863
          self._VerifyNodeOS(node_i, nimg, refos_img)
2864
        self._VerifyNodeBridges(node_i, nresult, bridges)
2865

    
2866
        # Check whether all running instancies are primary for the node. (This
2867
        # can no longer be done from _VerifyInstance below, since some of the
2868
        # wrong instances could be from other node groups.)
2869
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2870

    
2871
        for inst in non_primary_inst:
2872
          test = inst in self.all_inst_info
2873
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2874
                   "instance should not run on node %s", node_i.name)
2875
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2876
                   "node is running unknown instance %s", inst)
2877

    
2878
    for node, result in extra_lv_nvinfo.items():
2879
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2880
                              node_image[node], vg_name)
2881

    
2882
    feedback_fn("* Verifying instance status")
2883
    for instance in self.my_inst_names:
2884
      if verbose:
2885
        feedback_fn("* Verifying instance %s" % instance)
2886
      inst_config = self.my_inst_info[instance]
2887
      self._VerifyInstance(instance, inst_config, node_image,
2888
                           instdisk[instance])
2889
      inst_nodes_offline = []
2890

    
2891
      pnode = inst_config.primary_node
2892
      pnode_img = node_image[pnode]
2893
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2894
               self.ENODERPC, pnode, "instance %s, connection to"
2895
               " primary node failed", instance)
2896

    
2897
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2898
               self.EINSTANCEBADNODE, instance,
2899
               "instance is marked as running and lives on offline node %s",
2900
               inst_config.primary_node)
2901

    
2902
      # If the instance is non-redundant we cannot survive losing its primary
2903
      # node, so we are not N+1 compliant. On the other hand we have no disk
2904
      # templates with more than one secondary so that situation is not well
2905
      # supported either.
2906
      # FIXME: does not support file-backed instances
2907
      if not inst_config.secondary_nodes:
2908
        i_non_redundant.append(instance)
2909

    
2910
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2911
               instance, "instance has multiple secondary nodes: %s",
2912
               utils.CommaJoin(inst_config.secondary_nodes),
2913
               code=self.ETYPE_WARNING)
2914

    
2915
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2916
        pnode = inst_config.primary_node
2917
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2918
        instance_groups = {}
2919

    
2920
        for node in instance_nodes:
2921
          instance_groups.setdefault(self.all_node_info[node].group,
2922
                                     []).append(node)
2923

    
2924
        pretty_list = [
2925
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2926
          # Sort so that we always list the primary node first.
2927
          for group, nodes in sorted(instance_groups.items(),
2928
                                     key=lambda (_, nodes): pnode in nodes,
2929
                                     reverse=True)]
2930

    
2931
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2932
                      instance, "instance has primary and secondary nodes in"
2933
                      " different groups: %s", utils.CommaJoin(pretty_list),
2934
                      code=self.ETYPE_WARNING)
2935

    
2936
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2937
        i_non_a_balanced.append(instance)
2938

    
2939
      for snode in inst_config.secondary_nodes:
2940
        s_img = node_image[snode]
2941
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2942
                 "instance %s, connection to secondary node failed", instance)
2943

    
2944
        if s_img.offline:
2945
          inst_nodes_offline.append(snode)
2946

    
2947
      # warn that the instance lives on offline nodes
2948
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2949
               "instance has offline secondary node(s) %s",
2950
               utils.CommaJoin(inst_nodes_offline))
2951
      # ... or ghost/non-vm_capable nodes
2952
      for node in inst_config.all_nodes:
2953
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2954
                 "instance lives on ghost node %s", node)
2955
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2956
                 instance, "instance lives on non-vm_capable node %s", node)
2957

    
2958
    feedback_fn("* Verifying orphan volumes")
2959
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2960

    
2961
    # We will get spurious "unknown volume" warnings if any node of this group
2962
    # is secondary for an instance whose primary is in another group. To avoid
2963
    # them, we find these instances and add their volumes to node_vol_should.
2964
    for inst in self.all_inst_info.values():
2965
      for secondary in inst.secondary_nodes:
2966
        if (secondary in self.my_node_info
2967
            and inst.name not in self.my_inst_info):
2968
          inst.MapLVsByNode(node_vol_should)
2969
          break
2970

    
2971
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2972

    
2973
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2974
      feedback_fn("* Verifying N+1 Memory redundancy")
2975
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2976

    
2977
    feedback_fn("* Other Notes")
2978
    if i_non_redundant:
2979
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2980
                  % len(i_non_redundant))
2981

    
2982
    if i_non_a_balanced:
2983
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2984
                  % len(i_non_a_balanced))
2985

    
2986
    if n_offline:
2987
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2988

    
2989
    if n_drained:
2990
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2991

    
2992
    return not self.bad
2993

    
2994
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2995
    """Analyze the post-hooks' result
2996

2997
    This method analyses the hook result, handles it, and sends some
2998
    nicely-formatted feedback back to the user.
2999

3000
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3001
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3002
    @param hooks_results: the results of the multi-node hooks rpc call
3003
    @param feedback_fn: function used send feedback back to the caller
3004
    @param lu_result: previous Exec result
3005
    @return: the new Exec result, based on the previous result
3006
        and hook results
3007

3008
    """
3009
    # We only really run POST phase hooks, only for non-empty groups,
3010
    # and are only interested in their results
3011
    if not self.my_node_names:
3012
      # empty node group
3013
      pass
3014
    elif phase == constants.HOOKS_PHASE_POST:
3015
      # Used to change hooks' output to proper indentation
3016
      feedback_fn("* Hooks Results")
3017
      assert hooks_results, "invalid result from hooks"
3018

    
3019
      for node_name in hooks_results:
3020
        res = hooks_results[node_name]
3021
        msg = res.fail_msg
3022
        test = msg and not res.offline
3023
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3024
                      "Communication failure in hooks execution: %s", msg)
3025
        if res.offline or msg:
3026
          # No need to investigate payload if node is offline or gave
3027
          # an error.
3028
          continue
3029
        for script, hkr, output in res.payload:
3030
          test = hkr == constants.HKR_FAIL
3031
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3032
                        "Script %s failed, output:", script)
3033
          if test:
3034
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3035
            feedback_fn("%s" % output)
3036
            lu_result = False
3037

    
3038
    return lu_result
3039

    
3040

    
3041
class LUClusterVerifyDisks(NoHooksLU):
3042
  """Verifies the cluster disks status.
3043

3044
  """
3045
  REQ_BGL = False
3046

    
3047
  def ExpandNames(self):
3048
    self.share_locks = _ShareAll()
3049
    self.needed_locks = {
3050
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3051
      }
3052

    
3053
  def Exec(self, feedback_fn):
3054
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3055

    
3056
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3057
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3058
                           for group in group_names])
3059

    
3060

    
3061
class LUGroupVerifyDisks(NoHooksLU):
3062
  """Verifies the status of all disks in a node group.
3063

3064
  """
3065
  REQ_BGL = False
3066

    
3067
  def ExpandNames(self):
3068
    # Raises errors.OpPrereqError on its own if group can't be found
3069
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3070

    
3071
    self.share_locks = _ShareAll()
3072
    self.needed_locks = {
3073
      locking.LEVEL_INSTANCE: [],
3074
      locking.LEVEL_NODEGROUP: [],
3075
      locking.LEVEL_NODE: [],
3076
      }
3077

    
3078
  def DeclareLocks(self, level):
3079
    if level == locking.LEVEL_INSTANCE:
3080
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3081

    
3082
      # Lock instances optimistically, needs verification once node and group
3083
      # locks have been acquired
3084
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3085
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3086

    
3087
    elif level == locking.LEVEL_NODEGROUP:
3088
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3089

    
3090
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3091
        set([self.group_uuid] +
3092
            # Lock all groups used by instances optimistically; this requires
3093
            # going via the node before it's locked, requiring verification
3094
            # later on
3095
            [group_uuid
3096
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3097
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3098

    
3099
    elif level == locking.LEVEL_NODE:
3100
      # This will only lock the nodes in the group to be verified which contain
3101
      # actual instances
3102
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3103
      self._LockInstancesNodes()
3104

    
3105
      # Lock all nodes in group to be verified
3106
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3107
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3108
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3109

    
3110
  def CheckPrereq(self):
3111
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3112
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3113
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3114

    
3115
    assert self.group_uuid in owned_groups
3116

    
3117
    # Check if locked instances are still correct
3118
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3119

    
3120
    # Get instance information
3121
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3122

    
3123
    # Check if node groups for locked instances are still correct
3124
    for (instance_name, inst) in self.instances.items():
3125
      assert owned_nodes.issuperset(inst.all_nodes), \
3126
        "Instance %s's nodes changed while we kept the lock" % instance_name
3127

    
3128
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3129
                                             owned_groups)
3130

    
3131
      assert self.group_uuid in inst_groups, \
3132
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3133

    
3134
  def Exec(self, feedback_fn):
3135
    """Verify integrity of cluster disks.
3136

3137
    @rtype: tuple of three items
3138
    @return: a tuple of (dict of node-to-node_error, list of instances
3139
        which need activate-disks, dict of instance: (node, volume) for
3140
        missing volumes
3141

3142
    """
3143
    res_nodes = {}
3144
    res_instances = set()
3145
    res_missing = {}
3146

    
3147
    nv_dict = _MapInstanceDisksToNodes([inst
3148
                                        for inst in self.instances.values()
3149
                                        if inst.admin_up])
3150

    
3151
    if nv_dict:
3152
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3153
                             set(self.cfg.GetVmCapableNodeList()))
3154

    
3155
      node_lvs = self.rpc.call_lv_list(nodes, [])
3156

    
3157
      for (node, node_res) in node_lvs.items():
3158
        if node_res.offline:
3159
          continue
3160

    
3161
        msg = node_res.fail_msg
3162
        if msg:
3163
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3164
          res_nodes[node] = msg
3165
          continue
3166

    
3167
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3168
          inst = nv_dict.pop((node, lv_name), None)
3169
          if not (lv_online or inst is None):
3170
            res_instances.add(inst)
3171

    
3172
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3173
      # better
3174
      for key, inst in nv_dict.iteritems():
3175
        res_missing.setdefault(inst, []).append(list(key))
3176

    
3177
    return (res_nodes, list(res_instances), res_missing)
3178

    
3179

    
3180
class LUClusterRepairDiskSizes(NoHooksLU):
3181
  """Verifies the cluster disks sizes.
3182

3183
  """
3184
  REQ_BGL = False
3185

    
3186
  def ExpandNames(self):
3187
    if self.op.instances:
3188
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3189
      self.needed_locks = {
3190
        locking.LEVEL_NODE: [],
3191
        locking.LEVEL_INSTANCE: self.wanted_names,
3192
        }
3193
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3194
    else:
3195
      self.wanted_names = None
3196
      self.needed_locks = {
3197
        locking.LEVEL_NODE: locking.ALL_SET,
3198
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3199
        }
3200
    self.share_locks = {
3201
      locking.LEVEL_NODE: 1,
3202
      locking.LEVEL_INSTANCE: 0,
3203
      }
3204

    
3205
  def DeclareLocks(self, level):
3206
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3207
      self._LockInstancesNodes(primary_only=True)
3208

    
3209
  def CheckPrereq(self):
3210
    """Check prerequisites.
3211

3212
    This only checks the optional instance list against the existing names.
3213

3214
    """
3215
    if self.wanted_names is None:
3216
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3217

    
3218
    self.wanted_instances = \
3219
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3220

    
3221
  def _EnsureChildSizes(self, disk):
3222
    """Ensure children of the disk have the needed disk size.
3223

3224
    This is valid mainly for DRBD8 and fixes an issue where the
3225
    children have smaller disk size.
3226

3227
    @param disk: an L{ganeti.objects.Disk} object
3228

3229
    """
3230
    if disk.dev_type == constants.LD_DRBD8:
3231
      assert disk.children, "Empty children for DRBD8?"
3232
      fchild = disk.children[0]
3233
      mismatch = fchild.size < disk.size
3234
      if mismatch:
3235
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3236
                     fchild.size, disk.size)
3237
        fchild.size = disk.size
3238

    
3239
      # and we recurse on this child only, not on the metadev
3240
      return self._EnsureChildSizes(fchild) or mismatch
3241
    else:
3242
      return False
3243

    
3244
  def Exec(self, feedback_fn):
3245
    """Verify the size of cluster disks.
3246

3247
    """
3248
    # TODO: check child disks too
3249
    # TODO: check differences in size between primary/secondary nodes
3250
    per_node_disks = {}
3251
    for instance in self.wanted_instances:
3252
      pnode = instance.primary_node
3253
      if pnode not in per_node_disks:
3254
        per_node_disks[pnode] = []
3255
      for idx, disk in enumerate(instance.disks):
3256
        per_node_disks[pnode].append((instance, idx, disk))
3257

    
3258
    changed = []
3259
    for node, dskl in per_node_disks.items():
3260
      newl = [v[2].Copy() for v in dskl]
3261
      for dsk in newl:
3262
        self.cfg.SetDiskID(dsk, node)
3263
      result = self.rpc.call_blockdev_getsize(node, newl)
3264
      if result.fail_msg:
3265
        self.LogWarning("Failure in blockdev_getsize call to node"
3266
                        " %s, ignoring", node)
3267
        continue
3268
      if len(result.payload) != len(dskl):
3269
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3270
                        " result.payload=%s", node, len(dskl), result.payload)
3271
        self.LogWarning("Invalid result from node %s, ignoring node results",
3272
                        node)
3273
        continue
3274
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3275
        if size is None:
3276
          self.LogWarning("Disk %d of instance %s did not return size"
3277
                          " information, ignoring", idx, instance.name)
3278
          continue
3279
        if not isinstance(size, (int, long)):
3280
          self.LogWarning("Disk %d of instance %s did not return valid"
3281
                          " size information, ignoring", idx, instance.name)
3282
          continue
3283
        size = size >> 20
3284
        if size != disk.size:
3285
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3286
                       " correcting: recorded %d, actual %d", idx,
3287
                       instance.name, disk.size, size)
3288
          disk.size = size
3289
          self.cfg.Update(instance, feedback_fn)
3290
          changed.append((instance.name, idx, size))
3291
        if self._EnsureChildSizes(disk):
3292
          self.cfg.Update(instance, feedback_fn)
3293
          changed.append((instance.name, idx, disk.size))
3294
    return changed
3295

    
3296

    
3297
class LUClusterRename(LogicalUnit):
3298
  """Rename the cluster.
3299

3300
  """
3301
  HPATH = "cluster-rename"
3302
  HTYPE = constants.HTYPE_CLUSTER
3303

    
3304
  def BuildHooksEnv(self):
3305
    """Build hooks env.
3306

3307
    """
3308
    return {
3309
      "OP_TARGET": self.cfg.GetClusterName(),
3310
      "NEW_NAME": self.op.name,
3311
      }
3312

    
3313
  def BuildHooksNodes(self):
3314
    """Build hooks nodes.
3315

3316
    """
3317
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3318

    
3319
  def CheckPrereq(self):
3320
    """Verify that the passed name is a valid one.
3321

3322
    """
3323
    hostname = netutils.GetHostname(name=self.op.name,
3324
                                    family=self.cfg.GetPrimaryIPFamily())
3325

    
3326
    new_name = hostname.name
3327
    self.ip = new_ip = hostname.ip
3328
    old_name = self.cfg.GetClusterName()
3329
    old_ip = self.cfg.GetMasterIP()
3330
    if new_name == old_name and new_ip == old_ip:
3331
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3332
                                 " cluster has changed",
3333
                                 errors.ECODE_INVAL)
3334
    if new_ip != old_ip:
3335
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3336
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3337
                                   " reachable on the network" %
3338
                                   new_ip, errors.ECODE_NOTUNIQUE)
3339

    
3340
    self.op.name = new_name
3341

    
3342
  def Exec(self, feedback_fn):
3343
    """Rename the cluster.
3344

3345
    """
3346
    clustername = self.op.name
3347
    ip = self.ip
3348

    
3349
    # shutdown the master IP
3350
    master = self.cfg.GetMasterNode()
3351
    result = self.rpc.call_node_stop_master(master, False)
3352
    result.Raise("Could not disable the master role")
3353

    
3354
    try:
3355
      cluster = self.cfg.GetClusterInfo()
3356
      cluster.cluster_name = clustername
3357
      cluster.master_ip = ip
3358
      self.cfg.Update(cluster, feedback_fn)
3359

    
3360
      # update the known hosts file
3361
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3362
      node_list = self.cfg.GetOnlineNodeList()
3363
      try:
3364
        node_list.remove(master)
3365
      except ValueError:
3366
        pass
3367
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3368
    finally:
3369
      result = self.rpc.call_node_start_master(master, False, False)
3370
      msg = result.fail_msg
3371
      if msg:
3372
        self.LogWarning("Could not re-enable the master role on"
3373
                        " the master, please restart manually: %s", msg)
3374

    
3375
    return clustername
3376

    
3377

    
3378
class LUClusterSetParams(LogicalUnit):
3379
  """Change the parameters of the cluster.
3380

3381
  """
3382
  HPATH = "cluster-modify"
3383
  HTYPE = constants.HTYPE_CLUSTER
3384
  REQ_BGL = False
3385

    
3386
  def CheckArguments(self):
3387
    """Check parameters
3388

3389
    """
3390
    if self.op.uid_pool:
3391
      uidpool.CheckUidPool(self.op.uid_pool)
3392

    
3393
    if self.op.add_uids:
3394
      uidpool.CheckUidPool(self.op.add_uids)
3395

    
3396
    if self.op.remove_uids:
3397
      uidpool.CheckUidPool(self.op.remove_uids)
3398

    
3399
  def ExpandNames(self):
3400
    # FIXME: in the future maybe other cluster params won't require checking on
3401
    # all nodes to be modified.
3402
    self.needed_locks = {
3403
      locking.LEVEL_NODE: locking.ALL_SET,
3404
    }
3405
    self.share_locks[locking.LEVEL_NODE] = 1
3406

    
3407
  def BuildHooksEnv(self):
3408
    """Build hooks env.
3409

3410
    """
3411
    return {
3412
      "OP_TARGET": self.cfg.GetClusterName(),
3413
      "NEW_VG_NAME": self.op.vg_name,
3414
      }
3415

    
3416
  def BuildHooksNodes(self):
3417
    """Build hooks nodes.
3418

3419
    """
3420
    mn = self.cfg.GetMasterNode()
3421
    return ([mn], [mn])
3422

    
3423
  def CheckPrereq(self):
3424
    """Check prerequisites.
3425

3426
    This checks whether the given params don't conflict and
3427
    if the given volume group is valid.
3428

3429
    """
3430
    if self.op.vg_name is not None and not self.op.vg_name:
3431
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3432
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3433
                                   " instances exist", errors.ECODE_INVAL)
3434

    
3435
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3436
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3437
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3438
                                   " drbd-based instances exist",
3439
                                   errors.ECODE_INVAL)
3440

    
3441
    node_list = self.owned_locks(locking.LEVEL_NODE)
3442

    
3443
    # if vg_name not None, checks given volume group on all nodes
3444
    if self.op.vg_name:
3445
      vglist = self.rpc.call_vg_list(node_list)
3446
      for node in node_list:
3447
        msg = vglist[node].fail_msg
3448
        if msg:
3449
          # ignoring down node
3450
          self.LogWarning("Error while gathering data on node %s"
3451
                          " (ignoring node): %s", node, msg)
3452
          continue
3453
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3454
                                              self.op.vg_name,
3455
                                              constants.MIN_VG_SIZE)
3456
        if vgstatus:
3457
          raise errors.OpPrereqError("Error on node '%s': %s" %
3458
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3459

    
3460
    if self.op.drbd_helper:
3461
      # checks given drbd helper on all nodes
3462
      helpers = self.rpc.call_drbd_helper(node_list)
3463
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3464
        if ninfo.offline:
3465
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3466
          continue
3467
        msg = helpers[node].fail_msg
3468
        if msg:
3469
          raise errors.OpPrereqError("Error checking drbd helper on node"
3470
                                     " '%s': %s" % (node, msg),
3471
                                     errors.ECODE_ENVIRON)
3472
        node_helper = helpers[node].payload
3473
        if node_helper != self.op.drbd_helper:
3474
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3475
                                     (node, node_helper), errors.ECODE_ENVIRON)
3476

    
3477
    self.cluster = cluster = self.cfg.GetClusterInfo()
3478
    # validate params changes
3479
    if self.op.beparams:
3480
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3481
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3482

    
3483
    if self.op.ndparams:
3484
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3485
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3486

    
3487
      # TODO: we need a more general way to handle resetting
3488
      # cluster-level parameters to default values
3489
      if self.new_ndparams["oob_program"] == "":
3490
        self.new_ndparams["oob_program"] = \
3491
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3492

    
3493
    if self.op.nicparams:
3494
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3495
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3496
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3497
      nic_errors = []
3498

    
3499
      # check all instances for consistency
3500
      for instance in self.cfg.GetAllInstancesInfo().values():
3501
        for nic_idx, nic in enumerate(instance.nics):
3502
          params_copy = copy.deepcopy(nic.nicparams)
3503
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3504

    
3505
          # check parameter syntax
3506
          try:
3507
            objects.NIC.CheckParameterSyntax(params_filled)
3508
          except errors.ConfigurationError, err:
3509
            nic_errors.append("Instance %s, nic/%d: %s" %
3510
                              (instance.name, nic_idx, err))
3511

    
3512
          # if we're moving instances to routed, check that they have an ip
3513
          target_mode = params_filled[constants.NIC_MODE]
3514
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3515
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3516
                              " address" % (instance.name, nic_idx))
3517
      if nic_errors:
3518
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3519
                                   "\n".join(nic_errors))
3520

    
3521
    # hypervisor list/parameters
3522
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3523
    if self.op.hvparams:
3524
      for hv_name, hv_dict in self.op.hvparams.items():
3525
        if hv_name not in self.new_hvparams:
3526
          self.new_hvparams[hv_name] = hv_dict
3527
        else:
3528
          self.new_hvparams[hv_name].update(hv_dict)
3529

    
3530
    # os hypervisor parameters
3531
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3532
    if self.op.os_hvp:
3533
      for os_name, hvs in self.op.os_hvp.items():
3534
        if os_name not in self.new_os_hvp:
3535
          self.new_os_hvp[os_name] = hvs
3536
        else:
3537
          for hv_name, hv_dict in hvs.items():
3538
            if hv_name not in self.new_os_hvp[os_name]:
3539
              self.new_os_hvp[os_name][hv_name] = hv_dict
3540
            else:
3541
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3542

    
3543
    # os parameters
3544
    self.new_osp = objects.FillDict(cluster.osparams, {})
3545
    if self.op.osparams:
3546
      for os_name, osp in self.op.osparams.items():
3547
        if os_name not in self.new_osp:
3548
          self.new_osp[os_name] = {}
3549

    
3550
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3551
                                                  use_none=True)
3552

    
3553
        if not self.new_osp[os_name]:
3554
          # we removed all parameters
3555
          del self.new_osp[os_name]
3556
        else:
3557
          # check the parameter validity (remote check)
3558
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3559
                         os_name, self.new_osp[os_name])
3560

    
3561
    # changes to the hypervisor list
3562
    if self.op.enabled_hypervisors is not None:
3563
      self.hv_list = self.op.enabled_hypervisors
3564
      for hv in self.hv_list:
3565
        # if the hypervisor doesn't already exist in the cluster
3566
        # hvparams, we initialize it to empty, and then (in both
3567
        # cases) we make sure to fill the defaults, as we might not
3568
        # have a complete defaults list if the hypervisor wasn't
3569
        # enabled before
3570
        if hv not in new_hvp:
3571
          new_hvp[hv] = {}
3572
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3573
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3574
    else:
3575
      self.hv_list = cluster.enabled_hypervisors
3576

    
3577
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3578
      # either the enabled list has changed, or the parameters have, validate
3579
      for hv_name, hv_params in self.new_hvparams.items():
3580
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3581
            (self.op.enabled_hypervisors and
3582
             hv_name in self.op.enabled_hypervisors)):
3583
          # either this is a new hypervisor, or its parameters have changed
3584
          hv_class = hypervisor.GetHypervisor(hv_name)
3585
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3586
          hv_class.CheckParameterSyntax(hv_params)
3587
          _CheckHVParams(self, node_list, hv_name, hv_params)
3588

    
3589
    if self.op.os_hvp:
3590
      # no need to check any newly-enabled hypervisors, since the
3591
      # defaults have already been checked in the above code-block
3592
      for os_name, os_hvp in self.new_os_hvp.items():
3593
        for hv_name, hv_params in os_hvp.items():
3594
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3595
          # we need to fill in the new os_hvp on top of the actual hv_p
3596
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3597
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3598
          hv_class = hypervisor.GetHypervisor(hv_name)
3599
          hv_class.CheckParameterSyntax(new_osp)
3600
          _CheckHVParams(self, node_list, hv_name, new_osp)
3601

    
3602
    if self.op.default_iallocator:
3603
      alloc_script = utils.FindFile(self.op.default_iallocator,
3604
                                    constants.IALLOCATOR_SEARCH_PATH,
3605
                                    os.path.isfile)
3606
      if alloc_script is None:
3607
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3608
                                   " specified" % self.op.default_iallocator,
3609
                                   errors.ECODE_INVAL)
3610

    
3611
  def Exec(self, feedback_fn):
3612
    """Change the parameters of the cluster.
3613

3614
    """
3615
    if self.op.vg_name is not None:
3616
      new_volume = self.op.vg_name
3617
      if not new_volume:
3618
        new_volume = None
3619
      if new_volume != self.cfg.GetVGName():
3620
        self.cfg.SetVGName(new_volume)
3621
      else:
3622
        feedback_fn("Cluster LVM configuration already in desired"
3623
                    " state, not changing")
3624
    if self.op.drbd_helper is not None:
3625
      new_helper = self.op.drbd_helper
3626
      if not new_helper:
3627
        new_helper = None
3628
      if new_helper != self.cfg.GetDRBDHelper():
3629
        self.cfg.SetDRBDHelper(new_helper)
3630
      else:
3631
        feedback_fn("Cluster DRBD helper already in desired state,"
3632
                    " not changing")
3633
    if self.op.hvparams:
3634
      self.cluster.hvparams = self.new_hvparams
3635
    if self.op.os_hvp:
3636
      self.cluster.os_hvp = self.new_os_hvp
3637
    if self.op.enabled_hypervisors is not None:
3638
      self.cluster.hvparams = self.new_hvparams
3639
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3640
    if self.op.beparams:
3641
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3642
    if self.op.nicparams:
3643
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3644
    if self.op.osparams:
3645
      self.cluster.osparams = self.new_osp
3646
    if self.op.ndparams:
3647
      self.cluster.ndparams = self.new_ndparams
3648

    
3649
    if self.op.candidate_pool_size is not None:
3650
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3651
      # we need to update the pool size here, otherwise the save will fail
3652
      _AdjustCandidatePool(self, [])
3653

    
3654
    if self.op.maintain_node_health is not None:
3655
      self.cluster.maintain_node_health = self.op.maintain_node_health
3656

    
3657
    if self.op.prealloc_wipe_disks is not None:
3658
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3659

    
3660
    if self.op.add_uids is not None:
3661
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3662

    
3663
    if self.op.remove_uids is not None:
3664
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3665

    
3666
    if self.op.uid_pool is not None:
3667
      self.cluster.uid_pool = self.op.uid_pool
3668

    
3669
    if self.op.default_iallocator is not None:
3670
      self.cluster.default_iallocator = self.op.default_iallocator
3671

    
3672
    if self.op.reserved_lvs is not None:
3673
      self.cluster.reserved_lvs = self.op.reserved_lvs
3674

    
3675
    def helper_os(aname, mods, desc):
3676
      desc += " OS list"
3677
      lst = getattr(self.cluster, aname)
3678
      for key, val in mods:
3679
        if key == constants.DDM_ADD:
3680
          if val in lst:
3681
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3682
          else:
3683
            lst.append(val)
3684
        elif key == constants.DDM_REMOVE:
3685
          if val in lst:
3686
            lst.remove(val)
3687
          else:
3688
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3689
        else:
3690
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3691

    
3692
    if self.op.hidden_os:
3693
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3694

    
3695
    if self.op.blacklisted_os:
3696
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3697

    
3698
    if self.op.master_netdev:
3699
      master = self.cfg.GetMasterNode()
3700
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3701
                  self.cluster.master_netdev)
3702
      result = self.rpc.call_node_stop_master(master, False)
3703
      result.Raise("Could not disable the master ip")
3704
      feedback_fn("Changing master_netdev from %s to %s" %
3705
                  (self.cluster.master_netdev, self.op.master_netdev))
3706
      self.cluster.master_netdev = self.op.master_netdev
3707

    
3708
    self.cfg.Update(self.cluster, feedback_fn)
3709

    
3710
    if self.op.master_netdev:
3711
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3712
                  self.op.master_netdev)
3713
      result = self.rpc.call_node_start_master(master, False, False)
3714
      if result.fail_msg:
3715
        self.LogWarning("Could not re-enable the master ip on"
3716
                        " the master, please restart manually: %s",
3717
                        result.fail_msg)
3718

    
3719

    
3720
def _UploadHelper(lu, nodes, fname):
3721
  """Helper for uploading a file and showing warnings.
3722

3723
  """
3724
  if os.path.exists(fname):
3725
    result = lu.rpc.call_upload_file(nodes, fname)
3726
    for to_node, to_result in result.items():
3727
      msg = to_result.fail_msg
3728
      if msg:
3729
        msg = ("Copy of file %s to node %s failed: %s" %
3730
               (fname, to_node, msg))
3731
        lu.proc.LogWarning(msg)
3732

    
3733

    
3734
def _ComputeAncillaryFiles(cluster, redist):
3735
  """Compute files external to Ganeti which need to be consistent.
3736

3737
  @type redist: boolean
3738
  @param redist: Whether to include files which need to be redistributed
3739

3740
  """
3741
  # Compute files for all nodes
3742
  files_all = set([
3743
    constants.SSH_KNOWN_HOSTS_FILE,
3744
    constants.CONFD_HMAC_KEY,
3745
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3746
    ])
3747

    
3748
  if not redist:
3749
    files_all.update(constants.ALL_CERT_FILES)
3750
    files_all.update(ssconf.SimpleStore().GetFileList())
3751
  else:
3752
    # we need to ship at least the RAPI certificate
3753
    files_all.add(constants.RAPI_CERT_FILE)
3754

    
3755
  if cluster.modify_etc_hosts:
3756
    files_all.add(constants.ETC_HOSTS)
3757

    
3758
  # Files which must either exist on all nodes or on none
3759
  files_all_opt = set([
3760
    constants.RAPI_USERS_FILE,
3761
    ])
3762

    
3763
  # Files which should only be on master candidates
3764
  files_mc = set()
3765
  if not redist:
3766
    files_mc.add(constants.CLUSTER_CONF_FILE)
3767

    
3768
  # Files which should only be on VM-capable nodes
3769
  files_vm = set(filename
3770
    for hv_name in cluster.enabled_hypervisors
3771
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3772

    
3773
  # Filenames must be unique
3774
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3775
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3776
         "Found file listed in more than one file list"
3777

    
3778
  return (files_all, files_all_opt, files_mc, files_vm)
3779

    
3780

    
3781
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3782
  """Distribute additional files which are part of the cluster configuration.
3783

3784
  ConfigWriter takes care of distributing the config and ssconf files, but
3785
  there are more files which should be distributed to all nodes. This function
3786
  makes sure those are copied.
3787

3788
  @param lu: calling logical unit
3789
  @param additional_nodes: list of nodes not in the config to distribute to
3790
  @type additional_vm: boolean
3791
  @param additional_vm: whether the additional nodes are vm-capable or not
3792

3793
  """
3794
  # Gather target nodes
3795
  cluster = lu.cfg.GetClusterInfo()
3796
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3797

    
3798
  online_nodes = lu.cfg.GetOnlineNodeList()
3799
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3800

    
3801
  if additional_nodes is not None:
3802
    online_nodes.extend(additional_nodes)
3803
    if additional_vm:
3804
      vm_nodes.extend(additional_nodes)
3805

    
3806
  # Never distribute to master node
3807
  for nodelist in [online_nodes, vm_nodes]:
3808
    if master_info.name in nodelist:
3809
      nodelist.remove(master_info.name)
3810

    
3811
  # Gather file lists
3812
  (files_all, files_all_opt, files_mc, files_vm) = \
3813
    _ComputeAncillaryFiles(cluster, True)
3814

    
3815
  # Never re-distribute configuration file from here
3816
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3817
              constants.CLUSTER_CONF_FILE in files_vm)
3818
  assert not files_mc, "Master candidates not handled in this function"
3819

    
3820
  filemap = [
3821
    (online_nodes, files_all),
3822
    (online_nodes, files_all_opt),
3823
    (vm_nodes, files_vm),
3824
    ]
3825

    
3826
  # Upload the files
3827
  for (node_list, files) in filemap:
3828
    for fname in files:
3829
      _UploadHelper(lu, node_list, fname)
3830

    
3831

    
3832
class LUClusterRedistConf(NoHooksLU):
3833
  """Force the redistribution of cluster configuration.
3834

3835
  This is a very simple LU.
3836

3837
  """
3838
  REQ_BGL = False
3839

    
3840
  def ExpandNames(self):
3841
    self.needed_locks = {
3842
      locking.LEVEL_NODE: locking.ALL_SET,
3843
    }
3844
    self.share_locks[locking.LEVEL_NODE] = 1
3845

    
3846
  def Exec(self, feedback_fn):
3847
    """Redistribute the configuration.
3848

3849
    """
3850
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3851
    _RedistributeAncillaryFiles(self)
3852

    
3853

    
3854
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3855
  """Sleep and poll for an instance's disk to sync.
3856

3857
  """
3858
  if not instance.disks or disks is not None and not disks:
3859
    return True
3860

    
3861
  disks = _ExpandCheckDisks(instance, disks)
3862

    
3863
  if not oneshot:
3864
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3865

    
3866
  node = instance.primary_node
3867

    
3868
  for dev in disks:
3869
    lu.cfg.SetDiskID(dev, node)
3870

    
3871
  # TODO: Convert to utils.Retry
3872

    
3873
  retries = 0
3874
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3875
  while True:
3876
    max_time = 0
3877
    done = True
3878
    cumul_degraded = False
3879
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3880
    msg = rstats.fail_msg
3881
    if msg:
3882
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3883
      retries += 1
3884
      if retries >= 10:
3885
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3886
                                 " aborting." % node)
3887
      time.sleep(6)
3888
      continue
3889
    rstats = rstats.payload
3890
    retries = 0
3891
    for i, mstat in enumerate(rstats):
3892
      if mstat is None:
3893
        lu.LogWarning("Can't compute data for node %s/%s",
3894
                           node, disks[i].iv_name)
3895
        continue
3896

    
3897
      cumul_degraded = (cumul_degraded or
3898
                        (mstat.is_degraded and mstat.sync_percent is None))
3899
      if mstat.sync_percent is not None:
3900
        done = False
3901
        if mstat.estimated_time is not None:
3902
          rem_time = ("%s remaining (estimated)" %
3903
                      utils.FormatSeconds(mstat.estimated_time))
3904
          max_time = mstat.estimated_time
3905
        else:
3906
          rem_time = "no time estimate"
3907
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3908
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3909

    
3910
    # if we're done but degraded, let's do a few small retries, to
3911
    # make sure we see a stable and not transient situation; therefore
3912
    # we force restart of the loop
3913
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3914
      logging.info("Degraded disks found, %d retries left", degr_retries)
3915
      degr_retries -= 1
3916
      time.sleep(1)
3917
      continue
3918

    
3919
    if done or oneshot:
3920
      break
3921

    
3922
    time.sleep(min(60, max_time))
3923

    
3924
  if done:
3925
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3926
  return not cumul_degraded
3927

    
3928

    
3929
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3930
  """Check that mirrors are not degraded.
3931

3932
  The ldisk parameter, if True, will change the test from the
3933
  is_degraded attribute (which represents overall non-ok status for
3934
  the device(s)) to the ldisk (representing the local storage status).
3935

3936
  """
3937
  lu.cfg.SetDiskID(dev, node)
3938

    
3939
  result = True
3940

    
3941
  if on_primary or dev.AssembleOnSecondary():
3942
    rstats = lu.rpc.call_blockdev_find(node, dev)
3943
    msg = rstats.fail_msg
3944
    if msg:
3945
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3946
      result = False
3947
    elif not rstats.payload:
3948
      lu.LogWarning("Can't find disk on node %s", node)
3949
      result = False
3950
    else:
3951
      if ldisk:
3952
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3953
      else:
3954
        result = result and not rstats.payload.is_degraded
3955

    
3956
  if dev.children:
3957
    for child in dev.children:
3958
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3959

    
3960
  return result
3961

    
3962

    
3963
class LUOobCommand(NoHooksLU):
3964
  """Logical unit for OOB handling.
3965

3966
  """
3967
  REG_BGL = False
3968
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3969

    
3970
  def ExpandNames(self):
3971
    """Gather locks we need.
3972

3973
    """
3974
    if self.op.node_names:
3975
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3976
      lock_names = self.op.node_names
3977
    else:
3978
      lock_names = locking.ALL_SET
3979

    
3980
    self.needed_locks = {
3981
      locking.LEVEL_NODE: lock_names,
3982
      }
3983

    
3984
  def CheckPrereq(self):
3985
    """Check prerequisites.
3986

3987
    This checks:
3988
     - the node exists in the configuration
3989
     - OOB is supported
3990

3991
    Any errors are signaled by raising errors.OpPrereqError.
3992

3993
    """
3994
    self.nodes = []
3995
    self.master_node = self.cfg.GetMasterNode()
3996

    
3997
    assert self.op.power_delay >= 0.0
3998

    
3999
    if self.op.node_names:
4000
      if (self.op.command in self._SKIP_MASTER and
4001
          self.master_node in self.op.node_names):
4002
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4003
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4004

    
4005
        if master_oob_handler:
4006
          additional_text = ("run '%s %s %s' if you want to operate on the"
4007
                             " master regardless") % (master_oob_handler,
4008
                                                      self.op.command,
4009
                                                      self.master_node)
4010
        else:
4011
          additional_text = "it does not support out-of-band operations"
4012

    
4013
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4014
                                    " allowed for %s; %s") %
4015
                                   (self.master_node, self.op.command,
4016
                                    additional_text), errors.ECODE_INVAL)
4017
    else:
4018
      self.op.node_names = self.cfg.GetNodeList()
4019
      if self.op.command in self._SKIP_MASTER:
4020
        self.op.node_names.remove(self.master_node)
4021

    
4022
    if self.op.command in self._SKIP_MASTER:
4023
      assert self.master_node not in self.op.node_names
4024

    
4025
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4026
      if node is None:
4027
        raise errors.OpPrereqError("Node %s not found" % node_name,
4028
                                   errors.ECODE_NOENT)
4029
      else:
4030
        self.nodes.append(node)
4031

    
4032
      if (not self.op.ignore_status and
4033
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4034
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4035
                                    " not marked offline") % node_name,
4036
                                   errors.ECODE_STATE)
4037

    
4038
  def Exec(self, feedback_fn):
4039
    """Execute OOB and return result if we expect any.
4040

4041
    """
4042
    master_node = self.master_node
4043
    ret = []
4044

    
4045
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4046
                                              key=lambda node: node.name)):
4047
      node_entry = [(constants.RS_NORMAL, node.name)]
4048
      ret.append(node_entry)
4049

    
4050
      oob_program = _SupportsOob(self.cfg, node)
4051

    
4052
      if not oob_program:
4053
        node_entry.append((constants.RS_UNAVAIL, None))
4054
        continue
4055

    
4056
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4057
                   self.op.command, oob_program, node.name)
4058
      result = self.rpc.call_run_oob(master_node, oob_program,
4059
                                     self.op.command, node.name,
4060
                                     self.op.timeout)
4061

    
4062
      if result.fail_msg:
4063
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4064
                        node.name, result.fail_msg)
4065
        node_entry.append((constants.RS_NODATA, None))
4066
      else:
4067
        try:
4068
          self._CheckPayload(result)
4069
        except errors.OpExecError, err:
4070
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4071
                          node.name, err)
4072
          node_entry.append((constants.RS_NODATA, None))
4073
        else:
4074
          if self.op.command == constants.OOB_HEALTH:
4075
            # For health we should log important events
4076
            for item, status in result.payload:
4077
              if status in [constants.OOB_STATUS_WARNING,
4078
                            constants.OOB_STATUS_CRITICAL]:
4079
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4080
                                item, node.name, status)
4081

    
4082
          if self.op.command == constants.OOB_POWER_ON:
4083
            node.powered = True
4084
          elif self.op.command == constants.OOB_POWER_OFF:
4085
            node.powered = False
4086
          elif self.op.command == constants.OOB_POWER_STATUS:
4087
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4088
            if powered != node.powered:
4089
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4090
                               " match actual power state (%s)"), node.powered,
4091
                              node.name, powered)
4092

    
4093
          # For configuration changing commands we should update the node
4094
          if self.op.command in (constants.OOB_POWER_ON,
4095
                                 constants.OOB_POWER_OFF):
4096
            self.cfg.Update(node, feedback_fn)
4097

    
4098
          node_entry.append((constants.RS_NORMAL, result.payload))
4099

    
4100
          if (self.op.command == constants.OOB_POWER_ON and
4101
              idx < len(self.nodes) - 1):
4102
            time.sleep(self.op.power_delay)
4103

    
4104
    return ret
4105

    
4106
  def _CheckPayload(self, result):
4107
    """Checks if the payload is valid.
4108

4109
    @param result: RPC result
4110
    @raises errors.OpExecError: If payload is not valid
4111

4112
    """
4113
    errs = []
4114
    if self.op.command == constants.OOB_HEALTH:
4115
      if not isinstance(result.payload, list):
4116
        errs.append("command 'health' is expected to return a list but got %s" %
4117
                    type(result.payload))
4118
      else:
4119
        for item, status in result.payload:
4120
          if status not in constants.OOB_STATUSES:
4121
            errs.append("health item '%s' has invalid status '%s'" %
4122
                        (item, status))
4123

    
4124
    if self.op.command == constants.OOB_POWER_STATUS:
4125
      if not isinstance(result.payload, dict):
4126
        errs.append("power-status is expected to return a dict but got %s" %
4127
                    type(result.payload))
4128

    
4129
    if self.op.command in [
4130
        constants.OOB_POWER_ON,
4131
        constants.OOB_POWER_OFF,
4132
        constants.OOB_POWER_CYCLE,
4133
        ]:
4134
      if result.payload is not None:
4135
        errs.append("%s is expected to not return payload but got '%s'" %
4136
                    (self.op.command, result.payload))
4137

    
4138
    if errs:
4139
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4140
                               utils.CommaJoin(errs))
4141

    
4142

    
4143
class _OsQuery(_QueryBase):
4144
  FIELDS = query.OS_FIELDS
4145

    
4146
  def ExpandNames(self, lu):
4147
    # Lock all nodes in shared mode
4148
    # Temporary removal of locks, should be reverted later
4149
    # TODO: reintroduce locks when they are lighter-weight
4150
    lu.needed_locks = {}
4151
    #self.share_locks[locking.LEVEL_NODE] = 1
4152
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4153

    
4154
    # The following variables interact with _QueryBase._GetNames
4155
    if self.names:
4156
      self.wanted = self.names
4157
    else:
4158
      self.wanted = locking.ALL_SET
4159

    
4160
    self.do_locking = self.use_locking
4161

    
4162
  def DeclareLocks(self, lu, level):
4163
    pass
4164

    
4165
  @staticmethod
4166
  def _DiagnoseByOS(rlist):
4167
    """Remaps a per-node return list into an a per-os per-node dictionary
4168

4169
    @param rlist: a map with node names as keys and OS objects as values
4170

4171
    @rtype: dict
4172
    @return: a dictionary with osnames as keys and as value another
4173
        map, with nodes as keys and tuples of (path, status, diagnose,
4174
        variants, parameters, api_versions) as values, eg::
4175

4176
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4177
                                     (/srv/..., False, "invalid api")],
4178
                           "node2": [(/srv/..., True, "", [], [])]}
4179
          }
4180

4181
    """
4182
    all_os = {}
4183
    # we build here the list of nodes that didn't fail the RPC (at RPC
4184
    # level), so that nodes with a non-responding node daemon don't
4185
    # make all OSes invalid
4186
    good_nodes = [node_name for node_name in rlist
4187
                  if not rlist[node_name].fail_msg]
4188
    for node_name, nr in rlist.items():
4189
      if nr.fail_msg or not nr.payload:
4190
        continue
4191
      for (name, path, status, diagnose, variants,
4192
           params, api_versions) in nr.payload:
4193
        if name not in all_os:
4194
          # build a list of nodes for this os containing empty lists
4195
          # for each node in node_list
4196
          all_os[name] = {}
4197
          for nname in good_nodes:
4198
            all_os[name][nname] = []
4199
        # convert params from [name, help] to (name, help)
4200
        params = [tuple(v) for v in params]
4201
        all_os[name][node_name].append((path, status, diagnose,
4202
                                        variants, params, api_versions))
4203
    return all_os
4204

    
4205
  def _GetQueryData(self, lu):
4206
    """Computes the list of nodes and their attributes.
4207

4208
    """
4209
    # Locking is not used
4210
    assert not (compat.any(lu.glm.is_owned(level)
4211
                           for level in locking.LEVELS
4212
                           if level != locking.LEVEL_CLUSTER) or
4213
                self.do_locking or self.use_locking)
4214

    
4215
    valid_nodes = [node.name
4216
                   for node in lu.cfg.GetAllNodesInfo().values()
4217
                   if not node.offline and node.vm_capable]
4218
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4219
    cluster = lu.cfg.GetClusterInfo()
4220

    
4221
    data = {}
4222

    
4223
    for (os_name, os_data) in pol.items():
4224
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4225
                          hidden=(os_name in cluster.hidden_os),
4226
                          blacklisted=(os_name in cluster.blacklisted_os))
4227

    
4228
      variants = set()
4229
      parameters = set()
4230
      api_versions = set()
4231

    
4232
      for idx, osl in enumerate(os_data.values()):
4233
        info.valid = bool(info.valid and osl and osl[0][1])
4234
        if not info.valid:
4235
          break
4236

    
4237
        (node_variants, node_params, node_api) = osl[0][3:6]
4238
        if idx == 0:
4239
          # First entry
4240
          variants.update(node_variants)
4241
          parameters.update(node_params)
4242
          api_versions.update(node_api)
4243
        else:
4244
          # Filter out inconsistent values
4245
          variants.intersection_update(node_variants)
4246
          parameters.intersection_update(node_params)
4247
          api_versions.intersection_update(node_api)
4248

    
4249
      info.variants = list(variants)
4250
      info.parameters = list(parameters)
4251
      info.api_versions = list(api_versions)
4252

    
4253
      data[os_name] = info
4254

    
4255
    # Prepare data in requested order
4256
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4257
            if name in data]
4258

    
4259

    
4260
class LUOsDiagnose(NoHooksLU):
4261
  """Logical unit for OS diagnose/query.
4262

4263
  """
4264
  REQ_BGL = False
4265

    
4266
  @staticmethod
4267
  def _BuildFilter(fields, names):
4268
    """Builds a filter for querying OSes.
4269

4270
    """
4271
    name_filter = qlang.MakeSimpleFilter("name", names)
4272

    
4273
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4274
    # respective field is not requested
4275
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4276
                     for fname in ["hidden", "blacklisted"]
4277
                     if fname not in fields]
4278
    if "valid" not in fields:
4279
      status_filter.append([qlang.OP_TRUE, "valid"])
4280

    
4281
    if status_filter:
4282
      status_filter.insert(0, qlang.OP_AND)
4283
    else:
4284
      status_filter = None
4285

    
4286
    if name_filter and status_filter:
4287
      return [qlang.OP_AND, name_filter, status_filter]
4288
    elif name_filter:
4289
      return name_filter
4290
    else:
4291
      return status_filter
4292

    
4293
  def CheckArguments(self):
4294
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4295
                       self.op.output_fields, False)
4296

    
4297
  def ExpandNames(self):
4298
    self.oq.ExpandNames(self)
4299

    
4300
  def Exec(self, feedback_fn):
4301
    return self.oq.OldStyleQuery(self)
4302

    
4303

    
4304
class LUNodeRemove(LogicalUnit):
4305
  """Logical unit for removing a node.
4306

4307
  """
4308
  HPATH = "node-remove"
4309
  HTYPE = constants.HTYPE_NODE
4310

    
4311
  def BuildHooksEnv(self):
4312
    """Build hooks env.
4313

4314
    This doesn't run on the target node in the pre phase as a failed
4315
    node would then be impossible to remove.
4316

4317
    """
4318
    return {
4319
      "OP_TARGET": self.op.node_name,
4320
      "NODE_NAME": self.op.node_name,
4321
      }
4322

    
4323
  def BuildHooksNodes(self):
4324
    """Build hooks nodes.
4325

4326
    """
4327
    all_nodes = self.cfg.GetNodeList()
4328
    try:
4329
      all_nodes.remove(self.op.node_name)
4330
    except ValueError:
4331
      logging.warning("Node '%s', which is about to be removed, was not found"
4332
                      " in the list of all nodes", self.op.node_name)
4333
    return (all_nodes, all_nodes)
4334

    
4335
  def CheckPrereq(self):
4336
    """Check prerequisites.
4337

4338
    This checks:
4339
     - the node exists in the configuration
4340
     - it does not have primary or secondary instances
4341
     - it's not the master
4342

4343
    Any errors are signaled by raising errors.OpPrereqError.
4344

4345
    """
4346
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4347
    node = self.cfg.GetNodeInfo(self.op.node_name)
4348
    assert node is not None
4349

    
4350
    masternode = self.cfg.GetMasterNode()
4351
    if node.name == masternode:
4352
      raise errors.OpPrereqError("Node is the master node, failover to another"
4353
                                 " node is required", errors.ECODE_INVAL)
4354

    
4355
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4356
      if node.name in instance.all_nodes:
4357
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4358
                                   " please remove first" % instance_name,
4359
                                   errors.ECODE_INVAL)
4360
    self.op.node_name = node.name
4361
    self.node = node
4362

    
4363
  def Exec(self, feedback_fn):
4364
    """Removes the node from the cluster.
4365

4366
    """
4367
    node = self.node
4368
    logging.info("Stopping the node daemon and removing configs from node %s",
4369
                 node.name)
4370

    
4371
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4372

    
4373
    # Promote nodes to master candidate as needed
4374
    _AdjustCandidatePool(self, exceptions=[node.name])
4375
    self.context.RemoveNode(node.name)
4376

    
4377
    # Run post hooks on the node before it's removed
4378
    _RunPostHook(self, node.name)
4379

    
4380
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4381
    msg = result.fail_msg
4382
    if msg:
4383
      self.LogWarning("Errors encountered on the remote node while leaving"
4384
                      " the cluster: %s", msg)
4385

    
4386
    # Remove node from our /etc/hosts
4387
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4388
      master_node = self.cfg.GetMasterNode()
4389
      result = self.rpc.call_etc_hosts_modify(master_node,
4390
                                              constants.ETC_HOSTS_REMOVE,
4391
                                              node.name, None)
4392
      result.Raise("Can't update hosts file with new host data")
4393
      _RedistributeAncillaryFiles(self)
4394

    
4395

    
4396
class _NodeQuery(_QueryBase):
4397
  FIELDS = query.NODE_FIELDS
4398

    
4399
  def ExpandNames(self, lu):
4400
    lu.needed_locks = {}
4401
    lu.share_locks = _ShareAll()
4402

    
4403
    if self.names:
4404
      self.wanted = _GetWantedNodes(lu, self.names)
4405
    else:
4406
      self.wanted = locking.ALL_SET
4407

    
4408
    self.do_locking = (self.use_locking and
4409
                       query.NQ_LIVE in self.requested_data)
4410

    
4411
    if self.do_locking:
4412
      # If any non-static field is requested we need to lock the nodes
4413
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4414

    
4415
  def DeclareLocks(self, lu, level):
4416
    pass
4417

    
4418
  def _GetQueryData(self, lu):
4419
    """Computes the list of nodes and their attributes.
4420

4421
    """
4422
    all_info = lu.cfg.GetAllNodesInfo()
4423

    
4424
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4425

    
4426
    # Gather data as requested
4427
    if query.NQ_LIVE in self.requested_data:
4428
      # filter out non-vm_capable nodes
4429
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4430

    
4431
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4432
                                        lu.cfg.GetHypervisorType())
4433
      live_data = dict((name, nresult.payload)
4434
                       for (name, nresult) in node_data.items()
4435
                       if not nresult.fail_msg and nresult.payload)
4436
    else:
4437
      live_data = None
4438

    
4439
    if query.NQ_INST in self.requested_data:
4440
      node_to_primary = dict([(name, set()) for name in nodenames])
4441
      node_to_secondary = dict([(name, set()) for name in nodenames])
4442

    
4443
      inst_data = lu.cfg.GetAllInstancesInfo()
4444

    
4445
      for inst in inst_data.values():
4446
        if inst.primary_node in node_to_primary:
4447
          node_to_primary[inst.primary_node].add(inst.name)
4448
        for secnode in inst.secondary_nodes:
4449
          if secnode in node_to_secondary:
4450
            node_to_secondary[secnode].add(inst.name)
4451
    else:
4452
      node_to_primary = None
4453
      node_to_secondary = None
4454

    
4455
    if query.NQ_OOB in self.requested_data:
4456
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4457
                         for name, node in all_info.iteritems())
4458
    else:
4459
      oob_support = None
4460

    
4461
    if query.NQ_GROUP in self.requested_data:
4462
      groups = lu.cfg.GetAllNodeGroupsInfo()
4463
    else:
4464
      groups = {}
4465

    
4466
    return query.NodeQueryData([all_info[name] for name in nodenames],
4467
                               live_data, lu.cfg.GetMasterNode(),
4468
                               node_to_primary, node_to_secondary, groups,
4469
                               oob_support, lu.cfg.GetClusterInfo())
4470

    
4471

    
4472
class LUNodeQuery(NoHooksLU):
4473
  """Logical unit for querying nodes.
4474

4475
  """
4476
  # pylint: disable=W0142
4477
  REQ_BGL = False
4478

    
4479
  def CheckArguments(self):
4480
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4481
                         self.op.output_fields, self.op.use_locking)
4482

    
4483
  def ExpandNames(self):
4484
    self.nq.ExpandNames(self)
4485

    
4486
  def Exec(self, feedback_fn):
4487
    return self.nq.OldStyleQuery(self)
4488

    
4489

    
4490
class LUNodeQueryvols(NoHooksLU):
4491
  """Logical unit for getting volumes on node(s).
4492

4493
  """
4494
  REQ_BGL = False
4495
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4496
  _FIELDS_STATIC = utils.FieldSet("node")
4497

    
4498
  def CheckArguments(self):
4499
    _CheckOutputFields(static=self._FIELDS_STATIC,
4500
                       dynamic=self._FIELDS_DYNAMIC,
4501
                       selected=self.op.output_fields)
4502

    
4503
  def ExpandNames(self):
4504
    self.needed_locks = {}
4505
    self.share_locks[locking.LEVEL_NODE] = 1
4506
    if not self.op.nodes:
4507
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4508
    else:
4509
      self.needed_locks[locking.LEVEL_NODE] = \
4510
        _GetWantedNodes(self, self.op.nodes)
4511

    
4512
  def Exec(self, feedback_fn):
4513
    """Computes the list of nodes and their attributes.
4514

4515
    """
4516
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4517
    volumes = self.rpc.call_node_volumes(nodenames)
4518

    
4519
    ilist = self.cfg.GetAllInstancesInfo()
4520
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4521

    
4522
    output = []
4523
    for node in nodenames:
4524
      nresult = volumes[node]
4525
      if nresult.offline:
4526
        continue
4527
      msg = nresult.fail_msg
4528
      if msg:
4529
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4530
        continue
4531

    
4532
      node_vols = sorted(nresult.payload,
4533
                         key=operator.itemgetter("dev"))
4534

    
4535
      for vol in node_vols:
4536
        node_output = []
4537
        for field in self.op.output_fields:
4538
          if field == "node":
4539
            val = node
4540
          elif field == "phys":
4541
            val = vol["dev"]
4542
          elif field == "vg":
4543
            val = vol["vg"]
4544
          elif field == "name":
4545
            val = vol["name"]
4546
          elif field == "size":
4547
            val = int(float(vol["size"]))
4548
          elif field == "instance":
4549
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4550
          else:
4551
            raise errors.ParameterError(field)
4552
          node_output.append(str(val))
4553

    
4554
        output.append(node_output)
4555

    
4556
    return output
4557

    
4558

    
4559
class LUNodeQueryStorage(NoHooksLU):
4560
  """Logical unit for getting information on storage units on node(s).
4561

4562
  """
4563
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4564
  REQ_BGL = False
4565

    
4566
  def CheckArguments(self):
4567
    _CheckOutputFields(static=self._FIELDS_STATIC,
4568
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4569
                       selected=self.op.output_fields)
4570

    
4571
  def ExpandNames(self):
4572
    self.needed_locks = {}
4573
    self.share_locks[locking.LEVEL_NODE] = 1
4574

    
4575
    if self.op.nodes:
4576
      self.needed_locks[locking.LEVEL_NODE] = \
4577
        _GetWantedNodes(self, self.op.nodes)
4578
    else:
4579
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4580

    
4581
  def Exec(self, feedback_fn):
4582
    """Computes the list of nodes and their attributes.
4583

4584
    """
4585
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4586

    
4587
    # Always get name to sort by
4588
    if constants.SF_NAME in self.op.output_fields:
4589
      fields = self.op.output_fields[:]
4590
    else:
4591
      fields = [constants.SF_NAME] + self.op.output_fields
4592

    
4593
    # Never ask for node or type as it's only known to the LU
4594
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4595
      while extra in fields:
4596
        fields.remove(extra)
4597

    
4598
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4599
    name_idx = field_idx[constants.SF_NAME]
4600

    
4601
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4602
    data = self.rpc.call_storage_list(self.nodes,
4603
                                      self.op.storage_type, st_args,
4604
                                      self.op.name, fields)
4605

    
4606
    result = []
4607

    
4608
    for node in utils.NiceSort(self.nodes):
4609
      nresult = data[node]
4610
      if nresult.offline:
4611
        continue
4612

    
4613
      msg = nresult.fail_msg
4614
      if msg:
4615
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4616
        continue
4617

    
4618
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4619

    
4620
      for name in utils.NiceSort(rows.keys()):
4621
        row = rows[name]
4622

    
4623
        out = []
4624

    
4625
        for field in self.op.output_fields:
4626
          if field == constants.SF_NODE:
4627
            val = node
4628
          elif field == constants.SF_TYPE:
4629
            val = self.op.storage_type
4630
          elif field in field_idx:
4631
            val = row[field_idx[field]]
4632
          else:
4633
            raise errors.ParameterError(field)
4634

    
4635
          out.append(val)
4636

    
4637
        result.append(out)
4638

    
4639
    return result
4640

    
4641

    
4642
class _InstanceQuery(_QueryBase):
4643
  FIELDS = query.INSTANCE_FIELDS
4644

    
4645
  def ExpandNames(self, lu):
4646
    lu.needed_locks = {}
4647
    lu.share_locks = _ShareAll()
4648

    
4649
    if self.names:
4650
      self.wanted = _GetWantedInstances(lu, self.names)
4651
    else:
4652
      self.wanted = locking.ALL_SET
4653

    
4654
    self.do_locking = (self.use_locking and
4655
                       query.IQ_LIVE in self.requested_data)
4656
    if self.do_locking:
4657
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4658
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4659
      lu.needed_locks[locking.LEVEL_NODE] = []
4660
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4661

    
4662
    self.do_grouplocks = (self.do_locking and
4663
                          query.IQ_NODES in self.requested_data)
4664

    
4665
  def DeclareLocks(self, lu, level):
4666
    if self.do_locking:
4667
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4668
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4669

    
4670
        # Lock all groups used by instances optimistically; this requires going
4671
        # via the node before it's locked, requiring verification later on
4672
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4673
          set(group_uuid
4674
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4675
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4676
      elif level == locking.LEVEL_NODE:
4677
        lu._LockInstancesNodes() # pylint: disable=W0212
4678

    
4679
  @staticmethod
4680
  def _CheckGroupLocks(lu):
4681
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4682
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4683

    
4684
    # Check if node groups for locked instances are still correct
4685
    for instance_name in owned_instances:
4686
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4687

    
4688
  def _GetQueryData(self, lu):
4689
    """Computes the list of instances and their attributes.
4690

4691
    """
4692
    if self.do_grouplocks:
4693
      self._CheckGroupLocks(lu)
4694

    
4695
    cluster = lu.cfg.GetClusterInfo()
4696
    all_info = lu.cfg.GetAllInstancesInfo()
4697

    
4698
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4699

    
4700
    instance_list = [all_info[name] for name in instance_names]
4701
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4702
                                        for inst in instance_list)))
4703
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4704
    bad_nodes = []
4705
    offline_nodes = []
4706
    wrongnode_inst = set()
4707

    
4708
    # Gather data as requested
4709
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4710
      live_data = {}
4711
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4712
      for name in nodes:
4713
        result = node_data[name]
4714
        if result.offline:
4715
          # offline nodes will be in both lists
4716
          assert result.fail_msg
4717
          offline_nodes.append(name)
4718
        if result.fail_msg:
4719
          bad_nodes.append(name)
4720
        elif result.payload:
4721
          for inst in result.payload:
4722
            if inst in all_info:
4723
              if all_info[inst].primary_node == name:
4724
                live_data.update(result.payload)
4725
              else:
4726
                wrongnode_inst.add(inst)
4727
            else:
4728
              # orphan instance; we don't list it here as we don't
4729
              # handle this case yet in the output of instance listing
4730
              logging.warning("Orphan instance '%s' found on node %s",
4731
                              inst, name)
4732
        # else no instance is alive
4733
    else:
4734
      live_data = {}
4735

    
4736
    if query.IQ_DISKUSAGE in self.requested_data:
4737
      disk_usage = dict((inst.name,
4738
                         _ComputeDiskSize(inst.disk_template,
4739
                                          [{constants.IDISK_SIZE: disk.size}
4740
                                           for disk in inst.disks]))
4741
                        for inst in instance_list)
4742
    else:
4743
      disk_usage = None
4744

    
4745
    if query.IQ_CONSOLE in self.requested_data:
4746
      consinfo = {}
4747
      for inst in instance_list:
4748
        if inst.name in live_data:
4749
          # Instance is running
4750
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4751
        else:
4752
          consinfo[inst.name] = None
4753
      assert set(consinfo.keys()) == set(instance_names)
4754
    else:
4755
      consinfo = None
4756

    
4757
    if query.IQ_NODES in self.requested_data:
4758
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4759
                                            instance_list)))
4760
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4761
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4762
                    for uuid in set(map(operator.attrgetter("group"),
4763
                                        nodes.values())))
4764
    else:
4765
      nodes = None
4766
      groups = None
4767

    
4768
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4769
                                   disk_usage, offline_nodes, bad_nodes,
4770
                                   live_data, wrongnode_inst, consinfo,
4771
                                   nodes, groups)
4772

    
4773

    
4774
class LUQuery(NoHooksLU):
4775
  """Query for resources/items of a certain kind.
4776

4777
  """
4778
  # pylint: disable=W0142
4779
  REQ_BGL = False
4780

    
4781
  def CheckArguments(self):
4782
    qcls = _GetQueryImplementation(self.op.what)
4783

    
4784
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4785

    
4786
  def ExpandNames(self):
4787
    self.impl.ExpandNames(self)
4788

    
4789
  def DeclareLocks(self, level):
4790
    self.impl.DeclareLocks(self, level)
4791

    
4792
  def Exec(self, feedback_fn):
4793
    return self.impl.NewStyleQuery(self)
4794

    
4795

    
4796
class LUQueryFields(NoHooksLU):
4797
  """Query for resources/items of a certain kind.
4798

4799
  """
4800
  # pylint: disable=W0142
4801
  REQ_BGL = False
4802

    
4803
  def CheckArguments(self):
4804
    self.qcls = _GetQueryImplementation(self.op.what)
4805

    
4806
  def ExpandNames(self):
4807
    self.needed_locks = {}
4808

    
4809
  def Exec(self, feedback_fn):
4810
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4811

    
4812

    
4813
class LUNodeModifyStorage(NoHooksLU):
4814
  """Logical unit for modifying a storage volume on a node.
4815

4816
  """
4817
  REQ_BGL = False
4818

    
4819
  def CheckArguments(self):
4820
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4821

    
4822
    storage_type = self.op.storage_type
4823

    
4824
    try:
4825
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4826
    except KeyError:
4827
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4828
                                 " modified" % storage_type,
4829
                                 errors.ECODE_INVAL)
4830

    
4831
    diff = set(self.op.changes.keys()) - modifiable
4832
    if diff:
4833
      raise errors.OpPrereqError("The following fields can not be modified for"
4834
                                 " storage units of type '%s': %r" %
4835
                                 (storage_type, list(diff)),
4836
                                 errors.ECODE_INVAL)
4837

    
4838
  def ExpandNames(self):
4839
    self.needed_locks = {
4840
      locking.LEVEL_NODE: self.op.node_name,
4841
      }
4842

    
4843
  def Exec(self, feedback_fn):
4844
    """Computes the list of nodes and their attributes.
4845

4846
    """
4847
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4848
    result = self.rpc.call_storage_modify(self.op.node_name,
4849
                                          self.op.storage_type, st_args,
4850
                                          self.op.name, self.op.changes)
4851
    result.Raise("Failed to modify storage unit '%s' on %s" %
4852
                 (self.op.name, self.op.node_name))
4853

    
4854

    
4855
class LUNodeAdd(LogicalUnit):
4856
  """Logical unit for adding node to the cluster.
4857

4858
  """
4859
  HPATH = "node-add"
4860
  HTYPE = constants.HTYPE_NODE
4861
  _NFLAGS = ["master_capable", "vm_capable"]
4862

    
4863
  def CheckArguments(self):
4864
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4865
    # validate/normalize the node name
4866
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4867
                                         family=self.primary_ip_family)
4868
    self.op.node_name = self.hostname.name
4869

    
4870
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4871
      raise errors.OpPrereqError("Cannot readd the master node",
4872
                                 errors.ECODE_STATE)
4873

    
4874
    if self.op.readd and self.op.group:
4875
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4876
                                 " being readded", errors.ECODE_INVAL)
4877

    
4878
  def BuildHooksEnv(self):
4879
    """Build hooks env.
4880

4881
    This will run on all nodes before, and on all nodes + the new node after.
4882

4883
    """
4884
    return {
4885
      "OP_TARGET": self.op.node_name,
4886
      "NODE_NAME": self.op.node_name,
4887
      "NODE_PIP": self.op.primary_ip,
4888
      "NODE_SIP": self.op.secondary_ip,
4889
      "MASTER_CAPABLE": str(self.op.master_capable),
4890
      "VM_CAPABLE": str(self.op.vm_capable),
4891
      }
4892

    
4893
  def BuildHooksNodes(self):
4894
    """Build hooks nodes.
4895

4896
    """
4897
    # Exclude added node
4898
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4899
    post_nodes = pre_nodes + [self.op.node_name, ]
4900

    
4901
    return (pre_nodes, post_nodes)
4902

    
4903
  def CheckPrereq(self):
4904
    """Check prerequisites.
4905

4906
    This checks:
4907
     - the new node is not already in the config
4908
     - it is resolvable
4909
     - its parameters (single/dual homed) matches the cluster
4910

4911
    Any errors are signaled by raising errors.OpPrereqError.
4912

4913
    """
4914
    cfg = self.cfg
4915
    hostname = self.hostname
4916
    node = hostname.name
4917
    primary_ip = self.op.primary_ip = hostname.ip
4918
    if self.op.secondary_ip is None:
4919
      if self.primary_ip_family == netutils.IP6Address.family:
4920
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4921
                                   " IPv4 address must be given as secondary",
4922
                                   errors.ECODE_INVAL)
4923
      self.op.secondary_ip = primary_ip
4924

    
4925
    secondary_ip = self.op.secondary_ip
4926
    if not netutils.IP4Address.IsValid(secondary_ip):
4927
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4928
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4929

    
4930
    node_list = cfg.GetNodeList()
4931
    if not self.op.readd and node in node_list:
4932
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4933
                                 node, errors.ECODE_EXISTS)
4934
    elif self.op.readd and node not in node_list:
4935
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4936
                                 errors.ECODE_NOENT)
4937

    
4938
    self.changed_primary_ip = False
4939

    
4940
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4941
      if self.op.readd and node == existing_node_name:
4942
        if existing_node.secondary_ip != secondary_ip:
4943
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4944
                                     " address configuration as before",
4945
                                     errors.ECODE_INVAL)
4946
        if existing_node.primary_ip != primary_ip:
4947
          self.changed_primary_ip = True
4948

    
4949
        continue
4950

    
4951
      if (existing_node.primary_ip == primary_ip or
4952
          existing_node.secondary_ip == primary_ip or
4953
          existing_node.primary_ip == secondary_ip or
4954
          existing_node.secondary_ip == secondary_ip):
4955
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4956
                                   " existing node %s" % existing_node.name,
4957
                                   errors.ECODE_NOTUNIQUE)
4958

    
4959
    # After this 'if' block, None is no longer a valid value for the
4960
    # _capable op attributes
4961
    if self.op.readd:
4962
      old_node = self.cfg.GetNodeInfo(node)
4963
      assert old_node is not None, "Can't retrieve locked node %s" % node
4964
      for attr in self._NFLAGS:
4965
        if getattr(self.op, attr) is None:
4966
          setattr(self.op, attr, getattr(old_node, attr))
4967
    else:
4968
      for attr in self._NFLAGS:
4969
        if getattr(self.op, attr) is None:
4970
          setattr(self.op, attr, True)
4971

    
4972
    if self.op.readd and not self.op.vm_capable:
4973
      pri, sec = cfg.GetNodeInstances(node)
4974
      if pri or sec:
4975
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4976
                                   " flag set to false, but it already holds"
4977
                                   " instances" % node,
4978
                                   errors.ECODE_STATE)
4979

    
4980
    # check that the type of the node (single versus dual homed) is the
4981
    # same as for the master
4982
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4983
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4984
    newbie_singlehomed = secondary_ip == primary_ip
4985
    if master_singlehomed != newbie_singlehomed:
4986
      if master_singlehomed:
4987
        raise errors.OpPrereqError("The master has no secondary ip but the"
4988
                                   " new node has one",
4989
                                   errors.ECODE_INVAL)
4990
      else:
4991
        raise errors.OpPrereqError("The master has a secondary ip but the"
4992
                                   " new node doesn't have one",
4993
                                   errors.ECODE_INVAL)
4994

    
4995
    # checks reachability
4996
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4997
      raise errors.OpPrereqError("Node not reachable by ping",
4998
                                 errors.ECODE_ENVIRON)
4999

    
5000
    if not newbie_singlehomed:
5001
      # check reachability from my secondary ip to newbie's secondary ip
5002
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5003
                           source=myself.secondary_ip):
5004
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5005
                                   " based ping to node daemon port",
5006
                                   errors.ECODE_ENVIRON)
5007

    
5008
    if self.op.readd:
5009
      exceptions = [node]
5010
    else:
5011
      exceptions = []
5012

    
5013
    if self.op.master_capable:
5014
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5015
    else:
5016
      self.master_candidate = False
5017

    
5018
    if self.op.readd:
5019
      self.new_node = old_node
5020
    else:
5021
      node_group = cfg.LookupNodeGroup(self.op.group)
5022
      self.new_node = objects.Node(name=node,
5023
                                   primary_ip=primary_ip,
5024
                                   secondary_ip=secondary_ip,
5025
                                   master_candidate=self.master_candidate,
5026
                                   offline=False, drained=False,
5027
                                   group=node_group)
5028

    
5029
    if self.op.ndparams:
5030
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5031

    
5032
  def Exec(self, feedback_fn):
5033
    """Adds the new node to the cluster.
5034

5035
    """
5036
    new_node = self.new_node
5037
    node = new_node.name
5038

    
5039
    # We adding a new node so we assume it's powered
5040
    new_node.powered = True
5041

    
5042
    # for re-adds, reset the offline/drained/master-candidate flags;
5043
    # we need to reset here, otherwise offline would prevent RPC calls
5044
    # later in the procedure; this also means that if the re-add
5045
    # fails, we are left with a non-offlined, broken node
5046
    if self.op.readd:
5047
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5048
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5049
      # if we demote the node, we do cleanup later in the procedure
5050
      new_node.master_candidate = self.master_candidate
5051
      if self.changed_primary_ip:
5052
        new_node.primary_ip = self.op.primary_ip
5053

    
5054
    # copy the master/vm_capable flags
5055
    for attr in self._NFLAGS:
5056
      setattr(new_node, attr, getattr(self.op, attr))
5057

    
5058
    # notify the user about any possible mc promotion
5059
    if new_node.master_candidate:
5060
      self.LogInfo("Node will be a master candidate")
5061

    
5062
    if self.op.ndparams:
5063
      new_node.ndparams = self.op.ndparams
5064
    else:
5065
      new_node.ndparams = {}
5066

    
5067
    # check connectivity
5068
    result = self.rpc.call_version([node])[node]
5069
    result.Raise("Can't get version information from node %s" % node)
5070
    if constants.PROTOCOL_VERSION == result.payload:
5071
      logging.info("Communication to node %s fine, sw version %s match",
5072
                   node, result.payload)
5073
    else:
5074
      raise errors.OpExecError("Version mismatch master version %s,"
5075
                               " node version %s" %
5076
                               (constants.PROTOCOL_VERSION, result.payload))
5077

    
5078
    # Add node to our /etc/hosts, and add key to known_hosts
5079
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5080
      master_node = self.cfg.GetMasterNode()
5081
      result = self.rpc.call_etc_hosts_modify(master_node,
5082
                                              constants.ETC_HOSTS_ADD,
5083
                                              self.hostname.name,
5084
                                              self.hostname.ip)
5085
      result.Raise("Can't update hosts file with new host data")
5086

    
5087
    if new_node.secondary_ip != new_node.primary_ip:
5088
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5089
                               False)
5090

    
5091
    node_verify_list = [self.cfg.GetMasterNode()]
5092
    node_verify_param = {
5093
      constants.NV_NODELIST: ([node], {}),
5094
      # TODO: do a node-net-test as well?
5095
    }
5096

    
5097
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5098
                                       self.cfg.GetClusterName())
5099
    for verifier in node_verify_list:
5100
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5101
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5102
      if nl_payload:
5103
        for failed in nl_payload:
5104
          feedback_fn("ssh/hostname verification failed"
5105
                      " (checking from %s): %s" %
5106
                      (verifier, nl_payload[failed]))
5107
        raise errors.OpExecError("ssh/hostname verification failed")
5108

    
5109
    if self.op.readd:
5110
      _RedistributeAncillaryFiles(self)
5111
      self.context.ReaddNode(new_node)
5112
      # make sure we redistribute the config
5113
      self.cfg.Update(new_node, feedback_fn)
5114
      # and make sure the new node will not have old files around
5115
      if not new_node.master_candidate:
5116
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5117
        msg = result.fail_msg
5118
        if msg:
5119
          self.LogWarning("Node failed to demote itself from master"
5120
                          " candidate status: %s" % msg)
5121
    else:
5122
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5123
                                  additional_vm=self.op.vm_capable)
5124
      self.context.AddNode(new_node, self.proc.GetECId())
5125

    
5126

    
5127
class LUNodeSetParams(LogicalUnit):
5128
  """Modifies the parameters of a node.
5129

5130
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5131
      to the node role (as _ROLE_*)
5132
  @cvar _R2F: a dictionary from node role to tuples of flags
5133
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5134

5135
  """
5136
  HPATH = "node-modify"
5137
  HTYPE = constants.HTYPE_NODE
5138
  REQ_BGL = False
5139
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5140
  _F2R = {
5141
    (True, False, False): _ROLE_CANDIDATE,
5142
    (False, True, False): _ROLE_DRAINED,
5143
    (False, False, True): _ROLE_OFFLINE,
5144
    (False, False, False): _ROLE_REGULAR,
5145
    }
5146
  _R2F = dict((v, k) for k, v in _F2R.items())
5147
  _FLAGS = ["master_candidate", "drained", "offline"]
5148

    
5149
  def CheckArguments(self):
5150
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5151
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5152
                self.op.master_capable, self.op.vm_capable,
5153
                self.op.secondary_ip, self.op.ndparams]
5154
    if all_mods.count(None) == len(all_mods):
5155
      raise errors.OpPrereqError("Please pass at least one modification",
5156
                                 errors.ECODE_INVAL)
5157
    if all_mods.count(True) > 1:
5158
      raise errors.OpPrereqError("Can't set the node into more than one"
5159
                                 " state at the same time",
5160
                                 errors.ECODE_INVAL)
5161

    
5162
    # Boolean value that tells us whether we might be demoting from MC
5163
    self.might_demote = (self.op.master_candidate == False or
5164
                         self.op.offline == True or
5165
                         self.op.drained == True or
5166
                         self.op.master_capable == False)
5167

    
5168
    if self.op.secondary_ip:
5169
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5170
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5171
                                   " address" % self.op.secondary_ip,
5172
                                   errors.ECODE_INVAL)
5173

    
5174
    self.lock_all = self.op.auto_promote and self.might_demote
5175
    self.lock_instances = self.op.secondary_ip is not None
5176

    
5177
  def ExpandNames(self):
5178
    if self.lock_all:
5179
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5180
    else:
5181
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5182

    
5183
    if self.lock_instances:
5184
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5185

    
5186
  def DeclareLocks(self, level):
5187
    # If we have locked all instances, before waiting to lock nodes, release
5188
    # all the ones living on nodes unrelated to the current operation.
5189
    if level == locking.LEVEL_NODE and self.lock_instances:
5190
      self.affected_instances = []
5191
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5192
        instances_keep = []
5193

    
5194
        # Build list of instances to release
5195
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5196
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5197
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5198
              self.op.node_name in instance.all_nodes):
5199
            instances_keep.append(instance_name)
5200
            self.affected_instances.append(instance)
5201

    
5202
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5203

    
5204
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5205
                set(instances_keep))
5206

    
5207
  def BuildHooksEnv(self):
5208
    """Build hooks env.
5209

5210
    This runs on the master node.
5211

5212
    """
5213
    return {
5214
      "OP_TARGET": self.op.node_name,
5215
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5216
      "OFFLINE": str(self.op.offline),
5217
      "DRAINED": str(self.op.drained),
5218
      "MASTER_CAPABLE": str(self.op.master_capable),
5219
      "VM_CAPABLE": str(self.op.vm_capable),
5220
      }
5221

    
5222
  def BuildHooksNodes(self):
5223
    """Build hooks nodes.
5224

5225
    """
5226
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5227
    return (nl, nl)
5228

    
5229
  def CheckPrereq(self):
5230
    """Check prerequisites.
5231

5232
    This only checks the instance list against the existing names.
5233

5234
    """
5235
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5236

    
5237
    if (self.op.master_candidate is not None or
5238
        self.op.drained is not None or
5239
        self.op.offline is not None):
5240
      # we can't change the master's node flags
5241
      if self.op.node_name == self.cfg.GetMasterNode():
5242
        raise errors.OpPrereqError("The master role can be changed"
5243
                                   " only via master-failover",
5244
                                   errors.ECODE_INVAL)
5245

    
5246
    if self.op.master_candidate and not node.master_capable:
5247
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5248
                                 " it a master candidate" % node.name,
5249
                                 errors.ECODE_STATE)
5250

    
5251
    if self.op.vm_capable == False:
5252
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5253
      if ipri or isec:
5254
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5255
                                   " the vm_capable flag" % node.name,
5256
                                   errors.ECODE_STATE)
5257

    
5258
    if node.master_candidate and self.might_demote and not self.lock_all:
5259
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5260
      # check if after removing the current node, we're missing master
5261
      # candidates
5262
      (mc_remaining, mc_should, _) = \
5263
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5264
      if mc_remaining < mc_should:
5265
        raise errors.OpPrereqError("Not enough master candidates, please"
5266
                                   " pass auto promote option to allow"
5267
                                   " promotion", errors.ECODE_STATE)
5268

    
5269
    self.old_flags = old_flags = (node.master_candidate,
5270
                                  node.drained, node.offline)
5271
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5272
    self.old_role = old_role = self._F2R[old_flags]
5273

    
5274
    # Check for ineffective changes
5275
    for attr in self._FLAGS:
5276
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5277
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5278
        setattr(self.op, attr, None)
5279

    
5280
    # Past this point, any flag change to False means a transition
5281
    # away from the respective state, as only real changes are kept
5282

    
5283
    # TODO: We might query the real power state if it supports OOB
5284
    if _SupportsOob(self.cfg, node):
5285
      if self.op.offline is False and not (node.powered or
5286
                                           self.op.powered == True):
5287
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5288
                                    " offline status can be reset") %
5289
                                   self.op.node_name)
5290
    elif self.op.powered is not None:
5291
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5292
                                  " as it does not support out-of-band"
5293
                                  " handling") % self.op.node_name)
5294

    
5295
    # If we're being deofflined/drained, we'll MC ourself if needed
5296
    if (self.op.drained == False or self.op.offline == False or
5297
        (self.op.master_capable and not node.master_capable)):
5298
      if _DecideSelfPromotion(self):
5299
        self.op.master_candidate = True
5300
        self.LogInfo("Auto-promoting node to master candidate")
5301

    
5302
    # If we're no longer master capable, we'll demote ourselves from MC
5303
    if self.op.master_capable == False and node.master_candidate:
5304
      self.LogInfo("Demoting from master candidate")
5305
      self.op.master_candidate = False
5306

    
5307
    # Compute new role
5308
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5309
    if self.op.master_candidate:
5310
      new_role = self._ROLE_CANDIDATE
5311
    elif self.op.drained:
5312
      new_role = self._ROLE_DRAINED
5313
    elif self.op.offline:
5314
      new_role = self._ROLE_OFFLINE
5315
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5316
      # False is still in new flags, which means we're un-setting (the
5317
      # only) True flag
5318
      new_role = self._ROLE_REGULAR
5319
    else: # no new flags, nothing, keep old role
5320
      new_role = old_role
5321

    
5322
    self.new_role = new_role
5323

    
5324
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5325
      # Trying to transition out of offline status
5326
      result = self.rpc.call_version([node.name])[node.name]
5327
      if result.fail_msg:
5328
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5329
                                   " to report its version: %s" %
5330
                                   (node.name, result.fail_msg),
5331
                                   errors.ECODE_STATE)
5332
      else:
5333
        self.LogWarning("Transitioning node from offline to online state"
5334
                        " without using re-add. Please make sure the node"
5335
                        " is healthy!")
5336

    
5337
    if self.op.secondary_ip:
5338
      # Ok even without locking, because this can't be changed by any LU
5339
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5340
      master_singlehomed = master.secondary_ip == master.primary_ip
5341
      if master_singlehomed and self.op.secondary_ip:
5342
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5343
                                   " homed cluster", errors.ECODE_INVAL)
5344

    
5345
      if node.offline:
5346
        if self.affected_instances:
5347
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5348
                                     " node has instances (%s) configured"
5349
                                     " to use it" % self.affected_instances)
5350
      else:
5351
        # On online nodes, check that no instances are running, and that
5352
        # the node has the new ip and we can reach it.
5353
        for instance in self.affected_instances:
5354
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5355

    
5356
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5357
        if master.name != node.name:
5358
          # check reachability from master secondary ip to new secondary ip
5359
          if not netutils.TcpPing(self.op.secondary_ip,
5360
                                  constants.DEFAULT_NODED_PORT,
5361
                                  source=master.secondary_ip):
5362
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5363
                                       " based ping to node daemon port",
5364
                                       errors.ECODE_ENVIRON)
5365

    
5366
    if self.op.ndparams:
5367
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5368
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5369
      self.new_ndparams = new_ndparams
5370

    
5371
  def Exec(self, feedback_fn):
5372
    """Modifies a node.
5373

5374
    """
5375
    node = self.node
5376
    old_role = self.old_role
5377
    new_role = self.new_role
5378

    
5379
    result = []
5380

    
5381
    if self.op.ndparams:
5382
      node.ndparams = self.new_ndparams
5383

    
5384
    if self.op.powered is not None:
5385
      node.powered = self.op.powered
5386

    
5387
    for attr in ["master_capable", "vm_capable"]:
5388
      val = getattr(self.op, attr)
5389
      if val is not None:
5390
        setattr(node, attr, val)
5391
        result.append((attr, str(val)))
5392

    
5393
    if new_role != old_role:
5394
      # Tell the node to demote itself, if no longer MC and not offline
5395
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5396
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5397
        if msg:
5398
          self.LogWarning("Node failed to demote itself: %s", msg)
5399

    
5400
      new_flags = self._R2F[new_role]
5401
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5402
        if of != nf:
5403
          result.append((desc, str(nf)))
5404
      (node.master_candidate, node.drained, node.offline) = new_flags
5405

    
5406
      # we locked all nodes, we adjust the CP before updating this node
5407
      if self.lock_all:
5408
        _AdjustCandidatePool(self, [node.name])
5409

    
5410
    if self.op.secondary_ip:
5411
      node.secondary_ip = self.op.secondary_ip
5412
      result.append(("secondary_ip", self.op.secondary_ip))
5413

    
5414
    # this will trigger configuration file update, if needed
5415
    self.cfg.Update(node, feedback_fn)
5416

    
5417
    # this will trigger job queue propagation or cleanup if the mc
5418
    # flag changed
5419
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5420
      self.context.ReaddNode(node)
5421

    
5422
    return result
5423

    
5424

    
5425
class LUNodePowercycle(NoHooksLU):
5426
  """Powercycles a node.
5427

5428
  """
5429
  REQ_BGL = False
5430

    
5431
  def CheckArguments(self):
5432
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5433
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5434
      raise errors.OpPrereqError("The node is the master and the force"
5435
                                 " parameter was not set",
5436
                                 errors.ECODE_INVAL)
5437

    
5438
  def ExpandNames(self):
5439
    """Locking for PowercycleNode.
5440

5441
    This is a last-resort option and shouldn't block on other
5442
    jobs. Therefore, we grab no locks.
5443

5444
    """
5445
    self.needed_locks = {}
5446

    
5447
  def Exec(self, feedback_fn):
5448
    """Reboots a node.
5449

5450
    """
5451
    result = self.rpc.call_node_powercycle(self.op.node_name,
5452
                                           self.cfg.GetHypervisorType())
5453
    result.Raise("Failed to schedule the reboot")
5454
    return result.payload
5455

    
5456

    
5457
class LUClusterQuery(NoHooksLU):
5458
  """Query cluster configuration.
5459

5460
  """
5461
  REQ_BGL = False
5462

    
5463
  def ExpandNames(self):
5464
    self.needed_locks = {}
5465

    
5466
  def Exec(self, feedback_fn):
5467
    """Return cluster config.
5468

5469
    """
5470
    cluster = self.cfg.GetClusterInfo()
5471
    os_hvp = {}
5472

    
5473
    # Filter just for enabled hypervisors
5474
    for os_name, hv_dict in cluster.os_hvp.items():
5475
      os_hvp[os_name] = {}
5476
      for hv_name, hv_params in hv_dict.items():
5477
        if hv_name in cluster.enabled_hypervisors:
5478
          os_hvp[os_name][hv_name] = hv_params
5479

    
5480
    # Convert ip_family to ip_version
5481
    primary_ip_version = constants.IP4_VERSION
5482
    if cluster.primary_ip_family == netutils.IP6Address.family:
5483
      primary_ip_version = constants.IP6_VERSION
5484

    
5485
    result = {
5486
      "software_version": constants.RELEASE_VERSION,
5487
      "protocol_version": constants.PROTOCOL_VERSION,
5488
      "config_version": constants.CONFIG_VERSION,
5489
      "os_api_version": max(constants.OS_API_VERSIONS),
5490
      "export_version": constants.EXPORT_VERSION,
5491
      "architecture": (platform.architecture()[0], platform.machine()),
5492
      "name": cluster.cluster_name,
5493
      "master": cluster.master_node,
5494
      "default_hypervisor": cluster.enabled_hypervisors[0],
5495
      "enabled_hypervisors": cluster.enabled_hypervisors,
5496
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5497
                        for hypervisor_name in cluster.enabled_hypervisors]),
5498
      "os_hvp": os_hvp,
5499
      "beparams": cluster.beparams,
5500
      "osparams": cluster.osparams,
5501
      "nicparams": cluster.nicparams,
5502
      "ndparams": cluster.ndparams,
5503
      "candidate_pool_size": cluster.candidate_pool_size,
5504
      "master_netdev": cluster.master_netdev,
5505
      "volume_group_name": cluster.volume_group_name,
5506
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5507
      "file_storage_dir": cluster.file_storage_dir,
5508
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5509
      "maintain_node_health": cluster.maintain_node_health,
5510
      "ctime": cluster.ctime,
5511
      "mtime": cluster.mtime,
5512
      "uuid": cluster.uuid,
5513
      "tags": list(cluster.GetTags()),
5514
      "uid_pool": cluster.uid_pool,
5515
      "default_iallocator": cluster.default_iallocator,
5516
      "reserved_lvs": cluster.reserved_lvs,
5517
      "primary_ip_version": primary_ip_version,
5518
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5519
      "hidden_os": cluster.hidden_os,
5520
      "blacklisted_os": cluster.blacklisted_os,
5521
      }
5522

    
5523
    return result
5524

    
5525

    
5526
class LUClusterConfigQuery(NoHooksLU):
5527
  """Return configuration values.
5528

5529
  """
5530
  REQ_BGL = False
5531
  _FIELDS_DYNAMIC = utils.FieldSet()
5532
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5533
                                  "watcher_pause", "volume_group_name")
5534

    
5535
  def CheckArguments(self):
5536
    _CheckOutputFields(static=self._FIELDS_STATIC,
5537
                       dynamic=self._FIELDS_DYNAMIC,
5538
                       selected=self.op.output_fields)
5539

    
5540
  def ExpandNames(self):
5541
    self.needed_locks = {}
5542

    
5543
  def Exec(self, feedback_fn):
5544
    """Dump a representation of the cluster config to the standard output.
5545

5546
    """
5547
    values = []
5548
    for field in self.op.output_fields:
5549
      if field == "cluster_name":
5550
        entry = self.cfg.GetClusterName()
5551
      elif field == "master_node":
5552
        entry = self.cfg.GetMasterNode()
5553
      elif field == "drain_flag":
5554
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5555
      elif field == "watcher_pause":
5556
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5557
      elif field == "volume_group_name":
5558
        entry = self.cfg.GetVGName()
5559
      else:
5560
        raise errors.ParameterError(field)
5561
      values.append(entry)
5562
    return values
5563

    
5564

    
5565
class LUInstanceActivateDisks(NoHooksLU):
5566
  """Bring up an instance's disks.
5567

5568
  """
5569
  REQ_BGL = False
5570

    
5571
  def ExpandNames(self):
5572
    self._ExpandAndLockInstance()
5573
    self.needed_locks[locking.LEVEL_NODE] = []
5574
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5575

    
5576
  def DeclareLocks(self, level):
5577
    if level == locking.LEVEL_NODE:
5578
      self._LockInstancesNodes()
5579

    
5580
  def CheckPrereq(self):
5581
    """Check prerequisites.
5582

5583
    This checks that the instance is in the cluster.
5584

5585
    """
5586
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5587
    assert self.instance is not None, \
5588
      "Cannot retrieve locked instance %s" % self.op.instance_name
5589
    _CheckNodeOnline(self, self.instance.primary_node)
5590

    
5591
  def Exec(self, feedback_fn):
5592
    """Activate the disks.
5593

5594
    """
5595
    disks_ok, disks_info = \
5596
              _AssembleInstanceDisks(self, self.instance,
5597
                                     ignore_size=self.op.ignore_size)
5598
    if not disks_ok:
5599
      raise errors.OpExecError("Cannot activate block devices")
5600

    
5601
    return disks_info
5602

    
5603

    
5604
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5605
                           ignore_size=False):
5606
  """Prepare the block devices for an instance.
5607

5608
  This sets up the block devices on all nodes.
5609

5610
  @type lu: L{LogicalUnit}
5611
  @param lu: the logical unit on whose behalf we execute
5612
  @type instance: L{objects.Instance}
5613
  @param instance: the instance for whose disks we assemble
5614
  @type disks: list of L{objects.Disk} or None
5615
  @param disks: which disks to assemble (or all, if None)
5616
  @type ignore_secondaries: boolean
5617
  @param ignore_secondaries: if true, errors on secondary nodes
5618
      won't result in an error return from the function
5619
  @type ignore_size: boolean
5620
  @param ignore_size: if true, the current known size of the disk
5621
      will not be used during the disk activation, useful for cases
5622
      when the size is wrong
5623
  @return: False if the operation failed, otherwise a list of
5624
      (host, instance_visible_name, node_visible_name)
5625
      with the mapping from node devices to instance devices
5626

5627
  """
5628
  device_info = []
5629
  disks_ok = True
5630
  iname = instance.name
5631
  disks = _ExpandCheckDisks(instance, disks)
5632

    
5633
  # With the two passes mechanism we try to reduce the window of
5634
  # opportunity for the race condition of switching DRBD to primary
5635
  # before handshaking occured, but we do not eliminate it
5636

    
5637
  # The proper fix would be to wait (with some limits) until the
5638
  # connection has been made and drbd transitions from WFConnection
5639
  # into any other network-connected state (Connected, SyncTarget,
5640
  # SyncSource, etc.)
5641

    
5642
  # 1st pass, assemble on all nodes in secondary mode
5643
  for idx, inst_disk in enumerate(disks):
5644
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5645
      if ignore_size:
5646
        node_disk = node_disk.Copy()
5647
        node_disk.UnsetSize()
5648
      lu.cfg.SetDiskID(node_disk, node)
5649
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5650
      msg = result.fail_msg
5651
      if msg:
5652
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5653
                           " (is_primary=False, pass=1): %s",
5654
                           inst_disk.iv_name, node, msg)
5655
        if not ignore_secondaries:
5656
          disks_ok = False
5657

    
5658
  # FIXME: race condition on drbd migration to primary
5659

    
5660
  # 2nd pass, do only the primary node
5661
  for idx, inst_disk in enumerate(disks):
5662
    dev_path = None
5663

    
5664
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5665
      if node != instance.primary_node:
5666
        continue
5667
      if ignore_size:
5668
        node_disk = node_disk.Copy()
5669
        node_disk.UnsetSize()
5670
      lu.cfg.SetDiskID(node_disk, node)
5671
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5672
      msg = result.fail_msg
5673
      if msg:
5674
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5675
                           " (is_primary=True, pass=2): %s",
5676
                           inst_disk.iv_name, node, msg)
5677
        disks_ok = False
5678
      else:
5679
        dev_path = result.payload
5680

    
5681
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5682

    
5683
  # leave the disks configured for the primary node
5684
  # this is a workaround that would be fixed better by
5685
  # improving the logical/physical id handling
5686
  for disk in disks:
5687
    lu.cfg.SetDiskID(disk, instance.primary_node)
5688

    
5689
  return disks_ok, device_info
5690

    
5691

    
5692
def _StartInstanceDisks(lu, instance, force):
5693
  """Start the disks of an instance.
5694

5695
  """
5696
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5697
                                           ignore_secondaries=force)
5698
  if not disks_ok:
5699
    _ShutdownInstanceDisks(lu, instance)
5700
    if force is not None and not force:
5701
      lu.proc.LogWarning("", hint="If the message above refers to a"
5702
                         " secondary node,"
5703
                         " you can retry the operation using '--force'.")
5704
    raise errors.OpExecError("Disk consistency error")
5705

    
5706

    
5707
class LUInstanceDeactivateDisks(NoHooksLU):
5708
  """Shutdown an instance's disks.
5709

5710
  """
5711
  REQ_BGL = False
5712

    
5713
  def ExpandNames(self):
5714
    self._ExpandAndLockInstance()
5715
    self.needed_locks[locking.LEVEL_NODE] = []
5716
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5717

    
5718
  def DeclareLocks(self, level):
5719
    if level == locking.LEVEL_NODE:
5720
      self._LockInstancesNodes()
5721

    
5722
  def CheckPrereq(self):
5723
    """Check prerequisites.
5724

5725
    This checks that the instance is in the cluster.
5726

5727
    """
5728
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5729
    assert self.instance is not None, \
5730
      "Cannot retrieve locked instance %s" % self.op.instance_name
5731

    
5732
  def Exec(self, feedback_fn):
5733
    """Deactivate the disks
5734

5735
    """
5736
    instance = self.instance
5737
    if self.op.force:
5738
      _ShutdownInstanceDisks(self, instance)
5739
    else:
5740
      _SafeShutdownInstanceDisks(self, instance)
5741

    
5742

    
5743
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5744
  """Shutdown block devices of an instance.
5745

5746
  This function checks if an instance is running, before calling
5747
  _ShutdownInstanceDisks.
5748

5749
  """
5750
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5751
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5752

    
5753

    
5754
def _ExpandCheckDisks(instance, disks):
5755
  """Return the instance disks selected by the disks list
5756

5757
  @type disks: list of L{objects.Disk} or None
5758
  @param disks: selected disks
5759
  @rtype: list of L{objects.Disk}
5760
  @return: selected instance disks to act on
5761

5762
  """
5763
  if disks is None:
5764
    return instance.disks
5765
  else:
5766
    if not set(disks).issubset(instance.disks):
5767
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5768
                                   " target instance")
5769
    return disks
5770

    
5771

    
5772
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5773
  """Shutdown block devices of an instance.
5774

5775
  This does the shutdown on all nodes of the instance.
5776

5777
  If the ignore_primary is false, errors on the primary node are
5778
  ignored.
5779

5780
  """
5781
  all_result = True
5782
  disks = _ExpandCheckDisks(instance, disks)
5783

    
5784
  for disk in disks:
5785
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5786
      lu.cfg.SetDiskID(top_disk, node)
5787
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5788
      msg = result.fail_msg
5789
      if msg:
5790
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5791
                      disk.iv_name, node, msg)
5792
        if ((node == instance.primary_node and not ignore_primary) or
5793
            (node != instance.primary_node and not result.offline)):
5794
          all_result = False
5795
  return all_result
5796

    
5797

    
5798
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5799
  """Checks if a node has enough free memory.
5800

5801
  This function check if a given node has the needed amount of free
5802
  memory. In case the node has less memory or we cannot get the
5803
  information from the node, this function raise an OpPrereqError
5804
  exception.
5805

5806
  @type lu: C{LogicalUnit}
5807
  @param lu: a logical unit from which we get configuration data
5808
  @type node: C{str}
5809
  @param node: the node to check
5810
  @type reason: C{str}
5811
  @param reason: string to use in the error message
5812
  @type requested: C{int}
5813
  @param requested: the amount of memory in MiB to check for
5814
  @type hypervisor_name: C{str}
5815
  @param hypervisor_name: the hypervisor to ask for memory stats
5816
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5817
      we cannot check the node
5818

5819
  """
5820
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5821
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5822
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5823
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5824
  if not isinstance(free_mem, int):
5825
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5826
                               " was '%s'" % (node, free_mem),
5827
                               errors.ECODE_ENVIRON)
5828
  if requested > free_mem:
5829
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5830
                               " needed %s MiB, available %s MiB" %
5831
                               (node, reason, requested, free_mem),
5832
                               errors.ECODE_NORES)
5833

    
5834

    
5835
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5836
  """Checks if nodes have enough free disk space in the all VGs.
5837

5838
  This function check if all given nodes have the needed amount of
5839
  free disk. In case any node has less disk or we cannot get the
5840
  information from the node, this function raise an OpPrereqError
5841
  exception.
5842

5843
  @type lu: C{LogicalUnit}
5844
  @param lu: a logical unit from which we get configuration data
5845
  @type nodenames: C{list}
5846
  @param nodenames: the list of node names to check
5847
  @type req_sizes: C{dict}
5848
  @param req_sizes: the hash of vg and corresponding amount of disk in
5849
      MiB to check for
5850
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5851
      or we cannot check the node
5852

5853
  """
5854
  for vg, req_size in req_sizes.items():
5855
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5856

    
5857

    
5858
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5859
  """Checks if nodes have enough free disk space in the specified VG.
5860

5861
  This function check if all given nodes have the needed amount of
5862
  free disk. In case any node has less disk or we cannot get the
5863
  information from the node, this function raise an OpPrereqError
5864
  exception.
5865

5866
  @type lu: C{LogicalUnit}
5867
  @param lu: a logical unit from which we get configuration data
5868
  @type nodenames: C{list}
5869
  @param nodenames: the list of node names to check
5870
  @type vg: C{str}
5871
  @param vg: the volume group to check
5872
  @type requested: C{int}
5873
  @param requested: the amount of disk in MiB to check for
5874
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5875
      or we cannot check the node
5876

5877
  """
5878
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5879
  for node in nodenames:
5880
    info = nodeinfo[node]
5881
    info.Raise("Cannot get current information from node %s" % node,
5882
               prereq=True, ecode=errors.ECODE_ENVIRON)
5883
    vg_free = info.payload.get("vg_free", None)
5884
    if not isinstance(vg_free, int):
5885
      raise errors.OpPrereqError("Can't compute free disk space on node"
5886
                                 " %s for vg %s, result was '%s'" %
5887
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5888
    if requested > vg_free:
5889
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5890
                                 " vg %s: required %d MiB, available %d MiB" %
5891
                                 (node, vg, requested, vg_free),
5892
                                 errors.ECODE_NORES)
5893

    
5894

    
5895
class LUInstanceStartup(LogicalUnit):
5896
  """Starts an instance.
5897

5898
  """
5899
  HPATH = "instance-start"
5900
  HTYPE = constants.HTYPE_INSTANCE
5901
  REQ_BGL = False
5902

    
5903
  def CheckArguments(self):
5904
    # extra beparams
5905
    if self.op.beparams:
5906
      # fill the beparams dict
5907
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5908

    
5909
  def ExpandNames(self):
5910
    self._ExpandAndLockInstance()
5911

    
5912
  def BuildHooksEnv(self):
5913
    """Build hooks env.
5914

5915
    This runs on master, primary and secondary nodes of the instance.
5916

5917
    """
5918
    env = {
5919
      "FORCE": self.op.force,
5920
      }
5921

    
5922
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5923

    
5924
    return env
5925

    
5926
  def BuildHooksNodes(self):
5927
    """Build hooks nodes.
5928

5929
    """
5930
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5931
    return (nl, nl)
5932

    
5933
  def CheckPrereq(self):
5934
    """Check prerequisites.
5935

5936
    This checks that the instance is in the cluster.
5937

5938
    """
5939
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5940
    assert self.instance is not None, \
5941
      "Cannot retrieve locked instance %s" % self.op.instance_name
5942

    
5943
    # extra hvparams
5944
    if self.op.hvparams:
5945
      # check hypervisor parameter syntax (locally)
5946
      cluster = self.cfg.GetClusterInfo()
5947
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5948
      filled_hvp = cluster.FillHV(instance)
5949
      filled_hvp.update(self.op.hvparams)
5950
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5951
      hv_type.CheckParameterSyntax(filled_hvp)
5952
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5953

    
5954
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5955

    
5956
    if self.primary_offline and self.op.ignore_offline_nodes:
5957
      self.proc.LogWarning("Ignoring offline primary node")
5958

    
5959
      if self.op.hvparams or self.op.beparams:
5960
        self.proc.LogWarning("Overridden parameters are ignored")
5961
    else:
5962
      _CheckNodeOnline(self, instance.primary_node)
5963

    
5964
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5965

    
5966
      # check bridges existence
5967
      _CheckInstanceBridgesExist(self, instance)
5968

    
5969
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5970
                                                instance.name,
5971
                                                instance.hypervisor)
5972
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5973
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5974
      if not remote_info.payload: # not running already
5975
        _CheckNodeFreeMemory(self, instance.primary_node,
5976
                             "starting instance %s" % instance.name,
5977
                             bep[constants.BE_MEMORY], instance.hypervisor)
5978

    
5979
  def Exec(self, feedback_fn):
5980
    """Start the instance.
5981

5982
    """
5983
    instance = self.instance
5984
    force = self.op.force
5985

    
5986
    if not self.op.no_remember:
5987
      self.cfg.MarkInstanceUp(instance.name)
5988

    
5989
    if self.primary_offline:
5990
      assert self.op.ignore_offline_nodes
5991
      self.proc.LogInfo("Primary node offline, marked instance as started")
5992
    else:
5993
      node_current = instance.primary_node
5994

    
5995
      _StartInstanceDisks(self, instance, force)
5996

    
5997
      result = self.rpc.call_instance_start(node_current, instance,
5998
                                            self.op.hvparams, self.op.beparams,
5999
                                            self.op.startup_paused)
6000
      msg = result.fail_msg
6001
      if msg:
6002
        _ShutdownInstanceDisks(self, instance)
6003
        raise errors.OpExecError("Could not start instance: %s" % msg)
6004

    
6005

    
6006
class LUInstanceReboot(LogicalUnit):
6007
  """Reboot an instance.
6008

6009
  """
6010
  HPATH = "instance-reboot"
6011
  HTYPE = constants.HTYPE_INSTANCE
6012
  REQ_BGL = False
6013

    
6014
  def ExpandNames(self):
6015
    self._ExpandAndLockInstance()
6016

    
6017
  def BuildHooksEnv(self):
6018
    """Build hooks env.
6019

6020
    This runs on master, primary and secondary nodes of the instance.
6021

6022
    """
6023
    env = {
6024
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6025
      "REBOOT_TYPE": self.op.reboot_type,
6026
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6027
      }
6028

    
6029
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6030

    
6031
    return env
6032

    
6033
  def BuildHooksNodes(self):
6034
    """Build hooks nodes.
6035

6036
    """
6037
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6038
    return (nl, nl)
6039

    
6040
  def CheckPrereq(self):
6041
    """Check prerequisites.
6042

6043
    This checks that the instance is in the cluster.
6044

6045
    """
6046
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6047
    assert self.instance is not None, \
6048
      "Cannot retrieve locked instance %s" % self.op.instance_name
6049

    
6050
    _CheckNodeOnline(self, instance.primary_node)
6051

    
6052
    # check bridges existence
6053
    _CheckInstanceBridgesExist(self, instance)
6054

    
6055
  def Exec(self, feedback_fn):
6056
    """Reboot the instance.
6057

6058
    """
6059
    instance = self.instance
6060
    ignore_secondaries = self.op.ignore_secondaries
6061
    reboot_type = self.op.reboot_type
6062

    
6063
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6064
                                              instance.name,
6065
                                              instance.hypervisor)
6066
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6067
    instance_running = bool(remote_info.payload)
6068

    
6069
    node_current = instance.primary_node
6070

    
6071
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6072
                                            constants.INSTANCE_REBOOT_HARD]:
6073
      for disk in instance.disks:
6074
        self.cfg.SetDiskID(disk, node_current)
6075
      result = self.rpc.call_instance_reboot(node_current, instance,
6076
                                             reboot_type,
6077
                                             self.op.shutdown_timeout)
6078
      result.Raise("Could not reboot instance")
6079
    else:
6080
      if instance_running:
6081
        result = self.rpc.call_instance_shutdown(node_current, instance,
6082
                                                 self.op.shutdown_timeout)
6083
        result.Raise("Could not shutdown instance for full reboot")
6084
        _ShutdownInstanceDisks(self, instance)
6085
      else:
6086
        self.LogInfo("Instance %s was already stopped, starting now",
6087
                     instance.name)
6088
      _StartInstanceDisks(self, instance, ignore_secondaries)
6089
      result = self.rpc.call_instance_start(node_current, instance,
6090
                                            None, None, False)
6091
      msg = result.fail_msg
6092
      if msg:
6093
        _ShutdownInstanceDisks(self, instance)
6094
        raise errors.OpExecError("Could not start instance for"
6095
                                 " full reboot: %s" % msg)
6096

    
6097
    self.cfg.MarkInstanceUp(instance.name)
6098

    
6099

    
6100
class LUInstanceShutdown(LogicalUnit):
6101
  """Shutdown an instance.
6102

6103
  """
6104
  HPATH = "instance-stop"
6105
  HTYPE = constants.HTYPE_INSTANCE
6106
  REQ_BGL = False
6107

    
6108
  def ExpandNames(self):
6109
    self._ExpandAndLockInstance()
6110

    
6111
  def BuildHooksEnv(self):
6112
    """Build hooks env.
6113

6114
    This runs on master, primary and secondary nodes of the instance.
6115

6116
    """
6117
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6118
    env["TIMEOUT"] = self.op.timeout
6119
    return env
6120

    
6121
  def BuildHooksNodes(self):
6122
    """Build hooks nodes.
6123

6124
    """
6125
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6126
    return (nl, nl)
6127

    
6128
  def CheckPrereq(self):
6129
    """Check prerequisites.
6130

6131
    This checks that the instance is in the cluster.
6132

6133
    """
6134
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6135
    assert self.instance is not None, \
6136
      "Cannot retrieve locked instance %s" % self.op.instance_name
6137

    
6138
    self.primary_offline = \
6139
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6140

    
6141
    if self.primary_offline and self.op.ignore_offline_nodes:
6142
      self.proc.LogWarning("Ignoring offline primary node")
6143
    else:
6144
      _CheckNodeOnline(self, self.instance.primary_node)
6145

    
6146
  def Exec(self, feedback_fn):
6147
    """Shutdown the instance.
6148

6149
    """
6150
    instance = self.instance
6151
    node_current = instance.primary_node
6152
    timeout = self.op.timeout
6153

    
6154
    if not self.op.no_remember:
6155
      self.cfg.MarkInstanceDown(instance.name)
6156

    
6157
    if self.primary_offline:
6158
      assert self.op.ignore_offline_nodes
6159
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6160
    else:
6161
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6162
      msg = result.fail_msg
6163
      if msg:
6164
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6165

    
6166
      _ShutdownInstanceDisks(self, instance)
6167

    
6168

    
6169
class LUInstanceReinstall(LogicalUnit):
6170
  """Reinstall an instance.
6171

6172
  """
6173
  HPATH = "instance-reinstall"
6174
  HTYPE = constants.HTYPE_INSTANCE
6175
  REQ_BGL = False
6176

    
6177
  def ExpandNames(self):
6178
    self._ExpandAndLockInstance()
6179

    
6180
  def BuildHooksEnv(self):
6181
    """Build hooks env.
6182

6183
    This runs on master, primary and secondary nodes of the instance.
6184

6185
    """
6186
    return _BuildInstanceHookEnvByObject(self, self.instance)
6187

    
6188
  def BuildHooksNodes(self):
6189
    """Build hooks nodes.
6190

6191
    """
6192
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6193
    return (nl, nl)
6194

    
6195
  def CheckPrereq(self):
6196
    """Check prerequisites.
6197

6198
    This checks that the instance is in the cluster and is not running.
6199

6200
    """
6201
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6202
    assert instance is not None, \
6203
      "Cannot retrieve locked instance %s" % self.op.instance_name
6204
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6205
                     " offline, cannot reinstall")
6206
    for node in instance.secondary_nodes:
6207
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6208
                       " cannot reinstall")
6209

    
6210
    if instance.disk_template == constants.DT_DISKLESS:
6211
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6212
                                 self.op.instance_name,
6213
                                 errors.ECODE_INVAL)
6214
    _CheckInstanceDown(self, instance, "cannot reinstall")
6215

    
6216
    if self.op.os_type is not None:
6217
      # OS verification
6218
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6219
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6220
      instance_os = self.op.os_type
6221
    else:
6222
      instance_os = instance.os
6223

    
6224
    nodelist = list(instance.all_nodes)
6225

    
6226
    if self.op.osparams:
6227
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6228
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6229
      self.os_inst = i_osdict # the new dict (without defaults)
6230
    else:
6231
      self.os_inst = None
6232

    
6233
    self.instance = instance
6234

    
6235
  def Exec(self, feedback_fn):
6236
    """Reinstall the instance.
6237

6238
    """
6239
    inst = self.instance
6240

    
6241
    if self.op.os_type is not None:
6242
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6243
      inst.os = self.op.os_type
6244
      # Write to configuration
6245
      self.cfg.Update(inst, feedback_fn)
6246

    
6247
    _StartInstanceDisks(self, inst, None)
6248
    try:
6249
      feedback_fn("Running the instance OS create scripts...")
6250
      # FIXME: pass debug option from opcode to backend
6251
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6252
                                             self.op.debug_level,
6253
                                             osparams=self.os_inst)
6254
      result.Raise("Could not install OS for instance %s on node %s" %
6255
                   (inst.name, inst.primary_node))
6256
    finally:
6257
      _ShutdownInstanceDisks(self, inst)
6258

    
6259

    
6260
class LUInstanceRecreateDisks(LogicalUnit):
6261
  """Recreate an instance's missing disks.
6262

6263
  """
6264
  HPATH = "instance-recreate-disks"
6265
  HTYPE = constants.HTYPE_INSTANCE
6266
  REQ_BGL = False
6267

    
6268
  def CheckArguments(self):
6269
    # normalise the disk list
6270
    self.op.disks = sorted(frozenset(self.op.disks))
6271

    
6272
  def ExpandNames(self):
6273
    self._ExpandAndLockInstance()
6274
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6275
    if self.op.nodes:
6276
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6277
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6278
    else:
6279
      self.needed_locks[locking.LEVEL_NODE] = []
6280

    
6281
  def DeclareLocks(self, level):
6282
    if level == locking.LEVEL_NODE:
6283
      # if we replace the nodes, we only need to lock the old primary,
6284
      # otherwise we need to lock all nodes for disk re-creation
6285
      primary_only = bool(self.op.nodes)
6286
      self._LockInstancesNodes(primary_only=primary_only)
6287

    
6288
  def BuildHooksEnv(self):
6289
    """Build hooks env.
6290

6291
    This runs on master, primary and secondary nodes of the instance.
6292

6293
    """
6294
    return _BuildInstanceHookEnvByObject(self, self.instance)
6295

    
6296
  def BuildHooksNodes(self):
6297
    """Build hooks nodes.
6298

6299
    """
6300
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6301
    return (nl, nl)
6302

    
6303
  def CheckPrereq(self):
6304
    """Check prerequisites.
6305

6306
    This checks that the instance is in the cluster and is not running.
6307

6308
    """
6309
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6310
    assert instance is not None, \
6311
      "Cannot retrieve locked instance %s" % self.op.instance_name
6312
    if self.op.nodes:
6313
      if len(self.op.nodes) != len(instance.all_nodes):
6314
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6315
                                   " %d replacement nodes were specified" %
6316
                                   (instance.name, len(instance.all_nodes),
6317
                                    len(self.op.nodes)),
6318
                                   errors.ECODE_INVAL)
6319
      assert instance.disk_template != constants.DT_DRBD8 or \
6320
          len(self.op.nodes) == 2
6321
      assert instance.disk_template != constants.DT_PLAIN or \
6322
          len(self.op.nodes) == 1
6323
      primary_node = self.op.nodes[0]
6324
    else:
6325
      primary_node = instance.primary_node
6326
    _CheckNodeOnline(self, primary_node)
6327

    
6328
    if instance.disk_template == constants.DT_DISKLESS:
6329
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6330
                                 self.op.instance_name, errors.ECODE_INVAL)
6331
    # if we replace nodes *and* the old primary is offline, we don't
6332
    # check
6333
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6334
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6335
    if not (self.op.nodes and old_pnode.offline):
6336
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6337

    
6338
    if not self.op.disks:
6339
      self.op.disks = range(len(instance.disks))
6340
    else:
6341
      for idx in self.op.disks:
6342
        if idx >= len(instance.disks):
6343
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6344
                                     errors.ECODE_INVAL)
6345
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6346
      raise errors.OpPrereqError("Can't recreate disks partially and"
6347
                                 " change the nodes at the same time",
6348
                                 errors.ECODE_INVAL)
6349
    self.instance = instance
6350

    
6351
  def Exec(self, feedback_fn):
6352
    """Recreate the disks.
6353

6354
    """
6355
    instance = self.instance
6356

    
6357
    to_skip = []
6358
    mods = [] # keeps track of needed logical_id changes
6359

    
6360
    for idx, disk in enumerate(instance.disks):
6361
      if idx not in self.op.disks: # disk idx has not been passed in
6362
        to_skip.append(idx)
6363
        continue
6364
      # update secondaries for disks, if needed
6365
      if self.op.nodes:
6366
        if disk.dev_type == constants.LD_DRBD8:
6367
          # need to update the nodes and minors
6368
          assert len(self.op.nodes) == 2
6369
          assert len(disk.logical_id) == 6 # otherwise disk internals
6370
                                           # have changed
6371
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6372
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6373
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6374
                    new_minors[0], new_minors[1], old_secret)
6375
          assert len(disk.logical_id) == len(new_id)
6376
          mods.append((idx, new_id))
6377

    
6378
    # now that we have passed all asserts above, we can apply the mods
6379
    # in a single run (to avoid partial changes)
6380
    for idx, new_id in mods:
6381
      instance.disks[idx].logical_id = new_id
6382

    
6383
    # change primary node, if needed
6384
    if self.op.nodes:
6385
      instance.primary_node = self.op.nodes[0]
6386
      self.LogWarning("Changing the instance's nodes, you will have to"
6387
                      " remove any disks left on the older nodes manually")
6388

    
6389
    if self.op.nodes:
6390
      self.cfg.Update(instance, feedback_fn)
6391

    
6392
    _CreateDisks(self, instance, to_skip=to_skip)
6393

    
6394

    
6395
class LUInstanceRename(LogicalUnit):
6396
  """Rename an instance.
6397

6398
  """
6399
  HPATH = "instance-rename"
6400
  HTYPE = constants.HTYPE_INSTANCE
6401

    
6402
  def CheckArguments(self):
6403
    """Check arguments.
6404

6405
    """
6406
    if self.op.ip_check and not self.op.name_check:
6407
      # TODO: make the ip check more flexible and not depend on the name check
6408
      raise errors.OpPrereqError("IP address check requires a name check",
6409
                                 errors.ECODE_INVAL)
6410

    
6411
  def BuildHooksEnv(self):
6412
    """Build hooks env.
6413

6414
    This runs on master, primary and secondary nodes of the instance.
6415

6416
    """
6417
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6418
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6419
    return env
6420

    
6421
  def BuildHooksNodes(self):
6422
    """Build hooks nodes.
6423

6424
    """
6425
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6426
    return (nl, nl)
6427

    
6428
  def CheckPrereq(self):
6429
    """Check prerequisites.
6430

6431
    This checks that the instance is in the cluster and is not running.
6432

6433
    """
6434
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6435
                                                self.op.instance_name)
6436
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6437
    assert instance is not None
6438
    _CheckNodeOnline(self, instance.primary_node)
6439
    _CheckInstanceDown(self, instance, "cannot rename")
6440
    self.instance = instance
6441

    
6442
    new_name = self.op.new_name
6443
    if self.op.name_check:
6444
      hostname = netutils.GetHostname(name=new_name)
6445
      if hostname.name != new_name:
6446
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6447
                     hostname.name)
6448
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6449
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6450
                                    " same as given hostname '%s'") %
6451
                                    (hostname.name, self.op.new_name),
6452
                                    errors.ECODE_INVAL)
6453
      new_name = self.op.new_name = hostname.name
6454
      if (self.op.ip_check and
6455
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6456
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6457
                                   (hostname.ip, new_name),
6458
                                   errors.ECODE_NOTUNIQUE)
6459

    
6460
    instance_list = self.cfg.GetInstanceList()
6461
    if new_name in instance_list and new_name != instance.name:
6462
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6463
                                 new_name, errors.ECODE_EXISTS)
6464

    
6465
  def Exec(self, feedback_fn):
6466
    """Rename the instance.
6467

6468
    """
6469
    inst = self.instance
6470
    old_name = inst.name
6471

    
6472
    rename_file_storage = False
6473
    if (inst.disk_template in constants.DTS_FILEBASED and
6474
        self.op.new_name != inst.name):
6475
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6476
      rename_file_storage = True
6477

    
6478
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6479
    # Change the instance lock. This is definitely safe while we hold the BGL.
6480
    # Otherwise the new lock would have to be added in acquired mode.
6481
    assert self.REQ_BGL
6482
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6483
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6484

    
6485
    # re-read the instance from the configuration after rename
6486
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6487

    
6488
    if rename_file_storage:
6489
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6490
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6491
                                                     old_file_storage_dir,
6492
                                                     new_file_storage_dir)
6493
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6494
                   " (but the instance has been renamed in Ganeti)" %
6495
                   (inst.primary_node, old_file_storage_dir,
6496
                    new_file_storage_dir))
6497

    
6498
    _StartInstanceDisks(self, inst, None)
6499
    try:
6500
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6501
                                                 old_name, self.op.debug_level)
6502
      msg = result.fail_msg
6503
      if msg:
6504
        msg = ("Could not run OS rename script for instance %s on node %s"
6505
               " (but the instance has been renamed in Ganeti): %s" %
6506
               (inst.name, inst.primary_node, msg))
6507
        self.proc.LogWarning(msg)
6508
    finally:
6509
      _ShutdownInstanceDisks(self, inst)
6510

    
6511
    return inst.name
6512

    
6513

    
6514
class LUInstanceRemove(LogicalUnit):
6515
  """Remove an instance.
6516

6517
  """
6518
  HPATH = "instance-remove"
6519
  HTYPE = constants.HTYPE_INSTANCE
6520
  REQ_BGL = False
6521

    
6522
  def ExpandNames(self):
6523
    self._ExpandAndLockInstance()
6524
    self.needed_locks[locking.LEVEL_NODE] = []
6525
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6526

    
6527
  def DeclareLocks(self, level):
6528
    if level == locking.LEVEL_NODE:
6529
      self._LockInstancesNodes()
6530

    
6531
  def BuildHooksEnv(self):
6532
    """Build hooks env.
6533

6534
    This runs on master, primary and secondary nodes of the instance.
6535

6536
    """
6537
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6538
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6539
    return env
6540

    
6541
  def BuildHooksNodes(self):
6542
    """Build hooks nodes.
6543

6544
    """
6545
    nl = [self.cfg.GetMasterNode()]
6546
    nl_post = list(self.instance.all_nodes) + nl
6547
    return (nl, nl_post)
6548

    
6549
  def CheckPrereq(self):
6550
    """Check prerequisites.
6551

6552
    This checks that the instance is in the cluster.
6553

6554
    """
6555
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6556
    assert self.instance is not None, \
6557
      "Cannot retrieve locked instance %s" % self.op.instance_name
6558

    
6559
  def Exec(self, feedback_fn):
6560
    """Remove the instance.
6561

6562
    """
6563
    instance = self.instance
6564
    logging.info("Shutting down instance %s on node %s",
6565
                 instance.name, instance.primary_node)
6566

    
6567
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6568
                                             self.op.shutdown_timeout)
6569
    msg = result.fail_msg
6570
    if msg:
6571
      if self.op.ignore_failures:
6572
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6573
      else:
6574
        raise errors.OpExecError("Could not shutdown instance %s on"
6575
                                 " node %s: %s" %
6576
                                 (instance.name, instance.primary_node, msg))
6577

    
6578
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6579

    
6580

    
6581
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6582
  """Utility function to remove an instance.
6583

6584
  """
6585
  logging.info("Removing block devices for instance %s", instance.name)
6586

    
6587
  if not _RemoveDisks(lu, instance):
6588
    if not ignore_failures:
6589
      raise errors.OpExecError("Can't remove instance's disks")
6590
    feedback_fn("Warning: can't remove instance's disks")
6591

    
6592
  logging.info("Removing instance %s out of cluster config", instance.name)
6593

    
6594
  lu.cfg.RemoveInstance(instance.name)
6595

    
6596
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6597
    "Instance lock removal conflict"
6598

    
6599
  # Remove lock for the instance
6600
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6601

    
6602

    
6603
class LUInstanceQuery(NoHooksLU):
6604
  """Logical unit for querying instances.
6605

6606
  """
6607
  # pylint: disable=W0142
6608
  REQ_BGL = False
6609

    
6610
  def CheckArguments(self):
6611
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6612
                             self.op.output_fields, self.op.use_locking)
6613

    
6614
  def ExpandNames(self):
6615
    self.iq.ExpandNames(self)
6616

    
6617
  def DeclareLocks(self, level):
6618
    self.iq.DeclareLocks(self, level)
6619

    
6620
  def Exec(self, feedback_fn):
6621
    return self.iq.OldStyleQuery(self)
6622

    
6623

    
6624
class LUInstanceFailover(LogicalUnit):
6625
  """Failover an instance.
6626

6627
  """
6628
  HPATH = "instance-failover"
6629
  HTYPE = constants.HTYPE_INSTANCE
6630
  REQ_BGL = False
6631

    
6632
  def CheckArguments(self):
6633
    """Check the arguments.
6634

6635
    """
6636
    self.iallocator = getattr(self.op, "iallocator", None)
6637
    self.target_node = getattr(self.op, "target_node", None)
6638

    
6639
  def ExpandNames(self):
6640
    self._ExpandAndLockInstance()
6641

    
6642
    if self.op.target_node is not None:
6643
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6644

    
6645
    self.needed_locks[locking.LEVEL_NODE] = []
6646
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6647

    
6648
    ignore_consistency = self.op.ignore_consistency
6649
    shutdown_timeout = self.op.shutdown_timeout
6650
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6651
                                       cleanup=False,
6652
                                       failover=True,
6653
                                       ignore_consistency=ignore_consistency,
6654
                                       shutdown_timeout=shutdown_timeout)
6655
    self.tasklets = [self._migrater]
6656

    
6657
  def DeclareLocks(self, level):
6658
    if level == locking.LEVEL_NODE:
6659
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6660
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6661
        if self.op.target_node is None:
6662
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6663
        else:
6664
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6665
                                                   self.op.target_node]
6666
        del self.recalculate_locks[locking.LEVEL_NODE]
6667
      else:
6668
        self._LockInstancesNodes()
6669

    
6670
  def BuildHooksEnv(self):
6671
    """Build hooks env.
6672

6673
    This runs on master, primary and secondary nodes of the instance.
6674

6675
    """
6676
    instance = self._migrater.instance
6677
    source_node = instance.primary_node
6678
    target_node = self.op.target_node
6679
    env = {
6680
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6681
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6682
      "OLD_PRIMARY": source_node,
6683
      "NEW_PRIMARY": target_node,
6684
      }
6685

    
6686
    if instance.disk_template in constants.DTS_INT_MIRROR:
6687
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6688
      env["NEW_SECONDARY"] = source_node
6689
    else:
6690
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6691

    
6692
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6693

    
6694
    return env
6695

    
6696
  def BuildHooksNodes(self):
6697
    """Build hooks nodes.
6698

6699
    """
6700
    instance = self._migrater.instance
6701
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6702
    return (nl, nl + [instance.primary_node])
6703

    
6704

    
6705
class LUInstanceMigrate(LogicalUnit):
6706
  """Migrate an instance.
6707

6708
  This is migration without shutting down, compared to the failover,
6709
  which is done with shutdown.
6710

6711
  """
6712
  HPATH = "instance-migrate"
6713
  HTYPE = constants.HTYPE_INSTANCE
6714
  REQ_BGL = False
6715

    
6716
  def ExpandNames(self):
6717
    self._ExpandAndLockInstance()
6718

    
6719
    if self.op.target_node is not None:
6720
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6721

    
6722
    self.needed_locks[locking.LEVEL_NODE] = []
6723
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6724

    
6725
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6726
                                       cleanup=self.op.cleanup,
6727
                                       failover=False,
6728
                                       fallback=self.op.allow_failover)
6729
    self.tasklets = [self._migrater]
6730

    
6731
  def DeclareLocks(self, level):
6732
    if level == locking.LEVEL_NODE:
6733
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6734
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6735
        if self.op.target_node is None:
6736
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6737
        else:
6738
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6739
                                                   self.op.target_node]
6740
        del self.recalculate_locks[locking.LEVEL_NODE]
6741
      else:
6742
        self._LockInstancesNodes()
6743

    
6744
  def BuildHooksEnv(self):
6745
    """Build hooks env.
6746

6747
    This runs on master, primary and secondary nodes of the instance.
6748

6749
    """
6750
    instance = self._migrater.instance
6751
    source_node = instance.primary_node
6752
    target_node = self.op.target_node
6753
    env = _BuildInstanceHookEnvByObject(self, instance)
6754
    env.update({
6755
      "MIGRATE_LIVE": self._migrater.live,
6756
      "MIGRATE_CLEANUP": self.op.cleanup,
6757
      "OLD_PRIMARY": source_node,
6758
      "NEW_PRIMARY": target_node,
6759
      })
6760

    
6761
    if instance.disk_template in constants.DTS_INT_MIRROR:
6762
      env["OLD_SECONDARY"] = target_node
6763
      env["NEW_SECONDARY"] = source_node
6764
    else:
6765
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6766

    
6767
    return env
6768

    
6769
  def BuildHooksNodes(self):
6770
    """Build hooks nodes.
6771

6772
    """
6773
    instance = self._migrater.instance
6774
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6775
    return (nl, nl + [instance.primary_node])
6776

    
6777

    
6778
class LUInstanceMove(LogicalUnit):
6779
  """Move an instance by data-copying.
6780

6781
  """
6782
  HPATH = "instance-move"
6783
  HTYPE = constants.HTYPE_INSTANCE
6784
  REQ_BGL = False
6785

    
6786
  def ExpandNames(self):
6787
    self._ExpandAndLockInstance()
6788
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6789
    self.op.target_node = target_node
6790
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6791
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6792

    
6793
  def DeclareLocks(self, level):
6794
    if level == locking.LEVEL_NODE:
6795
      self._LockInstancesNodes(primary_only=True)
6796

    
6797
  def BuildHooksEnv(self):
6798
    """Build hooks env.
6799

6800
    This runs on master, primary and secondary nodes of the instance.
6801

6802
    """
6803
    env = {
6804
      "TARGET_NODE": self.op.target_node,
6805
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6806
      }
6807
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6808
    return env
6809

    
6810
  def BuildHooksNodes(self):
6811
    """Build hooks nodes.
6812

6813
    """
6814
    nl = [
6815
      self.cfg.GetMasterNode(),
6816
      self.instance.primary_node,
6817
      self.op.target_node,
6818
      ]
6819
    return (nl, nl)
6820

    
6821
  def CheckPrereq(self):
6822
    """Check prerequisites.
6823

6824
    This checks that the instance is in the cluster.
6825

6826
    """
6827
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6828
    assert self.instance is not None, \
6829
      "Cannot retrieve locked instance %s" % self.op.instance_name
6830

    
6831
    node = self.cfg.GetNodeInfo(self.op.target_node)
6832
    assert node is not None, \
6833
      "Cannot retrieve locked node %s" % self.op.target_node
6834

    
6835
    self.target_node = target_node = node.name
6836

    
6837
    if target_node == instance.primary_node:
6838
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6839
                                 (instance.name, target_node),
6840
                                 errors.ECODE_STATE)
6841

    
6842
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6843

    
6844
    for idx, dsk in enumerate(instance.disks):
6845
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6846
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6847
                                   " cannot copy" % idx, errors.ECODE_STATE)
6848

    
6849
    _CheckNodeOnline(self, target_node)
6850
    _CheckNodeNotDrained(self, target_node)
6851
    _CheckNodeVmCapable(self, target_node)
6852

    
6853
    if instance.admin_up:
6854
      # check memory requirements on the secondary node
6855
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6856
                           instance.name, bep[constants.BE_MEMORY],
6857
                           instance.hypervisor)
6858
    else:
6859
      self.LogInfo("Not checking memory on the secondary node as"
6860
                   " instance will not be started")
6861

    
6862
    # check bridge existance
6863
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6864

    
6865
  def Exec(self, feedback_fn):
6866
    """Move an instance.
6867

6868
    The move is done by shutting it down on its present node, copying
6869
    the data over (slow) and starting it on the new node.
6870

6871
    """
6872
    instance = self.instance
6873

    
6874
    source_node = instance.primary_node
6875
    target_node = self.target_node
6876

    
6877
    self.LogInfo("Shutting down instance %s on source node %s",
6878
                 instance.name, source_node)
6879

    
6880
    result = self.rpc.call_instance_shutdown(source_node, instance,
6881
                                             self.op.shutdown_timeout)
6882
    msg = result.fail_msg
6883
    if msg:
6884
      if self.op.ignore_consistency:
6885
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6886
                             " Proceeding anyway. Please make sure node"
6887
                             " %s is down. Error details: %s",
6888
                             instance.name, source_node, source_node, msg)
6889
      else:
6890
        raise errors.OpExecError("Could not shutdown instance %s on"
6891
                                 " node %s: %s" %
6892
                                 (instance.name, source_node, msg))
6893

    
6894
    # create the target disks
6895
    try:
6896
      _CreateDisks(self, instance, target_node=target_node)
6897
    except errors.OpExecError:
6898
      self.LogWarning("Device creation failed, reverting...")
6899
      try:
6900
        _RemoveDisks(self, instance, target_node=target_node)
6901
      finally:
6902
        self.cfg.ReleaseDRBDMinors(instance.name)
6903
        raise
6904

    
6905
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6906

    
6907
    errs = []
6908
    # activate, get path, copy the data over
6909
    for idx, disk in enumerate(instance.disks):
6910
      self.LogInfo("Copying data for disk %d", idx)
6911
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6912
                                               instance.name, True, idx)
6913
      if result.fail_msg:
6914
        self.LogWarning("Can't assemble newly created disk %d: %s",
6915
                        idx, result.fail_msg)
6916
        errs.append(result.fail_msg)
6917
        break
6918
      dev_path = result.payload
6919
      result = self.rpc.call_blockdev_export(source_node, disk,
6920
                                             target_node, dev_path,
6921
                                             cluster_name)
6922
      if result.fail_msg:
6923
        self.LogWarning("Can't copy data over for disk %d: %s",
6924
                        idx, result.fail_msg)
6925
        errs.append(result.fail_msg)
6926
        break
6927

    
6928
    if errs:
6929
      self.LogWarning("Some disks failed to copy, aborting")
6930
      try:
6931
        _RemoveDisks(self, instance, target_node=target_node)
6932
      finally:
6933
        self.cfg.ReleaseDRBDMinors(instance.name)
6934
        raise errors.OpExecError("Errors during disk copy: %s" %
6935
                                 (",".join(errs),))
6936

    
6937
    instance.primary_node = target_node
6938
    self.cfg.Update(instance, feedback_fn)
6939

    
6940
    self.LogInfo("Removing the disks on the original node")
6941
    _RemoveDisks(self, instance, target_node=source_node)
6942

    
6943
    # Only start the instance if it's marked as up
6944
    if instance.admin_up:
6945
      self.LogInfo("Starting instance %s on node %s",
6946
                   instance.name, target_node)
6947

    
6948
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6949
                                           ignore_secondaries=True)
6950
      if not disks_ok:
6951
        _ShutdownInstanceDisks(self, instance)
6952
        raise errors.OpExecError("Can't activate the instance's disks")
6953

    
6954
      result = self.rpc.call_instance_start(target_node, instance,
6955
                                            None, None, False)
6956
      msg = result.fail_msg
6957
      if msg:
6958
        _ShutdownInstanceDisks(self, instance)
6959
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6960
                                 (instance.name, target_node, msg))
6961

    
6962

    
6963
class LUNodeMigrate(LogicalUnit):
6964
  """Migrate all instances from a node.
6965

6966
  """
6967
  HPATH = "node-migrate"
6968
  HTYPE = constants.HTYPE_NODE
6969
  REQ_BGL = False
6970

    
6971
  def CheckArguments(self):
6972
    pass
6973

    
6974
  def ExpandNames(self):
6975
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6976

    
6977
    self.share_locks = _ShareAll()
6978
    self.needed_locks = {
6979
      locking.LEVEL_NODE: [self.op.node_name],
6980
      }
6981

    
6982
  def BuildHooksEnv(self):
6983
    """Build hooks env.
6984

6985
    This runs on the master, the primary and all the secondaries.
6986

6987
    """
6988
    return {
6989
      "NODE_NAME": self.op.node_name,
6990
      }
6991

    
6992
  def BuildHooksNodes(self):
6993
    """Build hooks nodes.
6994

6995
    """
6996
    nl = [self.cfg.GetMasterNode()]
6997
    return (nl, nl)
6998

    
6999
  def CheckPrereq(self):
7000
    pass
7001

    
7002
  def Exec(self, feedback_fn):
7003
    # Prepare jobs for migration instances
7004
    jobs = [
7005
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7006
                                 mode=self.op.mode,
7007
                                 live=self.op.live,
7008
                                 iallocator=self.op.iallocator,
7009
                                 target_node=self.op.target_node)]
7010
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7011
      ]
7012

    
7013
    # TODO: Run iallocator in this opcode and pass correct placement options to
7014
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7015
    # running the iallocator and the actual migration, a good consistency model
7016
    # will have to be found.
7017

    
7018
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7019
            frozenset([self.op.node_name]))
7020

    
7021
    return ResultWithJobs(jobs)
7022

    
7023

    
7024
class TLMigrateInstance(Tasklet):
7025
  """Tasklet class for instance migration.
7026

7027
  @type live: boolean
7028
  @ivar live: whether the migration will be done live or non-live;
7029
      this variable is initalized only after CheckPrereq has run
7030
  @type cleanup: boolean
7031
  @ivar cleanup: Wheater we cleanup from a failed migration
7032
  @type iallocator: string
7033
  @ivar iallocator: The iallocator used to determine target_node
7034
  @type target_node: string
7035
  @ivar target_node: If given, the target_node to reallocate the instance to
7036
  @type failover: boolean
7037
  @ivar failover: Whether operation results in failover or migration
7038
  @type fallback: boolean
7039
  @ivar fallback: Whether fallback to failover is allowed if migration not
7040
                  possible
7041
  @type ignore_consistency: boolean
7042
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7043
                            and target node
7044
  @type shutdown_timeout: int
7045
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7046

7047
  """
7048
  def __init__(self, lu, instance_name, cleanup=False,
7049
               failover=False, fallback=False,
7050
               ignore_consistency=False,
7051
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7052
    """Initializes this class.
7053

7054
    """
7055
    Tasklet.__init__(self, lu)
7056

    
7057
    # Parameters
7058
    self.instance_name = instance_name
7059
    self.cleanup = cleanup
7060
    self.live = False # will be overridden later
7061
    self.failover = failover
7062
    self.fallback = fallback
7063
    self.ignore_consistency = ignore_consistency
7064
    self.shutdown_timeout = shutdown_timeout
7065

    
7066
  def CheckPrereq(self):
7067
    """Check prerequisites.
7068

7069
    This checks that the instance is in the cluster.
7070

7071
    """
7072
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7073
    instance = self.cfg.GetInstanceInfo(instance_name)
7074
    assert instance is not None
7075
    self.instance = instance
7076

    
7077
    if (not self.cleanup and not instance.admin_up and not self.failover and
7078
        self.fallback):
7079
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7080
                      " to failover")
7081
      self.failover = True
7082

    
7083
    if instance.disk_template not in constants.DTS_MIRRORED:
7084
      if self.failover:
7085
        text = "failovers"
7086
      else:
7087
        text = "migrations"
7088
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7089
                                 " %s" % (instance.disk_template, text),
7090
                                 errors.ECODE_STATE)
7091

    
7092
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7093
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7094

    
7095
      if self.lu.op.iallocator:
7096
        self._RunAllocator()
7097
      else:
7098
        # We set set self.target_node as it is required by
7099
        # BuildHooksEnv
7100
        self.target_node = self.lu.op.target_node
7101

    
7102
      # self.target_node is already populated, either directly or by the
7103
      # iallocator run
7104
      target_node = self.target_node
7105
      if self.target_node == instance.primary_node:
7106
        raise errors.OpPrereqError("Cannot migrate instance %s"
7107
                                   " to its primary (%s)" %
7108
                                   (instance.name, instance.primary_node))
7109

    
7110
      if len(self.lu.tasklets) == 1:
7111
        # It is safe to release locks only when we're the only tasklet
7112
        # in the LU
7113
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7114
                      keep=[instance.primary_node, self.target_node])
7115

    
7116
    else:
7117
      secondary_nodes = instance.secondary_nodes
7118
      if not secondary_nodes:
7119
        raise errors.ConfigurationError("No secondary node but using"
7120
                                        " %s disk template" %
7121
                                        instance.disk_template)
7122
      target_node = secondary_nodes[0]
7123
      if self.lu.op.iallocator or (self.lu.op.target_node and
7124
                                   self.lu.op.target_node != target_node):
7125
        if self.failover:
7126
          text = "failed over"
7127
        else:
7128
          text = "migrated"
7129
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7130
                                   " be %s to arbitrary nodes"
7131
                                   " (neither an iallocator nor a target"
7132
                                   " node can be passed)" %
7133
                                   (instance.disk_template, text),
7134
                                   errors.ECODE_INVAL)
7135

    
7136
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7137

    
7138
    # check memory requirements on the secondary node
7139
    if not self.cleanup and (not self.failover or instance.admin_up):
7140
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7141
                           instance.name, i_be[constants.BE_MEMORY],
7142
                           instance.hypervisor)
7143
    else:
7144
      self.lu.LogInfo("Not checking memory on the secondary node as"
7145
                      " instance will not be started")
7146

    
7147
    # check bridge existance
7148
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7149

    
7150
    if not self.cleanup:
7151
      _CheckNodeNotDrained(self.lu, target_node)
7152
      if not self.failover:
7153
        result = self.rpc.call_instance_migratable(instance.primary_node,
7154
                                                   instance)
7155
        if result.fail_msg and self.fallback:
7156
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7157
                          " failover")
7158
          self.failover = True
7159
        else:
7160
          result.Raise("Can't migrate, please use failover",
7161
                       prereq=True, ecode=errors.ECODE_STATE)
7162

    
7163
    assert not (self.failover and self.cleanup)
7164

    
7165
    if not self.failover:
7166
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7167
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7168
                                   " parameters are accepted",
7169
                                   errors.ECODE_INVAL)
7170
      if self.lu.op.live is not None:
7171
        if self.lu.op.live:
7172
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7173
        else:
7174
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7175
        # reset the 'live' parameter to None so that repeated
7176
        # invocations of CheckPrereq do not raise an exception
7177
        self.lu.op.live = None
7178
      elif self.lu.op.mode is None:
7179
        # read the default value from the hypervisor
7180
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7181
                                                skip_globals=False)
7182
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7183

    
7184
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7185
    else:
7186
      # Failover is never live
7187
      self.live = False
7188

    
7189
  def _RunAllocator(self):
7190
    """Run the allocator based on input opcode.
7191

7192
    """
7193
    ial = IAllocator(self.cfg, self.rpc,
7194
                     mode=constants.IALLOCATOR_MODE_RELOC,
7195
                     name=self.instance_name,
7196
                     # TODO See why hail breaks with a single node below
7197
                     relocate_from=[self.instance.primary_node,
7198
                                    self.instance.primary_node],
7199
                     )
7200

    
7201
    ial.Run(self.lu.op.iallocator)
7202

    
7203
    if not ial.success:
7204
      raise errors.OpPrereqError("Can't compute nodes using"
7205
                                 " iallocator '%s': %s" %
7206
                                 (self.lu.op.iallocator, ial.info),
7207
                                 errors.ECODE_NORES)
7208
    if len(ial.result) != ial.required_nodes:
7209
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7210
                                 " of nodes (%s), required %s" %
7211
                                 (self.lu.op.iallocator, len(ial.result),
7212
                                  ial.required_nodes), errors.ECODE_FAULT)
7213
    self.target_node = ial.result[0]
7214
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7215
                 self.instance_name, self.lu.op.iallocator,
7216
                 utils.CommaJoin(ial.result))
7217

    
7218
  def _WaitUntilSync(self):
7219
    """Poll with custom rpc for disk sync.
7220

7221
    This uses our own step-based rpc call.
7222

7223
    """
7224
    self.feedback_fn("* wait until resync is done")
7225
    all_done = False
7226
    while not all_done:
7227
      all_done = True
7228
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7229
                                            self.nodes_ip,
7230
                                            self.instance.disks)
7231
      min_percent = 100
7232
      for node, nres in result.items():
7233
        nres.Raise("Cannot resync disks on node %s" % node)
7234
        node_done, node_percent = nres.payload
7235
        all_done = all_done and node_done
7236
        if node_percent is not None:
7237
          min_percent = min(min_percent, node_percent)
7238
      if not all_done:
7239
        if min_percent < 100:
7240
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7241
        time.sleep(2)
7242

    
7243
  def _EnsureSecondary(self, node):
7244
    """Demote a node to secondary.
7245

7246
    """
7247
    self.feedback_fn("* switching node %s to secondary mode" % node)
7248

    
7249
    for dev in self.instance.disks:
7250
      self.cfg.SetDiskID(dev, node)
7251

    
7252
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7253
                                          self.instance.disks)
7254
    result.Raise("Cannot change disk to secondary on node %s" % node)
7255

    
7256
  def _GoStandalone(self):
7257
    """Disconnect from the network.
7258

7259
    """
7260
    self.feedback_fn("* changing into standalone mode")
7261
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7262
                                               self.instance.disks)
7263
    for node, nres in result.items():
7264
      nres.Raise("Cannot disconnect disks node %s" % node)
7265

    
7266
  def _GoReconnect(self, multimaster):
7267
    """Reconnect to the network.
7268

7269
    """
7270
    if multimaster:
7271
      msg = "dual-master"
7272
    else:
7273
      msg = "single-master"
7274
    self.feedback_fn("* changing disks into %s mode" % msg)
7275
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7276
                                           self.instance.disks,
7277
                                           self.instance.name, multimaster)
7278
    for node, nres in result.items():
7279
      nres.Raise("Cannot change disks config on node %s" % node)
7280

    
7281
  def _ExecCleanup(self):
7282
    """Try to cleanup after a failed migration.
7283

7284
    The cleanup is done by:
7285
      - check that the instance is running only on one node
7286
        (and update the config if needed)
7287
      - change disks on its secondary node to secondary
7288
      - wait until disks are fully synchronized
7289
      - disconnect from the network
7290
      - change disks into single-master mode
7291
      - wait again until disks are fully synchronized
7292

7293
    """
7294
    instance = self.instance
7295
    target_node = self.target_node
7296
    source_node = self.source_node
7297

    
7298
    # check running on only one node
7299
    self.feedback_fn("* checking where the instance actually runs"
7300
                     " (if this hangs, the hypervisor might be in"
7301
                     " a bad state)")
7302
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7303
    for node, result in ins_l.items():
7304
      result.Raise("Can't contact node %s" % node)
7305

    
7306
    runningon_source = instance.name in ins_l[source_node].payload
7307
    runningon_target = instance.name in ins_l[target_node].payload
7308

    
7309
    if runningon_source and runningon_target:
7310
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7311
                               " or the hypervisor is confused; you will have"
7312
                               " to ensure manually that it runs only on one"
7313
                               " and restart this operation")
7314

    
7315
    if not (runningon_source or runningon_target):
7316
      raise errors.OpExecError("Instance does not seem to be running at all;"
7317
                               " in this case it's safer to repair by"
7318
                               " running 'gnt-instance stop' to ensure disk"
7319
                               " shutdown, and then restarting it")
7320

    
7321
    if runningon_target:
7322
      # the migration has actually succeeded, we need to update the config
7323
      self.feedback_fn("* instance running on secondary node (%s),"
7324
                       " updating config" % target_node)
7325
      instance.primary_node = target_node
7326
      self.cfg.Update(instance, self.feedback_fn)
7327
      demoted_node = source_node
7328
    else:
7329
      self.feedback_fn("* instance confirmed to be running on its"
7330
                       " primary node (%s)" % source_node)
7331
      demoted_node = target_node
7332

    
7333
    if instance.disk_template in constants.DTS_INT_MIRROR:
7334
      self._EnsureSecondary(demoted_node)
7335
      try:
7336
        self._WaitUntilSync()
7337
      except errors.OpExecError:
7338
        # we ignore here errors, since if the device is standalone, it
7339
        # won't be able to sync
7340
        pass
7341
      self._GoStandalone()
7342
      self._GoReconnect(False)
7343
      self._WaitUntilSync()
7344

    
7345
    self.feedback_fn("* done")
7346

    
7347
  def _RevertDiskStatus(self):
7348
    """Try to revert the disk status after a failed migration.
7349

7350
    """
7351
    target_node = self.target_node
7352
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7353
      return
7354

    
7355
    try:
7356
      self._EnsureSecondary(target_node)
7357
      self._GoStandalone()
7358
      self._GoReconnect(False)
7359
      self._WaitUntilSync()
7360
    except errors.OpExecError, err:
7361
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7362
                         " please try to recover the instance manually;"
7363
                         " error '%s'" % str(err))
7364

    
7365
  def _AbortMigration(self):
7366
    """Call the hypervisor code to abort a started migration.
7367

7368
    """
7369
    instance = self.instance
7370
    target_node = self.target_node
7371
    migration_info = self.migration_info
7372

    
7373
    abort_result = self.rpc.call_finalize_migration(target_node,
7374
                                                    instance,
7375
                                                    migration_info,
7376
                                                    False)
7377
    abort_msg = abort_result.fail_msg
7378
    if abort_msg:
7379
      logging.error("Aborting migration failed on target node %s: %s",
7380
                    target_node, abort_msg)
7381
      # Don't raise an exception here, as we stil have to try to revert the
7382
      # disk status, even if this step failed.
7383

    
7384
  def _ExecMigration(self):
7385
    """Migrate an instance.
7386

7387
    The migrate is done by:
7388
      - change the disks into dual-master mode
7389
      - wait until disks are fully synchronized again
7390
      - migrate the instance
7391
      - change disks on the new secondary node (the old primary) to secondary
7392
      - wait until disks are fully synchronized
7393
      - change disks into single-master mode
7394

7395
    """
7396
    instance = self.instance
7397
    target_node = self.target_node
7398
    source_node = self.source_node
7399

    
7400
    self.feedback_fn("* checking disk consistency between source and target")
7401
    for dev in instance.disks:
7402
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7403
        raise errors.OpExecError("Disk %s is degraded or not fully"
7404
                                 " synchronized on target node,"
7405
                                 " aborting migration" % dev.iv_name)
7406

    
7407
    # First get the migration information from the remote node
7408
    result = self.rpc.call_migration_info(source_node, instance)
7409
    msg = result.fail_msg
7410
    if msg:
7411
      log_err = ("Failed fetching source migration information from %s: %s" %
7412
                 (source_node, msg))
7413
      logging.error(log_err)
7414
      raise errors.OpExecError(log_err)
7415

    
7416
    self.migration_info = migration_info = result.payload
7417

    
7418
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7419
      # Then switch the disks to master/master mode
7420
      self._EnsureSecondary(target_node)
7421
      self._GoStandalone()
7422
      self._GoReconnect(True)
7423
      self._WaitUntilSync()
7424

    
7425
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7426
    result = self.rpc.call_accept_instance(target_node,
7427
                                           instance,
7428
                                           migration_info,
7429
                                           self.nodes_ip[target_node])
7430

    
7431
    msg = result.fail_msg
7432
    if msg:
7433
      logging.error("Instance pre-migration failed, trying to revert"
7434
                    " disk status: %s", msg)
7435
      self.feedback_fn("Pre-migration failed, aborting")
7436
      self._AbortMigration()
7437
      self._RevertDiskStatus()
7438
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7439
                               (instance.name, msg))
7440

    
7441
    self.feedback_fn("* migrating instance to %s" % target_node)
7442
    result = self.rpc.call_instance_migrate(source_node, instance,
7443
                                            self.nodes_ip[target_node],
7444
                                            self.live)
7445
    msg = result.fail_msg
7446
    if msg:
7447
      logging.error("Instance migration failed, trying to revert"
7448
                    " disk status: %s", msg)
7449
      self.feedback_fn("Migration failed, aborting")
7450
      self._AbortMigration()
7451
      self._RevertDiskStatus()
7452
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7453
                               (instance.name, msg))
7454

    
7455
    instance.primary_node = target_node
7456
    # distribute new instance config to the other nodes
7457
    self.cfg.Update(instance, self.feedback_fn)
7458

    
7459
    result = self.rpc.call_finalize_migration(target_node,
7460
                                              instance,
7461
                                              migration_info,
7462
                                              True)
7463
    msg = result.fail_msg
7464
    if msg:
7465
      logging.error("Instance migration succeeded, but finalization failed:"
7466
                    " %s", msg)
7467
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7468
                               msg)
7469

    
7470
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7471
      self._EnsureSecondary(source_node)
7472
      self._WaitUntilSync()
7473
      self._GoStandalone()
7474
      self._GoReconnect(False)
7475
      self._WaitUntilSync()
7476

    
7477
    self.feedback_fn("* done")
7478

    
7479
  def _ExecFailover(self):
7480
    """Failover an instance.
7481

7482
    The failover is done by shutting it down on its present node and
7483
    starting it on the secondary.
7484

7485
    """
7486
    instance = self.instance
7487
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7488

    
7489
    source_node = instance.primary_node
7490
    target_node = self.target_node
7491

    
7492
    if instance.admin_up:
7493
      self.feedback_fn("* checking disk consistency between source and target")
7494
      for dev in instance.disks:
7495
        # for drbd, these are drbd over lvm
7496
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7497
          if primary_node.offline:
7498
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7499
                             " target node %s" %
7500
                             (primary_node.name, dev.iv_name, target_node))
7501
          elif not self.ignore_consistency:
7502
            raise errors.OpExecError("Disk %s is degraded on target node,"
7503
                                     " aborting failover" % dev.iv_name)
7504
    else:
7505
      self.feedback_fn("* not checking disk consistency as instance is not"
7506
                       " running")
7507

    
7508
    self.feedback_fn("* shutting down instance on source node")
7509
    logging.info("Shutting down instance %s on node %s",
7510
                 instance.name, source_node)
7511

    
7512
    result = self.rpc.call_instance_shutdown(source_node, instance,
7513
                                             self.shutdown_timeout)
7514
    msg = result.fail_msg
7515
    if msg:
7516
      if self.ignore_consistency or primary_node.offline:
7517
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7518
                           " proceeding anyway; please make sure node"
7519
                           " %s is down; error details: %s",
7520
                           instance.name, source_node, source_node, msg)
7521
      else:
7522
        raise errors.OpExecError("Could not shutdown instance %s on"
7523
                                 " node %s: %s" %
7524
                                 (instance.name, source_node, msg))
7525

    
7526
    self.feedback_fn("* deactivating the instance's disks on source node")
7527
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7528
      raise errors.OpExecError("Can't shut down the instance's disks")
7529

    
7530
    instance.primary_node = target_node
7531
    # distribute new instance config to the other nodes
7532
    self.cfg.Update(instance, self.feedback_fn)
7533

    
7534
    # Only start the instance if it's marked as up
7535
    if instance.admin_up:
7536
      self.feedback_fn("* activating the instance's disks on target node %s" %
7537
                       target_node)
7538
      logging.info("Starting instance %s on node %s",
7539
                   instance.name, target_node)
7540

    
7541
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7542
                                           ignore_secondaries=True)
7543
      if not disks_ok:
7544
        _ShutdownInstanceDisks(self.lu, instance)
7545
        raise errors.OpExecError("Can't activate the instance's disks")
7546

    
7547
      self.feedback_fn("* starting the instance on the target node %s" %
7548
                       target_node)
7549
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7550
                                            False)
7551
      msg = result.fail_msg
7552
      if msg:
7553
        _ShutdownInstanceDisks(self.lu, instance)
7554
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7555
                                 (instance.name, target_node, msg))
7556

    
7557
  def Exec(self, feedback_fn):
7558
    """Perform the migration.
7559

7560
    """
7561
    self.feedback_fn = feedback_fn
7562
    self.source_node = self.instance.primary_node
7563

    
7564
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7565
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7566
      self.target_node = self.instance.secondary_nodes[0]
7567
      # Otherwise self.target_node has been populated either
7568
      # directly, or through an iallocator.
7569

    
7570
    self.all_nodes = [self.source_node, self.target_node]
7571
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7572
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7573

    
7574
    if self.failover:
7575
      feedback_fn("Failover instance %s" % self.instance.name)
7576
      self._ExecFailover()
7577
    else:
7578
      feedback_fn("Migrating instance %s" % self.instance.name)
7579

    
7580
      if self.cleanup:
7581
        return self._ExecCleanup()
7582
      else:
7583
        return self._ExecMigration()
7584

    
7585

    
7586
def _CreateBlockDev(lu, node, instance, device, force_create,
7587
                    info, force_open):
7588
  """Create a tree of block devices on a given node.
7589

7590
  If this device type has to be created on secondaries, create it and
7591
  all its children.
7592

7593
  If not, just recurse to children keeping the same 'force' value.
7594

7595
  @param lu: the lu on whose behalf we execute
7596
  @param node: the node on which to create the device
7597
  @type instance: L{objects.Instance}
7598
  @param instance: the instance which owns the device
7599
  @type device: L{objects.Disk}
7600
  @param device: the device to create
7601
  @type force_create: boolean
7602
  @param force_create: whether to force creation of this device; this
7603
      will be change to True whenever we find a device which has
7604
      CreateOnSecondary() attribute
7605
  @param info: the extra 'metadata' we should attach to the device
7606
      (this will be represented as a LVM tag)
7607
  @type force_open: boolean
7608
  @param force_open: this parameter will be passes to the
7609
      L{backend.BlockdevCreate} function where it specifies
7610
      whether we run on primary or not, and it affects both
7611
      the child assembly and the device own Open() execution
7612

7613
  """
7614
  if device.CreateOnSecondary():
7615
    force_create = True
7616

    
7617
  if device.children:
7618
    for child in device.children:
7619
      _CreateBlockDev(lu, node, instance, child, force_create,
7620
                      info, force_open)
7621

    
7622
  if not force_create:
7623
    return
7624

    
7625
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7626

    
7627

    
7628
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7629
  """Create a single block device on a given node.
7630

7631
  This will not recurse over children of the device, so they must be
7632
  created in advance.
7633

7634
  @param lu: the lu on whose behalf we execute
7635
  @param node: the node on which to create the device
7636
  @type instance: L{objects.Instance}
7637
  @param instance: the instance which owns the device
7638
  @type device: L{objects.Disk}
7639
  @param device: the device to create
7640
  @param info: the extra 'metadata' we should attach to the device
7641
      (this will be represented as a LVM tag)
7642
  @type force_open: boolean
7643
  @param force_open: this parameter will be passes to the
7644
      L{backend.BlockdevCreate} function where it specifies
7645
      whether we run on primary or not, and it affects both
7646
      the child assembly and the device own Open() execution
7647

7648
  """
7649
  lu.cfg.SetDiskID(device, node)
7650
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7651
                                       instance.name, force_open, info)
7652
  result.Raise("Can't create block device %s on"
7653
               " node %s for instance %s" % (device, node, instance.name))
7654
  if device.physical_id is None:
7655
    device.physical_id = result.payload
7656

    
7657

    
7658
def _GenerateUniqueNames(lu, exts):
7659
  """Generate a suitable LV name.
7660

7661
  This will generate a logical volume name for the given instance.
7662

7663
  """
7664
  results = []
7665
  for val in exts:
7666
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7667
    results.append("%s%s" % (new_id, val))
7668
  return results
7669

    
7670

    
7671
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7672
                         iv_name, p_minor, s_minor):
7673
  """Generate a drbd8 device complete with its children.
7674

7675
  """
7676
  assert len(vgnames) == len(names) == 2
7677
  port = lu.cfg.AllocatePort()
7678
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7679
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7680
                          logical_id=(vgnames[0], names[0]))
7681
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7682
                          logical_id=(vgnames[1], names[1]))
7683
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7684
                          logical_id=(primary, secondary, port,
7685
                                      p_minor, s_minor,
7686
                                      shared_secret),
7687
                          children=[dev_data, dev_meta],
7688
                          iv_name=iv_name)
7689
  return drbd_dev
7690

    
7691

    
7692
def _GenerateDiskTemplate(lu, template_name,
7693
                          instance_name, primary_node,
7694
                          secondary_nodes, disk_info,
7695
                          file_storage_dir, file_driver,
7696
                          base_index, feedback_fn):
7697
  """Generate the entire disk layout for a given template type.
7698

7699
  """
7700
  #TODO: compute space requirements
7701

    
7702
  vgname = lu.cfg.GetVGName()
7703
  disk_count = len(disk_info)
7704
  disks = []
7705
  if template_name == constants.DT_DISKLESS:
7706
    pass
7707
  elif template_name == constants.DT_PLAIN:
7708
    if len(secondary_nodes) != 0:
7709
      raise errors.ProgrammerError("Wrong template configuration")
7710

    
7711
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7712
                                      for i in range(disk_count)])
7713
    for idx, disk in enumerate(disk_info):
7714
      disk_index = idx + base_index
7715
      vg = disk.get(constants.IDISK_VG, vgname)
7716
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7717
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7718
                              size=disk[constants.IDISK_SIZE],
7719
                              logical_id=(vg, names[idx]),
7720
                              iv_name="disk/%d" % disk_index,
7721
                              mode=disk[constants.IDISK_MODE])
7722
      disks.append(disk_dev)
7723
  elif template_name == constants.DT_DRBD8:
7724
    if len(secondary_nodes) != 1:
7725
      raise errors.ProgrammerError("Wrong template configuration")
7726
    remote_node = secondary_nodes[0]
7727
    minors = lu.cfg.AllocateDRBDMinor(
7728
      [primary_node, remote_node] * len(disk_info), instance_name)
7729

    
7730
    names = []
7731
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7732
                                               for i in range(disk_count)]):
7733
      names.append(lv_prefix + "_data")
7734
      names.append(lv_prefix + "_meta")
7735
    for idx, disk in enumerate(disk_info):
7736
      disk_index = idx + base_index
7737
      data_vg = disk.get(constants.IDISK_VG, vgname)
7738
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7739
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7740
                                      disk[constants.IDISK_SIZE],
7741
                                      [data_vg, meta_vg],
7742
                                      names[idx * 2:idx * 2 + 2],
7743
                                      "disk/%d" % disk_index,
7744
                                      minors[idx * 2], minors[idx * 2 + 1])
7745
      disk_dev.mode = disk[constants.IDISK_MODE]
7746
      disks.append(disk_dev)
7747
  elif template_name == constants.DT_FILE:
7748
    if len(secondary_nodes) != 0:
7749
      raise errors.ProgrammerError("Wrong template configuration")
7750

    
7751
    opcodes.RequireFileStorage()
7752

    
7753
    for idx, disk in enumerate(disk_info):
7754
      disk_index = idx + base_index
7755
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7756
                              size=disk[constants.IDISK_SIZE],
7757
                              iv_name="disk/%d" % disk_index,
7758
                              logical_id=(file_driver,
7759
                                          "%s/disk%d" % (file_storage_dir,
7760
                                                         disk_index)),
7761
                              mode=disk[constants.IDISK_MODE])
7762
      disks.append(disk_dev)
7763
  elif template_name == constants.DT_SHARED_FILE:
7764
    if len(secondary_nodes) != 0:
7765
      raise errors.ProgrammerError("Wrong template configuration")
7766

    
7767
    opcodes.RequireSharedFileStorage()
7768

    
7769
    for idx, disk in enumerate(disk_info):
7770
      disk_index = idx + base_index
7771
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7772
                              size=disk[constants.IDISK_SIZE],
7773
                              iv_name="disk/%d" % disk_index,
7774
                              logical_id=(file_driver,
7775
                                          "%s/disk%d" % (file_storage_dir,
7776
                                                         disk_index)),
7777
                              mode=disk[constants.IDISK_MODE])
7778
      disks.append(disk_dev)
7779
  elif template_name == constants.DT_BLOCK:
7780
    if len(secondary_nodes) != 0:
7781
      raise errors.ProgrammerError("Wrong template configuration")
7782

    
7783
    for idx, disk in enumerate(disk_info):
7784
      disk_index = idx + base_index
7785
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7786
                              size=disk[constants.IDISK_SIZE],
7787
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7788
                                          disk[constants.IDISK_ADOPT]),
7789
                              iv_name="disk/%d" % disk_index,
7790
                              mode=disk[constants.IDISK_MODE])
7791
      disks.append(disk_dev)
7792

    
7793
  else:
7794
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7795
  return disks
7796

    
7797

    
7798
def _GetInstanceInfoText(instance):
7799
  """Compute that text that should be added to the disk's metadata.
7800

7801
  """
7802
  return "originstname+%s" % instance.name
7803

    
7804

    
7805
def _CalcEta(time_taken, written, total_size):
7806
  """Calculates the ETA based on size written and total size.
7807

7808
  @param time_taken: The time taken so far
7809
  @param written: amount written so far
7810
  @param total_size: The total size of data to be written
7811
  @return: The remaining time in seconds
7812

7813
  """
7814
  avg_time = time_taken / float(written)
7815
  return (total_size - written) * avg_time
7816

    
7817

    
7818
def _WipeDisks(lu, instance):
7819
  """Wipes instance disks.
7820

7821
  @type lu: L{LogicalUnit}
7822
  @param lu: the logical unit on whose behalf we execute
7823
  @type instance: L{objects.Instance}
7824
  @param instance: the instance whose disks we should create
7825
  @return: the success of the wipe
7826

7827
  """
7828
  node = instance.primary_node
7829

    
7830
  for device in instance.disks:
7831
    lu.cfg.SetDiskID(device, node)
7832

    
7833
  logging.info("Pause sync of instance %s disks", instance.name)
7834
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7835

    
7836
  for idx, success in enumerate(result.payload):
7837
    if not success:
7838
      logging.warn("pause-sync of instance %s for disks %d failed",
7839
                   instance.name, idx)
7840

    
7841
  try:
7842
    for idx, device in enumerate(instance.disks):
7843
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7844
      # MAX_WIPE_CHUNK at max
7845
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7846
                            constants.MIN_WIPE_CHUNK_PERCENT)
7847
      # we _must_ make this an int, otherwise rounding errors will
7848
      # occur
7849
      wipe_chunk_size = int(wipe_chunk_size)
7850

    
7851
      lu.LogInfo("* Wiping disk %d", idx)
7852
      logging.info("Wiping disk %d for instance %s, node %s using"
7853
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7854

    
7855
      offset = 0
7856
      size = device.size
7857
      last_output = 0
7858
      start_time = time.time()
7859

    
7860
      while offset < size:
7861
        wipe_size = min(wipe_chunk_size, size - offset)
7862
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7863
                      idx, offset, wipe_size)
7864
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7865
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7866
                     (idx, offset, wipe_size))
7867
        now = time.time()
7868
        offset += wipe_size
7869
        if now - last_output >= 60:
7870
          eta = _CalcEta(now - start_time, offset, size)
7871
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7872
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7873
          last_output = now
7874
  finally:
7875
    logging.info("Resume sync of instance %s disks", instance.name)
7876

    
7877
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7878

    
7879
    for idx, success in enumerate(result.payload):
7880
      if not success:
7881
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7882
                      " look at the status and troubleshoot the issue", idx)
7883
        logging.warn("resume-sync of instance %s for disks %d failed",
7884
                     instance.name, idx)
7885

    
7886

    
7887
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7888
  """Create all disks for an instance.
7889

7890
  This abstracts away some work from AddInstance.
7891

7892
  @type lu: L{LogicalUnit}
7893
  @param lu: the logical unit on whose behalf we execute
7894
  @type instance: L{objects.Instance}
7895
  @param instance: the instance whose disks we should create
7896
  @type to_skip: list
7897
  @param to_skip: list of indices to skip
7898
  @type target_node: string
7899
  @param target_node: if passed, overrides the target node for creation
7900
  @rtype: boolean
7901
  @return: the success of the creation
7902

7903
  """
7904
  info = _GetInstanceInfoText(instance)
7905
  if target_node is None:
7906
    pnode = instance.primary_node
7907
    all_nodes = instance.all_nodes
7908
  else:
7909
    pnode = target_node
7910
    all_nodes = [pnode]
7911

    
7912
  if instance.disk_template in constants.DTS_FILEBASED:
7913
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7914
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7915

    
7916
    result.Raise("Failed to create directory '%s' on"
7917
                 " node %s" % (file_storage_dir, pnode))
7918

    
7919
  # Note: this needs to be kept in sync with adding of disks in
7920
  # LUInstanceSetParams
7921
  for idx, device in enumerate(instance.disks):
7922
    if to_skip and idx in to_skip:
7923
      continue
7924
    logging.info("Creating volume %s for instance %s",
7925
                 device.iv_name, instance.name)
7926
    #HARDCODE
7927
    for node in all_nodes:
7928
      f_create = node == pnode
7929
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7930

    
7931

    
7932
def _RemoveDisks(lu, instance, target_node=None):
7933
  """Remove all disks for an instance.
7934

7935
  This abstracts away some work from `AddInstance()` and
7936
  `RemoveInstance()`. Note that in case some of the devices couldn't
7937
  be removed, the removal will continue with the other ones (compare
7938
  with `_CreateDisks()`).
7939

7940
  @type lu: L{LogicalUnit}
7941
  @param lu: the logical unit on whose behalf we execute
7942
  @type instance: L{objects.Instance}
7943
  @param instance: the instance whose disks we should remove
7944
  @type target_node: string
7945
  @param target_node: used to override the node on which to remove the disks
7946
  @rtype: boolean
7947
  @return: the success of the removal
7948

7949
  """
7950
  logging.info("Removing block devices for instance %s", instance.name)
7951

    
7952
  all_result = True
7953
  for device in instance.disks:
7954
    if target_node:
7955
      edata = [(target_node, device)]
7956
    else:
7957
      edata = device.ComputeNodeTree(instance.primary_node)
7958
    for node, disk in edata:
7959
      lu.cfg.SetDiskID(disk, node)
7960
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7961
      if msg:
7962
        lu.LogWarning("Could not remove block device %s on node %s,"
7963
                      " continuing anyway: %s", device.iv_name, node, msg)
7964
        all_result = False
7965

    
7966
    # if this is a DRBD disk, return its port to the pool
7967
    if device.dev_type in constants.LDS_DRBD:
7968
      tcp_port = device.logical_id[2]
7969
      lu.cfg.AddTcpUdpPort(tcp_port)
7970

    
7971
  if instance.disk_template == constants.DT_FILE:
7972
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7973
    if target_node:
7974
      tgt = target_node
7975
    else:
7976
      tgt = instance.primary_node
7977
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7978
    if result.fail_msg:
7979
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7980
                    file_storage_dir, instance.primary_node, result.fail_msg)
7981
      all_result = False
7982

    
7983
  return all_result
7984

    
7985

    
7986
def _ComputeDiskSizePerVG(disk_template, disks):
7987
  """Compute disk size requirements in the volume group
7988

7989
  """
7990
  def _compute(disks, payload):
7991
    """Universal algorithm.
7992

7993
    """
7994
    vgs = {}
7995
    for disk in disks:
7996
      vgs[disk[constants.IDISK_VG]] = \
7997
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7998

    
7999
    return vgs
8000

    
8001
  # Required free disk space as a function of disk and swap space
8002
  req_size_dict = {
8003
    constants.DT_DISKLESS: {},
8004
    constants.DT_PLAIN: _compute(disks, 0),
8005
    # 128 MB are added for drbd metadata for each disk
8006
    constants.DT_DRBD8: _compute(disks, 128),
8007
    constants.DT_FILE: {},
8008
    constants.DT_SHARED_FILE: {},
8009
  }
8010

    
8011
  if disk_template not in req_size_dict:
8012
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8013
                                 " is unknown" % disk_template)
8014

    
8015
  return req_size_dict[disk_template]
8016

    
8017

    
8018
def _ComputeDiskSize(disk_template, disks):
8019
  """Compute disk size requirements in the volume group
8020

8021
  """
8022
  # Required free disk space as a function of disk and swap space
8023
  req_size_dict = {
8024
    constants.DT_DISKLESS: None,
8025
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8026
    # 128 MB are added for drbd metadata for each disk
8027
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8028
    constants.DT_FILE: None,
8029
    constants.DT_SHARED_FILE: 0,
8030
    constants.DT_BLOCK: 0,
8031
  }
8032

    
8033
  if disk_template not in req_size_dict:
8034
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8035
                                 " is unknown" % disk_template)
8036

    
8037
  return req_size_dict[disk_template]
8038

    
8039

    
8040
def _FilterVmNodes(lu, nodenames):
8041
  """Filters out non-vm_capable nodes from a list.
8042

8043
  @type lu: L{LogicalUnit}
8044
  @param lu: the logical unit for which we check
8045
  @type nodenames: list
8046
  @param nodenames: the list of nodes on which we should check
8047
  @rtype: list
8048
  @return: the list of vm-capable nodes
8049

8050
  """
8051
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8052
  return [name for name in nodenames if name not in vm_nodes]
8053

    
8054

    
8055
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8056
  """Hypervisor parameter validation.
8057

8058
  This function abstract the hypervisor parameter validation to be
8059
  used in both instance create and instance modify.
8060

8061
  @type lu: L{LogicalUnit}
8062
  @param lu: the logical unit for which we check
8063
  @type nodenames: list
8064
  @param nodenames: the list of nodes on which we should check
8065
  @type hvname: string
8066
  @param hvname: the name of the hypervisor we should use
8067
  @type hvparams: dict
8068
  @param hvparams: the parameters which we need to check
8069
  @raise errors.OpPrereqError: if the parameters are not valid
8070

8071
  """
8072
  nodenames = _FilterVmNodes(lu, nodenames)
8073
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8074
                                                  hvname,
8075
                                                  hvparams)
8076
  for node in nodenames:
8077
    info = hvinfo[node]
8078
    if info.offline:
8079
      continue
8080
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8081

    
8082

    
8083
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8084
  """OS parameters validation.
8085

8086
  @type lu: L{LogicalUnit}
8087
  @param lu: the logical unit for which we check
8088
  @type required: boolean
8089
  @param required: whether the validation should fail if the OS is not
8090
      found
8091
  @type nodenames: list
8092
  @param nodenames: the list of nodes on which we should check
8093
  @type osname: string
8094
  @param osname: the name of the hypervisor we should use
8095
  @type osparams: dict
8096
  @param osparams: the parameters which we need to check
8097
  @raise errors.OpPrereqError: if the parameters are not valid
8098

8099
  """
8100
  nodenames = _FilterVmNodes(lu, nodenames)
8101
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8102
                                   [constants.OS_VALIDATE_PARAMETERS],
8103
                                   osparams)
8104
  for node, nres in result.items():
8105
    # we don't check for offline cases since this should be run only
8106
    # against the master node and/or an instance's nodes
8107
    nres.Raise("OS Parameters validation failed on node %s" % node)
8108
    if not nres.payload:
8109
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8110
                 osname, node)
8111

    
8112

    
8113
class LUInstanceCreate(LogicalUnit):
8114
  """Create an instance.
8115

8116
  """
8117
  HPATH = "instance-add"
8118
  HTYPE = constants.HTYPE_INSTANCE
8119
  REQ_BGL = False
8120

    
8121
  def CheckArguments(self):
8122
    """Check arguments.
8123

8124
    """
8125
    # do not require name_check to ease forward/backward compatibility
8126
    # for tools
8127
    if self.op.no_install and self.op.start:
8128
      self.LogInfo("No-installation mode selected, disabling startup")
8129
      self.op.start = False
8130
    # validate/normalize the instance name
8131
    self.op.instance_name = \
8132
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8133

    
8134
    if self.op.ip_check and not self.op.name_check:
8135
      # TODO: make the ip check more flexible and not depend on the name check
8136
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8137
                                 " check", errors.ECODE_INVAL)
8138

    
8139
    # check nics' parameter names
8140
    for nic in self.op.nics:
8141
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8142

    
8143
    # check disks. parameter names and consistent adopt/no-adopt strategy
8144
    has_adopt = has_no_adopt = False
8145
    for disk in self.op.disks:
8146
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8147
      if constants.IDISK_ADOPT in disk:
8148
        has_adopt = True
8149
      else:
8150
        has_no_adopt = True
8151
    if has_adopt and has_no_adopt:
8152
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8153
                                 errors.ECODE_INVAL)
8154
    if has_adopt:
8155
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8156
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8157
                                   " '%s' disk template" %
8158
                                   self.op.disk_template,
8159
                                   errors.ECODE_INVAL)
8160
      if self.op.iallocator is not None:
8161
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8162
                                   " iallocator script", errors.ECODE_INVAL)
8163
      if self.op.mode == constants.INSTANCE_IMPORT:
8164
        raise errors.OpPrereqError("Disk adoption not allowed for"
8165
                                   " instance import", errors.ECODE_INVAL)
8166
    else:
8167
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8168
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8169
                                   " but no 'adopt' parameter given" %
8170
                                   self.op.disk_template,
8171
                                   errors.ECODE_INVAL)
8172

    
8173
    self.adopt_disks = has_adopt
8174

    
8175
    # instance name verification
8176
    if self.op.name_check:
8177
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8178
      self.op.instance_name = self.hostname1.name
8179
      # used in CheckPrereq for ip ping check
8180
      self.check_ip = self.hostname1.ip
8181
    else:
8182
      self.check_ip = None
8183

    
8184
    # file storage checks
8185
    if (self.op.file_driver and
8186
        not self.op.file_driver in constants.FILE_DRIVER):
8187
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8188
                                 self.op.file_driver, errors.ECODE_INVAL)
8189

    
8190
    if self.op.disk_template == constants.DT_FILE:
8191
      opcodes.RequireFileStorage()
8192
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8193
      opcodes.RequireSharedFileStorage()
8194

    
8195
    ### Node/iallocator related checks
8196
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8197

    
8198
    if self.op.pnode is not None:
8199
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8200
        if self.op.snode is None:
8201
          raise errors.OpPrereqError("The networked disk templates need"
8202
                                     " a mirror node", errors.ECODE_INVAL)
8203
      elif self.op.snode:
8204
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8205
                        " template")
8206
        self.op.snode = None
8207

    
8208
    self._cds = _GetClusterDomainSecret()
8209

    
8210
    if self.op.mode == constants.INSTANCE_IMPORT:
8211
      # On import force_variant must be True, because if we forced it at
8212
      # initial install, our only chance when importing it back is that it
8213
      # works again!
8214
      self.op.force_variant = True
8215

    
8216
      if self.op.no_install:
8217
        self.LogInfo("No-installation mode has no effect during import")
8218

    
8219
    elif self.op.mode == constants.INSTANCE_CREATE:
8220
      if self.op.os_type is None:
8221
        raise errors.OpPrereqError("No guest OS specified",
8222
                                   errors.ECODE_INVAL)
8223
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8224
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8225
                                   " installation" % self.op.os_type,
8226
                                   errors.ECODE_STATE)
8227
      if self.op.disk_template is None:
8228
        raise errors.OpPrereqError("No disk template specified",
8229
                                   errors.ECODE_INVAL)
8230

    
8231
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8232
      # Check handshake to ensure both clusters have the same domain secret
8233
      src_handshake = self.op.source_handshake
8234
      if not src_handshake:
8235
        raise errors.OpPrereqError("Missing source handshake",
8236
                                   errors.ECODE_INVAL)
8237

    
8238
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8239
                                                           src_handshake)
8240
      if errmsg:
8241
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8242
                                   errors.ECODE_INVAL)
8243

    
8244
      # Load and check source CA
8245
      self.source_x509_ca_pem = self.op.source_x509_ca
8246
      if not self.source_x509_ca_pem:
8247
        raise errors.OpPrereqError("Missing source X509 CA",
8248
                                   errors.ECODE_INVAL)
8249

    
8250
      try:
8251
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8252
                                                    self._cds)
8253
      except OpenSSL.crypto.Error, err:
8254
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8255
                                   (err, ), errors.ECODE_INVAL)
8256

    
8257
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8258
      if errcode is not None:
8259
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8260
                                   errors.ECODE_INVAL)
8261

    
8262
      self.source_x509_ca = cert
8263

    
8264
      src_instance_name = self.op.source_instance_name
8265
      if not src_instance_name:
8266
        raise errors.OpPrereqError("Missing source instance name",
8267
                                   errors.ECODE_INVAL)
8268

    
8269
      self.source_instance_name = \
8270
          netutils.GetHostname(name=src_instance_name).name
8271

    
8272
    else:
8273
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8274
                                 self.op.mode, errors.ECODE_INVAL)
8275

    
8276
  def ExpandNames(self):
8277
    """ExpandNames for CreateInstance.
8278

8279
    Figure out the right locks for instance creation.
8280

8281
    """
8282
    self.needed_locks = {}
8283

    
8284
    instance_name = self.op.instance_name
8285
    # this is just a preventive check, but someone might still add this
8286
    # instance in the meantime, and creation will fail at lock-add time
8287
    if instance_name in self.cfg.GetInstanceList():
8288
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8289
                                 instance_name, errors.ECODE_EXISTS)
8290

    
8291
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8292

    
8293
    if self.op.iallocator:
8294
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8295
    else:
8296
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8297
      nodelist = [self.op.pnode]
8298
      if self.op.snode is not None:
8299
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8300
        nodelist.append(self.op.snode)
8301
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8302

    
8303
    # in case of import lock the source node too
8304
    if self.op.mode == constants.INSTANCE_IMPORT:
8305
      src_node = self.op.src_node
8306
      src_path = self.op.src_path
8307

    
8308
      if src_path is None:
8309
        self.op.src_path = src_path = self.op.instance_name
8310

    
8311
      if src_node is None:
8312
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8313
        self.op.src_node = None
8314
        if os.path.isabs(src_path):
8315
          raise errors.OpPrereqError("Importing an instance from a path"
8316
                                     " requires a source node option",
8317
                                     errors.ECODE_INVAL)
8318
      else:
8319
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8320
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8321
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8322
        if not os.path.isabs(src_path):
8323
          self.op.src_path = src_path = \
8324
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8325

    
8326
  def _RunAllocator(self):
8327
    """Run the allocator based on input opcode.
8328

8329
    """
8330
    nics = [n.ToDict() for n in self.nics]
8331
    ial = IAllocator(self.cfg, self.rpc,
8332
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8333
                     name=self.op.instance_name,
8334
                     disk_template=self.op.disk_template,
8335
                     tags=self.op.tags,
8336
                     os=self.op.os_type,
8337
                     vcpus=self.be_full[constants.BE_VCPUS],
8338
                     memory=self.be_full[constants.BE_MEMORY],
8339
                     disks=self.disks,
8340
                     nics=nics,
8341
                     hypervisor=self.op.hypervisor,
8342
                     )
8343

    
8344
    ial.Run(self.op.iallocator)
8345

    
8346
    if not ial.success:
8347
      raise errors.OpPrereqError("Can't compute nodes using"
8348
                                 " iallocator '%s': %s" %
8349
                                 (self.op.iallocator, ial.info),
8350
                                 errors.ECODE_NORES)
8351
    if len(ial.result) != ial.required_nodes:
8352
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8353
                                 " of nodes (%s), required %s" %
8354
                                 (self.op.iallocator, len(ial.result),
8355
                                  ial.required_nodes), errors.ECODE_FAULT)
8356
    self.op.pnode = ial.result[0]
8357
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8358
                 self.op.instance_name, self.op.iallocator,
8359
                 utils.CommaJoin(ial.result))
8360
    if ial.required_nodes == 2:
8361
      self.op.snode = ial.result[1]
8362

    
8363
  def BuildHooksEnv(self):
8364
    """Build hooks env.
8365

8366
    This runs on master, primary and secondary nodes of the instance.
8367

8368
    """
8369
    env = {
8370
      "ADD_MODE": self.op.mode,
8371
      }
8372
    if self.op.mode == constants.INSTANCE_IMPORT:
8373
      env["SRC_NODE"] = self.op.src_node
8374
      env["SRC_PATH"] = self.op.src_path
8375
      env["SRC_IMAGES"] = self.src_images
8376

    
8377
    env.update(_BuildInstanceHookEnv(
8378
      name=self.op.instance_name,
8379
      primary_node=self.op.pnode,
8380
      secondary_nodes=self.secondaries,
8381
      status=self.op.start,
8382
      os_type=self.op.os_type,
8383
      memory=self.be_full[constants.BE_MEMORY],
8384
      vcpus=self.be_full[constants.BE_VCPUS],
8385
      nics=_NICListToTuple(self, self.nics),
8386
      disk_template=self.op.disk_template,
8387
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8388
             for d in self.disks],
8389
      bep=self.be_full,
8390
      hvp=self.hv_full,
8391
      hypervisor_name=self.op.hypervisor,
8392
      tags=self.op.tags,
8393
    ))
8394

    
8395
    return env
8396

    
8397
  def BuildHooksNodes(self):
8398
    """Build hooks nodes.
8399

8400
    """
8401
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8402
    return nl, nl
8403

    
8404
  def _ReadExportInfo(self):
8405
    """Reads the export information from disk.
8406

8407
    It will override the opcode source node and path with the actual
8408
    information, if these two were not specified before.
8409

8410
    @return: the export information
8411

8412
    """
8413
    assert self.op.mode == constants.INSTANCE_IMPORT
8414

    
8415
    src_node = self.op.src_node
8416
    src_path = self.op.src_path
8417

    
8418
    if src_node is None:
8419
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8420
      exp_list = self.rpc.call_export_list(locked_nodes)
8421
      found = False
8422
      for node in exp_list:
8423
        if exp_list[node].fail_msg:
8424
          continue
8425
        if src_path in exp_list[node].payload:
8426
          found = True
8427
          self.op.src_node = src_node = node
8428
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8429
                                                       src_path)
8430
          break
8431
      if not found:
8432
        raise errors.OpPrereqError("No export found for relative path %s" %
8433
                                    src_path, errors.ECODE_INVAL)
8434

    
8435
    _CheckNodeOnline(self, src_node)
8436
    result = self.rpc.call_export_info(src_node, src_path)
8437
    result.Raise("No export or invalid export found in dir %s" % src_path)
8438

    
8439
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8440
    if not export_info.has_section(constants.INISECT_EXP):
8441
      raise errors.ProgrammerError("Corrupted export config",
8442
                                   errors.ECODE_ENVIRON)
8443

    
8444
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8445
    if (int(ei_version) != constants.EXPORT_VERSION):
8446
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8447
                                 (ei_version, constants.EXPORT_VERSION),
8448
                                 errors.ECODE_ENVIRON)
8449
    return export_info
8450

    
8451
  def _ReadExportParams(self, einfo):
8452
    """Use export parameters as defaults.
8453

8454
    In case the opcode doesn't specify (as in override) some instance
8455
    parameters, then try to use them from the export information, if
8456
    that declares them.
8457

8458
    """
8459
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8460

    
8461
    if self.op.disk_template is None:
8462
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8463
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8464
                                          "disk_template")
8465
      else:
8466
        raise errors.OpPrereqError("No disk template specified and the export"
8467
                                   " is missing the disk_template information",
8468
                                   errors.ECODE_INVAL)
8469

    
8470
    if not self.op.disks:
8471
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8472
        disks = []
8473
        # TODO: import the disk iv_name too
8474
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8475
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8476
          disks.append({constants.IDISK_SIZE: disk_sz})
8477
        self.op.disks = disks
8478
      else:
8479
        raise errors.OpPrereqError("No disk info specified and the export"
8480
                                   " is missing the disk information",
8481
                                   errors.ECODE_INVAL)
8482

    
8483
    if (not self.op.nics and
8484
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8485
      nics = []
8486
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8487
        ndict = {}
8488
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8489
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8490
          ndict[name] = v
8491
        nics.append(ndict)
8492
      self.op.nics = nics
8493

    
8494
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8495
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8496

    
8497
    if (self.op.hypervisor is None and
8498
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8499
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8500

    
8501
    if einfo.has_section(constants.INISECT_HYP):
8502
      # use the export parameters but do not override the ones
8503
      # specified by the user
8504
      for name, value in einfo.items(constants.INISECT_HYP):
8505
        if name not in self.op.hvparams:
8506
          self.op.hvparams[name] = value
8507

    
8508
    if einfo.has_section(constants.INISECT_BEP):
8509
      # use the parameters, without overriding
8510
      for name, value in einfo.items(constants.INISECT_BEP):
8511
        if name not in self.op.beparams:
8512
          self.op.beparams[name] = value
8513
    else:
8514
      # try to read the parameters old style, from the main section
8515
      for name in constants.BES_PARAMETERS:
8516
        if (name not in self.op.beparams and
8517
            einfo.has_option(constants.INISECT_INS, name)):
8518
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8519

    
8520
    if einfo.has_section(constants.INISECT_OSP):
8521
      # use the parameters, without overriding
8522
      for name, value in einfo.items(constants.INISECT_OSP):
8523
        if name not in self.op.osparams:
8524
          self.op.osparams[name] = value
8525

    
8526
  def _RevertToDefaults(self, cluster):
8527
    """Revert the instance parameters to the default values.
8528

8529
    """
8530
    # hvparams
8531
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8532
    for name in self.op.hvparams.keys():
8533
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8534
        del self.op.hvparams[name]
8535
    # beparams
8536
    be_defs = cluster.SimpleFillBE({})
8537
    for name in self.op.beparams.keys():
8538
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8539
        del self.op.beparams[name]
8540
    # nic params
8541
    nic_defs = cluster.SimpleFillNIC({})
8542
    for nic in self.op.nics:
8543
      for name in constants.NICS_PARAMETERS:
8544
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8545
          del nic[name]
8546
    # osparams
8547
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8548
    for name in self.op.osparams.keys():
8549
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8550
        del self.op.osparams[name]
8551

    
8552
  def _CalculateFileStorageDir(self):
8553
    """Calculate final instance file storage dir.
8554

8555
    """
8556
    # file storage dir calculation/check
8557
    self.instance_file_storage_dir = None
8558
    if self.op.disk_template in constants.DTS_FILEBASED:
8559
      # build the full file storage dir path
8560
      joinargs = []
8561

    
8562
      if self.op.disk_template == constants.DT_SHARED_FILE:
8563
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8564
      else:
8565
        get_fsd_fn = self.cfg.GetFileStorageDir
8566

    
8567
      cfg_storagedir = get_fsd_fn()
8568
      if not cfg_storagedir:
8569
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8570
      joinargs.append(cfg_storagedir)
8571

    
8572
      if self.op.file_storage_dir is not None:
8573
        joinargs.append(self.op.file_storage_dir)
8574

    
8575
      joinargs.append(self.op.instance_name)
8576

    
8577
      # pylint: disable=W0142
8578
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8579

    
8580
  def CheckPrereq(self):
8581
    """Check prerequisites.
8582

8583
    """
8584
    self._CalculateFileStorageDir()
8585

    
8586
    if self.op.mode == constants.INSTANCE_IMPORT:
8587
      export_info = self._ReadExportInfo()
8588
      self._ReadExportParams(export_info)
8589

    
8590
    if (not self.cfg.GetVGName() and
8591
        self.op.disk_template not in constants.DTS_NOT_LVM):
8592
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8593
                                 " instances", errors.ECODE_STATE)
8594

    
8595
    if self.op.hypervisor is None:
8596
      self.op.hypervisor = self.cfg.GetHypervisorType()
8597

    
8598
    cluster = self.cfg.GetClusterInfo()
8599
    enabled_hvs = cluster.enabled_hypervisors
8600
    if self.op.hypervisor not in enabled_hvs:
8601
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8602
                                 " cluster (%s)" % (self.op.hypervisor,
8603
                                  ",".join(enabled_hvs)),
8604
                                 errors.ECODE_STATE)
8605

    
8606
    # Check tag validity
8607
    for tag in self.op.tags:
8608
      objects.TaggableObject.ValidateTag(tag)
8609

    
8610
    # check hypervisor parameter syntax (locally)
8611
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8612
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8613
                                      self.op.hvparams)
8614
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8615
    hv_type.CheckParameterSyntax(filled_hvp)
8616
    self.hv_full = filled_hvp
8617
    # check that we don't specify global parameters on an instance
8618
    _CheckGlobalHvParams(self.op.hvparams)
8619

    
8620
    # fill and remember the beparams dict
8621
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8622
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8623

    
8624
    # build os parameters
8625
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8626

    
8627
    # now that hvp/bep are in final format, let's reset to defaults,
8628
    # if told to do so
8629
    if self.op.identify_defaults:
8630
      self._RevertToDefaults(cluster)
8631

    
8632
    # NIC buildup
8633
    self.nics = []
8634
    for idx, nic in enumerate(self.op.nics):
8635
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8636
      nic_mode = nic_mode_req
8637
      if nic_mode is None:
8638
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8639

    
8640
      # in routed mode, for the first nic, the default ip is 'auto'
8641
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8642
        default_ip_mode = constants.VALUE_AUTO
8643
      else:
8644
        default_ip_mode = constants.VALUE_NONE
8645

    
8646
      # ip validity checks
8647
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8648
      if ip is None or ip.lower() == constants.VALUE_NONE:
8649
        nic_ip = None
8650
      elif ip.lower() == constants.VALUE_AUTO:
8651
        if not self.op.name_check:
8652
          raise errors.OpPrereqError("IP address set to auto but name checks"
8653
                                     " have been skipped",
8654
                                     errors.ECODE_INVAL)
8655
        nic_ip = self.hostname1.ip
8656
      else:
8657
        if not netutils.IPAddress.IsValid(ip):
8658
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8659
                                     errors.ECODE_INVAL)
8660
        nic_ip = ip
8661

    
8662
      # TODO: check the ip address for uniqueness
8663
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8664
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8665
                                   errors.ECODE_INVAL)
8666

    
8667
      # MAC address verification
8668
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8669
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8670
        mac = utils.NormalizeAndValidateMac(mac)
8671

    
8672
        try:
8673
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8674
        except errors.ReservationError:
8675
          raise errors.OpPrereqError("MAC address %s already in use"
8676
                                     " in cluster" % mac,
8677
                                     errors.ECODE_NOTUNIQUE)
8678

    
8679
      #  Build nic parameters
8680
      link = nic.get(constants.INIC_LINK, None)
8681
      nicparams = {}
8682
      if nic_mode_req:
8683
        nicparams[constants.NIC_MODE] = nic_mode_req
8684
      if link:
8685
        nicparams[constants.NIC_LINK] = link
8686

    
8687
      check_params = cluster.SimpleFillNIC(nicparams)
8688
      objects.NIC.CheckParameterSyntax(check_params)
8689
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8690

    
8691
    # disk checks/pre-build
8692
    default_vg = self.cfg.GetVGName()
8693
    self.disks = []
8694
    for disk in self.op.disks:
8695
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8696
      if mode not in constants.DISK_ACCESS_SET:
8697
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8698
                                   mode, errors.ECODE_INVAL)
8699
      size = disk.get(constants.IDISK_SIZE, None)
8700
      if size is None:
8701
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8702
      try:
8703
        size = int(size)
8704
      except (TypeError, ValueError):
8705
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8706
                                   errors.ECODE_INVAL)
8707

    
8708
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8709
      new_disk = {
8710
        constants.IDISK_SIZE: size,
8711
        constants.IDISK_MODE: mode,
8712
        constants.IDISK_VG: data_vg,
8713
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8714
        }
8715
      if constants.IDISK_ADOPT in disk:
8716
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8717
      self.disks.append(new_disk)
8718

    
8719
    if self.op.mode == constants.INSTANCE_IMPORT:
8720

    
8721
      # Check that the new instance doesn't have less disks than the export
8722
      instance_disks = len(self.disks)
8723
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8724
      if instance_disks < export_disks:
8725
        raise errors.OpPrereqError("Not enough disks to import."
8726
                                   " (instance: %d, export: %d)" %
8727
                                   (instance_disks, export_disks),
8728
                                   errors.ECODE_INVAL)
8729

    
8730
      disk_images = []
8731
      for idx in range(export_disks):
8732
        option = "disk%d_dump" % idx
8733
        if export_info.has_option(constants.INISECT_INS, option):
8734
          # FIXME: are the old os-es, disk sizes, etc. useful?
8735
          export_name = export_info.get(constants.INISECT_INS, option)
8736
          image = utils.PathJoin(self.op.src_path, export_name)
8737
          disk_images.append(image)
8738
        else:
8739
          disk_images.append(False)
8740

    
8741
      self.src_images = disk_images
8742

    
8743
      old_name = export_info.get(constants.INISECT_INS, "name")
8744
      try:
8745
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8746
      except (TypeError, ValueError), err:
8747
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8748
                                   " an integer: %s" % str(err),
8749
                                   errors.ECODE_STATE)
8750
      if self.op.instance_name == old_name:
8751
        for idx, nic in enumerate(self.nics):
8752
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8753
            nic_mac_ini = "nic%d_mac" % idx
8754
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8755

    
8756
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8757

    
8758
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8759
    if self.op.ip_check:
8760
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8761
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8762
                                   (self.check_ip, self.op.instance_name),
8763
                                   errors.ECODE_NOTUNIQUE)
8764

    
8765
    #### mac address generation
8766
    # By generating here the mac address both the allocator and the hooks get
8767
    # the real final mac address rather than the 'auto' or 'generate' value.
8768
    # There is a race condition between the generation and the instance object
8769
    # creation, which means that we know the mac is valid now, but we're not
8770
    # sure it will be when we actually add the instance. If things go bad
8771
    # adding the instance will abort because of a duplicate mac, and the
8772
    # creation job will fail.
8773
    for nic in self.nics:
8774
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8775
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8776

    
8777
    #### allocator run
8778

    
8779
    if self.op.iallocator is not None:
8780
      self._RunAllocator()
8781

    
8782
    #### node related checks
8783

    
8784
    # check primary node
8785
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8786
    assert self.pnode is not None, \
8787
      "Cannot retrieve locked node %s" % self.op.pnode
8788
    if pnode.offline:
8789
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8790
                                 pnode.name, errors.ECODE_STATE)
8791
    if pnode.drained:
8792
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8793
                                 pnode.name, errors.ECODE_STATE)
8794
    if not pnode.vm_capable:
8795
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8796
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8797

    
8798
    self.secondaries = []
8799

    
8800
    # mirror node verification
8801
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8802
      if self.op.snode == pnode.name:
8803
        raise errors.OpPrereqError("The secondary node cannot be the"
8804
                                   " primary node", errors.ECODE_INVAL)
8805
      _CheckNodeOnline(self, self.op.snode)
8806
      _CheckNodeNotDrained(self, self.op.snode)
8807
      _CheckNodeVmCapable(self, self.op.snode)
8808
      self.secondaries.append(self.op.snode)
8809

    
8810
    nodenames = [pnode.name] + self.secondaries
8811

    
8812
    if not self.adopt_disks:
8813
      # Check lv size requirements, if not adopting
8814
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8815
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8816

    
8817
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8818
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8819
                                disk[constants.IDISK_ADOPT])
8820
                     for disk in self.disks])
8821
      if len(all_lvs) != len(self.disks):
8822
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8823
                                   errors.ECODE_INVAL)
8824
      for lv_name in all_lvs:
8825
        try:
8826
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8827
          # to ReserveLV uses the same syntax
8828
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8829
        except errors.ReservationError:
8830
          raise errors.OpPrereqError("LV named %s used by another instance" %
8831
                                     lv_name, errors.ECODE_NOTUNIQUE)
8832

    
8833
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8834
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8835

    
8836
      node_lvs = self.rpc.call_lv_list([pnode.name],
8837
                                       vg_names.payload.keys())[pnode.name]
8838
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8839
      node_lvs = node_lvs.payload
8840

    
8841
      delta = all_lvs.difference(node_lvs.keys())
8842
      if delta:
8843
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8844
                                   utils.CommaJoin(delta),
8845
                                   errors.ECODE_INVAL)
8846
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8847
      if online_lvs:
8848
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8849
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8850
                                   errors.ECODE_STATE)
8851
      # update the size of disk based on what is found
8852
      for dsk in self.disks:
8853
        dsk[constants.IDISK_SIZE] = \
8854
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8855
                                        dsk[constants.IDISK_ADOPT])][0]))
8856

    
8857
    elif self.op.disk_template == constants.DT_BLOCK:
8858
      # Normalize and de-duplicate device paths
8859
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8860
                       for disk in self.disks])
8861
      if len(all_disks) != len(self.disks):
8862
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8863
                                   errors.ECODE_INVAL)
8864
      baddisks = [d for d in all_disks
8865
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8866
      if baddisks:
8867
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8868
                                   " cannot be adopted" %
8869
                                   (", ".join(baddisks),
8870
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8871
                                   errors.ECODE_INVAL)
8872

    
8873
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8874
                                            list(all_disks))[pnode.name]
8875
      node_disks.Raise("Cannot get block device information from node %s" %
8876
                       pnode.name)
8877
      node_disks = node_disks.payload
8878
      delta = all_disks.difference(node_disks.keys())
8879
      if delta:
8880
        raise errors.OpPrereqError("Missing block device(s): %s" %
8881
                                   utils.CommaJoin(delta),
8882
                                   errors.ECODE_INVAL)
8883
      for dsk in self.disks:
8884
        dsk[constants.IDISK_SIZE] = \
8885
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8886

    
8887
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8888

    
8889
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8890
    # check OS parameters (remotely)
8891
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8892

    
8893
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8894

    
8895
    # memory check on primary node
8896
    if self.op.start:
8897
      _CheckNodeFreeMemory(self, self.pnode.name,
8898
                           "creating instance %s" % self.op.instance_name,
8899
                           self.be_full[constants.BE_MEMORY],
8900
                           self.op.hypervisor)
8901

    
8902
    self.dry_run_result = list(nodenames)
8903

    
8904
  def Exec(self, feedback_fn):
8905
    """Create and add the instance to the cluster.
8906

8907
    """
8908
    instance = self.op.instance_name
8909
    pnode_name = self.pnode.name
8910

    
8911
    ht_kind = self.op.hypervisor
8912
    if ht_kind in constants.HTS_REQ_PORT:
8913
      network_port = self.cfg.AllocatePort()
8914
    else:
8915
      network_port = None
8916

    
8917
    disks = _GenerateDiskTemplate(self,
8918
                                  self.op.disk_template,
8919
                                  instance, pnode_name,
8920
                                  self.secondaries,
8921
                                  self.disks,
8922
                                  self.instance_file_storage_dir,
8923
                                  self.op.file_driver,
8924
                                  0,
8925
                                  feedback_fn)
8926

    
8927
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8928
                            primary_node=pnode_name,
8929
                            nics=self.nics, disks=disks,
8930
                            disk_template=self.op.disk_template,
8931
                            admin_up=False,
8932
                            network_port=network_port,
8933
                            beparams=self.op.beparams,
8934
                            hvparams=self.op.hvparams,
8935
                            hypervisor=self.op.hypervisor,
8936
                            osparams=self.op.osparams,
8937
                            )
8938

    
8939
    if self.op.tags:
8940
      for tag in self.op.tags:
8941
        iobj.AddTag(tag)
8942

    
8943
    if self.adopt_disks:
8944
      if self.op.disk_template == constants.DT_PLAIN:
8945
        # rename LVs to the newly-generated names; we need to construct
8946
        # 'fake' LV disks with the old data, plus the new unique_id
8947
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8948
        rename_to = []
8949
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8950
          rename_to.append(t_dsk.logical_id)
8951
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8952
          self.cfg.SetDiskID(t_dsk, pnode_name)
8953
        result = self.rpc.call_blockdev_rename(pnode_name,
8954
                                               zip(tmp_disks, rename_to))
8955
        result.Raise("Failed to rename adoped LVs")
8956
    else:
8957
      feedback_fn("* creating instance disks...")
8958
      try:
8959
        _CreateDisks(self, iobj)
8960
      except errors.OpExecError:
8961
        self.LogWarning("Device creation failed, reverting...")
8962
        try:
8963
          _RemoveDisks(self, iobj)
8964
        finally:
8965
          self.cfg.ReleaseDRBDMinors(instance)
8966
          raise
8967

    
8968
    feedback_fn("adding instance %s to cluster config" % instance)
8969

    
8970
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8971

    
8972
    # Declare that we don't want to remove the instance lock anymore, as we've
8973
    # added the instance to the config
8974
    del self.remove_locks[locking.LEVEL_INSTANCE]
8975

    
8976
    if self.op.mode == constants.INSTANCE_IMPORT:
8977
      # Release unused nodes
8978
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8979
    else:
8980
      # Release all nodes
8981
      _ReleaseLocks(self, locking.LEVEL_NODE)
8982

    
8983
    disk_abort = False
8984
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8985
      feedback_fn("* wiping instance disks...")
8986
      try:
8987
        _WipeDisks(self, iobj)
8988
      except errors.OpExecError, err:
8989
        logging.exception("Wiping disks failed")
8990
        self.LogWarning("Wiping instance disks failed (%s)", err)
8991
        disk_abort = True
8992

    
8993
    if disk_abort:
8994
      # Something is already wrong with the disks, don't do anything else
8995
      pass
8996
    elif self.op.wait_for_sync:
8997
      disk_abort = not _WaitForSync(self, iobj)
8998
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8999
      # make sure the disks are not degraded (still sync-ing is ok)
9000
      feedback_fn("* checking mirrors status")
9001
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9002
    else:
9003
      disk_abort = False
9004

    
9005
    if disk_abort:
9006
      _RemoveDisks(self, iobj)
9007
      self.cfg.RemoveInstance(iobj.name)
9008
      # Make sure the instance lock gets removed
9009
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9010
      raise errors.OpExecError("There are some degraded disks for"
9011
                               " this instance")
9012

    
9013
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9014
      if self.op.mode == constants.INSTANCE_CREATE:
9015
        if not self.op.no_install:
9016
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9017
                        not self.op.wait_for_sync)
9018
          if pause_sync:
9019
            feedback_fn("* pausing disk sync to install instance OS")
9020
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9021
                                                              iobj.disks, True)
9022
            for idx, success in enumerate(result.payload):
9023
              if not success:
9024
                logging.warn("pause-sync of instance %s for disk %d failed",
9025
                             instance, idx)
9026

    
9027
          feedback_fn("* running the instance OS create scripts...")
9028
          # FIXME: pass debug option from opcode to backend
9029
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9030
                                                 self.op.debug_level)
9031
          if pause_sync:
9032
            feedback_fn("* resuming disk sync")
9033
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9034
                                                              iobj.disks, False)
9035
            for idx, success in enumerate(result.payload):
9036
              if not success:
9037
                logging.warn("resume-sync of instance %s for disk %d failed",
9038
                             instance, idx)
9039

    
9040
          result.Raise("Could not add os for instance %s"
9041
                       " on node %s" % (instance, pnode_name))
9042

    
9043
      elif self.op.mode == constants.INSTANCE_IMPORT:
9044
        feedback_fn("* running the instance OS import scripts...")
9045

    
9046
        transfers = []
9047

    
9048
        for idx, image in enumerate(self.src_images):
9049
          if not image:
9050
            continue
9051

    
9052
          # FIXME: pass debug option from opcode to backend
9053
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9054
                                             constants.IEIO_FILE, (image, ),
9055
                                             constants.IEIO_SCRIPT,
9056
                                             (iobj.disks[idx], idx),
9057
                                             None)
9058
          transfers.append(dt)
9059

    
9060
        import_result = \
9061
          masterd.instance.TransferInstanceData(self, feedback_fn,
9062
                                                self.op.src_node, pnode_name,
9063
                                                self.pnode.secondary_ip,
9064
                                                iobj, transfers)
9065
        if not compat.all(import_result):
9066
          self.LogWarning("Some disks for instance %s on node %s were not"
9067
                          " imported successfully" % (instance, pnode_name))
9068

    
9069
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9070
        feedback_fn("* preparing remote import...")
9071
        # The source cluster will stop the instance before attempting to make a
9072
        # connection. In some cases stopping an instance can take a long time,
9073
        # hence the shutdown timeout is added to the connection timeout.
9074
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9075
                           self.op.source_shutdown_timeout)
9076
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9077

    
9078
        assert iobj.primary_node == self.pnode.name
9079
        disk_results = \
9080
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9081
                                        self.source_x509_ca,
9082
                                        self._cds, timeouts)
9083
        if not compat.all(disk_results):
9084
          # TODO: Should the instance still be started, even if some disks
9085
          # failed to import (valid for local imports, too)?
9086
          self.LogWarning("Some disks for instance %s on node %s were not"
9087
                          " imported successfully" % (instance, pnode_name))
9088

    
9089
        # Run rename script on newly imported instance
9090
        assert iobj.name == instance
9091
        feedback_fn("Running rename script for %s" % instance)
9092
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9093
                                                   self.source_instance_name,
9094
                                                   self.op.debug_level)
9095
        if result.fail_msg:
9096
          self.LogWarning("Failed to run rename script for %s on node"
9097
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9098

    
9099
      else:
9100
        # also checked in the prereq part
9101
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9102
                                     % self.op.mode)
9103

    
9104
    if self.op.start:
9105
      iobj.admin_up = True
9106
      self.cfg.Update(iobj, feedback_fn)
9107
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9108
      feedback_fn("* starting instance...")
9109
      result = self.rpc.call_instance_start(pnode_name, iobj,
9110
                                            None, None, False)
9111
      result.Raise("Could not start instance")
9112

    
9113
    return list(iobj.all_nodes)
9114

    
9115

    
9116
class LUInstanceConsole(NoHooksLU):
9117
  """Connect to an instance's console.
9118

9119
  This is somewhat special in that it returns the command line that
9120
  you need to run on the master node in order to connect to the
9121
  console.
9122

9123
  """
9124
  REQ_BGL = False
9125

    
9126
  def ExpandNames(self):
9127
    self._ExpandAndLockInstance()
9128

    
9129
  def CheckPrereq(self):
9130
    """Check prerequisites.
9131

9132
    This checks that the instance is in the cluster.
9133

9134
    """
9135
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9136
    assert self.instance is not None, \
9137
      "Cannot retrieve locked instance %s" % self.op.instance_name
9138
    _CheckNodeOnline(self, self.instance.primary_node)
9139

    
9140
  def Exec(self, feedback_fn):
9141
    """Connect to the console of an instance
9142

9143
    """
9144
    instance = self.instance
9145
    node = instance.primary_node
9146

    
9147
    node_insts = self.rpc.call_instance_list([node],
9148
                                             [instance.hypervisor])[node]
9149
    node_insts.Raise("Can't get node information from %s" % node)
9150

    
9151
    if instance.name not in node_insts.payload:
9152
      if instance.admin_up:
9153
        state = constants.INSTST_ERRORDOWN
9154
      else:
9155
        state = constants.INSTST_ADMINDOWN
9156
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9157
                               (instance.name, state))
9158

    
9159
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9160

    
9161
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9162

    
9163

    
9164
def _GetInstanceConsole(cluster, instance):
9165
  """Returns console information for an instance.
9166

9167
  @type cluster: L{objects.Cluster}
9168
  @type instance: L{objects.Instance}
9169
  @rtype: dict
9170

9171
  """
9172
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9173
  # beparams and hvparams are passed separately, to avoid editing the
9174
  # instance and then saving the defaults in the instance itself.
9175
  hvparams = cluster.FillHV(instance)
9176
  beparams = cluster.FillBE(instance)
9177
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9178

    
9179
  assert console.instance == instance.name
9180
  assert console.Validate()
9181

    
9182
  return console.ToDict()
9183

    
9184

    
9185
class LUInstanceReplaceDisks(LogicalUnit):
9186
  """Replace the disks of an instance.
9187

9188
  """
9189
  HPATH = "mirrors-replace"
9190
  HTYPE = constants.HTYPE_INSTANCE
9191
  REQ_BGL = False
9192

    
9193
  def CheckArguments(self):
9194
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9195
                                  self.op.iallocator)
9196

    
9197
  def ExpandNames(self):
9198
    self._ExpandAndLockInstance()
9199

    
9200
    assert locking.LEVEL_NODE not in self.needed_locks
9201
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9202

    
9203
    assert self.op.iallocator is None or self.op.remote_node is None, \
9204
      "Conflicting options"
9205

    
9206
    if self.op.remote_node is not None:
9207
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9208

    
9209
      # Warning: do not remove the locking of the new secondary here
9210
      # unless DRBD8.AddChildren is changed to work in parallel;
9211
      # currently it doesn't since parallel invocations of
9212
      # FindUnusedMinor will conflict
9213
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9214
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9215
    else:
9216
      self.needed_locks[locking.LEVEL_NODE] = []
9217
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9218

    
9219
      if self.op.iallocator is not None:
9220
        # iallocator will select a new node in the same group
9221
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9222

    
9223
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9224
                                   self.op.iallocator, self.op.remote_node,
9225
                                   self.op.disks, False, self.op.early_release)
9226

    
9227
    self.tasklets = [self.replacer]
9228

    
9229
  def DeclareLocks(self, level):
9230
    if level == locking.LEVEL_NODEGROUP:
9231
      assert self.op.remote_node is None
9232
      assert self.op.iallocator is not None
9233
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9234

    
9235
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9236
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9237
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9238

    
9239
    elif level == locking.LEVEL_NODE:
9240
      if self.op.iallocator is not None:
9241
        assert self.op.remote_node is None
9242
        assert not self.needed_locks[locking.LEVEL_NODE]
9243

    
9244
        # Lock member nodes of all locked groups
9245
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9246
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9247
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9248
      else:
9249
        self._LockInstancesNodes()
9250

    
9251
  def BuildHooksEnv(self):
9252
    """Build hooks env.
9253

9254
    This runs on the master, the primary and all the secondaries.
9255

9256
    """
9257
    instance = self.replacer.instance
9258
    env = {
9259
      "MODE": self.op.mode,
9260
      "NEW_SECONDARY": self.op.remote_node,
9261
      "OLD_SECONDARY": instance.secondary_nodes[0],
9262
      }
9263
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9264
    return env
9265

    
9266
  def BuildHooksNodes(self):
9267
    """Build hooks nodes.
9268

9269
    """
9270
    instance = self.replacer.instance
9271
    nl = [
9272
      self.cfg.GetMasterNode(),
9273
      instance.primary_node,
9274
      ]
9275
    if self.op.remote_node is not None:
9276
      nl.append(self.op.remote_node)
9277
    return nl, nl
9278

    
9279
  def CheckPrereq(self):
9280
    """Check prerequisites.
9281

9282
    """
9283
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9284
            self.op.iallocator is None)
9285

    
9286
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9287
    if owned_groups:
9288
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9289

    
9290
    return LogicalUnit.CheckPrereq(self)
9291

    
9292

    
9293
class TLReplaceDisks(Tasklet):
9294
  """Replaces disks for an instance.
9295

9296
  Note: Locking is not within the scope of this class.
9297

9298
  """
9299
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9300
               disks, delay_iallocator, early_release):
9301
    """Initializes this class.
9302

9303
    """
9304
    Tasklet.__init__(self, lu)
9305

    
9306
    # Parameters
9307
    self.instance_name = instance_name
9308
    self.mode = mode
9309
    self.iallocator_name = iallocator_name
9310
    self.remote_node = remote_node
9311
    self.disks = disks
9312
    self.delay_iallocator = delay_iallocator
9313
    self.early_release = early_release
9314

    
9315
    # Runtime data
9316
    self.instance = None
9317
    self.new_node = None
9318
    self.target_node = None
9319
    self.other_node = None
9320
    self.remote_node_info = None
9321
    self.node_secondary_ip = None
9322

    
9323
  @staticmethod
9324
  def CheckArguments(mode, remote_node, iallocator):
9325
    """Helper function for users of this class.
9326

9327
    """
9328
    # check for valid parameter combination
9329
    if mode == constants.REPLACE_DISK_CHG:
9330
      if remote_node is None and iallocator is None:
9331
        raise errors.OpPrereqError("When changing the secondary either an"
9332
                                   " iallocator script must be used or the"
9333
                                   " new node given", errors.ECODE_INVAL)
9334

    
9335
      if remote_node is not None and iallocator is not None:
9336
        raise errors.OpPrereqError("Give either the iallocator or the new"
9337
                                   " secondary, not both", errors.ECODE_INVAL)
9338

    
9339
    elif remote_node is not None or iallocator is not None:
9340
      # Not replacing the secondary
9341
      raise errors.OpPrereqError("The iallocator and new node options can"
9342
                                 " only be used when changing the"
9343
                                 " secondary node", errors.ECODE_INVAL)
9344

    
9345
  @staticmethod
9346
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9347
    """Compute a new secondary node using an IAllocator.
9348

9349
    """
9350
    ial = IAllocator(lu.cfg, lu.rpc,
9351
                     mode=constants.IALLOCATOR_MODE_RELOC,
9352
                     name=instance_name,
9353
                     relocate_from=list(relocate_from))
9354

    
9355
    ial.Run(iallocator_name)
9356

    
9357
    if not ial.success:
9358
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9359
                                 " %s" % (iallocator_name, ial.info),
9360
                                 errors.ECODE_NORES)
9361

    
9362
    if len(ial.result) != ial.required_nodes:
9363
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9364
                                 " of nodes (%s), required %s" %
9365
                                 (iallocator_name,
9366
                                  len(ial.result), ial.required_nodes),
9367
                                 errors.ECODE_FAULT)
9368

    
9369
    remote_node_name = ial.result[0]
9370

    
9371
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9372
               instance_name, remote_node_name)
9373

    
9374
    return remote_node_name
9375

    
9376
  def _FindFaultyDisks(self, node_name):
9377
    """Wrapper for L{_FindFaultyInstanceDisks}.
9378

9379
    """
9380
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9381
                                    node_name, True)
9382

    
9383
  def _CheckDisksActivated(self, instance):
9384
    """Checks if the instance disks are activated.
9385

9386
    @param instance: The instance to check disks
9387
    @return: True if they are activated, False otherwise
9388

9389
    """
9390
    nodes = instance.all_nodes
9391

    
9392
    for idx, dev in enumerate(instance.disks):
9393
      for node in nodes:
9394
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9395
        self.cfg.SetDiskID(dev, node)
9396

    
9397
        result = self.rpc.call_blockdev_find(node, dev)
9398

    
9399
        if result.offline:
9400
          continue
9401
        elif result.fail_msg or not result.payload:
9402
          return False
9403

    
9404
    return True
9405

    
9406
  def CheckPrereq(self):
9407
    """Check prerequisites.
9408

9409
    This checks that the instance is in the cluster.
9410

9411
    """
9412
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9413
    assert instance is not None, \
9414
      "Cannot retrieve locked instance %s" % self.instance_name
9415

    
9416
    if instance.disk_template != constants.DT_DRBD8:
9417
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9418
                                 " instances", errors.ECODE_INVAL)
9419

    
9420
    if len(instance.secondary_nodes) != 1:
9421
      raise errors.OpPrereqError("The instance has a strange layout,"
9422
                                 " expected one secondary but found %d" %
9423
                                 len(instance.secondary_nodes),
9424
                                 errors.ECODE_FAULT)
9425

    
9426
    if not self.delay_iallocator:
9427
      self._CheckPrereq2()
9428

    
9429
  def _CheckPrereq2(self):
9430
    """Check prerequisites, second part.
9431

9432
    This function should always be part of CheckPrereq. It was separated and is
9433
    now called from Exec because during node evacuation iallocator was only
9434
    called with an unmodified cluster model, not taking planned changes into
9435
    account.
9436

9437
    """
9438
    instance = self.instance
9439
    secondary_node = instance.secondary_nodes[0]
9440

    
9441
    if self.iallocator_name is None:
9442
      remote_node = self.remote_node
9443
    else:
9444
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9445
                                       instance.name, instance.secondary_nodes)
9446

    
9447
    if remote_node is None:
9448
      self.remote_node_info = None
9449
    else:
9450
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9451
             "Remote node '%s' is not locked" % remote_node
9452

    
9453
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9454
      assert self.remote_node_info is not None, \
9455
        "Cannot retrieve locked node %s" % remote_node
9456

    
9457
    if remote_node == self.instance.primary_node:
9458
      raise errors.OpPrereqError("The specified node is the primary node of"
9459
                                 " the instance", errors.ECODE_INVAL)
9460

    
9461
    if remote_node == secondary_node:
9462
      raise errors.OpPrereqError("The specified node is already the"
9463
                                 " secondary node of the instance",
9464
                                 errors.ECODE_INVAL)
9465

    
9466
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9467
                                    constants.REPLACE_DISK_CHG):
9468
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9469
                                 errors.ECODE_INVAL)
9470

    
9471
    if self.mode == constants.REPLACE_DISK_AUTO:
9472
      if not self._CheckDisksActivated(instance):
9473
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9474
                                   " first" % self.instance_name,
9475
                                   errors.ECODE_STATE)
9476
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9477
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9478

    
9479
      if faulty_primary and faulty_secondary:
9480
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9481
                                   " one node and can not be repaired"
9482
                                   " automatically" % self.instance_name,
9483
                                   errors.ECODE_STATE)
9484

    
9485
      if faulty_primary:
9486
        self.disks = faulty_primary
9487
        self.target_node = instance.primary_node
9488
        self.other_node = secondary_node
9489
        check_nodes = [self.target_node, self.other_node]
9490
      elif faulty_secondary:
9491
        self.disks = faulty_secondary
9492
        self.target_node = secondary_node
9493
        self.other_node = instance.primary_node
9494
        check_nodes = [self.target_node, self.other_node]
9495
      else:
9496
        self.disks = []
9497
        check_nodes = []
9498

    
9499
    else:
9500
      # Non-automatic modes
9501
      if self.mode == constants.REPLACE_DISK_PRI:
9502
        self.target_node = instance.primary_node
9503
        self.other_node = secondary_node
9504
        check_nodes = [self.target_node, self.other_node]
9505

    
9506
      elif self.mode == constants.REPLACE_DISK_SEC:
9507
        self.target_node = secondary_node
9508
        self.other_node = instance.primary_node
9509
        check_nodes = [self.target_node, self.other_node]
9510

    
9511
      elif self.mode == constants.REPLACE_DISK_CHG:
9512
        self.new_node = remote_node
9513
        self.other_node = instance.primary_node
9514
        self.target_node = secondary_node
9515
        check_nodes = [self.new_node, self.other_node]
9516

    
9517
        _CheckNodeNotDrained(self.lu, remote_node)
9518
        _CheckNodeVmCapable(self.lu, remote_node)
9519

    
9520
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9521
        assert old_node_info is not None
9522
        if old_node_info.offline and not self.early_release:
9523
          # doesn't make sense to delay the release
9524
          self.early_release = True
9525
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9526
                          " early-release mode", secondary_node)
9527

    
9528
      else:
9529
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9530
                                     self.mode)
9531

    
9532
      # If not specified all disks should be replaced
9533
      if not self.disks:
9534
        self.disks = range(len(self.instance.disks))
9535

    
9536
    for node in check_nodes:
9537
      _CheckNodeOnline(self.lu, node)
9538

    
9539
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9540
                                                          self.other_node,
9541
                                                          self.target_node]
9542
                              if node_name is not None)
9543

    
9544
    # Release unneeded node locks
9545
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9546

    
9547
    # Release any owned node group
9548
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9549
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9550

    
9551
    # Check whether disks are valid
9552
    for disk_idx in self.disks:
9553
      instance.FindDisk(disk_idx)
9554

    
9555
    # Get secondary node IP addresses
9556
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9557
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9558

    
9559
  def Exec(self, feedback_fn):
9560
    """Execute disk replacement.
9561

9562
    This dispatches the disk replacement to the appropriate handler.
9563

9564
    """
9565
    if self.delay_iallocator:
9566
      self._CheckPrereq2()
9567

    
9568
    if __debug__:
9569
      # Verify owned locks before starting operation
9570
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9571
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9572
          ("Incorrect node locks, owning %s, expected %s" %
9573
           (owned_nodes, self.node_secondary_ip.keys()))
9574

    
9575
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9576
      assert list(owned_instances) == [self.instance_name], \
9577
          "Instance '%s' not locked" % self.instance_name
9578

    
9579
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9580
          "Should not own any node group lock at this point"
9581

    
9582
    if not self.disks:
9583
      feedback_fn("No disks need replacement")
9584
      return
9585

    
9586
    feedback_fn("Replacing disk(s) %s for %s" %
9587
                (utils.CommaJoin(self.disks), self.instance.name))
9588

    
9589
    activate_disks = (not self.instance.admin_up)
9590

    
9591
    # Activate the instance disks if we're replacing them on a down instance
9592
    if activate_disks:
9593
      _StartInstanceDisks(self.lu, self.instance, True)
9594

    
9595
    try:
9596
      # Should we replace the secondary node?
9597
      if self.new_node is not None:
9598
        fn = self._ExecDrbd8Secondary
9599
      else:
9600
        fn = self._ExecDrbd8DiskOnly
9601

    
9602
      result = fn(feedback_fn)
9603
    finally:
9604
      # Deactivate the instance disks if we're replacing them on a
9605
      # down instance
9606
      if activate_disks:
9607
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9608

    
9609
    if __debug__:
9610
      # Verify owned locks
9611
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9612
      nodes = frozenset(self.node_secondary_ip)
9613
      assert ((self.early_release and not owned_nodes) or
9614
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9615
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9616
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9617

    
9618
    return result
9619

    
9620
  def _CheckVolumeGroup(self, nodes):
9621
    self.lu.LogInfo("Checking volume groups")
9622

    
9623
    vgname = self.cfg.GetVGName()
9624

    
9625
    # Make sure volume group exists on all involved nodes
9626
    results = self.rpc.call_vg_list(nodes)
9627
    if not results:
9628
      raise errors.OpExecError("Can't list volume groups on the nodes")
9629

    
9630
    for node in nodes:
9631
      res = results[node]
9632
      res.Raise("Error checking node %s" % node)
9633
      if vgname not in res.payload:
9634
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9635
                                 (vgname, node))
9636

    
9637
  def _CheckDisksExistence(self, nodes):
9638
    # Check disk existence
9639
    for idx, dev in enumerate(self.instance.disks):
9640
      if idx not in self.disks:
9641
        continue
9642

    
9643
      for node in nodes:
9644
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9645
        self.cfg.SetDiskID(dev, node)
9646

    
9647
        result = self.rpc.call_blockdev_find(node, dev)
9648

    
9649
        msg = result.fail_msg
9650
        if msg or not result.payload:
9651
          if not msg:
9652
            msg = "disk not found"
9653
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9654
                                   (idx, node, msg))
9655

    
9656
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9657
    for idx, dev in enumerate(self.instance.disks):
9658
      if idx not in self.disks:
9659
        continue
9660

    
9661
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9662
                      (idx, node_name))
9663

    
9664
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9665
                                   ldisk=ldisk):
9666
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9667
                                 " replace disks for instance %s" %
9668
                                 (node_name, self.instance.name))
9669

    
9670
  def _CreateNewStorage(self, node_name):
9671
    """Create new storage on the primary or secondary node.
9672

9673
    This is only used for same-node replaces, not for changing the
9674
    secondary node, hence we don't want to modify the existing disk.
9675

9676
    """
9677
    iv_names = {}
9678

    
9679
    for idx, dev in enumerate(self.instance.disks):
9680
      if idx not in self.disks:
9681
        continue
9682

    
9683
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9684

    
9685
      self.cfg.SetDiskID(dev, node_name)
9686

    
9687
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9688
      names = _GenerateUniqueNames(self.lu, lv_names)
9689

    
9690
      vg_data = dev.children[0].logical_id[0]
9691
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9692
                             logical_id=(vg_data, names[0]))
9693
      vg_meta = dev.children[1].logical_id[0]
9694
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9695
                             logical_id=(vg_meta, names[1]))
9696

    
9697
      new_lvs = [lv_data, lv_meta]
9698
      old_lvs = [child.Copy() for child in dev.children]
9699
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9700

    
9701
      # we pass force_create=True to force the LVM creation
9702
      for new_lv in new_lvs:
9703
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9704
                        _GetInstanceInfoText(self.instance), False)
9705

    
9706
    return iv_names
9707

    
9708
  def _CheckDevices(self, node_name, iv_names):
9709
    for name, (dev, _, _) in iv_names.iteritems():
9710
      self.cfg.SetDiskID(dev, node_name)
9711

    
9712
      result = self.rpc.call_blockdev_find(node_name, dev)
9713

    
9714
      msg = result.fail_msg
9715
      if msg or not result.payload:
9716
        if not msg:
9717
          msg = "disk not found"
9718
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9719
                                 (name, msg))
9720

    
9721
      if result.payload.is_degraded:
9722
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9723

    
9724
  def _RemoveOldStorage(self, node_name, iv_names):
9725
    for name, (_, old_lvs, _) in iv_names.iteritems():
9726
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9727

    
9728
      for lv in old_lvs:
9729
        self.cfg.SetDiskID(lv, node_name)
9730

    
9731
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9732
        if msg:
9733
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9734
                             hint="remove unused LVs manually")
9735

    
9736
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9737
    """Replace a disk on the primary or secondary for DRBD 8.
9738

9739
    The algorithm for replace is quite complicated:
9740

9741
      1. for each disk to be replaced:
9742

9743
        1. create new LVs on the target node with unique names
9744
        1. detach old LVs from the drbd device
9745
        1. rename old LVs to name_replaced.<time_t>
9746
        1. rename new LVs to old LVs
9747
        1. attach the new LVs (with the old names now) to the drbd device
9748

9749
      1. wait for sync across all devices
9750

9751
      1. for each modified disk:
9752

9753
        1. remove old LVs (which have the name name_replaces.<time_t>)
9754

9755
    Failures are not very well handled.
9756

9757
    """
9758
    steps_total = 6
9759

    
9760
    # Step: check device activation
9761
    self.lu.LogStep(1, steps_total, "Check device existence")
9762
    self._CheckDisksExistence([self.other_node, self.target_node])
9763
    self._CheckVolumeGroup([self.target_node, self.other_node])
9764

    
9765
    # Step: check other node consistency
9766
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9767
    self._CheckDisksConsistency(self.other_node,
9768
                                self.other_node == self.instance.primary_node,
9769
                                False)
9770

    
9771
    # Step: create new storage
9772
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9773
    iv_names = self._CreateNewStorage(self.target_node)
9774

    
9775
    # Step: for each lv, detach+rename*2+attach
9776
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9777
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9778
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9779

    
9780
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9781
                                                     old_lvs)
9782
      result.Raise("Can't detach drbd from local storage on node"
9783
                   " %s for device %s" % (self.target_node, dev.iv_name))
9784
      #dev.children = []
9785
      #cfg.Update(instance)
9786

    
9787
      # ok, we created the new LVs, so now we know we have the needed
9788
      # storage; as such, we proceed on the target node to rename
9789
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9790
      # using the assumption that logical_id == physical_id (which in
9791
      # turn is the unique_id on that node)
9792

    
9793
      # FIXME(iustin): use a better name for the replaced LVs
9794
      temp_suffix = int(time.time())
9795
      ren_fn = lambda d, suff: (d.physical_id[0],
9796
                                d.physical_id[1] + "_replaced-%s" % suff)
9797

    
9798
      # Build the rename list based on what LVs exist on the node
9799
      rename_old_to_new = []
9800
      for to_ren in old_lvs:
9801
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9802
        if not result.fail_msg and result.payload:
9803
          # device exists
9804
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9805

    
9806
      self.lu.LogInfo("Renaming the old LVs on the target node")
9807
      result = self.rpc.call_blockdev_rename(self.target_node,
9808
                                             rename_old_to_new)
9809
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9810

    
9811
      # Now we rename the new LVs to the old LVs
9812
      self.lu.LogInfo("Renaming the new LVs on the target node")
9813
      rename_new_to_old = [(new, old.physical_id)
9814
                           for old, new in zip(old_lvs, new_lvs)]
9815
      result = self.rpc.call_blockdev_rename(self.target_node,
9816
                                             rename_new_to_old)
9817
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9818

    
9819
      # Intermediate steps of in memory modifications
9820
      for old, new in zip(old_lvs, new_lvs):
9821
        new.logical_id = old.logical_id
9822
        self.cfg.SetDiskID(new, self.target_node)
9823

    
9824
      # We need to modify old_lvs so that removal later removes the
9825
      # right LVs, not the newly added ones; note that old_lvs is a
9826
      # copy here
9827
      for disk in old_lvs:
9828
        disk.logical_id = ren_fn(disk, temp_suffix)
9829
        self.cfg.SetDiskID(disk, self.target_node)
9830

    
9831
      # Now that the new lvs have the old name, we can add them to the device
9832
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9833
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9834
                                                  new_lvs)
9835
      msg = result.fail_msg
9836
      if msg:
9837
        for new_lv in new_lvs:
9838
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9839
                                               new_lv).fail_msg
9840
          if msg2:
9841
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9842
                               hint=("cleanup manually the unused logical"
9843
                                     "volumes"))
9844
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9845

    
9846
    cstep = 5
9847
    if self.early_release:
9848
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9849
      cstep += 1
9850
      self._RemoveOldStorage(self.target_node, iv_names)
9851
      # WARNING: we release both node locks here, do not do other RPCs
9852
      # than WaitForSync to the primary node
9853
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9854
                    names=[self.target_node, self.other_node])
9855

    
9856
    # Wait for sync
9857
    # This can fail as the old devices are degraded and _WaitForSync
9858
    # does a combined result over all disks, so we don't check its return value
9859
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9860
    cstep += 1
9861
    _WaitForSync(self.lu, self.instance)
9862

    
9863
    # Check all devices manually
9864
    self._CheckDevices(self.instance.primary_node, iv_names)
9865

    
9866
    # Step: remove old storage
9867
    if not self.early_release:
9868
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9869
      cstep += 1
9870
      self._RemoveOldStorage(self.target_node, iv_names)
9871

    
9872
  def _ExecDrbd8Secondary(self, feedback_fn):
9873
    """Replace the secondary node for DRBD 8.
9874

9875
    The algorithm for replace is quite complicated:
9876
      - for all disks of the instance:
9877
        - create new LVs on the new node with same names
9878
        - shutdown the drbd device on the old secondary
9879
        - disconnect the drbd network on the primary
9880
        - create the drbd device on the new secondary
9881
        - network attach the drbd on the primary, using an artifice:
9882
          the drbd code for Attach() will connect to the network if it
9883
          finds a device which is connected to the good local disks but
9884
          not network enabled
9885
      - wait for sync across all devices
9886
      - remove all disks from the old secondary
9887

9888
    Failures are not very well handled.
9889

9890
    """
9891
    steps_total = 6
9892

    
9893
    pnode = self.instance.primary_node
9894

    
9895
    # Step: check device activation
9896
    self.lu.LogStep(1, steps_total, "Check device existence")
9897
    self._CheckDisksExistence([self.instance.primary_node])
9898
    self._CheckVolumeGroup([self.instance.primary_node])
9899

    
9900
    # Step: check other node consistency
9901
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9902
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9903

    
9904
    # Step: create new storage
9905
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9906
    for idx, dev in enumerate(self.instance.disks):
9907
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9908
                      (self.new_node, idx))
9909
      # we pass force_create=True to force LVM creation
9910
      for new_lv in dev.children:
9911
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9912
                        _GetInstanceInfoText(self.instance), False)
9913

    
9914
    # Step 4: dbrd minors and drbd setups changes
9915
    # after this, we must manually remove the drbd minors on both the
9916
    # error and the success paths
9917
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9918
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9919
                                         for dev in self.instance.disks],
9920
                                        self.instance.name)
9921
    logging.debug("Allocated minors %r", minors)
9922

    
9923
    iv_names = {}
9924
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9925
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9926
                      (self.new_node, idx))
9927
      # create new devices on new_node; note that we create two IDs:
9928
      # one without port, so the drbd will be activated without
9929
      # networking information on the new node at this stage, and one
9930
      # with network, for the latter activation in step 4
9931
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9932
      if self.instance.primary_node == o_node1:
9933
        p_minor = o_minor1
9934
      else:
9935
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9936
        p_minor = o_minor2
9937

    
9938
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9939
                      p_minor, new_minor, o_secret)
9940
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9941
                    p_minor, new_minor, o_secret)
9942

    
9943
      iv_names[idx] = (dev, dev.children, new_net_id)
9944
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9945
                    new_net_id)
9946
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9947
                              logical_id=new_alone_id,
9948
                              children=dev.children,
9949
                              size=dev.size)
9950
      try:
9951
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9952
                              _GetInstanceInfoText(self.instance), False)
9953
      except errors.GenericError:
9954
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9955
        raise
9956

    
9957
    # We have new devices, shutdown the drbd on the old secondary
9958
    for idx, dev in enumerate(self.instance.disks):
9959
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9960
      self.cfg.SetDiskID(dev, self.target_node)
9961
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9962
      if msg:
9963
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9964
                           "node: %s" % (idx, msg),
9965
                           hint=("Please cleanup this device manually as"
9966
                                 " soon as possible"))
9967

    
9968
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9969
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9970
                                               self.instance.disks)[pnode]
9971

    
9972
    msg = result.fail_msg
9973
    if msg:
9974
      # detaches didn't succeed (unlikely)
9975
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9976
      raise errors.OpExecError("Can't detach the disks from the network on"
9977
                               " old node: %s" % (msg,))
9978

    
9979
    # if we managed to detach at least one, we update all the disks of
9980
    # the instance to point to the new secondary
9981
    self.lu.LogInfo("Updating instance configuration")
9982
    for dev, _, new_logical_id in iv_names.itervalues():
9983
      dev.logical_id = new_logical_id
9984
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9985

    
9986
    self.cfg.Update(self.instance, feedback_fn)
9987

    
9988
    # and now perform the drbd attach
9989
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9990
                    " (standalone => connected)")
9991
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9992
                                            self.new_node],
9993
                                           self.node_secondary_ip,
9994
                                           self.instance.disks,
9995
                                           self.instance.name,
9996
                                           False)
9997
    for to_node, to_result in result.items():
9998
      msg = to_result.fail_msg
9999
      if msg:
10000
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10001
                           to_node, msg,
10002
                           hint=("please do a gnt-instance info to see the"
10003
                                 " status of disks"))
10004
    cstep = 5
10005
    if self.early_release:
10006
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10007
      cstep += 1
10008
      self._RemoveOldStorage(self.target_node, iv_names)
10009
      # WARNING: we release all node locks here, do not do other RPCs
10010
      # than WaitForSync to the primary node
10011
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10012
                    names=[self.instance.primary_node,
10013
                           self.target_node,
10014
                           self.new_node])
10015

    
10016
    # Wait for sync
10017
    # This can fail as the old devices are degraded and _WaitForSync
10018
    # does a combined result over all disks, so we don't check its return value
10019
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10020
    cstep += 1
10021
    _WaitForSync(self.lu, self.instance)
10022

    
10023
    # Check all devices manually
10024
    self._CheckDevices(self.instance.primary_node, iv_names)
10025

    
10026
    # Step: remove old storage
10027
    if not self.early_release:
10028
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10029
      self._RemoveOldStorage(self.target_node, iv_names)
10030

    
10031

    
10032
class LURepairNodeStorage(NoHooksLU):
10033
  """Repairs the volume group on a node.
10034

10035
  """
10036
  REQ_BGL = False
10037

    
10038
  def CheckArguments(self):
10039
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10040

    
10041
    storage_type = self.op.storage_type
10042

    
10043
    if (constants.SO_FIX_CONSISTENCY not in
10044
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10045
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10046
                                 " repaired" % storage_type,
10047
                                 errors.ECODE_INVAL)
10048

    
10049
  def ExpandNames(self):
10050
    self.needed_locks = {
10051
      locking.LEVEL_NODE: [self.op.node_name],
10052
      }
10053

    
10054
  def _CheckFaultyDisks(self, instance, node_name):
10055
    """Ensure faulty disks abort the opcode or at least warn."""
10056
    try:
10057
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10058
                                  node_name, True):
10059
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10060
                                   " node '%s'" % (instance.name, node_name),
10061
                                   errors.ECODE_STATE)
10062
    except errors.OpPrereqError, err:
10063
      if self.op.ignore_consistency:
10064
        self.proc.LogWarning(str(err.args[0]))
10065
      else:
10066
        raise
10067

    
10068
  def CheckPrereq(self):
10069
    """Check prerequisites.
10070

10071
    """
10072
    # Check whether any instance on this node has faulty disks
10073
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10074
      if not inst.admin_up:
10075
        continue
10076
      check_nodes = set(inst.all_nodes)
10077
      check_nodes.discard(self.op.node_name)
10078
      for inst_node_name in check_nodes:
10079
        self._CheckFaultyDisks(inst, inst_node_name)
10080

    
10081
  def Exec(self, feedback_fn):
10082
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10083
                (self.op.name, self.op.node_name))
10084

    
10085
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10086
    result = self.rpc.call_storage_execute(self.op.node_name,
10087
                                           self.op.storage_type, st_args,
10088
                                           self.op.name,
10089
                                           constants.SO_FIX_CONSISTENCY)
10090
    result.Raise("Failed to repair storage unit '%s' on %s" %
10091
                 (self.op.name, self.op.node_name))
10092

    
10093

    
10094
class LUNodeEvacuate(NoHooksLU):
10095
  """Evacuates instances off a list of nodes.
10096

10097
  """
10098
  REQ_BGL = False
10099

    
10100
  def CheckArguments(self):
10101
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10102

    
10103
  def ExpandNames(self):
10104
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10105

    
10106
    if self.op.remote_node is not None:
10107
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10108
      assert self.op.remote_node
10109

    
10110
      if self.op.remote_node == self.op.node_name:
10111
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10112
                                   " secondary node", errors.ECODE_INVAL)
10113

    
10114
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10115
        raise errors.OpPrereqError("Without the use of an iallocator only"
10116
                                   " secondary instances can be evacuated",
10117
                                   errors.ECODE_INVAL)
10118

    
10119
    # Declare locks
10120
    self.share_locks = _ShareAll()
10121
    self.needed_locks = {
10122
      locking.LEVEL_INSTANCE: [],
10123
      locking.LEVEL_NODEGROUP: [],
10124
      locking.LEVEL_NODE: [],
10125
      }
10126

    
10127
    # Determine nodes (via group) optimistically, needs verification once locks
10128
    # have been acquired
10129
    self.lock_nodes = self._DetermineNodes()
10130

    
10131
  def _DetermineNodes(self):
10132
    """Gets the list of nodes to operate on.
10133

10134
    """
10135
    if self.op.remote_node is None:
10136
      # Iallocator will choose any node(s) in the same group
10137
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10138
    else:
10139
      group_nodes = frozenset([self.op.remote_node])
10140

    
10141
    # Determine nodes to be locked
10142
    return set([self.op.node_name]) | group_nodes
10143

    
10144
  def _DetermineInstances(self):
10145
    """Builds list of instances to operate on.
10146

10147
    """
10148
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10149

    
10150
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10151
      # Primary instances only
10152
      inst_fn = _GetNodePrimaryInstances
10153
      assert self.op.remote_node is None, \
10154
        "Evacuating primary instances requires iallocator"
10155
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10156
      # Secondary instances only
10157
      inst_fn = _GetNodeSecondaryInstances
10158
    else:
10159
      # All instances
10160
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10161
      inst_fn = _GetNodeInstances
10162
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10163
      # per instance
10164
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10165
                                 " interface it is not possible to evacuate"
10166
                                 " all instances at once; specify explicitly"
10167
                                 " whether to evacuate primary or secondary"
10168
                                 " instances",
10169
                                 errors.ECODE_INVAL)
10170

    
10171
    return inst_fn(self.cfg, self.op.node_name)
10172

    
10173
  def DeclareLocks(self, level):
10174
    if level == locking.LEVEL_INSTANCE:
10175
      # Lock instances optimistically, needs verification once node and group
10176
      # locks have been acquired
10177
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10178
        set(i.name for i in self._DetermineInstances())
10179

    
10180
    elif level == locking.LEVEL_NODEGROUP:
10181
      # Lock node groups for all potential target nodes optimistically, needs
10182
      # verification once nodes have been acquired
10183
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10184
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10185

    
10186
    elif level == locking.LEVEL_NODE:
10187
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10188

    
10189
  def CheckPrereq(self):
10190
    # Verify locks
10191
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10192
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10193
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10194

    
10195
    need_nodes = self._DetermineNodes()
10196

    
10197
    if not owned_nodes.issuperset(need_nodes):
10198
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10199
                                 " locks were acquired, current nodes are"
10200
                                 " are '%s', used to be '%s'; retry the"
10201
                                 " operation" %
10202
                                 (self.op.node_name,
10203
                                  utils.CommaJoin(need_nodes),
10204
                                  utils.CommaJoin(owned_nodes)),
10205
                                 errors.ECODE_STATE)
10206

    
10207
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10208
    if owned_groups != wanted_groups:
10209
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10210
                               " current groups are '%s', used to be '%s';"
10211
                               " retry the operation" %
10212
                               (utils.CommaJoin(wanted_groups),
10213
                                utils.CommaJoin(owned_groups)))
10214

    
10215
    # Determine affected instances
10216
    self.instances = self._DetermineInstances()
10217
    self.instance_names = [i.name for i in self.instances]
10218

    
10219
    if set(self.instance_names) != owned_instances:
10220
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10221
                               " were acquired, current instances are '%s',"
10222
                               " used to be '%s'; retry the operation" %
10223
                               (self.op.node_name,
10224
                                utils.CommaJoin(self.instance_names),
10225
                                utils.CommaJoin(owned_instances)))
10226

    
10227
    if self.instance_names:
10228
      self.LogInfo("Evacuating instances from node '%s': %s",
10229
                   self.op.node_name,
10230
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10231
    else:
10232
      self.LogInfo("No instances to evacuate from node '%s'",
10233
                   self.op.node_name)
10234

    
10235
    if self.op.remote_node is not None:
10236
      for i in self.instances:
10237
        if i.primary_node == self.op.remote_node:
10238
          raise errors.OpPrereqError("Node %s is the primary node of"
10239
                                     " instance %s, cannot use it as"
10240
                                     " secondary" %
10241
                                     (self.op.remote_node, i.name),
10242
                                     errors.ECODE_INVAL)
10243

    
10244
  def Exec(self, feedback_fn):
10245
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10246

    
10247
    if not self.instance_names:
10248
      # No instances to evacuate
10249
      jobs = []
10250

    
10251
    elif self.op.iallocator is not None:
10252
      # TODO: Implement relocation to other group
10253
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254
                       evac_mode=self.op.mode,
10255
                       instances=list(self.instance_names))
10256

    
10257
      ial.Run(self.op.iallocator)
10258

    
10259
      if not ial.success:
10260
        raise errors.OpPrereqError("Can't compute node evacuation using"
10261
                                   " iallocator '%s': %s" %
10262
                                   (self.op.iallocator, ial.info),
10263
                                   errors.ECODE_NORES)
10264

    
10265
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10266

    
10267
    elif self.op.remote_node is not None:
10268
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10269
      jobs = [
10270
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271
                                        remote_node=self.op.remote_node,
10272
                                        disks=[],
10273
                                        mode=constants.REPLACE_DISK_CHG,
10274
                                        early_release=self.op.early_release)]
10275
        for instance_name in self.instance_names
10276
        ]
10277

    
10278
    else:
10279
      raise errors.ProgrammerError("No iallocator or remote node")
10280

    
10281
    return ResultWithJobs(jobs)
10282

    
10283

    
10284
def _SetOpEarlyRelease(early_release, op):
10285
  """Sets C{early_release} flag on opcodes if available.
10286

10287
  """
10288
  try:
10289
    op.early_release = early_release
10290
  except AttributeError:
10291
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10292

    
10293
  return op
10294

    
10295

    
10296
def _NodeEvacDest(use_nodes, group, nodes):
10297
  """Returns group or nodes depending on caller's choice.
10298

10299
  """
10300
  if use_nodes:
10301
    return utils.CommaJoin(nodes)
10302
  else:
10303
    return group
10304

    
10305

    
10306
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10308

10309
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10311

10312
  @type lu: L{LogicalUnit}
10313
  @param lu: Logical unit instance
10314
  @type alloc_result: tuple/list
10315
  @param alloc_result: Result from iallocator
10316
  @type early_release: bool
10317
  @param early_release: Whether to release locks early if possible
10318
  @type use_nodes: bool
10319
  @param use_nodes: Whether to display node names instead of groups
10320

10321
  """
10322
  (moved, failed, jobs) = alloc_result
10323

    
10324
  if failed:
10325
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10326
                                 for (name, reason) in failed)
10327
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10328
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10329

    
10330
  if moved:
10331
    lu.LogInfo("Instances to be moved: %s",
10332
               utils.CommaJoin("%s (to %s)" %
10333
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10334
                               for (name, group, nodes) in moved))
10335

    
10336
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10337
              map(opcodes.OpCode.LoadOpCode, ops))
10338
          for ops in jobs]
10339

    
10340

    
10341
class LUInstanceGrowDisk(LogicalUnit):
10342
  """Grow a disk of an instance.
10343

10344
  """
10345
  HPATH = "disk-grow"
10346
  HTYPE = constants.HTYPE_INSTANCE
10347
  REQ_BGL = False
10348

    
10349
  def ExpandNames(self):
10350
    self._ExpandAndLockInstance()
10351
    self.needed_locks[locking.LEVEL_NODE] = []
10352
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10353

    
10354
  def DeclareLocks(self, level):
10355
    if level == locking.LEVEL_NODE:
10356
      self._LockInstancesNodes()
10357

    
10358
  def BuildHooksEnv(self):
10359
    """Build hooks env.
10360

10361
    This runs on the master, the primary and all the secondaries.
10362

10363
    """
10364
    env = {
10365
      "DISK": self.op.disk,
10366
      "AMOUNT": self.op.amount,
10367
      }
10368
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10369
    return env
10370

    
10371
  def BuildHooksNodes(self):
10372
    """Build hooks nodes.
10373

10374
    """
10375
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10376
    return (nl, nl)
10377

    
10378
  def CheckPrereq(self):
10379
    """Check prerequisites.
10380

10381
    This checks that the instance is in the cluster.
10382

10383
    """
10384
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10385
    assert instance is not None, \
10386
      "Cannot retrieve locked instance %s" % self.op.instance_name
10387
    nodenames = list(instance.all_nodes)
10388
    for node in nodenames:
10389
      _CheckNodeOnline(self, node)
10390

    
10391
    self.instance = instance
10392

    
10393
    if instance.disk_template not in constants.DTS_GROWABLE:
10394
      raise errors.OpPrereqError("Instance's disk layout does not support"
10395
                                 " growing", errors.ECODE_INVAL)
10396

    
10397
    self.disk = instance.FindDisk(self.op.disk)
10398

    
10399
    if instance.disk_template not in (constants.DT_FILE,
10400
                                      constants.DT_SHARED_FILE):
10401
      # TODO: check the free disk space for file, when that feature will be
10402
      # supported
10403
      _CheckNodesFreeDiskPerVG(self, nodenames,
10404
                               self.disk.ComputeGrowth(self.op.amount))
10405

    
10406
  def Exec(self, feedback_fn):
10407
    """Execute disk grow.
10408

10409
    """
10410
    instance = self.instance
10411
    disk = self.disk
10412

    
10413
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10414
    if not disks_ok:
10415
      raise errors.OpExecError("Cannot activate block device to grow")
10416

    
10417
    # First run all grow ops in dry-run mode
10418
    for node in instance.all_nodes:
10419
      self.cfg.SetDiskID(disk, node)
10420
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10421
      result.Raise("Grow request failed to node %s" % node)
10422

    
10423
    # We know that (as far as we can test) operations across different
10424
    # nodes will succeed, time to run it for real
10425
    for node in instance.all_nodes:
10426
      self.cfg.SetDiskID(disk, node)
10427
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10428
      result.Raise("Grow request failed to node %s" % node)
10429

    
10430
      # TODO: Rewrite code to work properly
10431
      # DRBD goes into sync mode for a short amount of time after executing the
10432
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10433
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10434
      # time is a work-around.
10435
      time.sleep(5)
10436

    
10437
    disk.RecordGrow(self.op.amount)
10438
    self.cfg.Update(instance, feedback_fn)
10439
    if self.op.wait_for_sync:
10440
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10441
      if disk_abort:
10442
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10443
                             " status; please check the instance")
10444
      if not instance.admin_up:
10445
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10446
    elif not instance.admin_up:
10447
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10448
                           " not supposed to be running because no wait for"
10449
                           " sync mode was requested")
10450

    
10451

    
10452
class LUInstanceQueryData(NoHooksLU):
10453
  """Query runtime instance data.
10454

10455
  """
10456
  REQ_BGL = False
10457

    
10458
  def ExpandNames(self):
10459
    self.needed_locks = {}
10460

    
10461
    # Use locking if requested or when non-static information is wanted
10462
    if not (self.op.static or self.op.use_locking):
10463
      self.LogWarning("Non-static data requested, locks need to be acquired")
10464
      self.op.use_locking = True
10465

    
10466
    if self.op.instances or not self.op.use_locking:
10467
      # Expand instance names right here
10468
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10469
    else:
10470
      # Will use acquired locks
10471
      self.wanted_names = None
10472

    
10473
    if self.op.use_locking:
10474
      self.share_locks = _ShareAll()
10475

    
10476
      if self.wanted_names is None:
10477
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10478
      else:
10479
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10480

    
10481
      self.needed_locks[locking.LEVEL_NODE] = []
10482
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10483

    
10484
  def DeclareLocks(self, level):
10485
    if self.op.use_locking and level == locking.LEVEL_NODE:
10486
      self._LockInstancesNodes()
10487

    
10488
  def CheckPrereq(self):
10489
    """Check prerequisites.
10490

10491
    This only checks the optional instance list against the existing names.
10492

10493
    """
10494
    if self.wanted_names is None:
10495
      assert self.op.use_locking, "Locking was not used"
10496
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10497

    
10498
    self.wanted_instances = \
10499
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10500

    
10501
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10502
    """Returns the status of a block device
10503

10504
    """
10505
    if self.op.static or not node:
10506
      return None
10507

    
10508
    self.cfg.SetDiskID(dev, node)
10509

    
10510
    result = self.rpc.call_blockdev_find(node, dev)
10511
    if result.offline:
10512
      return None
10513

    
10514
    result.Raise("Can't compute disk status for %s" % instance_name)
10515

    
10516
    status = result.payload
10517
    if status is None:
10518
      return None
10519

    
10520
    return (status.dev_path, status.major, status.minor,
10521
            status.sync_percent, status.estimated_time,
10522
            status.is_degraded, status.ldisk_status)
10523

    
10524
  def _ComputeDiskStatus(self, instance, snode, dev):
10525
    """Compute block device status.
10526

10527
    """
10528
    if dev.dev_type in constants.LDS_DRBD:
10529
      # we change the snode then (otherwise we use the one passed in)
10530
      if dev.logical_id[0] == instance.primary_node:
10531
        snode = dev.logical_id[1]
10532
      else:
10533
        snode = dev.logical_id[0]
10534

    
10535
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10536
                                              instance.name, dev)
10537
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10538

    
10539
    if dev.children:
10540
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10541
                                        instance, snode),
10542
                         dev.children)
10543
    else:
10544
      dev_children = []
10545

    
10546
    return {
10547
      "iv_name": dev.iv_name,
10548
      "dev_type": dev.dev_type,
10549
      "logical_id": dev.logical_id,
10550
      "physical_id": dev.physical_id,
10551
      "pstatus": dev_pstatus,
10552
      "sstatus": dev_sstatus,
10553
      "children": dev_children,
10554
      "mode": dev.mode,
10555
      "size": dev.size,
10556
      }
10557

    
10558
  def Exec(self, feedback_fn):
10559
    """Gather and return data"""
10560
    result = {}
10561

    
10562
    cluster = self.cfg.GetClusterInfo()
10563

    
10564
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10565
                                          for i in self.wanted_instances)
10566
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10567
      if self.op.static or pnode.offline:
10568
        remote_state = None
10569
        if pnode.offline:
10570
          self.LogWarning("Primary node %s is marked offline, returning static"
10571
                          " information only for instance %s" %
10572
                          (pnode.name, instance.name))
10573
      else:
10574
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10575
                                                  instance.name,
10576
                                                  instance.hypervisor)
10577
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10578
        remote_info = remote_info.payload
10579
        if remote_info and "state" in remote_info:
10580
          remote_state = "up"
10581
        else:
10582
          remote_state = "down"
10583

    
10584
      if instance.admin_up:
10585
        config_state = "up"
10586
      else:
10587
        config_state = "down"
10588

    
10589
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10590
                  instance.disks)
10591

    
10592
      result[instance.name] = {
10593
        "name": instance.name,
10594
        "config_state": config_state,
10595
        "run_state": remote_state,
10596
        "pnode": instance.primary_node,
10597
        "snodes": instance.secondary_nodes,
10598
        "os": instance.os,
10599
        # this happens to be the same format used for hooks
10600
        "nics": _NICListToTuple(self, instance.nics),
10601
        "disk_template": instance.disk_template,
10602
        "disks": disks,
10603
        "hypervisor": instance.hypervisor,
10604
        "network_port": instance.network_port,
10605
        "hv_instance": instance.hvparams,
10606
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10607
        "be_instance": instance.beparams,
10608
        "be_actual": cluster.FillBE(instance),
10609
        "os_instance": instance.osparams,
10610
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10611
        "serial_no": instance.serial_no,
10612
        "mtime": instance.mtime,
10613
        "ctime": instance.ctime,
10614
        "uuid": instance.uuid,
10615
        }
10616

    
10617
    return result
10618

    
10619

    
10620
class LUInstanceSetParams(LogicalUnit):
10621
  """Modifies an instances's parameters.
10622

10623
  """
10624
  HPATH = "instance-modify"
10625
  HTYPE = constants.HTYPE_INSTANCE
10626
  REQ_BGL = False
10627

    
10628
  def CheckArguments(self):
10629
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10630
            self.op.hvparams or self.op.beparams or self.op.os_name):
10631
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10632

    
10633
    if self.op.hvparams:
10634
      _CheckGlobalHvParams(self.op.hvparams)
10635

    
10636
    # Disk validation
10637
    disk_addremove = 0
10638
    for disk_op, disk_dict in self.op.disks:
10639
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10640
      if disk_op == constants.DDM_REMOVE:
10641
        disk_addremove += 1
10642
        continue
10643
      elif disk_op == constants.DDM_ADD:
10644
        disk_addremove += 1
10645
      else:
10646
        if not isinstance(disk_op, int):
10647
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10648
        if not isinstance(disk_dict, dict):
10649
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10650
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10651

    
10652
      if disk_op == constants.DDM_ADD:
10653
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10654
        if mode not in constants.DISK_ACCESS_SET:
10655
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10656
                                     errors.ECODE_INVAL)
10657
        size = disk_dict.get(constants.IDISK_SIZE, None)
10658
        if size is None:
10659
          raise errors.OpPrereqError("Required disk parameter size missing",
10660
                                     errors.ECODE_INVAL)
10661
        try:
10662
          size = int(size)
10663
        except (TypeError, ValueError), err:
10664
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10665
                                     str(err), errors.ECODE_INVAL)
10666
        disk_dict[constants.IDISK_SIZE] = size
10667
      else:
10668
        # modification of disk
10669
        if constants.IDISK_SIZE in disk_dict:
10670
          raise errors.OpPrereqError("Disk size change not possible, use"
10671
                                     " grow-disk", errors.ECODE_INVAL)
10672

    
10673
    if disk_addremove > 1:
10674
      raise errors.OpPrereqError("Only one disk add or remove operation"
10675
                                 " supported at a time", errors.ECODE_INVAL)
10676

    
10677
    if self.op.disks and self.op.disk_template is not None:
10678
      raise errors.OpPrereqError("Disk template conversion and other disk"
10679
                                 " changes not supported at the same time",
10680
                                 errors.ECODE_INVAL)
10681

    
10682
    if (self.op.disk_template and
10683
        self.op.disk_template in constants.DTS_INT_MIRROR and
10684
        self.op.remote_node is None):
10685
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10686
                                 " one requires specifying a secondary node",
10687
                                 errors.ECODE_INVAL)
10688

    
10689
    # NIC validation
10690
    nic_addremove = 0
10691
    for nic_op, nic_dict in self.op.nics:
10692
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10693
      if nic_op == constants.DDM_REMOVE:
10694
        nic_addremove += 1
10695
        continue
10696
      elif nic_op == constants.DDM_ADD:
10697
        nic_addremove += 1
10698
      else:
10699
        if not isinstance(nic_op, int):
10700
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10701
        if not isinstance(nic_dict, dict):
10702
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10703
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10704

    
10705
      # nic_dict should be a dict
10706
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10707
      if nic_ip is not None:
10708
        if nic_ip.lower() == constants.VALUE_NONE:
10709
          nic_dict[constants.INIC_IP] = None
10710
        else:
10711
          if not netutils.IPAddress.IsValid(nic_ip):
10712
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10713
                                       errors.ECODE_INVAL)
10714

    
10715
      nic_bridge = nic_dict.get("bridge", None)
10716
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10717
      if nic_bridge and nic_link:
10718
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10719
                                   " at the same time", errors.ECODE_INVAL)
10720
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10721
        nic_dict["bridge"] = None
10722
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10723
        nic_dict[constants.INIC_LINK] = None
10724

    
10725
      if nic_op == constants.DDM_ADD:
10726
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10727
        if nic_mac is None:
10728
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10729

    
10730
      if constants.INIC_MAC in nic_dict:
10731
        nic_mac = nic_dict[constants.INIC_MAC]
10732
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10733
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10734

    
10735
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10736
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10737
                                     " modifying an existing nic",
10738
                                     errors.ECODE_INVAL)
10739

    
10740
    if nic_addremove > 1:
10741
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10742
                                 " supported at a time", errors.ECODE_INVAL)
10743

    
10744
  def ExpandNames(self):
10745
    self._ExpandAndLockInstance()
10746
    self.needed_locks[locking.LEVEL_NODE] = []
10747
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10748

    
10749
  def DeclareLocks(self, level):
10750
    if level == locking.LEVEL_NODE:
10751
      self._LockInstancesNodes()
10752
      if self.op.disk_template and self.op.remote_node:
10753
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10754
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10755

    
10756
  def BuildHooksEnv(self):
10757
    """Build hooks env.
10758

10759
    This runs on the master, primary and secondaries.
10760

10761
    """
10762
    args = dict()
10763
    if constants.BE_MEMORY in self.be_new:
10764
      args["memory"] = self.be_new[constants.BE_MEMORY]
10765
    if constants.BE_VCPUS in self.be_new:
10766
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10767
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10768
    # information at all.
10769
    if self.op.nics:
10770
      args["nics"] = []
10771
      nic_override = dict(self.op.nics)
10772
      for idx, nic in enumerate(self.instance.nics):
10773
        if idx in nic_override:
10774
          this_nic_override = nic_override[idx]
10775
        else:
10776
          this_nic_override = {}
10777
        if constants.INIC_IP in this_nic_override:
10778
          ip = this_nic_override[constants.INIC_IP]
10779
        else:
10780
          ip = nic.ip
10781
        if constants.INIC_MAC in this_nic_override:
10782
          mac = this_nic_override[constants.INIC_MAC]
10783
        else:
10784
          mac = nic.mac
10785
        if idx in self.nic_pnew:
10786
          nicparams = self.nic_pnew[idx]
10787
        else:
10788
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10789
        mode = nicparams[constants.NIC_MODE]
10790
        link = nicparams[constants.NIC_LINK]
10791
        args["nics"].append((ip, mac, mode, link))
10792
      if constants.DDM_ADD in nic_override:
10793
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10794
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10795
        nicparams = self.nic_pnew[constants.DDM_ADD]
10796
        mode = nicparams[constants.NIC_MODE]
10797
        link = nicparams[constants.NIC_LINK]
10798
        args["nics"].append((ip, mac, mode, link))
10799
      elif constants.DDM_REMOVE in nic_override:
10800
        del args["nics"][-1]
10801

    
10802
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10803
    if self.op.disk_template:
10804
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10805

    
10806
    return env
10807

    
10808
  def BuildHooksNodes(self):
10809
    """Build hooks nodes.
10810

10811
    """
10812
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10813
    return (nl, nl)
10814

    
10815
  def CheckPrereq(self):
10816
    """Check prerequisites.
10817

10818
    This only checks the instance list against the existing names.
10819

10820
    """
10821
    # checking the new params on the primary/secondary nodes
10822

    
10823
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10824
    cluster = self.cluster = self.cfg.GetClusterInfo()
10825
    assert self.instance is not None, \
10826
      "Cannot retrieve locked instance %s" % self.op.instance_name
10827
    pnode = instance.primary_node
10828
    nodelist = list(instance.all_nodes)
10829

    
10830
    # OS change
10831
    if self.op.os_name and not self.op.force:
10832
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10833
                      self.op.force_variant)
10834
      instance_os = self.op.os_name
10835
    else:
10836
      instance_os = instance.os
10837

    
10838
    if self.op.disk_template:
10839
      if instance.disk_template == self.op.disk_template:
10840
        raise errors.OpPrereqError("Instance already has disk template %s" %
10841
                                   instance.disk_template, errors.ECODE_INVAL)
10842

    
10843
      if (instance.disk_template,
10844
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10845
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10846
                                   " %s to %s" % (instance.disk_template,
10847
                                                  self.op.disk_template),
10848
                                   errors.ECODE_INVAL)
10849
      _CheckInstanceDown(self, instance, "cannot change disk template")
10850
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10851
        if self.op.remote_node == pnode:
10852
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10853
                                     " as the primary node of the instance" %
10854
                                     self.op.remote_node, errors.ECODE_STATE)
10855
        _CheckNodeOnline(self, self.op.remote_node)
10856
        _CheckNodeNotDrained(self, self.op.remote_node)
10857
        # FIXME: here we assume that the old instance type is DT_PLAIN
10858
        assert instance.disk_template == constants.DT_PLAIN
10859
        disks = [{constants.IDISK_SIZE: d.size,
10860
                  constants.IDISK_VG: d.logical_id[0]}
10861
                 for d in instance.disks]
10862
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10863
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10864

    
10865
    # hvparams processing
10866
    if self.op.hvparams:
10867
      hv_type = instance.hypervisor
10868
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10869
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10870
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10871

    
10872
      # local check
10873
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10874
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10875
      self.hv_new = hv_new # the new actual values
10876
      self.hv_inst = i_hvdict # the new dict (without defaults)
10877
    else:
10878
      self.hv_new = self.hv_inst = {}
10879

    
10880
    # beparams processing
10881
    if self.op.beparams:
10882
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10883
                                   use_none=True)
10884
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10885
      be_new = cluster.SimpleFillBE(i_bedict)
10886
      self.be_new = be_new # the new actual values
10887
      self.be_inst = i_bedict # the new dict (without defaults)
10888
    else:
10889
      self.be_new = self.be_inst = {}
10890
    be_old = cluster.FillBE(instance)
10891

    
10892
    # osparams processing
10893
    if self.op.osparams:
10894
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10895
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10896
      self.os_inst = i_osdict # the new dict (without defaults)
10897
    else:
10898
      self.os_inst = {}
10899

    
10900
    self.warn = []
10901

    
10902
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10903
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10904
      mem_check_list = [pnode]
10905
      if be_new[constants.BE_AUTO_BALANCE]:
10906
        # either we changed auto_balance to yes or it was from before
10907
        mem_check_list.extend(instance.secondary_nodes)
10908
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10909
                                                  instance.hypervisor)
10910
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10911
                                         instance.hypervisor)
10912
      pninfo = nodeinfo[pnode]
10913
      msg = pninfo.fail_msg
10914
      if msg:
10915
        # Assume the primary node is unreachable and go ahead
10916
        self.warn.append("Can't get info from primary node %s: %s" %
10917
                         (pnode, msg))
10918
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10919
        self.warn.append("Node data from primary node %s doesn't contain"
10920
                         " free memory information" % pnode)
10921
      elif instance_info.fail_msg:
10922
        self.warn.append("Can't get instance runtime information: %s" %
10923
                        instance_info.fail_msg)
10924
      else:
10925
        if instance_info.payload:
10926
          current_mem = int(instance_info.payload["memory"])
10927
        else:
10928
          # Assume instance not running
10929
          # (there is a slight race condition here, but it's not very probable,
10930
          # and we have no other way to check)
10931
          current_mem = 0
10932
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10933
                    pninfo.payload["memory_free"])
10934
        if miss_mem > 0:
10935
          raise errors.OpPrereqError("This change will prevent the instance"
10936
                                     " from starting, due to %d MB of memory"
10937
                                     " missing on its primary node" % miss_mem,
10938
                                     errors.ECODE_NORES)
10939

    
10940
      if be_new[constants.BE_AUTO_BALANCE]:
10941
        for node, nres in nodeinfo.items():
10942
          if node not in instance.secondary_nodes:
10943
            continue
10944
          nres.Raise("Can't get info from secondary node %s" % node,
10945
                     prereq=True, ecode=errors.ECODE_STATE)
10946
          if not isinstance(nres.payload.get("memory_free", None), int):
10947
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10948
                                       " memory information" % node,
10949
                                       errors.ECODE_STATE)
10950
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10951
            raise errors.OpPrereqError("This change will prevent the instance"
10952
                                       " from failover to its secondary node"
10953
                                       " %s, due to not enough memory" % node,
10954
                                       errors.ECODE_STATE)
10955

    
10956
    # NIC processing
10957
    self.nic_pnew = {}
10958
    self.nic_pinst = {}
10959
    for nic_op, nic_dict in self.op.nics:
10960
      if nic_op == constants.DDM_REMOVE:
10961
        if not instance.nics:
10962
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10963
                                     errors.ECODE_INVAL)
10964
        continue
10965
      if nic_op != constants.DDM_ADD:
10966
        # an existing nic
10967
        if not instance.nics:
10968
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10969
                                     " no NICs" % nic_op,
10970
                                     errors.ECODE_INVAL)
10971
        if nic_op < 0 or nic_op >= len(instance.nics):
10972
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10973
                                     " are 0 to %d" %
10974
                                     (nic_op, len(instance.nics) - 1),
10975
                                     errors.ECODE_INVAL)
10976
        old_nic_params = instance.nics[nic_op].nicparams
10977
        old_nic_ip = instance.nics[nic_op].ip
10978
      else:
10979
        old_nic_params = {}
10980
        old_nic_ip = None
10981

    
10982
      update_params_dict = dict([(key, nic_dict[key])
10983
                                 for key in constants.NICS_PARAMETERS
10984
                                 if key in nic_dict])
10985

    
10986
      if "bridge" in nic_dict:
10987
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10988

    
10989
      new_nic_params = _GetUpdatedParams(old_nic_params,
10990
                                         update_params_dict)
10991
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10992
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10993
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10994
      self.nic_pinst[nic_op] = new_nic_params
10995
      self.nic_pnew[nic_op] = new_filled_nic_params
10996
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10997

    
10998
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10999
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11000
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11001
        if msg:
11002
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11003
          if self.op.force:
11004
            self.warn.append(msg)
11005
          else:
11006
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11007
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11008
        if constants.INIC_IP in nic_dict:
11009
          nic_ip = nic_dict[constants.INIC_IP]
11010
        else:
11011
          nic_ip = old_nic_ip
11012
        if nic_ip is None:
11013
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11014
                                     " on a routed nic", errors.ECODE_INVAL)
11015
      if constants.INIC_MAC in nic_dict:
11016
        nic_mac = nic_dict[constants.INIC_MAC]
11017
        if nic_mac is None:
11018
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11019
                                     errors.ECODE_INVAL)
11020
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11021
          # otherwise generate the mac
11022
          nic_dict[constants.INIC_MAC] = \
11023
            self.cfg.GenerateMAC(self.proc.GetECId())
11024
        else:
11025
          # or validate/reserve the current one
11026
          try:
11027
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11028
          except errors.ReservationError:
11029
            raise errors.OpPrereqError("MAC address %s already in use"
11030
                                       " in cluster" % nic_mac,
11031
                                       errors.ECODE_NOTUNIQUE)
11032

    
11033
    # DISK processing
11034
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11035
      raise errors.OpPrereqError("Disk operations not supported for"
11036
                                 " diskless instances",
11037
                                 errors.ECODE_INVAL)
11038
    for disk_op, _ in self.op.disks:
11039
      if disk_op == constants.DDM_REMOVE:
11040
        if len(instance.disks) == 1:
11041
          raise errors.OpPrereqError("Cannot remove the last disk of"
11042
                                     " an instance", errors.ECODE_INVAL)
11043
        _CheckInstanceDown(self, instance, "cannot remove disks")
11044

    
11045
      if (disk_op == constants.DDM_ADD and
11046
          len(instance.disks) >= constants.MAX_DISKS):
11047
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11048
                                   " add more" % constants.MAX_DISKS,
11049
                                   errors.ECODE_STATE)
11050
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11051
        # an existing disk
11052
        if disk_op < 0 or disk_op >= len(instance.disks):
11053
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11054
                                     " are 0 to %d" %
11055
                                     (disk_op, len(instance.disks)),
11056
                                     errors.ECODE_INVAL)
11057

    
11058
    return
11059

    
11060
  def _ConvertPlainToDrbd(self, feedback_fn):
11061
    """Converts an instance from plain to drbd.
11062

11063
    """
11064
    feedback_fn("Converting template to drbd")
11065
    instance = self.instance
11066
    pnode = instance.primary_node
11067
    snode = self.op.remote_node
11068

    
11069
    # create a fake disk info for _GenerateDiskTemplate
11070
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11071
                  constants.IDISK_VG: d.logical_id[0]}
11072
                 for d in instance.disks]
11073
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11074
                                      instance.name, pnode, [snode],
11075
                                      disk_info, None, None, 0, feedback_fn)
11076
    info = _GetInstanceInfoText(instance)
11077
    feedback_fn("Creating aditional volumes...")
11078
    # first, create the missing data and meta devices
11079
    for disk in new_disks:
11080
      # unfortunately this is... not too nice
11081
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11082
                            info, True)
11083
      for child in disk.children:
11084
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11085
    # at this stage, all new LVs have been created, we can rename the
11086
    # old ones
11087
    feedback_fn("Renaming original volumes...")
11088
    rename_list = [(o, n.children[0].logical_id)
11089
                   for (o, n) in zip(instance.disks, new_disks)]
11090
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11091
    result.Raise("Failed to rename original LVs")
11092

    
11093
    feedback_fn("Initializing DRBD devices...")
11094
    # all child devices are in place, we can now create the DRBD devices
11095
    for disk in new_disks:
11096
      for node in [pnode, snode]:
11097
        f_create = node == pnode
11098
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11099

    
11100
    # at this point, the instance has been modified
11101
    instance.disk_template = constants.DT_DRBD8
11102
    instance.disks = new_disks
11103
    self.cfg.Update(instance, feedback_fn)
11104

    
11105
    # disks are created, waiting for sync
11106
    disk_abort = not _WaitForSync(self, instance,
11107
                                  oneshot=not self.op.wait_for_sync)
11108
    if disk_abort:
11109
      raise errors.OpExecError("There are some degraded disks for"
11110
                               " this instance, please cleanup manually")
11111

    
11112
  def _ConvertDrbdToPlain(self, feedback_fn):
11113
    """Converts an instance from drbd to plain.
11114

11115
    """
11116
    instance = self.instance
11117
    assert len(instance.secondary_nodes) == 1
11118
    pnode = instance.primary_node
11119
    snode = instance.secondary_nodes[0]
11120
    feedback_fn("Converting template to plain")
11121

    
11122
    old_disks = instance.disks
11123
    new_disks = [d.children[0] for d in old_disks]
11124

    
11125
    # copy over size and mode
11126
    for parent, child in zip(old_disks, new_disks):
11127
      child.size = parent.size
11128
      child.mode = parent.mode
11129

    
11130
    # update instance structure
11131
    instance.disks = new_disks
11132
    instance.disk_template = constants.DT_PLAIN
11133
    self.cfg.Update(instance, feedback_fn)
11134

    
11135
    feedback_fn("Removing volumes on the secondary node...")
11136
    for disk in old_disks:
11137
      self.cfg.SetDiskID(disk, snode)
11138
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11139
      if msg:
11140
        self.LogWarning("Could not remove block device %s on node %s,"
11141
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11142

    
11143
    feedback_fn("Removing unneeded volumes on the primary node...")
11144
    for idx, disk in enumerate(old_disks):
11145
      meta = disk.children[1]
11146
      self.cfg.SetDiskID(meta, pnode)
11147
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11148
      if msg:
11149
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11150
                        " continuing anyway: %s", idx, pnode, msg)
11151

    
11152
    # this is a DRBD disk, return its port to the pool
11153
    for disk in old_disks:
11154
      tcp_port = disk.logical_id[2]
11155
      self.cfg.AddTcpUdpPort(tcp_port)
11156

    
11157
  def Exec(self, feedback_fn):
11158
    """Modifies an instance.
11159

11160
    All parameters take effect only at the next restart of the instance.
11161

11162
    """
11163
    # Process here the warnings from CheckPrereq, as we don't have a
11164
    # feedback_fn there.
11165
    for warn in self.warn:
11166
      feedback_fn("WARNING: %s" % warn)
11167

    
11168
    result = []
11169
    instance = self.instance
11170
    # disk changes
11171
    for disk_op, disk_dict in self.op.disks:
11172
      if disk_op == constants.DDM_REMOVE:
11173
        # remove the last disk
11174
        device = instance.disks.pop()
11175
        device_idx = len(instance.disks)
11176
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11177
          self.cfg.SetDiskID(disk, node)
11178
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11179
          if msg:
11180
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11181
                            " continuing anyway", device_idx, node, msg)
11182
        result.append(("disk/%d" % device_idx, "remove"))
11183

    
11184
        # if this is a DRBD disk, return its port to the pool
11185
        if device.dev_type in constants.LDS_DRBD:
11186
          tcp_port = device.logical_id[2]
11187
          self.cfg.AddTcpUdpPort(tcp_port)
11188
      elif disk_op == constants.DDM_ADD:
11189
        # add a new disk
11190
        if instance.disk_template in (constants.DT_FILE,
11191
                                        constants.DT_SHARED_FILE):
11192
          file_driver, file_path = instance.disks[0].logical_id
11193
          file_path = os.path.dirname(file_path)
11194
        else:
11195
          file_driver = file_path = None
11196
        disk_idx_base = len(instance.disks)
11197
        new_disk = _GenerateDiskTemplate(self,
11198
                                         instance.disk_template,
11199
                                         instance.name, instance.primary_node,
11200
                                         instance.secondary_nodes,
11201
                                         [disk_dict],
11202
                                         file_path,
11203
                                         file_driver,
11204
                                         disk_idx_base, feedback_fn)[0]
11205
        instance.disks.append(new_disk)
11206
        info = _GetInstanceInfoText(instance)
11207

    
11208
        logging.info("Creating volume %s for instance %s",
11209
                     new_disk.iv_name, instance.name)
11210
        # Note: this needs to be kept in sync with _CreateDisks
11211
        #HARDCODE
11212
        for node in instance.all_nodes:
11213
          f_create = node == instance.primary_node
11214
          try:
11215
            _CreateBlockDev(self, node, instance, new_disk,
11216
                            f_create, info, f_create)
11217
          except errors.OpExecError, err:
11218
            self.LogWarning("Failed to create volume %s (%s) on"
11219
                            " node %s: %s",
11220
                            new_disk.iv_name, new_disk, node, err)
11221
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11222
                       (new_disk.size, new_disk.mode)))
11223
      else:
11224
        # change a given disk
11225
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11226
        result.append(("disk.mode/%d" % disk_op,
11227
                       disk_dict[constants.IDISK_MODE]))
11228

    
11229
    if self.op.disk_template:
11230
      r_shut = _ShutdownInstanceDisks(self, instance)
11231
      if not r_shut:
11232
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11233
                                 " proceed with disk template conversion")
11234
      mode = (instance.disk_template, self.op.disk_template)
11235
      try:
11236
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11237
      except:
11238
        self.cfg.ReleaseDRBDMinors(instance.name)
11239
        raise
11240
      result.append(("disk_template", self.op.disk_template))
11241

    
11242
    # NIC changes
11243
    for nic_op, nic_dict in self.op.nics:
11244
      if nic_op == constants.DDM_REMOVE:
11245
        # remove the last nic
11246
        del instance.nics[-1]
11247
        result.append(("nic.%d" % len(instance.nics), "remove"))
11248
      elif nic_op == constants.DDM_ADD:
11249
        # mac and bridge should be set, by now
11250
        mac = nic_dict[constants.INIC_MAC]
11251
        ip = nic_dict.get(constants.INIC_IP, None)
11252
        nicparams = self.nic_pinst[constants.DDM_ADD]
11253
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11254
        instance.nics.append(new_nic)
11255
        result.append(("nic.%d" % (len(instance.nics) - 1),
11256
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11257
                       (new_nic.mac, new_nic.ip,
11258
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11259
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11260
                       )))
11261
      else:
11262
        for key in (constants.INIC_MAC, constants.INIC_IP):
11263
          if key in nic_dict:
11264
            setattr(instance.nics[nic_op], key, nic_dict[key])
11265
        if nic_op in self.nic_pinst:
11266
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11267
        for key, val in nic_dict.iteritems():
11268
          result.append(("nic.%s/%d" % (key, nic_op), val))
11269

    
11270
    # hvparams changes
11271
    if self.op.hvparams:
11272
      instance.hvparams = self.hv_inst
11273
      for key, val in self.op.hvparams.iteritems():
11274
        result.append(("hv/%s" % key, val))
11275

    
11276
    # beparams changes
11277
    if self.op.beparams:
11278
      instance.beparams = self.be_inst
11279
      for key, val in self.op.beparams.iteritems():
11280
        result.append(("be/%s" % key, val))
11281

    
11282
    # OS change
11283
    if self.op.os_name:
11284
      instance.os = self.op.os_name
11285

    
11286
    # osparams changes
11287
    if self.op.osparams:
11288
      instance.osparams = self.os_inst
11289
      for key, val in self.op.osparams.iteritems():
11290
        result.append(("os/%s" % key, val))
11291

    
11292
    self.cfg.Update(instance, feedback_fn)
11293

    
11294
    return result
11295

    
11296
  _DISK_CONVERSIONS = {
11297
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11298
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11299
    }
11300

    
11301

    
11302
class LUInstanceChangeGroup(LogicalUnit):
11303
  HPATH = "instance-change-group"
11304
  HTYPE = constants.HTYPE_INSTANCE
11305
  REQ_BGL = False
11306

    
11307
  def ExpandNames(self):
11308
    self.share_locks = _ShareAll()
11309
    self.needed_locks = {
11310
      locking.LEVEL_NODEGROUP: [],
11311
      locking.LEVEL_NODE: [],
11312
      }
11313

    
11314
    self._ExpandAndLockInstance()
11315

    
11316
    if self.op.target_groups:
11317
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11318
                                  self.op.target_groups)
11319
    else:
11320
      self.req_target_uuids = None
11321

    
11322
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11323

    
11324
  def DeclareLocks(self, level):
11325
    if level == locking.LEVEL_NODEGROUP:
11326
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11327

    
11328
      if self.req_target_uuids:
11329
        lock_groups = set(self.req_target_uuids)
11330

    
11331
        # Lock all groups used by instance optimistically; this requires going
11332
        # via the node before it's locked, requiring verification later on
11333
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11334
        lock_groups.update(instance_groups)
11335
      else:
11336
        # No target groups, need to lock all of them
11337
        lock_groups = locking.ALL_SET
11338

    
11339
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11340

    
11341
    elif level == locking.LEVEL_NODE:
11342
      if self.req_target_uuids:
11343
        # Lock all nodes used by instances
11344
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11345
        self._LockInstancesNodes()
11346

    
11347
        # Lock all nodes in all potential target groups
11348
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11349
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11350
        member_nodes = [node_name
11351
                        for group in lock_groups
11352
                        for node_name in self.cfg.GetNodeGroup(group).members]
11353
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11354
      else:
11355
        # Lock all nodes as all groups are potential targets
11356
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11357

    
11358
  def CheckPrereq(self):
11359
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11360
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11361
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11362

    
11363
    assert (self.req_target_uuids is None or
11364
            owned_groups.issuperset(self.req_target_uuids))
11365
    assert owned_instances == set([self.op.instance_name])
11366

    
11367
    # Get instance information
11368
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11369

    
11370
    # Check if node groups for locked instance are still correct
11371
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11372
      ("Instance %s's nodes changed while we kept the lock" %
11373
       self.op.instance_name)
11374

    
11375
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11376
                                           owned_groups)
11377

    
11378
    if self.req_target_uuids:
11379
      # User requested specific target groups
11380
      self.target_uuids = frozenset(self.req_target_uuids)
11381
    else:
11382
      # All groups except those used by the instance are potential targets
11383
      self.target_uuids = owned_groups - inst_groups
11384

    
11385
    conflicting_groups = self.target_uuids & inst_groups
11386
    if conflicting_groups:
11387
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11388
                                 " used by the instance '%s'" %
11389
                                 (utils.CommaJoin(conflicting_groups),
11390
                                  self.op.instance_name),
11391
                                 errors.ECODE_INVAL)
11392

    
11393
    if not self.target_uuids:
11394
      raise errors.OpPrereqError("There are no possible target groups",
11395
                                 errors.ECODE_INVAL)
11396

    
11397
  def BuildHooksEnv(self):
11398
    """Build hooks env.
11399

11400
    """
11401
    assert self.target_uuids
11402

    
11403
    env = {
11404
      "TARGET_GROUPS": " ".join(self.target_uuids),
11405
      }
11406

    
11407
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11408

    
11409
    return env
11410

    
11411
  def BuildHooksNodes(self):
11412
    """Build hooks nodes.
11413

11414
    """
11415
    mn = self.cfg.GetMasterNode()
11416
    return ([mn], [mn])
11417

    
11418
  def Exec(self, feedback_fn):
11419
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11420

    
11421
    assert instances == [self.op.instance_name], "Instance not locked"
11422

    
11423
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11424
                     instances=instances, target_groups=list(self.target_uuids))
11425

    
11426
    ial.Run(self.op.iallocator)
11427

    
11428
    if not ial.success:
11429
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11430
                                 " instance '%s' using iallocator '%s': %s" %
11431
                                 (self.op.instance_name, self.op.iallocator,
11432
                                  ial.info),
11433
                                 errors.ECODE_NORES)
11434

    
11435
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11436

    
11437
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11438
                 " instance '%s'", len(jobs), self.op.instance_name)
11439

    
11440
    return ResultWithJobs(jobs)
11441

    
11442

    
11443
class LUBackupQuery(NoHooksLU):
11444
  """Query the exports list
11445

11446
  """
11447
  REQ_BGL = False
11448

    
11449
  def ExpandNames(self):
11450
    self.needed_locks = {}
11451
    self.share_locks[locking.LEVEL_NODE] = 1
11452
    if not self.op.nodes:
11453
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11454
    else:
11455
      self.needed_locks[locking.LEVEL_NODE] = \
11456
        _GetWantedNodes(self, self.op.nodes)
11457

    
11458
  def Exec(self, feedback_fn):
11459
    """Compute the list of all the exported system images.
11460

11461
    @rtype: dict
11462
    @return: a dictionary with the structure node->(export-list)
11463
        where export-list is a list of the instances exported on
11464
        that node.
11465

11466
    """
11467
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11468
    rpcresult = self.rpc.call_export_list(self.nodes)
11469
    result = {}
11470
    for node in rpcresult:
11471
      if rpcresult[node].fail_msg:
11472
        result[node] = False
11473
      else:
11474
        result[node] = rpcresult[node].payload
11475

    
11476
    return result
11477

    
11478

    
11479
class LUBackupPrepare(NoHooksLU):
11480
  """Prepares an instance for an export and returns useful information.
11481

11482
  """
11483
  REQ_BGL = False
11484

    
11485
  def ExpandNames(self):
11486
    self._ExpandAndLockInstance()
11487

    
11488
  def CheckPrereq(self):
11489
    """Check prerequisites.
11490

11491
    """
11492
    instance_name = self.op.instance_name
11493

    
11494
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11495
    assert self.instance is not None, \
11496
          "Cannot retrieve locked instance %s" % self.op.instance_name
11497
    _CheckNodeOnline(self, self.instance.primary_node)
11498

    
11499
    self._cds = _GetClusterDomainSecret()
11500

    
11501
  def Exec(self, feedback_fn):
11502
    """Prepares an instance for an export.
11503

11504
    """
11505
    instance = self.instance
11506

    
11507
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11508
      salt = utils.GenerateSecret(8)
11509

    
11510
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11511
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11512
                                              constants.RIE_CERT_VALIDITY)
11513
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11514

    
11515
      (name, cert_pem) = result.payload
11516

    
11517
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11518
                                             cert_pem)
11519

    
11520
      return {
11521
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11522
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11523
                          salt),
11524
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11525
        }
11526

    
11527
    return None
11528

    
11529

    
11530
class LUBackupExport(LogicalUnit):
11531
  """Export an instance to an image in the cluster.
11532

11533
  """
11534
  HPATH = "instance-export"
11535
  HTYPE = constants.HTYPE_INSTANCE
11536
  REQ_BGL = False
11537

    
11538
  def CheckArguments(self):
11539
    """Check the arguments.
11540

11541
    """
11542
    self.x509_key_name = self.op.x509_key_name
11543
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11544

    
11545
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11546
      if not self.x509_key_name:
11547
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11548
                                   errors.ECODE_INVAL)
11549

    
11550
      if not self.dest_x509_ca_pem:
11551
        raise errors.OpPrereqError("Missing destination X509 CA",
11552
                                   errors.ECODE_INVAL)
11553

    
11554
  def ExpandNames(self):
11555
    self._ExpandAndLockInstance()
11556

    
11557
    # Lock all nodes for local exports
11558
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11559
      # FIXME: lock only instance primary and destination node
11560
      #
11561
      # Sad but true, for now we have do lock all nodes, as we don't know where
11562
      # the previous export might be, and in this LU we search for it and
11563
      # remove it from its current node. In the future we could fix this by:
11564
      #  - making a tasklet to search (share-lock all), then create the
11565
      #    new one, then one to remove, after
11566
      #  - removing the removal operation altogether
11567
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11568

    
11569
  def DeclareLocks(self, level):
11570
    """Last minute lock declaration."""
11571
    # All nodes are locked anyway, so nothing to do here.
11572

    
11573
  def BuildHooksEnv(self):
11574
    """Build hooks env.
11575

11576
    This will run on the master, primary node and target node.
11577

11578
    """
11579
    env = {
11580
      "EXPORT_MODE": self.op.mode,
11581
      "EXPORT_NODE": self.op.target_node,
11582
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11583
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11584
      # TODO: Generic function for boolean env variables
11585
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11586
      }
11587

    
11588
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11589

    
11590
    return env
11591

    
11592
  def BuildHooksNodes(self):
11593
    """Build hooks nodes.
11594

11595
    """
11596
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11597

    
11598
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11599
      nl.append(self.op.target_node)
11600

    
11601
    return (nl, nl)
11602

    
11603
  def CheckPrereq(self):
11604
    """Check prerequisites.
11605

11606
    This checks that the instance and node names are valid.
11607

11608
    """
11609
    instance_name = self.op.instance_name
11610

    
11611
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11612
    assert self.instance is not None, \
11613
          "Cannot retrieve locked instance %s" % self.op.instance_name
11614
    _CheckNodeOnline(self, self.instance.primary_node)
11615

    
11616
    if (self.op.remove_instance and self.instance.admin_up and
11617
        not self.op.shutdown):
11618
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11619
                                 " down before")
11620

    
11621
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11622
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11623
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11624
      assert self.dst_node is not None
11625

    
11626
      _CheckNodeOnline(self, self.dst_node.name)
11627
      _CheckNodeNotDrained(self, self.dst_node.name)
11628

    
11629
      self._cds = None
11630
      self.dest_disk_info = None
11631
      self.dest_x509_ca = None
11632

    
11633
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11634
      self.dst_node = None
11635

    
11636
      if len(self.op.target_node) != len(self.instance.disks):
11637
        raise errors.OpPrereqError(("Received destination information for %s"
11638
                                    " disks, but instance %s has %s disks") %
11639
                                   (len(self.op.target_node), instance_name,
11640
                                    len(self.instance.disks)),
11641
                                   errors.ECODE_INVAL)
11642

    
11643
      cds = _GetClusterDomainSecret()
11644

    
11645
      # Check X509 key name
11646
      try:
11647
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11648
      except (TypeError, ValueError), err:
11649
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11650

    
11651
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11652
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11653
                                   errors.ECODE_INVAL)
11654

    
11655
      # Load and verify CA
11656
      try:
11657
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11658
      except OpenSSL.crypto.Error, err:
11659
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11660
                                   (err, ), errors.ECODE_INVAL)
11661

    
11662
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11663
      if errcode is not None:
11664
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11665
                                   (msg, ), errors.ECODE_INVAL)
11666

    
11667
      self.dest_x509_ca = cert
11668

    
11669
      # Verify target information
11670
      disk_info = []
11671
      for idx, disk_data in enumerate(self.op.target_node):
11672
        try:
11673
          (host, port, magic) = \
11674
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11675
        except errors.GenericError, err:
11676
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11677
                                     (idx, err), errors.ECODE_INVAL)
11678

    
11679
        disk_info.append((host, port, magic))
11680

    
11681
      assert len(disk_info) == len(self.op.target_node)
11682
      self.dest_disk_info = disk_info
11683

    
11684
    else:
11685
      raise errors.ProgrammerError("Unhandled export mode %r" %
11686
                                   self.op.mode)
11687

    
11688
    # instance disk type verification
11689
    # TODO: Implement export support for file-based disks
11690
    for disk in self.instance.disks:
11691
      if disk.dev_type == constants.LD_FILE:
11692
        raise errors.OpPrereqError("Export not supported for instances with"
11693
                                   " file-based disks", errors.ECODE_INVAL)
11694

    
11695
  def _CleanupExports(self, feedback_fn):
11696
    """Removes exports of current instance from all other nodes.
11697

11698
    If an instance in a cluster with nodes A..D was exported to node C, its
11699
    exports will be removed from the nodes A, B and D.
11700

11701
    """
11702
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11703

    
11704
    nodelist = self.cfg.GetNodeList()
11705
    nodelist.remove(self.dst_node.name)
11706

    
11707
    # on one-node clusters nodelist will be empty after the removal
11708
    # if we proceed the backup would be removed because OpBackupQuery
11709
    # substitutes an empty list with the full cluster node list.
11710
    iname = self.instance.name
11711
    if nodelist:
11712
      feedback_fn("Removing old exports for instance %s" % iname)
11713
      exportlist = self.rpc.call_export_list(nodelist)
11714
      for node in exportlist:
11715
        if exportlist[node].fail_msg:
11716
          continue
11717
        if iname in exportlist[node].payload:
11718
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11719
          if msg:
11720
            self.LogWarning("Could not remove older export for instance %s"
11721
                            " on node %s: %s", iname, node, msg)
11722

    
11723
  def Exec(self, feedback_fn):
11724
    """Export an instance to an image in the cluster.
11725

11726
    """
11727
    assert self.op.mode in constants.EXPORT_MODES
11728

    
11729
    instance = self.instance
11730
    src_node = instance.primary_node
11731

    
11732
    if self.op.shutdown:
11733
      # shutdown the instance, but not the disks
11734
      feedback_fn("Shutting down instance %s" % instance.name)
11735
      result = self.rpc.call_instance_shutdown(src_node, instance,
11736
                                               self.op.shutdown_timeout)
11737
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11738
      result.Raise("Could not shutdown instance %s on"
11739
                   " node %s" % (instance.name, src_node))
11740

    
11741
    # set the disks ID correctly since call_instance_start needs the
11742
    # correct drbd minor to create the symlinks
11743
    for disk in instance.disks:
11744
      self.cfg.SetDiskID(disk, src_node)
11745

    
11746
    activate_disks = (not instance.admin_up)
11747

    
11748
    if activate_disks:
11749
      # Activate the instance disks if we'exporting a stopped instance
11750
      feedback_fn("Activating disks for %s" % instance.name)
11751
      _StartInstanceDisks(self, instance, None)
11752

    
11753
    try:
11754
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11755
                                                     instance)
11756

    
11757
      helper.CreateSnapshots()
11758
      try:
11759
        if (self.op.shutdown and instance.admin_up and
11760
            not self.op.remove_instance):
11761
          assert not activate_disks
11762
          feedback_fn("Starting instance %s" % instance.name)
11763
          result = self.rpc.call_instance_start(src_node, instance,
11764
                                                None, None, False)
11765
          msg = result.fail_msg
11766
          if msg:
11767
            feedback_fn("Failed to start instance: %s" % msg)
11768
            _ShutdownInstanceDisks(self, instance)
11769
            raise errors.OpExecError("Could not start instance: %s" % msg)
11770

    
11771
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11773
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11774
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11775
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11776

    
11777
          (key_name, _, _) = self.x509_key_name
11778

    
11779
          dest_ca_pem = \
11780
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11781
                                            self.dest_x509_ca)
11782

    
11783
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11784
                                                     key_name, dest_ca_pem,
11785
                                                     timeouts)
11786
      finally:
11787
        helper.Cleanup()
11788

    
11789
      # Check for backwards compatibility
11790
      assert len(dresults) == len(instance.disks)
11791
      assert compat.all(isinstance(i, bool) for i in dresults), \
11792
             "Not all results are boolean: %r" % dresults
11793

    
11794
    finally:
11795
      if activate_disks:
11796
        feedback_fn("Deactivating disks for %s" % instance.name)
11797
        _ShutdownInstanceDisks(self, instance)
11798

    
11799
    if not (compat.all(dresults) and fin_resu):
11800
      failures = []
11801
      if not fin_resu:
11802
        failures.append("export finalization")
11803
      if not compat.all(dresults):
11804
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11805
                               if not dsk)
11806
        failures.append("disk export: disk(s) %s" % fdsk)
11807

    
11808
      raise errors.OpExecError("Export failed, errors in %s" %
11809
                               utils.CommaJoin(failures))
11810

    
11811
    # At this point, the export was successful, we can cleanup/finish
11812

    
11813
    # Remove instance if requested
11814
    if self.op.remove_instance:
11815
      feedback_fn("Removing instance %s" % instance.name)
11816
      _RemoveInstance(self, feedback_fn, instance,
11817
                      self.op.ignore_remove_failures)
11818

    
11819
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11820
      self._CleanupExports(feedback_fn)
11821

    
11822
    return fin_resu, dresults
11823

    
11824

    
11825
class LUBackupRemove(NoHooksLU):
11826
  """Remove exports related to the named instance.
11827

11828
  """
11829
  REQ_BGL = False
11830

    
11831
  def ExpandNames(self):
11832
    self.needed_locks = {}
11833
    # We need all nodes to be locked in order for RemoveExport to work, but we
11834
    # don't need to lock the instance itself, as nothing will happen to it (and
11835
    # we can remove exports also for a removed instance)
11836
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11837

    
11838
  def Exec(self, feedback_fn):
11839
    """Remove any export.
11840

11841
    """
11842
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11843
    # If the instance was not found we'll try with the name that was passed in.
11844
    # This will only work if it was an FQDN, though.
11845
    fqdn_warn = False
11846
    if not instance_name:
11847
      fqdn_warn = True
11848
      instance_name = self.op.instance_name
11849

    
11850
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11851
    exportlist = self.rpc.call_export_list(locked_nodes)
11852
    found = False
11853
    for node in exportlist:
11854
      msg = exportlist[node].fail_msg
11855
      if msg:
11856
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11857
        continue
11858
      if instance_name in exportlist[node].payload:
11859
        found = True
11860
        result = self.rpc.call_export_remove(node, instance_name)
11861
        msg = result.fail_msg
11862
        if msg:
11863
          logging.error("Could not remove export for instance %s"
11864
                        " on node %s: %s", instance_name, node, msg)
11865

    
11866
    if fqdn_warn and not found:
11867
      feedback_fn("Export not found. If trying to remove an export belonging"
11868
                  " to a deleted instance please use its Fully Qualified"
11869
                  " Domain Name.")
11870

    
11871

    
11872
class LUGroupAdd(LogicalUnit):
11873
  """Logical unit for creating node groups.
11874

11875
  """
11876
  HPATH = "group-add"
11877
  HTYPE = constants.HTYPE_GROUP
11878
  REQ_BGL = False
11879

    
11880
  def ExpandNames(self):
11881
    # We need the new group's UUID here so that we can create and acquire the
11882
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11883
    # that it should not check whether the UUID exists in the configuration.
11884
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11885
    self.needed_locks = {}
11886
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11887

    
11888
  def CheckPrereq(self):
11889
    """Check prerequisites.
11890

11891
    This checks that the given group name is not an existing node group
11892
    already.
11893

11894
    """
11895
    try:
11896
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11897
    except errors.OpPrereqError:
11898
      pass
11899
    else:
11900
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11901
                                 " node group (UUID: %s)" %
11902
                                 (self.op.group_name, existing_uuid),
11903
                                 errors.ECODE_EXISTS)
11904

    
11905
    if self.op.ndparams:
11906
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11907

    
11908
  def BuildHooksEnv(self):
11909
    """Build hooks env.
11910

11911
    """
11912
    return {
11913
      "GROUP_NAME": self.op.group_name,
11914
      }
11915

    
11916
  def BuildHooksNodes(self):
11917
    """Build hooks nodes.
11918

11919
    """
11920
    mn = self.cfg.GetMasterNode()
11921
    return ([mn], [mn])
11922

    
11923
  def Exec(self, feedback_fn):
11924
    """Add the node group to the cluster.
11925

11926
    """
11927
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11928
                                  uuid=self.group_uuid,
11929
                                  alloc_policy=self.op.alloc_policy,
11930
                                  ndparams=self.op.ndparams)
11931

    
11932
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11933
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11934

    
11935

    
11936
class LUGroupAssignNodes(NoHooksLU):
11937
  """Logical unit for assigning nodes to groups.
11938

11939
  """
11940
  REQ_BGL = False
11941

    
11942
  def ExpandNames(self):
11943
    # These raise errors.OpPrereqError on their own:
11944
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11945
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11946

    
11947
    # We want to lock all the affected nodes and groups. We have readily
11948
    # available the list of nodes, and the *destination* group. To gather the
11949
    # list of "source" groups, we need to fetch node information later on.
11950
    self.needed_locks = {
11951
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11952
      locking.LEVEL_NODE: self.op.nodes,
11953
      }
11954

    
11955
  def DeclareLocks(self, level):
11956
    if level == locking.LEVEL_NODEGROUP:
11957
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11958

    
11959
      # Try to get all affected nodes' groups without having the group or node
11960
      # lock yet. Needs verification later in the code flow.
11961
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11962

    
11963
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11964

    
11965
  def CheckPrereq(self):
11966
    """Check prerequisites.
11967

11968
    """
11969
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11970
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11971
            frozenset(self.op.nodes))
11972

    
11973
    expected_locks = (set([self.group_uuid]) |
11974
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11975
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11976
    if actual_locks != expected_locks:
11977
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11978
                               " current groups are '%s', used to be '%s'" %
11979
                               (utils.CommaJoin(expected_locks),
11980
                                utils.CommaJoin(actual_locks)))
11981

    
11982
    self.node_data = self.cfg.GetAllNodesInfo()
11983
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11984
    instance_data = self.cfg.GetAllInstancesInfo()
11985

    
11986
    if self.group is None:
11987
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11988
                               (self.op.group_name, self.group_uuid))
11989

    
11990
    (new_splits, previous_splits) = \
11991
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11992
                                             for node in self.op.nodes],
11993
                                            self.node_data, instance_data)
11994

    
11995
    if new_splits:
11996
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11997

    
11998
      if not self.op.force:
11999
        raise errors.OpExecError("The following instances get split by this"
12000
                                 " change and --force was not given: %s" %
12001
                                 fmt_new_splits)
12002
      else:
12003
        self.LogWarning("This operation will split the following instances: %s",
12004
                        fmt_new_splits)
12005

    
12006
        if previous_splits:
12007
          self.LogWarning("In addition, these already-split instances continue"
12008
                          " to be split across groups: %s",
12009
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12010

    
12011
  def Exec(self, feedback_fn):
12012
    """Assign nodes to a new group.
12013

12014
    """
12015
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12016

    
12017
    self.cfg.AssignGroupNodes(mods)
12018

    
12019
  @staticmethod
12020
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12021
    """Check for split instances after a node assignment.
12022

12023
    This method considers a series of node assignments as an atomic operation,
12024
    and returns information about split instances after applying the set of
12025
    changes.
12026

12027
    In particular, it returns information about newly split instances, and
12028
    instances that were already split, and remain so after the change.
12029

12030
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12031
    considered.
12032

12033
    @type changes: list of (node_name, new_group_uuid) pairs.
12034
    @param changes: list of node assignments to consider.
12035
    @param node_data: a dict with data for all nodes
12036
    @param instance_data: a dict with all instances to consider
12037
    @rtype: a two-tuple
12038
    @return: a list of instances that were previously okay and result split as a
12039
      consequence of this change, and a list of instances that were previously
12040
      split and this change does not fix.
12041

12042
    """
12043
    changed_nodes = dict((node, group) for node, group in changes
12044
                         if node_data[node].group != group)
12045

    
12046
    all_split_instances = set()
12047
    previously_split_instances = set()
12048

    
12049
    def InstanceNodes(instance):
12050
      return [instance.primary_node] + list(instance.secondary_nodes)
12051

    
12052
    for inst in instance_data.values():
12053
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12054
        continue
12055

    
12056
      instance_nodes = InstanceNodes(inst)
12057

    
12058
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12059
        previously_split_instances.add(inst.name)
12060

    
12061
      if len(set(changed_nodes.get(node, node_data[node].group)
12062
                 for node in instance_nodes)) > 1:
12063
        all_split_instances.add(inst.name)
12064

    
12065
    return (list(all_split_instances - previously_split_instances),
12066
            list(previously_split_instances & all_split_instances))
12067

    
12068

    
12069
class _GroupQuery(_QueryBase):
12070
  FIELDS = query.GROUP_FIELDS
12071

    
12072
  def ExpandNames(self, lu):
12073
    lu.needed_locks = {}
12074

    
12075
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12076
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12077

    
12078
    if not self.names:
12079
      self.wanted = [name_to_uuid[name]
12080
                     for name in utils.NiceSort(name_to_uuid.keys())]
12081
    else:
12082
      # Accept names to be either names or UUIDs.
12083
      missing = []
12084
      self.wanted = []
12085
      all_uuid = frozenset(self._all_groups.keys())
12086

    
12087
      for name in self.names:
12088
        if name in all_uuid:
12089
          self.wanted.append(name)
12090
        elif name in name_to_uuid:
12091
          self.wanted.append(name_to_uuid[name])
12092
        else:
12093
          missing.append(name)
12094

    
12095
      if missing:
12096
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12097
                                   utils.CommaJoin(missing),
12098
                                   errors.ECODE_NOENT)
12099

    
12100
  def DeclareLocks(self, lu, level):
12101
    pass
12102

    
12103
  def _GetQueryData(self, lu):
12104
    """Computes the list of node groups and their attributes.
12105

12106
    """
12107
    do_nodes = query.GQ_NODE in self.requested_data
12108
    do_instances = query.GQ_INST in self.requested_data
12109

    
12110
    group_to_nodes = None
12111
    group_to_instances = None
12112

    
12113
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12114
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12115
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12116
    # instance->node. Hence, we will need to process nodes even if we only need
12117
    # instance information.
12118
    if do_nodes or do_instances:
12119
      all_nodes = lu.cfg.GetAllNodesInfo()
12120
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12121
      node_to_group = {}
12122

    
12123
      for node in all_nodes.values():
12124
        if node.group in group_to_nodes:
12125
          group_to_nodes[node.group].append(node.name)
12126
          node_to_group[node.name] = node.group
12127

    
12128
      if do_instances:
12129
        all_instances = lu.cfg.GetAllInstancesInfo()
12130
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12131

    
12132
        for instance in all_instances.values():
12133
          node = instance.primary_node
12134
          if node in node_to_group:
12135
            group_to_instances[node_to_group[node]].append(instance.name)
12136

    
12137
        if not do_nodes:
12138
          # Do not pass on node information if it was not requested.
12139
          group_to_nodes = None
12140

    
12141
    return query.GroupQueryData([self._all_groups[uuid]
12142
                                 for uuid in self.wanted],
12143
                                group_to_nodes, group_to_instances)
12144

    
12145

    
12146
class LUGroupQuery(NoHooksLU):
12147
  """Logical unit for querying node groups.
12148

12149
  """
12150
  REQ_BGL = False
12151

    
12152
  def CheckArguments(self):
12153
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12154
                          self.op.output_fields, False)
12155

    
12156
  def ExpandNames(self):
12157
    self.gq.ExpandNames(self)
12158

    
12159
  def DeclareLocks(self, level):
12160
    self.gq.DeclareLocks(self, level)
12161

    
12162
  def Exec(self, feedback_fn):
12163
    return self.gq.OldStyleQuery(self)
12164

    
12165

    
12166
class LUGroupSetParams(LogicalUnit):
12167
  """Modifies the parameters of a node group.
12168

12169
  """
12170
  HPATH = "group-modify"
12171
  HTYPE = constants.HTYPE_GROUP
12172
  REQ_BGL = False
12173

    
12174
  def CheckArguments(self):
12175
    all_changes = [
12176
      self.op.ndparams,
12177
      self.op.alloc_policy,
12178
      ]
12179

    
12180
    if all_changes.count(None) == len(all_changes):
12181
      raise errors.OpPrereqError("Please pass at least one modification",
12182
                                 errors.ECODE_INVAL)
12183

    
12184
  def ExpandNames(self):
12185
    # This raises errors.OpPrereqError on its own:
12186
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12187

    
12188
    self.needed_locks = {
12189
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12190
      }
12191

    
12192
  def CheckPrereq(self):
12193
    """Check prerequisites.
12194

12195
    """
12196
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12197

    
12198
    if self.group is None:
12199
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12200
                               (self.op.group_name, self.group_uuid))
12201

    
12202
    if self.op.ndparams:
12203
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12204
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12205
      self.new_ndparams = new_ndparams
12206

    
12207
  def BuildHooksEnv(self):
12208
    """Build hooks env.
12209

12210
    """
12211
    return {
12212
      "GROUP_NAME": self.op.group_name,
12213
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12214
      }
12215

    
12216
  def BuildHooksNodes(self):
12217
    """Build hooks nodes.
12218

12219
    """
12220
    mn = self.cfg.GetMasterNode()
12221
    return ([mn], [mn])
12222

    
12223
  def Exec(self, feedback_fn):
12224
    """Modifies the node group.
12225

12226
    """
12227
    result = []
12228

    
12229
    if self.op.ndparams:
12230
      self.group.ndparams = self.new_ndparams
12231
      result.append(("ndparams", str(self.group.ndparams)))
12232

    
12233
    if self.op.alloc_policy:
12234
      self.group.alloc_policy = self.op.alloc_policy
12235

    
12236
    self.cfg.Update(self.group, feedback_fn)
12237
    return result
12238

    
12239

    
12240
class LUGroupRemove(LogicalUnit):
12241
  HPATH = "group-remove"
12242
  HTYPE = constants.HTYPE_GROUP
12243
  REQ_BGL = False
12244

    
12245
  def ExpandNames(self):
12246
    # This will raises errors.OpPrereqError on its own:
12247
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12248
    self.needed_locks = {
12249
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12250
      }
12251

    
12252
  def CheckPrereq(self):
12253
    """Check prerequisites.
12254

12255
    This checks that the given group name exists as a node group, that is
12256
    empty (i.e., contains no nodes), and that is not the last group of the
12257
    cluster.
12258

12259
    """
12260
    # Verify that the group is empty.
12261
    group_nodes = [node.name
12262
                   for node in self.cfg.GetAllNodesInfo().values()
12263
                   if node.group == self.group_uuid]
12264

    
12265
    if group_nodes:
12266
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12267
                                 " nodes: %s" %
12268
                                 (self.op.group_name,
12269
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12270
                                 errors.ECODE_STATE)
12271

    
12272
    # Verify the cluster would not be left group-less.
12273
    if len(self.cfg.GetNodeGroupList()) == 1:
12274
      raise errors.OpPrereqError("Group '%s' is the only group,"
12275
                                 " cannot be removed" %
12276
                                 self.op.group_name,
12277
                                 errors.ECODE_STATE)
12278

    
12279
  def BuildHooksEnv(self):
12280
    """Build hooks env.
12281

12282
    """
12283
    return {
12284
      "GROUP_NAME": self.op.group_name,
12285
      }
12286

    
12287
  def BuildHooksNodes(self):
12288
    """Build hooks nodes.
12289

12290
    """
12291
    mn = self.cfg.GetMasterNode()
12292
    return ([mn], [mn])
12293

    
12294
  def Exec(self, feedback_fn):
12295
    """Remove the node group.
12296

12297
    """
12298
    try:
12299
      self.cfg.RemoveNodeGroup(self.group_uuid)
12300
    except errors.ConfigurationError:
12301
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12302
                               (self.op.group_name, self.group_uuid))
12303

    
12304
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12305

    
12306

    
12307
class LUGroupRename(LogicalUnit):
12308
  HPATH = "group-rename"
12309
  HTYPE = constants.HTYPE_GROUP
12310
  REQ_BGL = False
12311

    
12312
  def ExpandNames(self):
12313
    # This raises errors.OpPrereqError on its own:
12314
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12315

    
12316
    self.needed_locks = {
12317
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12318
      }
12319

    
12320
  def CheckPrereq(self):
12321
    """Check prerequisites.
12322

12323
    Ensures requested new name is not yet used.
12324

12325
    """
12326
    try:
12327
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12328
    except errors.OpPrereqError:
12329
      pass
12330
    else:
12331
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12332
                                 " node group (UUID: %s)" %
12333
                                 (self.op.new_name, new_name_uuid),
12334
                                 errors.ECODE_EXISTS)
12335

    
12336
  def BuildHooksEnv(self):
12337
    """Build hooks env.
12338

12339
    """
12340
    return {
12341
      "OLD_NAME": self.op.group_name,
12342
      "NEW_NAME": self.op.new_name,
12343
      }
12344

    
12345
  def BuildHooksNodes(self):
12346
    """Build hooks nodes.
12347

12348
    """
12349
    mn = self.cfg.GetMasterNode()
12350

    
12351
    all_nodes = self.cfg.GetAllNodesInfo()
12352
    all_nodes.pop(mn, None)
12353

    
12354
    run_nodes = [mn]
12355
    run_nodes.extend(node.name for node in all_nodes.values()
12356
                     if node.group == self.group_uuid)
12357

    
12358
    return (run_nodes, run_nodes)
12359

    
12360
  def Exec(self, feedback_fn):
12361
    """Rename the node group.
12362

12363
    """
12364
    group = self.cfg.GetNodeGroup(self.group_uuid)
12365

    
12366
    if group is None:
12367
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12368
                               (self.op.group_name, self.group_uuid))
12369

    
12370
    group.name = self.op.new_name
12371
    self.cfg.Update(group, feedback_fn)
12372

    
12373
    return self.op.new_name
12374

    
12375

    
12376
class LUGroupEvacuate(LogicalUnit):
12377
  HPATH = "group-evacuate"
12378
  HTYPE = constants.HTYPE_GROUP
12379
  REQ_BGL = False
12380

    
12381
  def ExpandNames(self):
12382
    # This raises errors.OpPrereqError on its own:
12383
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12384

    
12385
    if self.op.target_groups:
12386
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12387
                                  self.op.target_groups)
12388
    else:
12389
      self.req_target_uuids = []
12390

    
12391
    if self.group_uuid in self.req_target_uuids:
12392
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12393
                                 " as a target group (targets are %s)" %
12394
                                 (self.group_uuid,
12395
                                  utils.CommaJoin(self.req_target_uuids)),
12396
                                 errors.ECODE_INVAL)
12397

    
12398
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12399

    
12400
    self.share_locks = _ShareAll()
12401
    self.needed_locks = {
12402
      locking.LEVEL_INSTANCE: [],
12403
      locking.LEVEL_NODEGROUP: [],
12404
      locking.LEVEL_NODE: [],
12405
      }
12406

    
12407
  def DeclareLocks(self, level):
12408
    if level == locking.LEVEL_INSTANCE:
12409
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12410

    
12411
      # Lock instances optimistically, needs verification once node and group
12412
      # locks have been acquired
12413
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12414
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12415

    
12416
    elif level == locking.LEVEL_NODEGROUP:
12417
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12418

    
12419
      if self.req_target_uuids:
12420
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12421

    
12422
        # Lock all groups used by instances optimistically; this requires going
12423
        # via the node before it's locked, requiring verification later on
12424
        lock_groups.update(group_uuid
12425
                           for instance_name in
12426
                             self.owned_locks(locking.LEVEL_INSTANCE)
12427
                           for group_uuid in
12428
                             self.cfg.GetInstanceNodeGroups(instance_name))
12429
      else:
12430
        # No target groups, need to lock all of them
12431
        lock_groups = locking.ALL_SET
12432

    
12433
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12434

    
12435
    elif level == locking.LEVEL_NODE:
12436
      # This will only lock the nodes in the group to be evacuated which
12437
      # contain actual instances
12438
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12439
      self._LockInstancesNodes()
12440

    
12441
      # Lock all nodes in group to be evacuated and target groups
12442
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12443
      assert self.group_uuid in owned_groups
12444
      member_nodes = [node_name
12445
                      for group in owned_groups
12446
                      for node_name in self.cfg.GetNodeGroup(group).members]
12447
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12448

    
12449
  def CheckPrereq(self):
12450
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12451
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12452
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12453

    
12454
    assert owned_groups.issuperset(self.req_target_uuids)
12455
    assert self.group_uuid in owned_groups
12456

    
12457
    # Check if locked instances are still correct
12458
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12459

    
12460
    # Get instance information
12461
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12462

    
12463
    # Check if node groups for locked instances are still correct
12464
    for instance_name in owned_instances:
12465
      inst = self.instances[instance_name]
12466
      assert owned_nodes.issuperset(inst.all_nodes), \
12467
        "Instance %s's nodes changed while we kept the lock" % instance_name
12468

    
12469
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12470
                                             owned_groups)
12471

    
12472
      assert self.group_uuid in inst_groups, \
12473
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12474

    
12475
    if self.req_target_uuids:
12476
      # User requested specific target groups
12477
      self.target_uuids = self.req_target_uuids
12478
    else:
12479
      # All groups except the one to be evacuated are potential targets
12480
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12481
                           if group_uuid != self.group_uuid]
12482

    
12483
      if not self.target_uuids:
12484
        raise errors.OpPrereqError("There are no possible target groups",
12485
                                   errors.ECODE_INVAL)
12486

    
12487
  def BuildHooksEnv(self):
12488
    """Build hooks env.
12489

12490
    """
12491
    return {
12492
      "GROUP_NAME": self.op.group_name,
12493
      "TARGET_GROUPS": " ".join(self.target_uuids),
12494
      }
12495

    
12496
  def BuildHooksNodes(self):
12497
    """Build hooks nodes.
12498

12499
    """
12500
    mn = self.cfg.GetMasterNode()
12501

    
12502
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12503

    
12504
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12505

    
12506
    return (run_nodes, run_nodes)
12507

    
12508
  def Exec(self, feedback_fn):
12509
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12510

    
12511
    assert self.group_uuid not in self.target_uuids
12512

    
12513
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12514
                     instances=instances, target_groups=self.target_uuids)
12515

    
12516
    ial.Run(self.op.iallocator)
12517

    
12518
    if not ial.success:
12519
      raise errors.OpPrereqError("Can't compute group evacuation using"
12520
                                 " iallocator '%s': %s" %
12521
                                 (self.op.iallocator, ial.info),
12522
                                 errors.ECODE_NORES)
12523

    
12524
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12525

    
12526
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12527
                 len(jobs), self.op.group_name)
12528

    
12529
    return ResultWithJobs(jobs)
12530

    
12531

    
12532
class TagsLU(NoHooksLU): # pylint: disable=W0223
12533
  """Generic tags LU.
12534

12535
  This is an abstract class which is the parent of all the other tags LUs.
12536

12537
  """
12538
  def ExpandNames(self):
12539
    self.group_uuid = None
12540
    self.needed_locks = {}
12541
    if self.op.kind == constants.TAG_NODE:
12542
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12543
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12544
    elif self.op.kind == constants.TAG_INSTANCE:
12545
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12546
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12547
    elif self.op.kind == constants.TAG_NODEGROUP:
12548
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12549

    
12550
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12551
    # not possible to acquire the BGL based on opcode parameters)
12552

    
12553
  def CheckPrereq(self):
12554
    """Check prerequisites.
12555

12556
    """
12557
    if self.op.kind == constants.TAG_CLUSTER:
12558
      self.target = self.cfg.GetClusterInfo()
12559
    elif self.op.kind == constants.TAG_NODE:
12560
      self.target = self.cfg.GetNodeInfo(self.op.name)
12561
    elif self.op.kind == constants.TAG_INSTANCE:
12562
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12563
    elif self.op.kind == constants.TAG_NODEGROUP:
12564
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12565
    else:
12566
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12567
                                 str(self.op.kind), errors.ECODE_INVAL)
12568

    
12569

    
12570
class LUTagsGet(TagsLU):
12571
  """Returns the tags of a given object.
12572

12573
  """
12574
  REQ_BGL = False
12575

    
12576
  def ExpandNames(self):
12577
    TagsLU.ExpandNames(self)
12578

    
12579
    # Share locks as this is only a read operation
12580
    self.share_locks = _ShareAll()
12581

    
12582
  def Exec(self, feedback_fn):
12583
    """Returns the tag list.
12584

12585
    """
12586
    return list(self.target.GetTags())
12587

    
12588

    
12589
class LUTagsSearch(NoHooksLU):
12590
  """Searches the tags for a given pattern.
12591

12592
  """
12593
  REQ_BGL = False
12594

    
12595
  def ExpandNames(self):
12596
    self.needed_locks = {}
12597

    
12598
  def CheckPrereq(self):
12599
    """Check prerequisites.
12600

12601
    This checks the pattern passed for validity by compiling it.
12602

12603
    """
12604
    try:
12605
      self.re = re.compile(self.op.pattern)
12606
    except re.error, err:
12607
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12608
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12609

    
12610
  def Exec(self, feedback_fn):
12611
    """Returns the tag list.
12612

12613
    """
12614
    cfg = self.cfg
12615
    tgts = [("/cluster", cfg.GetClusterInfo())]
12616
    ilist = cfg.GetAllInstancesInfo().values()
12617
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12618
    nlist = cfg.GetAllNodesInfo().values()
12619
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12620
    tgts.extend(("/nodegroup/%s" % n.name, n)
12621
                for n in cfg.GetAllNodeGroupsInfo().values())
12622
    results = []
12623
    for path, target in tgts:
12624
      for tag in target.GetTags():
12625
        if self.re.search(tag):
12626
          results.append((path, tag))
12627
    return results
12628

    
12629

    
12630
class LUTagsSet(TagsLU):
12631
  """Sets a tag on a given object.
12632

12633
  """
12634
  REQ_BGL = False
12635

    
12636
  def CheckPrereq(self):
12637
    """Check prerequisites.
12638

12639
    This checks the type and length of the tag name and value.
12640

12641
    """
12642
    TagsLU.CheckPrereq(self)
12643
    for tag in self.op.tags:
12644
      objects.TaggableObject.ValidateTag(tag)
12645

    
12646
  def Exec(self, feedback_fn):
12647
    """Sets the tag.
12648

12649
    """
12650
    try:
12651
      for tag in self.op.tags:
12652
        self.target.AddTag(tag)
12653
    except errors.TagError, err:
12654
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12655
    self.cfg.Update(self.target, feedback_fn)
12656

    
12657

    
12658
class LUTagsDel(TagsLU):
12659
  """Delete a list of tags from a given object.
12660

12661
  """
12662
  REQ_BGL = False
12663

    
12664
  def CheckPrereq(self):
12665
    """Check prerequisites.
12666

12667
    This checks that we have the given tag.
12668

12669
    """
12670
    TagsLU.CheckPrereq(self)
12671
    for tag in self.op.tags:
12672
      objects.TaggableObject.ValidateTag(tag)
12673
    del_tags = frozenset(self.op.tags)
12674
    cur_tags = self.target.GetTags()
12675

    
12676
    diff_tags = del_tags - cur_tags
12677
    if diff_tags:
12678
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12679
      raise errors.OpPrereqError("Tag(s) %s not found" %
12680
                                 (utils.CommaJoin(diff_names), ),
12681
                                 errors.ECODE_NOENT)
12682

    
12683
  def Exec(self, feedback_fn):
12684
    """Remove the tag from the object.
12685

12686
    """
12687
    for tag in self.op.tags:
12688
      self.target.RemoveTag(tag)
12689
    self.cfg.Update(self.target, feedback_fn)
12690

    
12691

    
12692
class LUTestDelay(NoHooksLU):
12693
  """Sleep for a specified amount of time.
12694

12695
  This LU sleeps on the master and/or nodes for a specified amount of
12696
  time.
12697

12698
  """
12699
  REQ_BGL = False
12700

    
12701
  def ExpandNames(self):
12702
    """Expand names and set required locks.
12703

12704
    This expands the node list, if any.
12705

12706
    """
12707
    self.needed_locks = {}
12708
    if self.op.on_nodes:
12709
      # _GetWantedNodes can be used here, but is not always appropriate to use
12710
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12711
      # more information.
12712
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12713
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12714

    
12715
  def _TestDelay(self):
12716
    """Do the actual sleep.
12717

12718
    """
12719
    if self.op.on_master:
12720
      if not utils.TestDelay(self.op.duration):
12721
        raise errors.OpExecError("Error during master delay test")
12722
    if self.op.on_nodes:
12723
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12724
      for node, node_result in result.items():
12725
        node_result.Raise("Failure during rpc call to node %s" % node)
12726

    
12727
  def Exec(self, feedback_fn):
12728
    """Execute the test delay opcode, with the wanted repetitions.
12729

12730
    """
12731
    if self.op.repeat == 0:
12732
      self._TestDelay()
12733
    else:
12734
      top_value = self.op.repeat - 1
12735
      for i in range(self.op.repeat):
12736
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12737
        self._TestDelay()
12738

    
12739

    
12740
class LUTestJqueue(NoHooksLU):
12741
  """Utility LU to test some aspects of the job queue.
12742

12743
  """
12744
  REQ_BGL = False
12745

    
12746
  # Must be lower than default timeout for WaitForJobChange to see whether it
12747
  # notices changed jobs
12748
  _CLIENT_CONNECT_TIMEOUT = 20.0
12749
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12750

    
12751
  @classmethod
12752
  def _NotifyUsingSocket(cls, cb, errcls):
12753
    """Opens a Unix socket and waits for another program to connect.
12754

12755
    @type cb: callable
12756
    @param cb: Callback to send socket name to client
12757
    @type errcls: class
12758
    @param errcls: Exception class to use for errors
12759

12760
    """
12761
    # Using a temporary directory as there's no easy way to create temporary
12762
    # sockets without writing a custom loop around tempfile.mktemp and
12763
    # socket.bind
12764
    tmpdir = tempfile.mkdtemp()
12765
    try:
12766
      tmpsock = utils.PathJoin(tmpdir, "sock")
12767

    
12768
      logging.debug("Creating temporary socket at %s", tmpsock)
12769
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12770
      try:
12771
        sock.bind(tmpsock)
12772
        sock.listen(1)
12773

    
12774
        # Send details to client
12775
        cb(tmpsock)
12776

    
12777
        # Wait for client to connect before continuing
12778
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12779
        try:
12780
          (conn, _) = sock.accept()
12781
        except socket.error, err:
12782
          raise errcls("Client didn't connect in time (%s)" % err)
12783
      finally:
12784
        sock.close()
12785
    finally:
12786
      # Remove as soon as client is connected
12787
      shutil.rmtree(tmpdir)
12788

    
12789
    # Wait for client to close
12790
    try:
12791
      try:
12792
        # pylint: disable=E1101
12793
        # Instance of '_socketobject' has no ... member
12794
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12795
        conn.recv(1)
12796
      except socket.error, err:
12797
        raise errcls("Client failed to confirm notification (%s)" % err)
12798
    finally:
12799
      conn.close()
12800

    
12801
  def _SendNotification(self, test, arg, sockname):
12802
    """Sends a notification to the client.
12803

12804
    @type test: string
12805
    @param test: Test name
12806
    @param arg: Test argument (depends on test)
12807
    @type sockname: string
12808
    @param sockname: Socket path
12809

12810
    """
12811
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12812

    
12813
  def _Notify(self, prereq, test, arg):
12814
    """Notifies the client of a test.
12815

12816
    @type prereq: bool
12817
    @param prereq: Whether this is a prereq-phase test
12818
    @type test: string
12819
    @param test: Test name
12820
    @param arg: Test argument (depends on test)
12821

12822
    """
12823
    if prereq:
12824
      errcls = errors.OpPrereqError
12825
    else:
12826
      errcls = errors.OpExecError
12827

    
12828
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12829
                                                  test, arg),
12830
                                   errcls)
12831

    
12832
  def CheckArguments(self):
12833
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12834
    self.expandnames_calls = 0
12835

    
12836
  def ExpandNames(self):
12837
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12838
    if checkargs_calls < 1:
12839
      raise errors.ProgrammerError("CheckArguments was not called")
12840

    
12841
    self.expandnames_calls += 1
12842

    
12843
    if self.op.notify_waitlock:
12844
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12845

    
12846
    self.LogInfo("Expanding names")
12847

    
12848
    # Get lock on master node (just to get a lock, not for a particular reason)
12849
    self.needed_locks = {
12850
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12851
      }
12852

    
12853
  def Exec(self, feedback_fn):
12854
    if self.expandnames_calls < 1:
12855
      raise errors.ProgrammerError("ExpandNames was not called")
12856

    
12857
    if self.op.notify_exec:
12858
      self._Notify(False, constants.JQT_EXEC, None)
12859

    
12860
    self.LogInfo("Executing")
12861

    
12862
    if self.op.log_messages:
12863
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12864
      for idx, msg in enumerate(self.op.log_messages):
12865
        self.LogInfo("Sending log message %s", idx + 1)
12866
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12867
        # Report how many test messages have been sent
12868
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12869

    
12870
    if self.op.fail:
12871
      raise errors.OpExecError("Opcode failure was requested")
12872

    
12873
    return True
12874

    
12875

    
12876
class IAllocator(object):
12877
  """IAllocator framework.
12878

12879
  An IAllocator instance has three sets of attributes:
12880
    - cfg that is needed to query the cluster
12881
    - input data (all members of the _KEYS class attribute are required)
12882
    - four buffer attributes (in|out_data|text), that represent the
12883
      input (to the external script) in text and data structure format,
12884
      and the output from it, again in two formats
12885
    - the result variables from the script (success, info, nodes) for
12886
      easy usage
12887

12888
  """
12889
  # pylint: disable=R0902
12890
  # lots of instance attributes
12891

    
12892
  def __init__(self, cfg, rpc, mode, **kwargs):
12893
    self.cfg = cfg
12894
    self.rpc = rpc
12895
    # init buffer variables
12896
    self.in_text = self.out_text = self.in_data = self.out_data = None
12897
    # init all input fields so that pylint is happy
12898
    self.mode = mode
12899
    self.memory = self.disks = self.disk_template = None
12900
    self.os = self.tags = self.nics = self.vcpus = None
12901
    self.hypervisor = None
12902
    self.relocate_from = None
12903
    self.name = None
12904
    self.instances = None
12905
    self.evac_mode = None
12906
    self.target_groups = []
12907
    # computed fields
12908
    self.required_nodes = None
12909
    # init result fields
12910
    self.success = self.info = self.result = None
12911

    
12912
    try:
12913
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12914
    except KeyError:
12915
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12916
                                   " IAllocator" % self.mode)
12917

    
12918
    keyset = [n for (n, _) in keydata]
12919

    
12920
    for key in kwargs:
12921
      if key not in keyset:
12922
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12923
                                     " IAllocator" % key)
12924
      setattr(self, key, kwargs[key])
12925

    
12926
    for key in keyset:
12927
      if key not in kwargs:
12928
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12929
                                     " IAllocator" % key)
12930
    self._BuildInputData(compat.partial(fn, self), keydata)
12931

    
12932
  def _ComputeClusterData(self):
12933
    """Compute the generic allocator input data.
12934

12935
    This is the data that is independent of the actual operation.
12936

12937
    """
12938
    cfg = self.cfg
12939
    cluster_info = cfg.GetClusterInfo()
12940
    # cluster data
12941
    data = {
12942
      "version": constants.IALLOCATOR_VERSION,
12943
      "cluster_name": cfg.GetClusterName(),
12944
      "cluster_tags": list(cluster_info.GetTags()),
12945
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12946
      # we don't have job IDs
12947
      }
12948
    ninfo = cfg.GetAllNodesInfo()
12949
    iinfo = cfg.GetAllInstancesInfo().values()
12950
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12951

    
12952
    # node data
12953
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12954

    
12955
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12956
      hypervisor_name = self.hypervisor
12957
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12958
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12959
    else:
12960
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12961

    
12962
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12963
                                        hypervisor_name)
12964
    node_iinfo = \
12965
      self.rpc.call_all_instances_info(node_list,
12966
                                       cluster_info.enabled_hypervisors)
12967

    
12968
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12969

    
12970
    config_ndata = self._ComputeBasicNodeData(ninfo)
12971
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12972
                                                 i_list, config_ndata)
12973
    assert len(data["nodes"]) == len(ninfo), \
12974
        "Incomplete node data computed"
12975

    
12976
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12977

    
12978
    self.in_data = data
12979

    
12980
  @staticmethod
12981
  def _ComputeNodeGroupData(cfg):
12982
    """Compute node groups data.
12983

12984
    """
12985
    ng = dict((guuid, {
12986
      "name": gdata.name,
12987
      "alloc_policy": gdata.alloc_policy,
12988
      })
12989
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12990

    
12991
    return ng
12992

    
12993
  @staticmethod
12994
  def _ComputeBasicNodeData(node_cfg):
12995
    """Compute global node data.
12996

12997
    @rtype: dict
12998
    @returns: a dict of name: (node dict, node config)
12999

13000
    """
13001
    # fill in static (config-based) values
13002
    node_results = dict((ninfo.name, {
13003
      "tags": list(ninfo.GetTags()),
13004
      "primary_ip": ninfo.primary_ip,
13005
      "secondary_ip": ninfo.secondary_ip,
13006
      "offline": ninfo.offline,
13007
      "drained": ninfo.drained,
13008
      "master_candidate": ninfo.master_candidate,
13009
      "group": ninfo.group,
13010
      "master_capable": ninfo.master_capable,
13011
      "vm_capable": ninfo.vm_capable,
13012
      })
13013
      for ninfo in node_cfg.values())
13014

    
13015
    return node_results
13016

    
13017
  @staticmethod
13018
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13019
                              node_results):
13020
    """Compute global node data.
13021

13022
    @param node_results: the basic node structures as filled from the config
13023

13024
    """
13025
    # make a copy of the current dict
13026
    node_results = dict(node_results)
13027
    for nname, nresult in node_data.items():
13028
      assert nname in node_results, "Missing basic data for node %s" % nname
13029
      ninfo = node_cfg[nname]
13030

    
13031
      if not (ninfo.offline or ninfo.drained):
13032
        nresult.Raise("Can't get data for node %s" % nname)
13033
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13034
                                nname)
13035
        remote_info = nresult.payload
13036

    
13037
        for attr in ["memory_total", "memory_free", "memory_dom0",
13038
                     "vg_size", "vg_free", "cpu_total"]:
13039
          if attr not in remote_info:
13040
            raise errors.OpExecError("Node '%s' didn't return attribute"
13041
                                     " '%s'" % (nname, attr))
13042
          if not isinstance(remote_info[attr], int):
13043
            raise errors.OpExecError("Node '%s' returned invalid value"
13044
                                     " for '%s': %s" %
13045
                                     (nname, attr, remote_info[attr]))
13046
        # compute memory used by primary instances
13047
        i_p_mem = i_p_up_mem = 0
13048
        for iinfo, beinfo in i_list:
13049
          if iinfo.primary_node == nname:
13050
            i_p_mem += beinfo[constants.BE_MEMORY]
13051
            if iinfo.name not in node_iinfo[nname].payload:
13052
              i_used_mem = 0
13053
            else:
13054
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13055
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13056
            remote_info["memory_free"] -= max(0, i_mem_diff)
13057

    
13058
            if iinfo.admin_up:
13059
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13060

    
13061
        # compute memory used by instances
13062
        pnr_dyn = {
13063
          "total_memory": remote_info["memory_total"],
13064
          "reserved_memory": remote_info["memory_dom0"],
13065
          "free_memory": remote_info["memory_free"],
13066
          "total_disk": remote_info["vg_size"],
13067
          "free_disk": remote_info["vg_free"],
13068
          "total_cpus": remote_info["cpu_total"],
13069
          "i_pri_memory": i_p_mem,
13070
          "i_pri_up_memory": i_p_up_mem,
13071
          }
13072
        pnr_dyn.update(node_results[nname])
13073
        node_results[nname] = pnr_dyn
13074

    
13075
    return node_results
13076

    
13077
  @staticmethod
13078
  def _ComputeInstanceData(cluster_info, i_list):
13079
    """Compute global instance data.
13080

13081
    """
13082
    instance_data = {}
13083
    for iinfo, beinfo in i_list:
13084
      nic_data = []
13085
      for nic in iinfo.nics:
13086
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13087
        nic_dict = {
13088
          "mac": nic.mac,
13089
          "ip": nic.ip,
13090
          "mode": filled_params[constants.NIC_MODE],
13091
          "link": filled_params[constants.NIC_LINK],
13092
          }
13093
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13094
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13095
        nic_data.append(nic_dict)
13096
      pir = {
13097
        "tags": list(iinfo.GetTags()),
13098
        "admin_up": iinfo.admin_up,
13099
        "vcpus": beinfo[constants.BE_VCPUS],
13100
        "memory": beinfo[constants.BE_MEMORY],
13101
        "os": iinfo.os,
13102
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13103
        "nics": nic_data,
13104
        "disks": [{constants.IDISK_SIZE: dsk.size,
13105
                   constants.IDISK_MODE: dsk.mode}
13106
                  for dsk in iinfo.disks],
13107
        "disk_template": iinfo.disk_template,
13108
        "hypervisor": iinfo.hypervisor,
13109
        }
13110
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13111
                                                 pir["disks"])
13112
      instance_data[iinfo.name] = pir
13113

    
13114
    return instance_data
13115

    
13116
  def _AddNewInstance(self):
13117
    """Add new instance data to allocator structure.
13118

13119
    This in combination with _AllocatorGetClusterData will create the
13120
    correct structure needed as input for the allocator.
13121

13122
    The checks for the completeness of the opcode must have already been
13123
    done.
13124

13125
    """
13126
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13127

    
13128
    if self.disk_template in constants.DTS_INT_MIRROR:
13129
      self.required_nodes = 2
13130
    else:
13131
      self.required_nodes = 1
13132

    
13133
    request = {
13134
      "name": self.name,
13135
      "disk_template": self.disk_template,
13136
      "tags": self.tags,
13137
      "os": self.os,
13138
      "vcpus": self.vcpus,
13139
      "memory": self.memory,
13140
      "disks": self.disks,
13141
      "disk_space_total": disk_space,
13142
      "nics": self.nics,
13143
      "required_nodes": self.required_nodes,
13144
      "hypervisor": self.hypervisor,
13145
      }
13146

    
13147
    return request
13148

    
13149
  def _AddRelocateInstance(self):
13150
    """Add relocate instance data to allocator structure.
13151

13152
    This in combination with _IAllocatorGetClusterData will create the
13153
    correct structure needed as input for the allocator.
13154

13155
    The checks for the completeness of the opcode must have already been
13156
    done.
13157

13158
    """
13159
    instance = self.cfg.GetInstanceInfo(self.name)
13160
    if instance is None:
13161
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13162
                                   " IAllocator" % self.name)
13163

    
13164
    if instance.disk_template not in constants.DTS_MIRRORED:
13165
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13166
                                 errors.ECODE_INVAL)
13167

    
13168
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13169
        len(instance.secondary_nodes) != 1:
13170
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13171
                                 errors.ECODE_STATE)
13172

    
13173
    self.required_nodes = 1
13174
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13175
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13176

    
13177
    request = {
13178
      "name": self.name,
13179
      "disk_space_total": disk_space,
13180
      "required_nodes": self.required_nodes,
13181
      "relocate_from": self.relocate_from,
13182
      }
13183
    return request
13184

    
13185
  def _AddNodeEvacuate(self):
13186
    """Get data for node-evacuate requests.
13187

13188
    """
13189
    return {
13190
      "instances": self.instances,
13191
      "evac_mode": self.evac_mode,
13192
      }
13193

    
13194
  def _AddChangeGroup(self):
13195
    """Get data for node-evacuate requests.
13196

13197
    """
13198
    return {
13199
      "instances": self.instances,
13200
      "target_groups": self.target_groups,
13201
      }
13202

    
13203
  def _BuildInputData(self, fn, keydata):
13204
    """Build input data structures.
13205

13206
    """
13207
    self._ComputeClusterData()
13208

    
13209
    request = fn()
13210
    request["type"] = self.mode
13211
    for keyname, keytype in keydata:
13212
      if keyname not in request:
13213
        raise errors.ProgrammerError("Request parameter %s is missing" %
13214
                                     keyname)
13215
      val = request[keyname]
13216
      if not keytype(val):
13217
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13218
                                     " validation, value %s, expected"
13219
                                     " type %s" % (keyname, val, keytype))
13220
    self.in_data["request"] = request
13221

    
13222
    self.in_text = serializer.Dump(self.in_data)
13223

    
13224
  _STRING_LIST = ht.TListOf(ht.TString)
13225
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13226
     # pylint: disable=E1101
13227
     # Class '...' has no 'OP_ID' member
13228
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13229
                          opcodes.OpInstanceMigrate.OP_ID,
13230
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13231
     })))
13232

    
13233
  _NEVAC_MOVED = \
13234
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13235
                       ht.TItems([ht.TNonEmptyString,
13236
                                  ht.TNonEmptyString,
13237
                                  ht.TListOf(ht.TNonEmptyString),
13238
                                 ])))
13239
  _NEVAC_FAILED = \
13240
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13241
                       ht.TItems([ht.TNonEmptyString,
13242
                                  ht.TMaybeString,
13243
                                 ])))
13244
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13245
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13246

    
13247
  _MODE_DATA = {
13248
    constants.IALLOCATOR_MODE_ALLOC:
13249
      (_AddNewInstance,
13250
       [
13251
        ("name", ht.TString),
13252
        ("memory", ht.TInt),
13253
        ("disks", ht.TListOf(ht.TDict)),
13254
        ("disk_template", ht.TString),
13255
        ("os", ht.TString),
13256
        ("tags", _STRING_LIST),
13257
        ("nics", ht.TListOf(ht.TDict)),
13258
        ("vcpus", ht.TInt),
13259
        ("hypervisor", ht.TString),
13260
        ], ht.TList),
13261
    constants.IALLOCATOR_MODE_RELOC:
13262
      (_AddRelocateInstance,
13263
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13264
       ht.TList),
13265
     constants.IALLOCATOR_MODE_NODE_EVAC:
13266
      (_AddNodeEvacuate, [
13267
        ("instances", _STRING_LIST),
13268
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13269
        ], _NEVAC_RESULT),
13270
     constants.IALLOCATOR_MODE_CHG_GROUP:
13271
      (_AddChangeGroup, [
13272
        ("instances", _STRING_LIST),
13273
        ("target_groups", _STRING_LIST),
13274
        ], _NEVAC_RESULT),
13275
    }
13276

    
13277
  def Run(self, name, validate=True, call_fn=None):
13278
    """Run an instance allocator and return the results.
13279

13280
    """
13281
    if call_fn is None:
13282
      call_fn = self.rpc.call_iallocator_runner
13283

    
13284
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13285
    result.Raise("Failure while running the iallocator script")
13286

    
13287
    self.out_text = result.payload
13288
    if validate:
13289
      self._ValidateResult()
13290

    
13291
  def _ValidateResult(self):
13292
    """Process the allocator results.
13293

13294
    This will process and if successful save the result in
13295
    self.out_data and the other parameters.
13296

13297
    """
13298
    try:
13299
      rdict = serializer.Load(self.out_text)
13300
    except Exception, err:
13301
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13302

    
13303
    if not isinstance(rdict, dict):
13304
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13305

    
13306
    # TODO: remove backwards compatiblity in later versions
13307
    if "nodes" in rdict and "result" not in rdict:
13308
      rdict["result"] = rdict["nodes"]
13309
      del rdict["nodes"]
13310

    
13311
    for key in "success", "info", "result":
13312
      if key not in rdict:
13313
        raise errors.OpExecError("Can't parse iallocator results:"
13314
                                 " missing key '%s'" % key)
13315
      setattr(self, key, rdict[key])
13316

    
13317
    if not self._result_check(self.result):
13318
      raise errors.OpExecError("Iallocator returned invalid result,"
13319
                               " expected %s, got %s" %
13320
                               (self._result_check, self.result),
13321
                               errors.ECODE_INVAL)
13322

    
13323
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13324
      assert self.relocate_from is not None
13325
      assert self.required_nodes == 1
13326

    
13327
      node2group = dict((name, ndata["group"])
13328
                        for (name, ndata) in self.in_data["nodes"].items())
13329

    
13330
      fn = compat.partial(self._NodesToGroups, node2group,
13331
                          self.in_data["nodegroups"])
13332

    
13333
      instance = self.cfg.GetInstanceInfo(self.name)
13334
      request_groups = fn(self.relocate_from + [instance.primary_node])
13335
      result_groups = fn(rdict["result"] + [instance.primary_node])
13336

    
13337
      if self.success and not set(result_groups).issubset(request_groups):
13338
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13339
                                 " differ from original groups (%s)" %
13340
                                 (utils.CommaJoin(result_groups),
13341
                                  utils.CommaJoin(request_groups)))
13342

    
13343
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13344
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13345

    
13346
    self.out_data = rdict
13347

    
13348
  @staticmethod
13349
  def _NodesToGroups(node2group, groups, nodes):
13350
    """Returns a list of unique group names for a list of nodes.
13351

13352
    @type node2group: dict
13353
    @param node2group: Map from node name to group UUID
13354
    @type groups: dict
13355
    @param groups: Group information
13356
    @type nodes: list
13357
    @param nodes: Node names
13358

13359
    """
13360
    result = set()
13361

    
13362
    for node in nodes:
13363
      try:
13364
        group_uuid = node2group[node]
13365
      except KeyError:
13366
        # Ignore unknown node
13367
        pass
13368
      else:
13369
        try:
13370
          group = groups[group_uuid]
13371
        except KeyError:
13372
          # Can't find group, let's use UUID
13373
          group_name = group_uuid
13374
        else:
13375
          group_name = group["name"]
13376

    
13377
        result.add(group_name)
13378

    
13379
    return sorted(result)
13380

    
13381

    
13382
class LUTestAllocator(NoHooksLU):
13383
  """Run allocator tests.
13384

13385
  This LU runs the allocator tests
13386

13387
  """
13388
  def CheckPrereq(self):
13389
    """Check prerequisites.
13390

13391
    This checks the opcode parameters depending on the director and mode test.
13392

13393
    """
13394
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13395
      for attr in ["memory", "disks", "disk_template",
13396
                   "os", "tags", "nics", "vcpus"]:
13397
        if not hasattr(self.op, attr):
13398
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13399
                                     attr, errors.ECODE_INVAL)
13400
      iname = self.cfg.ExpandInstanceName(self.op.name)
13401
      if iname is not None:
13402
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13403
                                   iname, errors.ECODE_EXISTS)
13404
      if not isinstance(self.op.nics, list):
13405
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13406
                                   errors.ECODE_INVAL)
13407
      if not isinstance(self.op.disks, list):
13408
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13409
                                   errors.ECODE_INVAL)
13410
      for row in self.op.disks:
13411
        if (not isinstance(row, dict) or
13412
            constants.IDISK_SIZE not in row or
13413
            not isinstance(row[constants.IDISK_SIZE], int) or
13414
            constants.IDISK_MODE not in row or
13415
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13416
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13417
                                     " parameter", errors.ECODE_INVAL)
13418
      if self.op.hypervisor is None:
13419
        self.op.hypervisor = self.cfg.GetHypervisorType()
13420
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13421
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13422
      self.op.name = fname
13423
      self.relocate_from = \
13424
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13425
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13426
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13427
      if not self.op.instances:
13428
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13429
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13430
    else:
13431
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13432
                                 self.op.mode, errors.ECODE_INVAL)
13433

    
13434
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13435
      if self.op.allocator is None:
13436
        raise errors.OpPrereqError("Missing allocator name",
13437
                                   errors.ECODE_INVAL)
13438
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13439
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13440
                                 self.op.direction, errors.ECODE_INVAL)
13441

    
13442
  def Exec(self, feedback_fn):
13443
    """Run the allocator test.
13444

13445
    """
13446
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13447
      ial = IAllocator(self.cfg, self.rpc,
13448
                       mode=self.op.mode,
13449
                       name=self.op.name,
13450
                       memory=self.op.memory,
13451
                       disks=self.op.disks,
13452
                       disk_template=self.op.disk_template,
13453
                       os=self.op.os,
13454
                       tags=self.op.tags,
13455
                       nics=self.op.nics,
13456
                       vcpus=self.op.vcpus,
13457
                       hypervisor=self.op.hypervisor,
13458
                       )
13459
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13460
      ial = IAllocator(self.cfg, self.rpc,
13461
                       mode=self.op.mode,
13462
                       name=self.op.name,
13463
                       relocate_from=list(self.relocate_from),
13464
                       )
13465
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13466
      ial = IAllocator(self.cfg, self.rpc,
13467
                       mode=self.op.mode,
13468
                       instances=self.op.instances,
13469
                       target_groups=self.op.target_groups)
13470
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13471
      ial = IAllocator(self.cfg, self.rpc,
13472
                       mode=self.op.mode,
13473
                       instances=self.op.instances,
13474
                       evac_mode=self.op.evac_mode)
13475
    else:
13476
      raise errors.ProgrammerError("Uncatched mode %s in"
13477
                                   " LUTestAllocator.Exec", self.op.mode)
13478

    
13479
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13480
      result = ial.in_text
13481
    else:
13482
      ial.Run(self.op.allocator, validate=False)
13483
      result = ial.out_text
13484
    return result
13485

    
13486

    
13487
#: Query type implementations
13488
_QUERY_IMPL = {
13489
  constants.QR_INSTANCE: _InstanceQuery,
13490
  constants.QR_NODE: _NodeQuery,
13491
  constants.QR_GROUP: _GroupQuery,
13492
  constants.QR_OS: _OsQuery,
13493
  }
13494

    
13495
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13496

    
13497

    
13498
def _GetQueryImplementation(name):
13499
  """Returns the implemtnation for a query type.
13500

13501
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13502

13503
  """
13504
  try:
13505
    return _QUERY_IMPL[name]
13506
  except KeyError:
13507
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13508
                               errors.ECODE_INVAL)