Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c50452c3

History | View | Annotate | Download (474.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_all_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113
           "Found file listed in more than one file list"
2114

    
2115
    # Define functions determining which nodes to consider for a file
2116
    files2nodefn = [
2117
      (files_all, None),
2118
      (files_all_opt, None),
2119
      (files_mc, lambda node: (node.master_candidate or
2120
                               node.name == master_node)),
2121
      (files_vm, lambda node: node.vm_capable),
2122
      ]
2123

    
2124
    # Build mapping from filename to list of nodes which should have the file
2125
    nodefiles = {}
2126
    for (files, fn) in files2nodefn:
2127
      if fn is None:
2128
        filenodes = nodeinfo
2129
      else:
2130
        filenodes = filter(fn, nodeinfo)
2131
      nodefiles.update((filename,
2132
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2133
                       for filename in files)
2134

    
2135
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136

    
2137
    fileinfo = dict((filename, {}) for filename in nodefiles)
2138
    ignore_nodes = set()
2139

    
2140
    for node in nodeinfo:
2141
      if node.offline:
2142
        ignore_nodes.add(node.name)
2143
        continue
2144

    
2145
      nresult = all_nvinfo[node.name]
2146

    
2147
      if nresult.fail_msg or not nresult.payload:
2148
        node_files = None
2149
      else:
2150
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151

    
2152
      test = not (node_files and isinstance(node_files, dict))
2153
      errorif(test, cls.ENODEFILECHECK, node.name,
2154
              "Node did not return file checksum data")
2155
      if test:
2156
        ignore_nodes.add(node.name)
2157
        continue
2158

    
2159
      # Build per-checksum mapping from filename to nodes having it
2160
      for (filename, checksum) in node_files.items():
2161
        assert filename in nodefiles
2162
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163

    
2164
    for (filename, checksums) in fileinfo.items():
2165
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166

    
2167
      # Nodes having the file
2168
      with_file = frozenset(node_name
2169
                            for nodes in fileinfo[filename].values()
2170
                            for node_name in nodes) - ignore_nodes
2171

    
2172
      expected_nodes = nodefiles[filename] - ignore_nodes
2173

    
2174
      # Nodes missing file
2175
      missing_file = expected_nodes - with_file
2176

    
2177
      if filename in files_all_opt:
2178
        # All or no nodes
2179
        errorif(missing_file and missing_file != expected_nodes,
2180
                cls.ECLUSTERFILECHECK, None,
2181
                "File %s is optional, but it must exist on all or no"
2182
                " nodes (not found on %s)",
2183
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184
      else:
2185
        # Non-optional files
2186
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187
                "File %s is missing from node(s) %s", filename,
2188
                utils.CommaJoin(utils.NiceSort(missing_file)))
2189

    
2190
        # Warn if a node has a file it shouldn't
2191
        unexpected = with_file - expected_nodes
2192
        errorif(unexpected,
2193
                cls.ECLUSTERFILECHECK, None,
2194
                "File %s should not exist on node(s) %s",
2195
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196

    
2197
      # See if there are multiple versions of the file
2198
      test = len(checksums) > 1
2199
      if test:
2200
        variants = ["variant %s on %s" %
2201
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202
                    for (idx, (checksum, nodes)) in
2203
                      enumerate(sorted(checksums.items()))]
2204
      else:
2205
        variants = []
2206

    
2207
      errorif(test, cls.ECLUSTERFILECHECK, None,
2208
              "File %s found with %s different checksums (%s)",
2209
              filename, len(checksums), "; ".join(variants))
2210

    
2211
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212
                      drbd_map):
2213
    """Verifies and the node DRBD status.
2214

2215
    @type ninfo: L{objects.Node}
2216
    @param ninfo: the node to check
2217
    @param nresult: the remote results for the node
2218
    @param instanceinfo: the dict of instances
2219
    @param drbd_helper: the configured DRBD usermode helper
2220
    @param drbd_map: the DRBD map as returned by
2221
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222

2223
    """
2224
    node = ninfo.name
2225
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226

    
2227
    if drbd_helper:
2228
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229
      test = (helper_result == None)
2230
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
               "no drbd usermode helper returned")
2232
      if helper_result:
2233
        status, payload = helper_result
2234
        test = not status
2235
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236
                 "drbd usermode helper check unsuccessful: %s", payload)
2237
        test = status and (payload != drbd_helper)
2238
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239
                 "wrong drbd usermode helper: %s", payload)
2240

    
2241
    # compute the DRBD minors
2242
    node_drbd = {}
2243
    for minor, instance in drbd_map[node].items():
2244
      test = instance not in instanceinfo
2245
      _ErrorIf(test, self.ECLUSTERCFG, None,
2246
               "ghost instance '%s' in temporary DRBD map", instance)
2247
        # ghost instance should not be running, but otherwise we
2248
        # don't give double warnings (both ghost instance and
2249
        # unallocated minor in use)
2250
      if test:
2251
        node_drbd[minor] = (instance, False)
2252
      else:
2253
        instance = instanceinfo[instance]
2254
        node_drbd[minor] = (instance.name, instance.admin_up)
2255

    
2256
    # and now check them
2257
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258
    test = not isinstance(used_minors, (tuple, list))
2259
    _ErrorIf(test, self.ENODEDRBD, node,
2260
             "cannot parse drbd status file: %s", str(used_minors))
2261
    if test:
2262
      # we cannot check drbd status
2263
      return
2264

    
2265
    for minor, (iname, must_exist) in node_drbd.items():
2266
      test = minor not in used_minors and must_exist
2267
      _ErrorIf(test, self.ENODEDRBD, node,
2268
               "drbd minor %d of instance %s is not active", minor, iname)
2269
    for minor in used_minors:
2270
      test = minor not in node_drbd
2271
      _ErrorIf(test, self.ENODEDRBD, node,
2272
               "unallocated drbd minor %d is in use", minor)
2273

    
2274
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275
    """Builds the node OS structures.
2276

2277
    @type ninfo: L{objects.Node}
2278
    @param ninfo: the node to check
2279
    @param nresult: the remote results for the node
2280
    @param nimg: the node image object
2281

2282
    """
2283
    node = ninfo.name
2284
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285

    
2286
    remote_os = nresult.get(constants.NV_OSLIST, None)
2287
    test = (not isinstance(remote_os, list) or
2288
            not compat.all(isinstance(v, list) and len(v) == 7
2289
                           for v in remote_os))
2290

    
2291
    _ErrorIf(test, self.ENODEOS, node,
2292
             "node hasn't returned valid OS data")
2293

    
2294
    nimg.os_fail = test
2295

    
2296
    if test:
2297
      return
2298

    
2299
    os_dict = {}
2300

    
2301
    for (name, os_path, status, diagnose,
2302
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303

    
2304
      if name not in os_dict:
2305
        os_dict[name] = []
2306

    
2307
      # parameters is a list of lists instead of list of tuples due to
2308
      # JSON lacking a real tuple type, fix it:
2309
      parameters = [tuple(v) for v in parameters]
2310
      os_dict[name].append((os_path, status, diagnose,
2311
                            set(variants), set(parameters), set(api_ver)))
2312

    
2313
    nimg.oslist = os_dict
2314

    
2315
  def _VerifyNodeOS(self, ninfo, nimg, base):
2316
    """Verifies the node OS list.
2317

2318
    @type ninfo: L{objects.Node}
2319
    @param ninfo: the node to check
2320
    @param nimg: the node image object
2321
    @param base: the 'template' node we match against (e.g. from the master)
2322

2323
    """
2324
    node = ninfo.name
2325
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326

    
2327
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328

    
2329
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330
    for os_name, os_data in nimg.oslist.items():
2331
      assert os_data, "Empty OS status for OS %s?!" % os_name
2332
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333
      _ErrorIf(not f_status, self.ENODEOS, node,
2334
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2337
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2338
      # comparisons with the 'base' image
2339
      test = os_name not in base.oslist
2340
      _ErrorIf(test, self.ENODEOS, node,
2341
               "Extra OS %s not present on reference node (%s)",
2342
               os_name, base.name)
2343
      if test:
2344
        continue
2345
      assert base.oslist[os_name], "Base node has empty OS status?"
2346
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347
      if not b_status:
2348
        # base OS is invalid, skipping
2349
        continue
2350
      for kind, a, b in [("API version", f_api, b_api),
2351
                         ("variants list", f_var, b_var),
2352
                         ("parameters", beautify_params(f_param),
2353
                          beautify_params(b_param))]:
2354
        _ErrorIf(a != b, self.ENODEOS, node,
2355
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356
                 kind, os_name, base.name,
2357
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358

    
2359
    # check any missing OSes
2360
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361
    _ErrorIf(missing, self.ENODEOS, node,
2362
             "OSes present on reference node %s but missing on this node: %s",
2363
             base.name, utils.CommaJoin(missing))
2364

    
2365
  def _VerifyOob(self, ninfo, nresult):
2366
    """Verifies out of band functionality of a node.
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371

2372
    """
2373
    node = ninfo.name
2374
    # We just have to verify the paths on master and/or master candidates
2375
    # as the oob helper is invoked on the master
2376
    if ((ninfo.master_candidate or ninfo.master_capable) and
2377
        constants.NV_OOB_PATHS in nresult):
2378
      for path_result in nresult[constants.NV_OOB_PATHS]:
2379
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380

    
2381
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382
    """Verifies and updates the node volume data.
2383

2384
    This function will update a L{NodeImage}'s internal structures
2385
    with data from the remote call.
2386

2387
    @type ninfo: L{objects.Node}
2388
    @param ninfo: the node to check
2389
    @param nresult: the remote results for the node
2390
    @param nimg: the node image object
2391
    @param vg_name: the configured VG name
2392

2393
    """
2394
    node = ninfo.name
2395
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396

    
2397
    nimg.lvm_fail = True
2398
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399
    if vg_name is None:
2400
      pass
2401
    elif isinstance(lvdata, basestring):
2402
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403
               utils.SafeEncode(lvdata))
2404
    elif not isinstance(lvdata, dict):
2405
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406
    else:
2407
      nimg.volumes = lvdata
2408
      nimg.lvm_fail = False
2409

    
2410
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411
    """Verifies and updates the node instance list.
2412

2413
    If the listing was successful, then updates this node's instance
2414
    list. Otherwise, it marks the RPC call as failed for the instance
2415
    list key.
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421

2422
    """
2423
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2424
    test = not isinstance(idata, list)
2425
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2427
    if test:
2428
      nimg.hyp_fail = True
2429
    else:
2430
      nimg.instances = idata
2431

    
2432
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433
    """Verifies and computes a node information map
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    # try to read free memory (from the hypervisor)
2446
    hv_info = nresult.get(constants.NV_HVINFO, None)
2447
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449
    if not test:
2450
      try:
2451
        nimg.mfree = int(hv_info["memory_free"])
2452
      except (ValueError, TypeError):
2453
        _ErrorIf(True, self.ENODERPC, node,
2454
                 "node returned invalid nodeinfo, check hypervisor")
2455

    
2456
    # FIXME: devise a free space model for file based instances as well
2457
    if vg_name is not None:
2458
      test = (constants.NV_VGLIST not in nresult or
2459
              vg_name not in nresult[constants.NV_VGLIST])
2460
      _ErrorIf(test, self.ENODELVM, node,
2461
               "node didn't return data for the volume group '%s'"
2462
               " - it is either missing or broken", vg_name)
2463
      if not test:
2464
        try:
2465
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466
        except (ValueError, TypeError):
2467
          _ErrorIf(True, self.ENODERPC, node,
2468
                   "node returned invalid LVM info, check LVM status")
2469

    
2470
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471
    """Gets per-disk status information for all instances.
2472

2473
    @type nodelist: list of strings
2474
    @param nodelist: Node names
2475
    @type node_image: dict of (name, L{objects.Node})
2476
    @param node_image: Node objects
2477
    @type instanceinfo: dict of (name, L{objects.Instance})
2478
    @param instanceinfo: Instance objects
2479
    @rtype: {instance: {node: [(succes, payload)]}}
2480
    @return: a dictionary of per-instance dictionaries with nodes as
2481
        keys and disk information as values; the disk information is a
2482
        list of tuples (success, payload)
2483

2484
    """
2485
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486

    
2487
    node_disks = {}
2488
    node_disks_devonly = {}
2489
    diskless_instances = set()
2490
    diskless = constants.DT_DISKLESS
2491

    
2492
    for nname in nodelist:
2493
      node_instances = list(itertools.chain(node_image[nname].pinst,
2494
                                            node_image[nname].sinst))
2495
      diskless_instances.update(inst for inst in node_instances
2496
                                if instanceinfo[inst].disk_template == diskless)
2497
      disks = [(inst, disk)
2498
               for inst in node_instances
2499
               for disk in instanceinfo[inst].disks]
2500

    
2501
      if not disks:
2502
        # No need to collect data
2503
        continue
2504

    
2505
      node_disks[nname] = disks
2506

    
2507
      # Creating copies as SetDiskID below will modify the objects and that can
2508
      # lead to incorrect data returned from nodes
2509
      devonly = [dev.Copy() for (_, dev) in disks]
2510

    
2511
      for dev in devonly:
2512
        self.cfg.SetDiskID(dev, nname)
2513

    
2514
      node_disks_devonly[nname] = devonly
2515

    
2516
    assert len(node_disks) == len(node_disks_devonly)
2517

    
2518
    # Collect data from all nodes with disks
2519
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520
                                                          node_disks_devonly)
2521

    
2522
    assert len(result) == len(node_disks)
2523

    
2524
    instdisk = {}
2525

    
2526
    for (nname, nres) in result.items():
2527
      disks = node_disks[nname]
2528

    
2529
      if nres.offline:
2530
        # No data from this node
2531
        data = len(disks) * [(False, "node offline")]
2532
      else:
2533
        msg = nres.fail_msg
2534
        _ErrorIf(msg, self.ENODERPC, nname,
2535
                 "while getting disk information: %s", msg)
2536
        if msg:
2537
          # No data from this node
2538
          data = len(disks) * [(False, msg)]
2539
        else:
2540
          data = []
2541
          for idx, i in enumerate(nres.payload):
2542
            if isinstance(i, (tuple, list)) and len(i) == 2:
2543
              data.append(i)
2544
            else:
2545
              logging.warning("Invalid result from node %s, entry %d: %s",
2546
                              nname, idx, i)
2547
              data.append((False, "Invalid result from the remote node"))
2548

    
2549
      for ((inst, _), status) in zip(disks, data):
2550
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551

    
2552
    # Add empty entries for diskless instances.
2553
    for inst in diskless_instances:
2554
      assert inst not in instdisk
2555
      instdisk[inst] = {}
2556

    
2557
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559
                      compat.all(isinstance(s, (tuple, list)) and
2560
                                 len(s) == 2 for s in statuses)
2561
                      for inst, nnames in instdisk.items()
2562
                      for nname, statuses in nnames.items())
2563
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564

    
2565
    return instdisk
2566

    
2567
  @staticmethod
2568
  def _SshNodeSelector(group_uuid, all_nodes):
2569
    """Create endless iterators for all potential SSH check hosts.
2570

2571
    """
2572
    nodes = [node for node in all_nodes
2573
             if (node.group != group_uuid and
2574
                 not node.offline)]
2575
    keyfunc = operator.attrgetter("group")
2576

    
2577
    return map(itertools.cycle,
2578
               [sorted(map(operator.attrgetter("name"), names))
2579
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580
                                                  keyfunc)])
2581

    
2582
  @classmethod
2583
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584
    """Choose which nodes should talk to which other nodes.
2585

2586
    We will make nodes contact all nodes in their group, and one node from
2587
    every other group.
2588

2589
    @warning: This algorithm has a known issue if one node group is much
2590
      smaller than others (e.g. just one node). In such a case all other
2591
      nodes will talk to the single node.
2592

2593
    """
2594
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596

    
2597
    return (online_nodes,
2598
            dict((name, sorted([i.next() for i in sel]))
2599
                 for name in online_nodes))
2600

    
2601
  def BuildHooksEnv(self):
2602
    """Build hooks env.
2603

2604
    Cluster-Verify hooks just ran in the post phase and their failure makes
2605
    the output be logged in the verify output and the verification to fail.
2606

2607
    """
2608
    env = {
2609
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610
      }
2611

    
2612
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613
               for node in self.my_node_info.values())
2614

    
2615
    return env
2616

    
2617
  def BuildHooksNodes(self):
2618
    """Build hooks nodes.
2619

2620
    """
2621
    return ([], self.my_node_names)
2622

    
2623
  def Exec(self, feedback_fn):
2624
    """Verify integrity of the node group, performing various test on nodes.
2625

2626
    """
2627
    # This method has too many local variables. pylint: disable=R0914
2628
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629

    
2630
    if not self.my_node_names:
2631
      # empty node group
2632
      feedback_fn("* Empty node group, skipping verification")
2633
      return True
2634

    
2635
    self.bad = False
2636
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637
    verbose = self.op.verbose
2638
    self._feedback_fn = feedback_fn
2639

    
2640
    vg_name = self.cfg.GetVGName()
2641
    drbd_helper = self.cfg.GetDRBDHelper()
2642
    cluster = self.cfg.GetClusterInfo()
2643
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644
    hypervisors = cluster.enabled_hypervisors
2645
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646

    
2647
    i_non_redundant = [] # Non redundant instances
2648
    i_non_a_balanced = [] # Non auto-balanced instances
2649
    n_offline = 0 # Count of offline nodes
2650
    n_drained = 0 # Count of nodes being drained
2651
    node_vol_should = {}
2652

    
2653
    # FIXME: verify OS list
2654

    
2655
    # File verification
2656
    filemap = _ComputeAncillaryFiles(cluster, False)
2657

    
2658
    # do local checksums
2659
    master_node = self.master_node = self.cfg.GetMasterNode()
2660
    master_ip = self.cfg.GetMasterIP()
2661

    
2662
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663

    
2664
    node_verify_param = {
2665
      constants.NV_FILELIST:
2666
        utils.UniqueSequence(filename
2667
                             for files in filemap
2668
                             for filename in files),
2669
      constants.NV_NODELIST:
2670
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671
                                  self.all_node_info.values()),
2672
      constants.NV_HYPERVISOR: hypervisors,
2673
      constants.NV_HVPARAMS:
2674
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676
                                 for node in node_data_list
2677
                                 if not node.offline],
2678
      constants.NV_INSTANCELIST: hypervisors,
2679
      constants.NV_VERSION: None,
2680
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681
      constants.NV_NODESETUP: None,
2682
      constants.NV_TIME: None,
2683
      constants.NV_MASTERIP: (master_node, master_ip),
2684
      constants.NV_OSLIST: None,
2685
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686
      }
2687

    
2688
    if vg_name is not None:
2689
      node_verify_param[constants.NV_VGLIST] = None
2690
      node_verify_param[constants.NV_LVLIST] = vg_name
2691
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2692
      node_verify_param[constants.NV_DRBDLIST] = None
2693

    
2694
    if drbd_helper:
2695
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696

    
2697
    # bridge checks
2698
    # FIXME: this needs to be changed per node-group, not cluster-wide
2699
    bridges = set()
2700
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
      bridges.add(default_nicpp[constants.NIC_LINK])
2703
    for instance in self.my_inst_info.values():
2704
      for nic in instance.nics:
2705
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707
          bridges.add(full_nic[constants.NIC_LINK])
2708

    
2709
    if bridges:
2710
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711

    
2712
    # Build our expected cluster state
2713
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714
                                                 name=node.name,
2715
                                                 vm_capable=node.vm_capable))
2716
                      for node in node_data_list)
2717

    
2718
    # Gather OOB paths
2719
    oob_paths = []
2720
    for node in self.all_node_info.values():
2721
      path = _SupportsOob(self.cfg, node)
2722
      if path and path not in oob_paths:
2723
        oob_paths.append(path)
2724

    
2725
    if oob_paths:
2726
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727

    
2728
    for instance in self.my_inst_names:
2729
      inst_config = self.my_inst_info[instance]
2730

    
2731
      for nname in inst_config.all_nodes:
2732
        if nname not in node_image:
2733
          gnode = self.NodeImage(name=nname)
2734
          gnode.ghost = (nname not in self.all_node_info)
2735
          node_image[nname] = gnode
2736

    
2737
      inst_config.MapLVsByNode(node_vol_should)
2738

    
2739
      pnode = inst_config.primary_node
2740
      node_image[pnode].pinst.append(instance)
2741

    
2742
      for snode in inst_config.secondary_nodes:
2743
        nimg = node_image[snode]
2744
        nimg.sinst.append(instance)
2745
        if pnode not in nimg.sbp:
2746
          nimg.sbp[pnode] = []
2747
        nimg.sbp[pnode].append(instance)
2748

    
2749
    # At this point, we have the in-memory data structures complete,
2750
    # except for the runtime information, which we'll gather next
2751

    
2752
    # Due to the way our RPC system works, exact response times cannot be
2753
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754
    # time before and after executing the request, we can at least have a time
2755
    # window.
2756
    nvinfo_starttime = time.time()
2757
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758
                                           node_verify_param,
2759
                                           self.cfg.GetClusterName())
2760
    nvinfo_endtime = time.time()
2761

    
2762
    if self.extra_lv_nodes and vg_name is not None:
2763
      extra_lv_nvinfo = \
2764
          self.rpc.call_node_verify(self.extra_lv_nodes,
2765
                                    {constants.NV_LVLIST: vg_name},
2766
                                    self.cfg.GetClusterName())
2767
    else:
2768
      extra_lv_nvinfo = {}
2769

    
2770
    all_drbd_map = self.cfg.ComputeDRBDMap()
2771

    
2772
    feedback_fn("* Gathering disk information (%s nodes)" %
2773
                len(self.my_node_names))
2774
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775
                                     self.my_inst_info)
2776

    
2777
    feedback_fn("* Verifying configuration file consistency")
2778

    
2779
    # If not all nodes are being checked, we need to make sure the master node
2780
    # and a non-checked vm_capable node are in the list.
2781
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782
    if absent_nodes:
2783
      vf_nvinfo = all_nvinfo.copy()
2784
      vf_node_info = list(self.my_node_info.values())
2785
      additional_nodes = []
2786
      if master_node not in self.my_node_info:
2787
        additional_nodes.append(master_node)
2788
        vf_node_info.append(self.all_node_info[master_node])
2789
      # Add the first vm_capable node we find which is not included
2790
      for node in absent_nodes:
2791
        nodeinfo = self.all_node_info[node]
2792
        if nodeinfo.vm_capable and not nodeinfo.offline:
2793
          additional_nodes.append(node)
2794
          vf_node_info.append(self.all_node_info[node])
2795
          break
2796
      key = constants.NV_FILELIST
2797
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798
                                                 {key: node_verify_param[key]},
2799
                                                 self.cfg.GetClusterName()))
2800
    else:
2801
      vf_nvinfo = all_nvinfo
2802
      vf_node_info = self.my_node_info.values()
2803

    
2804
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805

    
2806
    feedback_fn("* Verifying node status")
2807

    
2808
    refos_img = None
2809

    
2810
    for node_i in node_data_list:
2811
      node = node_i.name
2812
      nimg = node_image[node]
2813

    
2814
      if node_i.offline:
2815
        if verbose:
2816
          feedback_fn("* Skipping offline node %s" % (node,))
2817
        n_offline += 1
2818
        continue
2819

    
2820
      if node == master_node:
2821
        ntype = "master"
2822
      elif node_i.master_candidate:
2823
        ntype = "master candidate"
2824
      elif node_i.drained:
2825
        ntype = "drained"
2826
        n_drained += 1
2827
      else:
2828
        ntype = "regular"
2829
      if verbose:
2830
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831

    
2832
      msg = all_nvinfo[node].fail_msg
2833
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834
      if msg:
2835
        nimg.rpc_fail = True
2836
        continue
2837

    
2838
      nresult = all_nvinfo[node].payload
2839

    
2840
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2841
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842
      self._VerifyNodeNetwork(node_i, nresult)
2843
      self._VerifyOob(node_i, nresult)
2844

    
2845
      if nimg.vm_capable:
2846
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2847
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848
                             all_drbd_map)
2849

    
2850
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851
        self._UpdateNodeInstances(node_i, nresult, nimg)
2852
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853
        self._UpdateNodeOS(node_i, nresult, nimg)
2854

    
2855
        if not nimg.os_fail:
2856
          if refos_img is None:
2857
            refos_img = nimg
2858
          self._VerifyNodeOS(node_i, nimg, refos_img)
2859
        self._VerifyNodeBridges(node_i, nresult, bridges)
2860

    
2861
        # Check whether all running instancies are primary for the node. (This
2862
        # can no longer be done from _VerifyInstance below, since some of the
2863
        # wrong instances could be from other node groups.)
2864
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865

    
2866
        for inst in non_primary_inst:
2867
          test = inst in self.all_inst_info
2868
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869
                   "instance should not run on node %s", node_i.name)
2870
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871
                   "node is running unknown instance %s", inst)
2872

    
2873
    for node, result in extra_lv_nvinfo.items():
2874
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875
                              node_image[node], vg_name)
2876

    
2877
    feedback_fn("* Verifying instance status")
2878
    for instance in self.my_inst_names:
2879
      if verbose:
2880
        feedback_fn("* Verifying instance %s" % instance)
2881
      inst_config = self.my_inst_info[instance]
2882
      self._VerifyInstance(instance, inst_config, node_image,
2883
                           instdisk[instance])
2884
      inst_nodes_offline = []
2885

    
2886
      pnode = inst_config.primary_node
2887
      pnode_img = node_image[pnode]
2888
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889
               self.ENODERPC, pnode, "instance %s, connection to"
2890
               " primary node failed", instance)
2891

    
2892
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893
               self.EINSTANCEBADNODE, instance,
2894
               "instance is marked as running and lives on offline node %s",
2895
               inst_config.primary_node)
2896

    
2897
      # If the instance is non-redundant we cannot survive losing its primary
2898
      # node, so we are not N+1 compliant. On the other hand we have no disk
2899
      # templates with more than one secondary so that situation is not well
2900
      # supported either.
2901
      # FIXME: does not support file-backed instances
2902
      if not inst_config.secondary_nodes:
2903
        i_non_redundant.append(instance)
2904

    
2905
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906
               instance, "instance has multiple secondary nodes: %s",
2907
               utils.CommaJoin(inst_config.secondary_nodes),
2908
               code=self.ETYPE_WARNING)
2909

    
2910
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911
        pnode = inst_config.primary_node
2912
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913
        instance_groups = {}
2914

    
2915
        for node in instance_nodes:
2916
          instance_groups.setdefault(self.all_node_info[node].group,
2917
                                     []).append(node)
2918

    
2919
        pretty_list = [
2920
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921
          # Sort so that we always list the primary node first.
2922
          for group, nodes in sorted(instance_groups.items(),
2923
                                     key=lambda (_, nodes): pnode in nodes,
2924
                                     reverse=True)]
2925

    
2926
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927
                      instance, "instance has primary and secondary nodes in"
2928
                      " different groups: %s", utils.CommaJoin(pretty_list),
2929
                      code=self.ETYPE_WARNING)
2930

    
2931
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932
        i_non_a_balanced.append(instance)
2933

    
2934
      for snode in inst_config.secondary_nodes:
2935
        s_img = node_image[snode]
2936
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937
                 "instance %s, connection to secondary node failed", instance)
2938

    
2939
        if s_img.offline:
2940
          inst_nodes_offline.append(snode)
2941

    
2942
      # warn that the instance lives on offline nodes
2943
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944
               "instance has offline secondary node(s) %s",
2945
               utils.CommaJoin(inst_nodes_offline))
2946
      # ... or ghost/non-vm_capable nodes
2947
      for node in inst_config.all_nodes:
2948
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949
                 "instance lives on ghost node %s", node)
2950
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951
                 instance, "instance lives on non-vm_capable node %s", node)
2952

    
2953
    feedback_fn("* Verifying orphan volumes")
2954
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2955

    
2956
    # We will get spurious "unknown volume" warnings if any node of this group
2957
    # is secondary for an instance whose primary is in another group. To avoid
2958
    # them, we find these instances and add their volumes to node_vol_should.
2959
    for inst in self.all_inst_info.values():
2960
      for secondary in inst.secondary_nodes:
2961
        if (secondary in self.my_node_info
2962
            and inst.name not in self.my_inst_info):
2963
          inst.MapLVsByNode(node_vol_should)
2964
          break
2965

    
2966
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967

    
2968
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969
      feedback_fn("* Verifying N+1 Memory redundancy")
2970
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971

    
2972
    feedback_fn("* Other Notes")
2973
    if i_non_redundant:
2974
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975
                  % len(i_non_redundant))
2976

    
2977
    if i_non_a_balanced:
2978
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979
                  % len(i_non_a_balanced))
2980

    
2981
    if n_offline:
2982
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983

    
2984
    if n_drained:
2985
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986

    
2987
    return not self.bad
2988

    
2989
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990
    """Analyze the post-hooks' result
2991

2992
    This method analyses the hook result, handles it, and sends some
2993
    nicely-formatted feedback back to the user.
2994

2995
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997
    @param hooks_results: the results of the multi-node hooks rpc call
2998
    @param feedback_fn: function used send feedback back to the caller
2999
    @param lu_result: previous Exec result
3000
    @return: the new Exec result, based on the previous result
3001
        and hook results
3002

3003
    """
3004
    # We only really run POST phase hooks, only for non-empty groups,
3005
    # and are only interested in their results
3006
    if not self.my_node_names:
3007
      # empty node group
3008
      pass
3009
    elif phase == constants.HOOKS_PHASE_POST:
3010
      # Used to change hooks' output to proper indentation
3011
      feedback_fn("* Hooks Results")
3012
      assert hooks_results, "invalid result from hooks"
3013

    
3014
      for node_name in hooks_results:
3015
        res = hooks_results[node_name]
3016
        msg = res.fail_msg
3017
        test = msg and not res.offline
3018
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019
                      "Communication failure in hooks execution: %s", msg)
3020
        if res.offline or msg:
3021
          # No need to investigate payload if node is offline or gave
3022
          # an error.
3023
          continue
3024
        for script, hkr, output in res.payload:
3025
          test = hkr == constants.HKR_FAIL
3026
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027
                        "Script %s failed, output:", script)
3028
          if test:
3029
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3030
            feedback_fn("%s" % output)
3031
            lu_result = False
3032

    
3033
    return lu_result
3034

    
3035

    
3036
class LUClusterVerifyDisks(NoHooksLU):
3037
  """Verifies the cluster disks status.
3038

3039
  """
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.share_locks = _ShareAll()
3044
    self.needed_locks = {
3045
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046
      }
3047

    
3048
  def Exec(self, feedback_fn):
3049
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050

    
3051
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053
                           for group in group_names])
3054

    
3055

    
3056
class LUGroupVerifyDisks(NoHooksLU):
3057
  """Verifies the status of all disks in a node group.
3058

3059
  """
3060
  REQ_BGL = False
3061

    
3062
  def ExpandNames(self):
3063
    # Raises errors.OpPrereqError on its own if group can't be found
3064
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065

    
3066
    self.share_locks = _ShareAll()
3067
    self.needed_locks = {
3068
      locking.LEVEL_INSTANCE: [],
3069
      locking.LEVEL_NODEGROUP: [],
3070
      locking.LEVEL_NODE: [],
3071
      }
3072

    
3073
  def DeclareLocks(self, level):
3074
    if level == locking.LEVEL_INSTANCE:
3075
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076

    
3077
      # Lock instances optimistically, needs verification once node and group
3078
      # locks have been acquired
3079
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3080
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3081

    
3082
    elif level == locking.LEVEL_NODEGROUP:
3083
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084

    
3085
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086
        set([self.group_uuid] +
3087
            # Lock all groups used by instances optimistically; this requires
3088
            # going via the node before it's locked, requiring verification
3089
            # later on
3090
            [group_uuid
3091
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093

    
3094
    elif level == locking.LEVEL_NODE:
3095
      # This will only lock the nodes in the group to be verified which contain
3096
      # actual instances
3097
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098
      self._LockInstancesNodes()
3099

    
3100
      # Lock all nodes in group to be verified
3101
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104

    
3105
  def CheckPrereq(self):
3106
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109

    
3110
    assert self.group_uuid in owned_groups
3111

    
3112
    # Check if locked instances are still correct
3113
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114

    
3115
    # Get instance information
3116
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117

    
3118
    # Check if node groups for locked instances are still correct
3119
    for (instance_name, inst) in self.instances.items():
3120
      assert owned_nodes.issuperset(inst.all_nodes), \
3121
        "Instance %s's nodes changed while we kept the lock" % instance_name
3122

    
3123
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124
                                             owned_groups)
3125

    
3126
      assert self.group_uuid in inst_groups, \
3127
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Verify integrity of cluster disks.
3131

3132
    @rtype: tuple of three items
3133
    @return: a tuple of (dict of node-to-node_error, list of instances
3134
        which need activate-disks, dict of instance: (node, volume) for
3135
        missing volumes
3136

3137
    """
3138
    res_nodes = {}
3139
    res_instances = set()
3140
    res_missing = {}
3141

    
3142
    nv_dict = _MapInstanceDisksToNodes([inst
3143
                                        for inst in self.instances.values()
3144
                                        if inst.admin_up])
3145

    
3146
    if nv_dict:
3147
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148
                             set(self.cfg.GetVmCapableNodeList()))
3149

    
3150
      node_lvs = self.rpc.call_lv_list(nodes, [])
3151

    
3152
      for (node, node_res) in node_lvs.items():
3153
        if node_res.offline:
3154
          continue
3155

    
3156
        msg = node_res.fail_msg
3157
        if msg:
3158
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159
          res_nodes[node] = msg
3160
          continue
3161

    
3162
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3163
          inst = nv_dict.pop((node, lv_name), None)
3164
          if not (lv_online or inst is None):
3165
            res_instances.add(inst)
3166

    
3167
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3168
      # better
3169
      for key, inst in nv_dict.iteritems():
3170
        res_missing.setdefault(inst, []).append(list(key))
3171

    
3172
    return (res_nodes, list(res_instances), res_missing)
3173

    
3174

    
3175
class LUClusterRepairDiskSizes(NoHooksLU):
3176
  """Verifies the cluster disks sizes.
3177

3178
  """
3179
  REQ_BGL = False
3180

    
3181
  def ExpandNames(self):
3182
    if self.op.instances:
3183
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184
      self.needed_locks = {
3185
        locking.LEVEL_NODE: [],
3186
        locking.LEVEL_INSTANCE: self.wanted_names,
3187
        }
3188
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189
    else:
3190
      self.wanted_names = None
3191
      self.needed_locks = {
3192
        locking.LEVEL_NODE: locking.ALL_SET,
3193
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3194
        }
3195
    self.share_locks = {
3196
      locking.LEVEL_NODE: 1,
3197
      locking.LEVEL_INSTANCE: 0,
3198
      }
3199

    
3200
  def DeclareLocks(self, level):
3201
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202
      self._LockInstancesNodes(primary_only=True)
3203

    
3204
  def CheckPrereq(self):
3205
    """Check prerequisites.
3206

3207
    This only checks the optional instance list against the existing names.
3208

3209
    """
3210
    if self.wanted_names is None:
3211
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3212

    
3213
    self.wanted_instances = \
3214
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3215

    
3216
  def _EnsureChildSizes(self, disk):
3217
    """Ensure children of the disk have the needed disk size.
3218

3219
    This is valid mainly for DRBD8 and fixes an issue where the
3220
    children have smaller disk size.
3221

3222
    @param disk: an L{ganeti.objects.Disk} object
3223

3224
    """
3225
    if disk.dev_type == constants.LD_DRBD8:
3226
      assert disk.children, "Empty children for DRBD8?"
3227
      fchild = disk.children[0]
3228
      mismatch = fchild.size < disk.size
3229
      if mismatch:
3230
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3231
                     fchild.size, disk.size)
3232
        fchild.size = disk.size
3233

    
3234
      # and we recurse on this child only, not on the metadev
3235
      return self._EnsureChildSizes(fchild) or mismatch
3236
    else:
3237
      return False
3238

    
3239
  def Exec(self, feedback_fn):
3240
    """Verify the size of cluster disks.
3241

3242
    """
3243
    # TODO: check child disks too
3244
    # TODO: check differences in size between primary/secondary nodes
3245
    per_node_disks = {}
3246
    for instance in self.wanted_instances:
3247
      pnode = instance.primary_node
3248
      if pnode not in per_node_disks:
3249
        per_node_disks[pnode] = []
3250
      for idx, disk in enumerate(instance.disks):
3251
        per_node_disks[pnode].append((instance, idx, disk))
3252

    
3253
    changed = []
3254
    for node, dskl in per_node_disks.items():
3255
      newl = [v[2].Copy() for v in dskl]
3256
      for dsk in newl:
3257
        self.cfg.SetDiskID(dsk, node)
3258
      result = self.rpc.call_blockdev_getsize(node, newl)
3259
      if result.fail_msg:
3260
        self.LogWarning("Failure in blockdev_getsize call to node"
3261
                        " %s, ignoring", node)
3262
        continue
3263
      if len(result.payload) != len(dskl):
3264
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265
                        " result.payload=%s", node, len(dskl), result.payload)
3266
        self.LogWarning("Invalid result from node %s, ignoring node results",
3267
                        node)
3268
        continue
3269
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3270
        if size is None:
3271
          self.LogWarning("Disk %d of instance %s did not return size"
3272
                          " information, ignoring", idx, instance.name)
3273
          continue
3274
        if not isinstance(size, (int, long)):
3275
          self.LogWarning("Disk %d of instance %s did not return valid"
3276
                          " size information, ignoring", idx, instance.name)
3277
          continue
3278
        size = size >> 20
3279
        if size != disk.size:
3280
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3281
                       " correcting: recorded %d, actual %d", idx,
3282
                       instance.name, disk.size, size)
3283
          disk.size = size
3284
          self.cfg.Update(instance, feedback_fn)
3285
          changed.append((instance.name, idx, size))
3286
        if self._EnsureChildSizes(disk):
3287
          self.cfg.Update(instance, feedback_fn)
3288
          changed.append((instance.name, idx, disk.size))
3289
    return changed
3290

    
3291

    
3292
class LUClusterRename(LogicalUnit):
3293
  """Rename the cluster.
3294

3295
  """
3296
  HPATH = "cluster-rename"
3297
  HTYPE = constants.HTYPE_CLUSTER
3298

    
3299
  def BuildHooksEnv(self):
3300
    """Build hooks env.
3301

3302
    """
3303
    return {
3304
      "OP_TARGET": self.cfg.GetClusterName(),
3305
      "NEW_NAME": self.op.name,
3306
      }
3307

    
3308
  def BuildHooksNodes(self):
3309
    """Build hooks nodes.
3310

3311
    """
3312
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3313

    
3314
  def CheckPrereq(self):
3315
    """Verify that the passed name is a valid one.
3316

3317
    """
3318
    hostname = netutils.GetHostname(name=self.op.name,
3319
                                    family=self.cfg.GetPrimaryIPFamily())
3320

    
3321
    new_name = hostname.name
3322
    self.ip = new_ip = hostname.ip
3323
    old_name = self.cfg.GetClusterName()
3324
    old_ip = self.cfg.GetMasterIP()
3325
    if new_name == old_name and new_ip == old_ip:
3326
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327
                                 " cluster has changed",
3328
                                 errors.ECODE_INVAL)
3329
    if new_ip != old_ip:
3330
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332
                                   " reachable on the network" %
3333
                                   new_ip, errors.ECODE_NOTUNIQUE)
3334

    
3335
    self.op.name = new_name
3336

    
3337
  def Exec(self, feedback_fn):
3338
    """Rename the cluster.
3339

3340
    """
3341
    clustername = self.op.name
3342
    ip = self.ip
3343

    
3344
    # shutdown the master IP
3345
    master = self.cfg.GetMasterNode()
3346
    result = self.rpc.call_node_stop_master(master, False)
3347
    result.Raise("Could not disable the master role")
3348

    
3349
    try:
3350
      cluster = self.cfg.GetClusterInfo()
3351
      cluster.cluster_name = clustername
3352
      cluster.master_ip = ip
3353
      self.cfg.Update(cluster, feedback_fn)
3354

    
3355
      # update the known hosts file
3356
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357
      node_list = self.cfg.GetOnlineNodeList()
3358
      try:
3359
        node_list.remove(master)
3360
      except ValueError:
3361
        pass
3362
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3363
    finally:
3364
      result = self.rpc.call_node_start_master(master, False, False)
3365
      msg = result.fail_msg
3366
      if msg:
3367
        self.LogWarning("Could not re-enable the master role on"
3368
                        " the master, please restart manually: %s", msg)
3369

    
3370
    return clustername
3371

    
3372

    
3373
class LUClusterSetParams(LogicalUnit):
3374
  """Change the parameters of the cluster.
3375

3376
  """
3377
  HPATH = "cluster-modify"
3378
  HTYPE = constants.HTYPE_CLUSTER
3379
  REQ_BGL = False
3380

    
3381
  def CheckArguments(self):
3382
    """Check parameters
3383

3384
    """
3385
    if self.op.uid_pool:
3386
      uidpool.CheckUidPool(self.op.uid_pool)
3387

    
3388
    if self.op.add_uids:
3389
      uidpool.CheckUidPool(self.op.add_uids)
3390

    
3391
    if self.op.remove_uids:
3392
      uidpool.CheckUidPool(self.op.remove_uids)
3393

    
3394
  def ExpandNames(self):
3395
    # FIXME: in the future maybe other cluster params won't require checking on
3396
    # all nodes to be modified.
3397
    self.needed_locks = {
3398
      locking.LEVEL_NODE: locking.ALL_SET,
3399
    }
3400
    self.share_locks[locking.LEVEL_NODE] = 1
3401

    
3402
  def BuildHooksEnv(self):
3403
    """Build hooks env.
3404

3405
    """
3406
    return {
3407
      "OP_TARGET": self.cfg.GetClusterName(),
3408
      "NEW_VG_NAME": self.op.vg_name,
3409
      }
3410

    
3411
  def BuildHooksNodes(self):
3412
    """Build hooks nodes.
3413

3414
    """
3415
    mn = self.cfg.GetMasterNode()
3416
    return ([mn], [mn])
3417

    
3418
  def CheckPrereq(self):
3419
    """Check prerequisites.
3420

3421
    This checks whether the given params don't conflict and
3422
    if the given volume group is valid.
3423

3424
    """
3425
    if self.op.vg_name is not None and not self.op.vg_name:
3426
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428
                                   " instances exist", errors.ECODE_INVAL)
3429

    
3430
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3433
                                   " drbd-based instances exist",
3434
                                   errors.ECODE_INVAL)
3435

    
3436
    node_list = self.owned_locks(locking.LEVEL_NODE)
3437

    
3438
    # if vg_name not None, checks given volume group on all nodes
3439
    if self.op.vg_name:
3440
      vglist = self.rpc.call_vg_list(node_list)
3441
      for node in node_list:
3442
        msg = vglist[node].fail_msg
3443
        if msg:
3444
          # ignoring down node
3445
          self.LogWarning("Error while gathering data on node %s"
3446
                          " (ignoring node): %s", node, msg)
3447
          continue
3448
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3449
                                              self.op.vg_name,
3450
                                              constants.MIN_VG_SIZE)
3451
        if vgstatus:
3452
          raise errors.OpPrereqError("Error on node '%s': %s" %
3453
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3454

    
3455
    if self.op.drbd_helper:
3456
      # checks given drbd helper on all nodes
3457
      helpers = self.rpc.call_drbd_helper(node_list)
3458
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3459
        if ninfo.offline:
3460
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3461
          continue
3462
        msg = helpers[node].fail_msg
3463
        if msg:
3464
          raise errors.OpPrereqError("Error checking drbd helper on node"
3465
                                     " '%s': %s" % (node, msg),
3466
                                     errors.ECODE_ENVIRON)
3467
        node_helper = helpers[node].payload
3468
        if node_helper != self.op.drbd_helper:
3469
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470
                                     (node, node_helper), errors.ECODE_ENVIRON)
3471

    
3472
    self.cluster = cluster = self.cfg.GetClusterInfo()
3473
    # validate params changes
3474
    if self.op.beparams:
3475
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3477

    
3478
    if self.op.ndparams:
3479
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3481

    
3482
      # TODO: we need a more general way to handle resetting
3483
      # cluster-level parameters to default values
3484
      if self.new_ndparams["oob_program"] == "":
3485
        self.new_ndparams["oob_program"] = \
3486
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3487

    
3488
    if self.op.nicparams:
3489
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3492
      nic_errors = []
3493

    
3494
      # check all instances for consistency
3495
      for instance in self.cfg.GetAllInstancesInfo().values():
3496
        for nic_idx, nic in enumerate(instance.nics):
3497
          params_copy = copy.deepcopy(nic.nicparams)
3498
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3499

    
3500
          # check parameter syntax
3501
          try:
3502
            objects.NIC.CheckParameterSyntax(params_filled)
3503
          except errors.ConfigurationError, err:
3504
            nic_errors.append("Instance %s, nic/%d: %s" %
3505
                              (instance.name, nic_idx, err))
3506

    
3507
          # if we're moving instances to routed, check that they have an ip
3508
          target_mode = params_filled[constants.NIC_MODE]
3509
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511
                              " address" % (instance.name, nic_idx))
3512
      if nic_errors:
3513
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514
                                   "\n".join(nic_errors))
3515

    
3516
    # hypervisor list/parameters
3517
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518
    if self.op.hvparams:
3519
      for hv_name, hv_dict in self.op.hvparams.items():
3520
        if hv_name not in self.new_hvparams:
3521
          self.new_hvparams[hv_name] = hv_dict
3522
        else:
3523
          self.new_hvparams[hv_name].update(hv_dict)
3524

    
3525
    # os hypervisor parameters
3526
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3527
    if self.op.os_hvp:
3528
      for os_name, hvs in self.op.os_hvp.items():
3529
        if os_name not in self.new_os_hvp:
3530
          self.new_os_hvp[os_name] = hvs
3531
        else:
3532
          for hv_name, hv_dict in hvs.items():
3533
            if hv_name not in self.new_os_hvp[os_name]:
3534
              self.new_os_hvp[os_name][hv_name] = hv_dict
3535
            else:
3536
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3537

    
3538
    # os parameters
3539
    self.new_osp = objects.FillDict(cluster.osparams, {})
3540
    if self.op.osparams:
3541
      for os_name, osp in self.op.osparams.items():
3542
        if os_name not in self.new_osp:
3543
          self.new_osp[os_name] = {}
3544

    
3545
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3546
                                                  use_none=True)
3547

    
3548
        if not self.new_osp[os_name]:
3549
          # we removed all parameters
3550
          del self.new_osp[os_name]
3551
        else:
3552
          # check the parameter validity (remote check)
3553
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554
                         os_name, self.new_osp[os_name])
3555

    
3556
    # changes to the hypervisor list
3557
    if self.op.enabled_hypervisors is not None:
3558
      self.hv_list = self.op.enabled_hypervisors
3559
      for hv in self.hv_list:
3560
        # if the hypervisor doesn't already exist in the cluster
3561
        # hvparams, we initialize it to empty, and then (in both
3562
        # cases) we make sure to fill the defaults, as we might not
3563
        # have a complete defaults list if the hypervisor wasn't
3564
        # enabled before
3565
        if hv not in new_hvp:
3566
          new_hvp[hv] = {}
3567
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3569
    else:
3570
      self.hv_list = cluster.enabled_hypervisors
3571

    
3572
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573
      # either the enabled list has changed, or the parameters have, validate
3574
      for hv_name, hv_params in self.new_hvparams.items():
3575
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576
            (self.op.enabled_hypervisors and
3577
             hv_name in self.op.enabled_hypervisors)):
3578
          # either this is a new hypervisor, or its parameters have changed
3579
          hv_class = hypervisor.GetHypervisor(hv_name)
3580
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581
          hv_class.CheckParameterSyntax(hv_params)
3582
          _CheckHVParams(self, node_list, hv_name, hv_params)
3583

    
3584
    if self.op.os_hvp:
3585
      # no need to check any newly-enabled hypervisors, since the
3586
      # defaults have already been checked in the above code-block
3587
      for os_name, os_hvp in self.new_os_hvp.items():
3588
        for hv_name, hv_params in os_hvp.items():
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          # we need to fill in the new os_hvp on top of the actual hv_p
3591
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3592
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3593
          hv_class = hypervisor.GetHypervisor(hv_name)
3594
          hv_class.CheckParameterSyntax(new_osp)
3595
          _CheckHVParams(self, node_list, hv_name, new_osp)
3596

    
3597
    if self.op.default_iallocator:
3598
      alloc_script = utils.FindFile(self.op.default_iallocator,
3599
                                    constants.IALLOCATOR_SEARCH_PATH,
3600
                                    os.path.isfile)
3601
      if alloc_script is None:
3602
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603
                                   " specified" % self.op.default_iallocator,
3604
                                   errors.ECODE_INVAL)
3605

    
3606
  def Exec(self, feedback_fn):
3607
    """Change the parameters of the cluster.
3608

3609
    """
3610
    if self.op.vg_name is not None:
3611
      new_volume = self.op.vg_name
3612
      if not new_volume:
3613
        new_volume = None
3614
      if new_volume != self.cfg.GetVGName():
3615
        self.cfg.SetVGName(new_volume)
3616
      else:
3617
        feedback_fn("Cluster LVM configuration already in desired"
3618
                    " state, not changing")
3619
    if self.op.drbd_helper is not None:
3620
      new_helper = self.op.drbd_helper
3621
      if not new_helper:
3622
        new_helper = None
3623
      if new_helper != self.cfg.GetDRBDHelper():
3624
        self.cfg.SetDRBDHelper(new_helper)
3625
      else:
3626
        feedback_fn("Cluster DRBD helper already in desired state,"
3627
                    " not changing")
3628
    if self.op.hvparams:
3629
      self.cluster.hvparams = self.new_hvparams
3630
    if self.op.os_hvp:
3631
      self.cluster.os_hvp = self.new_os_hvp
3632
    if self.op.enabled_hypervisors is not None:
3633
      self.cluster.hvparams = self.new_hvparams
3634
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635
    if self.op.beparams:
3636
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637
    if self.op.nicparams:
3638
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639
    if self.op.osparams:
3640
      self.cluster.osparams = self.new_osp
3641
    if self.op.ndparams:
3642
      self.cluster.ndparams = self.new_ndparams
3643

    
3644
    if self.op.candidate_pool_size is not None:
3645
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646
      # we need to update the pool size here, otherwise the save will fail
3647
      _AdjustCandidatePool(self, [])
3648

    
3649
    if self.op.maintain_node_health is not None:
3650
      self.cluster.maintain_node_health = self.op.maintain_node_health
3651

    
3652
    if self.op.prealloc_wipe_disks is not None:
3653
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3654

    
3655
    if self.op.add_uids is not None:
3656
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3657

    
3658
    if self.op.remove_uids is not None:
3659
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3660

    
3661
    if self.op.uid_pool is not None:
3662
      self.cluster.uid_pool = self.op.uid_pool
3663

    
3664
    if self.op.default_iallocator is not None:
3665
      self.cluster.default_iallocator = self.op.default_iallocator
3666

    
3667
    if self.op.reserved_lvs is not None:
3668
      self.cluster.reserved_lvs = self.op.reserved_lvs
3669

    
3670
    def helper_os(aname, mods, desc):
3671
      desc += " OS list"
3672
      lst = getattr(self.cluster, aname)
3673
      for key, val in mods:
3674
        if key == constants.DDM_ADD:
3675
          if val in lst:
3676
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3677
          else:
3678
            lst.append(val)
3679
        elif key == constants.DDM_REMOVE:
3680
          if val in lst:
3681
            lst.remove(val)
3682
          else:
3683
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3684
        else:
3685
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3686

    
3687
    if self.op.hidden_os:
3688
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3689

    
3690
    if self.op.blacklisted_os:
3691
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3692

    
3693
    if self.op.master_netdev:
3694
      master = self.cfg.GetMasterNode()
3695
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696
                  self.cluster.master_netdev)
3697
      result = self.rpc.call_node_stop_master(master, False)
3698
      result.Raise("Could not disable the master ip")
3699
      feedback_fn("Changing master_netdev from %s to %s" %
3700
                  (self.cluster.master_netdev, self.op.master_netdev))
3701
      self.cluster.master_netdev = self.op.master_netdev
3702

    
3703
    self.cfg.Update(self.cluster, feedback_fn)
3704

    
3705
    if self.op.master_netdev:
3706
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707
                  self.op.master_netdev)
3708
      result = self.rpc.call_node_start_master(master, False, False)
3709
      if result.fail_msg:
3710
        self.LogWarning("Could not re-enable the master ip on"
3711
                        " the master, please restart manually: %s",
3712
                        result.fail_msg)
3713

    
3714

    
3715
def _UploadHelper(lu, nodes, fname):
3716
  """Helper for uploading a file and showing warnings.
3717

3718
  """
3719
  if os.path.exists(fname):
3720
    result = lu.rpc.call_upload_file(nodes, fname)
3721
    for to_node, to_result in result.items():
3722
      msg = to_result.fail_msg
3723
      if msg:
3724
        msg = ("Copy of file %s to node %s failed: %s" %
3725
               (fname, to_node, msg))
3726
        lu.proc.LogWarning(msg)
3727

    
3728

    
3729
def _ComputeAncillaryFiles(cluster, redist):
3730
  """Compute files external to Ganeti which need to be consistent.
3731

3732
  @type redist: boolean
3733
  @param redist: Whether to include files which need to be redistributed
3734

3735
  """
3736
  # Compute files for all nodes
3737
  files_all = set([
3738
    constants.SSH_KNOWN_HOSTS_FILE,
3739
    constants.CONFD_HMAC_KEY,
3740
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3741
    ])
3742

    
3743
  if not redist:
3744
    files_all.update(constants.ALL_CERT_FILES)
3745
    files_all.update(ssconf.SimpleStore().GetFileList())
3746
  else:
3747
    # we need to ship at least the RAPI certificate
3748
    files_all.add(constants.RAPI_CERT_FILE)
3749

    
3750
  if cluster.modify_etc_hosts:
3751
    files_all.add(constants.ETC_HOSTS)
3752

    
3753
  # Files which must either exist on all nodes or on none
3754
  files_all_opt = set([
3755
    constants.RAPI_USERS_FILE,
3756
    ])
3757

    
3758
  # Files which should only be on master candidates
3759
  files_mc = set()
3760
  if not redist:
3761
    files_mc.add(constants.CLUSTER_CONF_FILE)
3762

    
3763
  # Files which should only be on VM-capable nodes
3764
  files_vm = set(filename
3765
    for hv_name in cluster.enabled_hypervisors
3766
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3767

    
3768
  # Filenames must be unique
3769
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3770
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3771
         "Found file listed in more than one file list"
3772

    
3773
  return (files_all, files_all_opt, files_mc, files_vm)
3774

    
3775

    
3776
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3777
  """Distribute additional files which are part of the cluster configuration.
3778

3779
  ConfigWriter takes care of distributing the config and ssconf files, but
3780
  there are more files which should be distributed to all nodes. This function
3781
  makes sure those are copied.
3782

3783
  @param lu: calling logical unit
3784
  @param additional_nodes: list of nodes not in the config to distribute to
3785
  @type additional_vm: boolean
3786
  @param additional_vm: whether the additional nodes are vm-capable or not
3787

3788
  """
3789
  # Gather target nodes
3790
  cluster = lu.cfg.GetClusterInfo()
3791
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3792

    
3793
  online_nodes = lu.cfg.GetOnlineNodeList()
3794
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3795

    
3796
  if additional_nodes is not None:
3797
    online_nodes.extend(additional_nodes)
3798
    if additional_vm:
3799
      vm_nodes.extend(additional_nodes)
3800

    
3801
  # Never distribute to master node
3802
  for nodelist in [online_nodes, vm_nodes]:
3803
    if master_info.name in nodelist:
3804
      nodelist.remove(master_info.name)
3805

    
3806
  # Gather file lists
3807
  (files_all, files_all_opt, files_mc, files_vm) = \
3808
    _ComputeAncillaryFiles(cluster, True)
3809

    
3810
  # Never re-distribute configuration file from here
3811
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3812
              constants.CLUSTER_CONF_FILE in files_vm)
3813
  assert not files_mc, "Master candidates not handled in this function"
3814

    
3815
  filemap = [
3816
    (online_nodes, files_all),
3817
    (online_nodes, files_all_opt),
3818
    (vm_nodes, files_vm),
3819
    ]
3820

    
3821
  # Upload the files
3822
  for (node_list, files) in filemap:
3823
    for fname in files:
3824
      _UploadHelper(lu, node_list, fname)
3825

    
3826

    
3827
class LUClusterRedistConf(NoHooksLU):
3828
  """Force the redistribution of cluster configuration.
3829

3830
  This is a very simple LU.
3831

3832
  """
3833
  REQ_BGL = False
3834

    
3835
  def ExpandNames(self):
3836
    self.needed_locks = {
3837
      locking.LEVEL_NODE: locking.ALL_SET,
3838
    }
3839
    self.share_locks[locking.LEVEL_NODE] = 1
3840

    
3841
  def Exec(self, feedback_fn):
3842
    """Redistribute the configuration.
3843

3844
    """
3845
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3846
    _RedistributeAncillaryFiles(self)
3847

    
3848

    
3849
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3850
  """Sleep and poll for an instance's disk to sync.
3851

3852
  """
3853
  if not instance.disks or disks is not None and not disks:
3854
    return True
3855

    
3856
  disks = _ExpandCheckDisks(instance, disks)
3857

    
3858
  if not oneshot:
3859
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3860

    
3861
  node = instance.primary_node
3862

    
3863
  for dev in disks:
3864
    lu.cfg.SetDiskID(dev, node)
3865

    
3866
  # TODO: Convert to utils.Retry
3867

    
3868
  retries = 0
3869
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3870
  while True:
3871
    max_time = 0
3872
    done = True
3873
    cumul_degraded = False
3874
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3875
    msg = rstats.fail_msg
3876
    if msg:
3877
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3878
      retries += 1
3879
      if retries >= 10:
3880
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3881
                                 " aborting." % node)
3882
      time.sleep(6)
3883
      continue
3884
    rstats = rstats.payload
3885
    retries = 0
3886
    for i, mstat in enumerate(rstats):
3887
      if mstat is None:
3888
        lu.LogWarning("Can't compute data for node %s/%s",
3889
                           node, disks[i].iv_name)
3890
        continue
3891

    
3892
      cumul_degraded = (cumul_degraded or
3893
                        (mstat.is_degraded and mstat.sync_percent is None))
3894
      if mstat.sync_percent is not None:
3895
        done = False
3896
        if mstat.estimated_time is not None:
3897
          rem_time = ("%s remaining (estimated)" %
3898
                      utils.FormatSeconds(mstat.estimated_time))
3899
          max_time = mstat.estimated_time
3900
        else:
3901
          rem_time = "no time estimate"
3902
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3903
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3904

    
3905
    # if we're done but degraded, let's do a few small retries, to
3906
    # make sure we see a stable and not transient situation; therefore
3907
    # we force restart of the loop
3908
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3909
      logging.info("Degraded disks found, %d retries left", degr_retries)
3910
      degr_retries -= 1
3911
      time.sleep(1)
3912
      continue
3913

    
3914
    if done or oneshot:
3915
      break
3916

    
3917
    time.sleep(min(60, max_time))
3918

    
3919
  if done:
3920
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3921
  return not cumul_degraded
3922

    
3923

    
3924
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3925
  """Check that mirrors are not degraded.
3926

3927
  The ldisk parameter, if True, will change the test from the
3928
  is_degraded attribute (which represents overall non-ok status for
3929
  the device(s)) to the ldisk (representing the local storage status).
3930

3931
  """
3932
  lu.cfg.SetDiskID(dev, node)
3933

    
3934
  result = True
3935

    
3936
  if on_primary or dev.AssembleOnSecondary():
3937
    rstats = lu.rpc.call_blockdev_find(node, dev)
3938
    msg = rstats.fail_msg
3939
    if msg:
3940
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3941
      result = False
3942
    elif not rstats.payload:
3943
      lu.LogWarning("Can't find disk on node %s", node)
3944
      result = False
3945
    else:
3946
      if ldisk:
3947
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3948
      else:
3949
        result = result and not rstats.payload.is_degraded
3950

    
3951
  if dev.children:
3952
    for child in dev.children:
3953
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3954

    
3955
  return result
3956

    
3957

    
3958
class LUOobCommand(NoHooksLU):
3959
  """Logical unit for OOB handling.
3960

3961
  """
3962
  REG_BGL = False
3963
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3964

    
3965
  def ExpandNames(self):
3966
    """Gather locks we need.
3967

3968
    """
3969
    if self.op.node_names:
3970
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3971
      lock_names = self.op.node_names
3972
    else:
3973
      lock_names = locking.ALL_SET
3974

    
3975
    self.needed_locks = {
3976
      locking.LEVEL_NODE: lock_names,
3977
      }
3978

    
3979
  def CheckPrereq(self):
3980
    """Check prerequisites.
3981

3982
    This checks:
3983
     - the node exists in the configuration
3984
     - OOB is supported
3985

3986
    Any errors are signaled by raising errors.OpPrereqError.
3987

3988
    """
3989
    self.nodes = []
3990
    self.master_node = self.cfg.GetMasterNode()
3991

    
3992
    assert self.op.power_delay >= 0.0
3993

    
3994
    if self.op.node_names:
3995
      if (self.op.command in self._SKIP_MASTER and
3996
          self.master_node in self.op.node_names):
3997
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3998
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3999

    
4000
        if master_oob_handler:
4001
          additional_text = ("run '%s %s %s' if you want to operate on the"
4002
                             " master regardless") % (master_oob_handler,
4003
                                                      self.op.command,
4004
                                                      self.master_node)
4005
        else:
4006
          additional_text = "it does not support out-of-band operations"
4007

    
4008
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4009
                                    " allowed for %s; %s") %
4010
                                   (self.master_node, self.op.command,
4011
                                    additional_text), errors.ECODE_INVAL)
4012
    else:
4013
      self.op.node_names = self.cfg.GetNodeList()
4014
      if self.op.command in self._SKIP_MASTER:
4015
        self.op.node_names.remove(self.master_node)
4016

    
4017
    if self.op.command in self._SKIP_MASTER:
4018
      assert self.master_node not in self.op.node_names
4019

    
4020
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4021
      if node is None:
4022
        raise errors.OpPrereqError("Node %s not found" % node_name,
4023
                                   errors.ECODE_NOENT)
4024
      else:
4025
        self.nodes.append(node)
4026

    
4027
      if (not self.op.ignore_status and
4028
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4029
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4030
                                    " not marked offline") % node_name,
4031
                                   errors.ECODE_STATE)
4032

    
4033
  def Exec(self, feedback_fn):
4034
    """Execute OOB and return result if we expect any.
4035

4036
    """
4037
    master_node = self.master_node
4038
    ret = []
4039

    
4040
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4041
                                              key=lambda node: node.name)):
4042
      node_entry = [(constants.RS_NORMAL, node.name)]
4043
      ret.append(node_entry)
4044

    
4045
      oob_program = _SupportsOob(self.cfg, node)
4046

    
4047
      if not oob_program:
4048
        node_entry.append((constants.RS_UNAVAIL, None))
4049
        continue
4050

    
4051
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4052
                   self.op.command, oob_program, node.name)
4053
      result = self.rpc.call_run_oob(master_node, oob_program,
4054
                                     self.op.command, node.name,
4055
                                     self.op.timeout)
4056

    
4057
      if result.fail_msg:
4058
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4059
                        node.name, result.fail_msg)
4060
        node_entry.append((constants.RS_NODATA, None))
4061
      else:
4062
        try:
4063
          self._CheckPayload(result)
4064
        except errors.OpExecError, err:
4065
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4066
                          node.name, err)
4067
          node_entry.append((constants.RS_NODATA, None))
4068
        else:
4069
          if self.op.command == constants.OOB_HEALTH:
4070
            # For health we should log important events
4071
            for item, status in result.payload:
4072
              if status in [constants.OOB_STATUS_WARNING,
4073
                            constants.OOB_STATUS_CRITICAL]:
4074
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4075
                                item, node.name, status)
4076

    
4077
          if self.op.command == constants.OOB_POWER_ON:
4078
            node.powered = True
4079
          elif self.op.command == constants.OOB_POWER_OFF:
4080
            node.powered = False
4081
          elif self.op.command == constants.OOB_POWER_STATUS:
4082
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4083
            if powered != node.powered:
4084
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4085
                               " match actual power state (%s)"), node.powered,
4086
                              node.name, powered)
4087

    
4088
          # For configuration changing commands we should update the node
4089
          if self.op.command in (constants.OOB_POWER_ON,
4090
                                 constants.OOB_POWER_OFF):
4091
            self.cfg.Update(node, feedback_fn)
4092

    
4093
          node_entry.append((constants.RS_NORMAL, result.payload))
4094

    
4095
          if (self.op.command == constants.OOB_POWER_ON and
4096
              idx < len(self.nodes) - 1):
4097
            time.sleep(self.op.power_delay)
4098

    
4099
    return ret
4100

    
4101
  def _CheckPayload(self, result):
4102
    """Checks if the payload is valid.
4103

4104
    @param result: RPC result
4105
    @raises errors.OpExecError: If payload is not valid
4106

4107
    """
4108
    errs = []
4109
    if self.op.command == constants.OOB_HEALTH:
4110
      if not isinstance(result.payload, list):
4111
        errs.append("command 'health' is expected to return a list but got %s" %
4112
                    type(result.payload))
4113
      else:
4114
        for item, status in result.payload:
4115
          if status not in constants.OOB_STATUSES:
4116
            errs.append("health item '%s' has invalid status '%s'" %
4117
                        (item, status))
4118

    
4119
    if self.op.command == constants.OOB_POWER_STATUS:
4120
      if not isinstance(result.payload, dict):
4121
        errs.append("power-status is expected to return a dict but got %s" %
4122
                    type(result.payload))
4123

    
4124
    if self.op.command in [
4125
        constants.OOB_POWER_ON,
4126
        constants.OOB_POWER_OFF,
4127
        constants.OOB_POWER_CYCLE,
4128
        ]:
4129
      if result.payload is not None:
4130
        errs.append("%s is expected to not return payload but got '%s'" %
4131
                    (self.op.command, result.payload))
4132

    
4133
    if errs:
4134
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4135
                               utils.CommaJoin(errs))
4136

    
4137

    
4138
class _OsQuery(_QueryBase):
4139
  FIELDS = query.OS_FIELDS
4140

    
4141
  def ExpandNames(self, lu):
4142
    # Lock all nodes in shared mode
4143
    # Temporary removal of locks, should be reverted later
4144
    # TODO: reintroduce locks when they are lighter-weight
4145
    lu.needed_locks = {}
4146
    #self.share_locks[locking.LEVEL_NODE] = 1
4147
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4148

    
4149
    # The following variables interact with _QueryBase._GetNames
4150
    if self.names:
4151
      self.wanted = self.names
4152
    else:
4153
      self.wanted = locking.ALL_SET
4154

    
4155
    self.do_locking = self.use_locking
4156

    
4157
  def DeclareLocks(self, lu, level):
4158
    pass
4159

    
4160
  @staticmethod
4161
  def _DiagnoseByOS(rlist):
4162
    """Remaps a per-node return list into an a per-os per-node dictionary
4163

4164
    @param rlist: a map with node names as keys and OS objects as values
4165

4166
    @rtype: dict
4167
    @return: a dictionary with osnames as keys and as value another
4168
        map, with nodes as keys and tuples of (path, status, diagnose,
4169
        variants, parameters, api_versions) as values, eg::
4170

4171
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4172
                                     (/srv/..., False, "invalid api")],
4173
                           "node2": [(/srv/..., True, "", [], [])]}
4174
          }
4175

4176
    """
4177
    all_os = {}
4178
    # we build here the list of nodes that didn't fail the RPC (at RPC
4179
    # level), so that nodes with a non-responding node daemon don't
4180
    # make all OSes invalid
4181
    good_nodes = [node_name for node_name in rlist
4182
                  if not rlist[node_name].fail_msg]
4183
    for node_name, nr in rlist.items():
4184
      if nr.fail_msg or not nr.payload:
4185
        continue
4186
      for (name, path, status, diagnose, variants,
4187
           params, api_versions) in nr.payload:
4188
        if name not in all_os:
4189
          # build a list of nodes for this os containing empty lists
4190
          # for each node in node_list
4191
          all_os[name] = {}
4192
          for nname in good_nodes:
4193
            all_os[name][nname] = []
4194
        # convert params from [name, help] to (name, help)
4195
        params = [tuple(v) for v in params]
4196
        all_os[name][node_name].append((path, status, diagnose,
4197
                                        variants, params, api_versions))
4198
    return all_os
4199

    
4200
  def _GetQueryData(self, lu):
4201
    """Computes the list of nodes and their attributes.
4202

4203
    """
4204
    # Locking is not used
4205
    assert not (compat.any(lu.glm.is_owned(level)
4206
                           for level in locking.LEVELS
4207
                           if level != locking.LEVEL_CLUSTER) or
4208
                self.do_locking or self.use_locking)
4209

    
4210
    valid_nodes = [node.name
4211
                   for node in lu.cfg.GetAllNodesInfo().values()
4212
                   if not node.offline and node.vm_capable]
4213
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4214
    cluster = lu.cfg.GetClusterInfo()
4215

    
4216
    data = {}
4217

    
4218
    for (os_name, os_data) in pol.items():
4219
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4220
                          hidden=(os_name in cluster.hidden_os),
4221
                          blacklisted=(os_name in cluster.blacklisted_os))
4222

    
4223
      variants = set()
4224
      parameters = set()
4225
      api_versions = set()
4226

    
4227
      for idx, osl in enumerate(os_data.values()):
4228
        info.valid = bool(info.valid and osl and osl[0][1])
4229
        if not info.valid:
4230
          break
4231

    
4232
        (node_variants, node_params, node_api) = osl[0][3:6]
4233
        if idx == 0:
4234
          # First entry
4235
          variants.update(node_variants)
4236
          parameters.update(node_params)
4237
          api_versions.update(node_api)
4238
        else:
4239
          # Filter out inconsistent values
4240
          variants.intersection_update(node_variants)
4241
          parameters.intersection_update(node_params)
4242
          api_versions.intersection_update(node_api)
4243

    
4244
      info.variants = list(variants)
4245
      info.parameters = list(parameters)
4246
      info.api_versions = list(api_versions)
4247

    
4248
      data[os_name] = info
4249

    
4250
    # Prepare data in requested order
4251
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4252
            if name in data]
4253

    
4254

    
4255
class LUOsDiagnose(NoHooksLU):
4256
  """Logical unit for OS diagnose/query.
4257

4258
  """
4259
  REQ_BGL = False
4260

    
4261
  @staticmethod
4262
  def _BuildFilter(fields, names):
4263
    """Builds a filter for querying OSes.
4264

4265
    """
4266
    name_filter = qlang.MakeSimpleFilter("name", names)
4267

    
4268
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4269
    # respective field is not requested
4270
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4271
                     for fname in ["hidden", "blacklisted"]
4272
                     if fname not in fields]
4273
    if "valid" not in fields:
4274
      status_filter.append([qlang.OP_TRUE, "valid"])
4275

    
4276
    if status_filter:
4277
      status_filter.insert(0, qlang.OP_AND)
4278
    else:
4279
      status_filter = None
4280

    
4281
    if name_filter and status_filter:
4282
      return [qlang.OP_AND, name_filter, status_filter]
4283
    elif name_filter:
4284
      return name_filter
4285
    else:
4286
      return status_filter
4287

    
4288
  def CheckArguments(self):
4289
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4290
                       self.op.output_fields, False)
4291

    
4292
  def ExpandNames(self):
4293
    self.oq.ExpandNames(self)
4294

    
4295
  def Exec(self, feedback_fn):
4296
    return self.oq.OldStyleQuery(self)
4297

    
4298

    
4299
class LUNodeRemove(LogicalUnit):
4300
  """Logical unit for removing a node.
4301

4302
  """
4303
  HPATH = "node-remove"
4304
  HTYPE = constants.HTYPE_NODE
4305

    
4306
  def BuildHooksEnv(self):
4307
    """Build hooks env.
4308

4309
    This doesn't run on the target node in the pre phase as a failed
4310
    node would then be impossible to remove.
4311

4312
    """
4313
    return {
4314
      "OP_TARGET": self.op.node_name,
4315
      "NODE_NAME": self.op.node_name,
4316
      }
4317

    
4318
  def BuildHooksNodes(self):
4319
    """Build hooks nodes.
4320

4321
    """
4322
    all_nodes = self.cfg.GetNodeList()
4323
    try:
4324
      all_nodes.remove(self.op.node_name)
4325
    except ValueError:
4326
      logging.warning("Node '%s', which is about to be removed, was not found"
4327
                      " in the list of all nodes", self.op.node_name)
4328
    return (all_nodes, all_nodes)
4329

    
4330
  def CheckPrereq(self):
4331
    """Check prerequisites.
4332

4333
    This checks:
4334
     - the node exists in the configuration
4335
     - it does not have primary or secondary instances
4336
     - it's not the master
4337

4338
    Any errors are signaled by raising errors.OpPrereqError.
4339

4340
    """
4341
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4342
    node = self.cfg.GetNodeInfo(self.op.node_name)
4343
    assert node is not None
4344

    
4345
    masternode = self.cfg.GetMasterNode()
4346
    if node.name == masternode:
4347
      raise errors.OpPrereqError("Node is the master node, failover to another"
4348
                                 " node is required", errors.ECODE_INVAL)
4349

    
4350
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4351
      if node.name in instance.all_nodes:
4352
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4353
                                   " please remove first" % instance_name,
4354
                                   errors.ECODE_INVAL)
4355
    self.op.node_name = node.name
4356
    self.node = node
4357

    
4358
  def Exec(self, feedback_fn):
4359
    """Removes the node from the cluster.
4360

4361
    """
4362
    node = self.node
4363
    logging.info("Stopping the node daemon and removing configs from node %s",
4364
                 node.name)
4365

    
4366
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4367

    
4368
    # Promote nodes to master candidate as needed
4369
    _AdjustCandidatePool(self, exceptions=[node.name])
4370
    self.context.RemoveNode(node.name)
4371

    
4372
    # Run post hooks on the node before it's removed
4373
    _RunPostHook(self, node.name)
4374

    
4375
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4376
    msg = result.fail_msg
4377
    if msg:
4378
      self.LogWarning("Errors encountered on the remote node while leaving"
4379
                      " the cluster: %s", msg)
4380

    
4381
    # Remove node from our /etc/hosts
4382
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4383
      master_node = self.cfg.GetMasterNode()
4384
      result = self.rpc.call_etc_hosts_modify(master_node,
4385
                                              constants.ETC_HOSTS_REMOVE,
4386
                                              node.name, None)
4387
      result.Raise("Can't update hosts file with new host data")
4388
      _RedistributeAncillaryFiles(self)
4389

    
4390

    
4391
class _NodeQuery(_QueryBase):
4392
  FIELDS = query.NODE_FIELDS
4393

    
4394
  def ExpandNames(self, lu):
4395
    lu.needed_locks = {}
4396
    lu.share_locks = _ShareAll()
4397

    
4398
    if self.names:
4399
      self.wanted = _GetWantedNodes(lu, self.names)
4400
    else:
4401
      self.wanted = locking.ALL_SET
4402

    
4403
    self.do_locking = (self.use_locking and
4404
                       query.NQ_LIVE in self.requested_data)
4405

    
4406
    if self.do_locking:
4407
      # If any non-static field is requested we need to lock the nodes
4408
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4409

    
4410
  def DeclareLocks(self, lu, level):
4411
    pass
4412

    
4413
  def _GetQueryData(self, lu):
4414
    """Computes the list of nodes and their attributes.
4415

4416
    """
4417
    all_info = lu.cfg.GetAllNodesInfo()
4418

    
4419
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4420

    
4421
    # Gather data as requested
4422
    if query.NQ_LIVE in self.requested_data:
4423
      # filter out non-vm_capable nodes
4424
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4425

    
4426
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4427
                                        lu.cfg.GetHypervisorType())
4428
      live_data = dict((name, nresult.payload)
4429
                       for (name, nresult) in node_data.items()
4430
                       if not nresult.fail_msg and nresult.payload)
4431
    else:
4432
      live_data = None
4433

    
4434
    if query.NQ_INST in self.requested_data:
4435
      node_to_primary = dict([(name, set()) for name in nodenames])
4436
      node_to_secondary = dict([(name, set()) for name in nodenames])
4437

    
4438
      inst_data = lu.cfg.GetAllInstancesInfo()
4439

    
4440
      for inst in inst_data.values():
4441
        if inst.primary_node in node_to_primary:
4442
          node_to_primary[inst.primary_node].add(inst.name)
4443
        for secnode in inst.secondary_nodes:
4444
          if secnode in node_to_secondary:
4445
            node_to_secondary[secnode].add(inst.name)
4446
    else:
4447
      node_to_primary = None
4448
      node_to_secondary = None
4449

    
4450
    if query.NQ_OOB in self.requested_data:
4451
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4452
                         for name, node in all_info.iteritems())
4453
    else:
4454
      oob_support = None
4455

    
4456
    if query.NQ_GROUP in self.requested_data:
4457
      groups = lu.cfg.GetAllNodeGroupsInfo()
4458
    else:
4459
      groups = {}
4460

    
4461
    return query.NodeQueryData([all_info[name] for name in nodenames],
4462
                               live_data, lu.cfg.GetMasterNode(),
4463
                               node_to_primary, node_to_secondary, groups,
4464
                               oob_support, lu.cfg.GetClusterInfo())
4465

    
4466

    
4467
class LUNodeQuery(NoHooksLU):
4468
  """Logical unit for querying nodes.
4469

4470
  """
4471
  # pylint: disable=W0142
4472
  REQ_BGL = False
4473

    
4474
  def CheckArguments(self):
4475
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4476
                         self.op.output_fields, self.op.use_locking)
4477

    
4478
  def ExpandNames(self):
4479
    self.nq.ExpandNames(self)
4480

    
4481
  def Exec(self, feedback_fn):
4482
    return self.nq.OldStyleQuery(self)
4483

    
4484

    
4485
class LUNodeQueryvols(NoHooksLU):
4486
  """Logical unit for getting volumes on node(s).
4487

4488
  """
4489
  REQ_BGL = False
4490
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4491
  _FIELDS_STATIC = utils.FieldSet("node")
4492

    
4493
  def CheckArguments(self):
4494
    _CheckOutputFields(static=self._FIELDS_STATIC,
4495
                       dynamic=self._FIELDS_DYNAMIC,
4496
                       selected=self.op.output_fields)
4497

    
4498
  def ExpandNames(self):
4499
    self.needed_locks = {}
4500
    self.share_locks[locking.LEVEL_NODE] = 1
4501
    if not self.op.nodes:
4502
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4503
    else:
4504
      self.needed_locks[locking.LEVEL_NODE] = \
4505
        _GetWantedNodes(self, self.op.nodes)
4506

    
4507
  def Exec(self, feedback_fn):
4508
    """Computes the list of nodes and their attributes.
4509

4510
    """
4511
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4512
    volumes = self.rpc.call_node_volumes(nodenames)
4513

    
4514
    ilist = self.cfg.GetAllInstancesInfo()
4515
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4516

    
4517
    output = []
4518
    for node in nodenames:
4519
      nresult = volumes[node]
4520
      if nresult.offline:
4521
        continue
4522
      msg = nresult.fail_msg
4523
      if msg:
4524
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4525
        continue
4526

    
4527
      node_vols = sorted(nresult.payload,
4528
                         key=operator.itemgetter("dev"))
4529

    
4530
      for vol in node_vols:
4531
        node_output = []
4532
        for field in self.op.output_fields:
4533
          if field == "node":
4534
            val = node
4535
          elif field == "phys":
4536
            val = vol["dev"]
4537
          elif field == "vg":
4538
            val = vol["vg"]
4539
          elif field == "name":
4540
            val = vol["name"]
4541
          elif field == "size":
4542
            val = int(float(vol["size"]))
4543
          elif field == "instance":
4544
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4545
          else:
4546
            raise errors.ParameterError(field)
4547
          node_output.append(str(val))
4548

    
4549
        output.append(node_output)
4550

    
4551
    return output
4552

    
4553

    
4554
class LUNodeQueryStorage(NoHooksLU):
4555
  """Logical unit for getting information on storage units on node(s).
4556

4557
  """
4558
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4559
  REQ_BGL = False
4560

    
4561
  def CheckArguments(self):
4562
    _CheckOutputFields(static=self._FIELDS_STATIC,
4563
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4564
                       selected=self.op.output_fields)
4565

    
4566
  def ExpandNames(self):
4567
    self.needed_locks = {}
4568
    self.share_locks[locking.LEVEL_NODE] = 1
4569

    
4570
    if self.op.nodes:
4571
      self.needed_locks[locking.LEVEL_NODE] = \
4572
        _GetWantedNodes(self, self.op.nodes)
4573
    else:
4574
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4575

    
4576
  def Exec(self, feedback_fn):
4577
    """Computes the list of nodes and their attributes.
4578

4579
    """
4580
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4581

    
4582
    # Always get name to sort by
4583
    if constants.SF_NAME in self.op.output_fields:
4584
      fields = self.op.output_fields[:]
4585
    else:
4586
      fields = [constants.SF_NAME] + self.op.output_fields
4587

    
4588
    # Never ask for node or type as it's only known to the LU
4589
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4590
      while extra in fields:
4591
        fields.remove(extra)
4592

    
4593
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4594
    name_idx = field_idx[constants.SF_NAME]
4595

    
4596
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4597
    data = self.rpc.call_storage_list(self.nodes,
4598
                                      self.op.storage_type, st_args,
4599
                                      self.op.name, fields)
4600

    
4601
    result = []
4602

    
4603
    for node in utils.NiceSort(self.nodes):
4604
      nresult = data[node]
4605
      if nresult.offline:
4606
        continue
4607

    
4608
      msg = nresult.fail_msg
4609
      if msg:
4610
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4611
        continue
4612

    
4613
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4614

    
4615
      for name in utils.NiceSort(rows.keys()):
4616
        row = rows[name]
4617

    
4618
        out = []
4619

    
4620
        for field in self.op.output_fields:
4621
          if field == constants.SF_NODE:
4622
            val = node
4623
          elif field == constants.SF_TYPE:
4624
            val = self.op.storage_type
4625
          elif field in field_idx:
4626
            val = row[field_idx[field]]
4627
          else:
4628
            raise errors.ParameterError(field)
4629

    
4630
          out.append(val)
4631

    
4632
        result.append(out)
4633

    
4634
    return result
4635

    
4636

    
4637
class _InstanceQuery(_QueryBase):
4638
  FIELDS = query.INSTANCE_FIELDS
4639

    
4640
  def ExpandNames(self, lu):
4641
    lu.needed_locks = {}
4642
    lu.share_locks = _ShareAll()
4643

    
4644
    if self.names:
4645
      self.wanted = _GetWantedInstances(lu, self.names)
4646
    else:
4647
      self.wanted = locking.ALL_SET
4648

    
4649
    self.do_locking = (self.use_locking and
4650
                       query.IQ_LIVE in self.requested_data)
4651
    if self.do_locking:
4652
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4653
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4654
      lu.needed_locks[locking.LEVEL_NODE] = []
4655
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4656

    
4657
    self.do_grouplocks = (self.do_locking and
4658
                          query.IQ_NODES in self.requested_data)
4659

    
4660
  def DeclareLocks(self, lu, level):
4661
    if self.do_locking:
4662
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4663
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4664

    
4665
        # Lock all groups used by instances optimistically; this requires going
4666
        # via the node before it's locked, requiring verification later on
4667
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4668
          set(group_uuid
4669
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4670
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4671
      elif level == locking.LEVEL_NODE:
4672
        lu._LockInstancesNodes() # pylint: disable=W0212
4673

    
4674
  @staticmethod
4675
  def _CheckGroupLocks(lu):
4676
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4677
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4678

    
4679
    # Check if node groups for locked instances are still correct
4680
    for instance_name in owned_instances:
4681
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4682

    
4683
  def _GetQueryData(self, lu):
4684
    """Computes the list of instances and their attributes.
4685

4686
    """
4687
    if self.do_grouplocks:
4688
      self._CheckGroupLocks(lu)
4689

    
4690
    cluster = lu.cfg.GetClusterInfo()
4691
    all_info = lu.cfg.GetAllInstancesInfo()
4692

    
4693
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4694

    
4695
    instance_list = [all_info[name] for name in instance_names]
4696
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4697
                                        for inst in instance_list)))
4698
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4699
    bad_nodes = []
4700
    offline_nodes = []
4701
    wrongnode_inst = set()
4702

    
4703
    # Gather data as requested
4704
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4705
      live_data = {}
4706
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4707
      for name in nodes:
4708
        result = node_data[name]
4709
        if result.offline:
4710
          # offline nodes will be in both lists
4711
          assert result.fail_msg
4712
          offline_nodes.append(name)
4713
        if result.fail_msg:
4714
          bad_nodes.append(name)
4715
        elif result.payload:
4716
          for inst in result.payload:
4717
            if inst in all_info:
4718
              if all_info[inst].primary_node == name:
4719
                live_data.update(result.payload)
4720
              else:
4721
                wrongnode_inst.add(inst)
4722
            else:
4723
              # orphan instance; we don't list it here as we don't
4724
              # handle this case yet in the output of instance listing
4725
              logging.warning("Orphan instance '%s' found on node %s",
4726
                              inst, name)
4727
        # else no instance is alive
4728
    else:
4729
      live_data = {}
4730

    
4731
    if query.IQ_DISKUSAGE in self.requested_data:
4732
      disk_usage = dict((inst.name,
4733
                         _ComputeDiskSize(inst.disk_template,
4734
                                          [{constants.IDISK_SIZE: disk.size}
4735
                                           for disk in inst.disks]))
4736
                        for inst in instance_list)
4737
    else:
4738
      disk_usage = None
4739

    
4740
    if query.IQ_CONSOLE in self.requested_data:
4741
      consinfo = {}
4742
      for inst in instance_list:
4743
        if inst.name in live_data:
4744
          # Instance is running
4745
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4746
        else:
4747
          consinfo[inst.name] = None
4748
      assert set(consinfo.keys()) == set(instance_names)
4749
    else:
4750
      consinfo = None
4751

    
4752
    if query.IQ_NODES in self.requested_data:
4753
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4754
                                            instance_list)))
4755
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4756
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4757
                    for uuid in set(map(operator.attrgetter("group"),
4758
                                        nodes.values())))
4759
    else:
4760
      nodes = None
4761
      groups = None
4762

    
4763
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4764
                                   disk_usage, offline_nodes, bad_nodes,
4765
                                   live_data, wrongnode_inst, consinfo,
4766
                                   nodes, groups)
4767

    
4768

    
4769
class LUQuery(NoHooksLU):
4770
  """Query for resources/items of a certain kind.
4771

4772
  """
4773
  # pylint: disable=W0142
4774
  REQ_BGL = False
4775

    
4776
  def CheckArguments(self):
4777
    qcls = _GetQueryImplementation(self.op.what)
4778

    
4779
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4780

    
4781
  def ExpandNames(self):
4782
    self.impl.ExpandNames(self)
4783

    
4784
  def DeclareLocks(self, level):
4785
    self.impl.DeclareLocks(self, level)
4786

    
4787
  def Exec(self, feedback_fn):
4788
    return self.impl.NewStyleQuery(self)
4789

    
4790

    
4791
class LUQueryFields(NoHooksLU):
4792
  """Query for resources/items of a certain kind.
4793

4794
  """
4795
  # pylint: disable=W0142
4796
  REQ_BGL = False
4797

    
4798
  def CheckArguments(self):
4799
    self.qcls = _GetQueryImplementation(self.op.what)
4800

    
4801
  def ExpandNames(self):
4802
    self.needed_locks = {}
4803

    
4804
  def Exec(self, feedback_fn):
4805
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4806

    
4807

    
4808
class LUNodeModifyStorage(NoHooksLU):
4809
  """Logical unit for modifying a storage volume on a node.
4810

4811
  """
4812
  REQ_BGL = False
4813

    
4814
  def CheckArguments(self):
4815
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816

    
4817
    storage_type = self.op.storage_type
4818

    
4819
    try:
4820
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4821
    except KeyError:
4822
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4823
                                 " modified" % storage_type,
4824
                                 errors.ECODE_INVAL)
4825

    
4826
    diff = set(self.op.changes.keys()) - modifiable
4827
    if diff:
4828
      raise errors.OpPrereqError("The following fields can not be modified for"
4829
                                 " storage units of type '%s': %r" %
4830
                                 (storage_type, list(diff)),
4831
                                 errors.ECODE_INVAL)
4832

    
4833
  def ExpandNames(self):
4834
    self.needed_locks = {
4835
      locking.LEVEL_NODE: self.op.node_name,
4836
      }
4837

    
4838
  def Exec(self, feedback_fn):
4839
    """Computes the list of nodes and their attributes.
4840

4841
    """
4842
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4843
    result = self.rpc.call_storage_modify(self.op.node_name,
4844
                                          self.op.storage_type, st_args,
4845
                                          self.op.name, self.op.changes)
4846
    result.Raise("Failed to modify storage unit '%s' on %s" %
4847
                 (self.op.name, self.op.node_name))
4848

    
4849

    
4850
class LUNodeAdd(LogicalUnit):
4851
  """Logical unit for adding node to the cluster.
4852

4853
  """
4854
  HPATH = "node-add"
4855
  HTYPE = constants.HTYPE_NODE
4856
  _NFLAGS = ["master_capable", "vm_capable"]
4857

    
4858
  def CheckArguments(self):
4859
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4860
    # validate/normalize the node name
4861
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4862
                                         family=self.primary_ip_family)
4863
    self.op.node_name = self.hostname.name
4864

    
4865
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4866
      raise errors.OpPrereqError("Cannot readd the master node",
4867
                                 errors.ECODE_STATE)
4868

    
4869
    if self.op.readd and self.op.group:
4870
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4871
                                 " being readded", errors.ECODE_INVAL)
4872

    
4873
  def BuildHooksEnv(self):
4874
    """Build hooks env.
4875

4876
    This will run on all nodes before, and on all nodes + the new node after.
4877

4878
    """
4879
    return {
4880
      "OP_TARGET": self.op.node_name,
4881
      "NODE_NAME": self.op.node_name,
4882
      "NODE_PIP": self.op.primary_ip,
4883
      "NODE_SIP": self.op.secondary_ip,
4884
      "MASTER_CAPABLE": str(self.op.master_capable),
4885
      "VM_CAPABLE": str(self.op.vm_capable),
4886
      }
4887

    
4888
  def BuildHooksNodes(self):
4889
    """Build hooks nodes.
4890

4891
    """
4892
    # Exclude added node
4893
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4894
    post_nodes = pre_nodes + [self.op.node_name, ]
4895

    
4896
    return (pre_nodes, post_nodes)
4897

    
4898
  def CheckPrereq(self):
4899
    """Check prerequisites.
4900

4901
    This checks:
4902
     - the new node is not already in the config
4903
     - it is resolvable
4904
     - its parameters (single/dual homed) matches the cluster
4905

4906
    Any errors are signaled by raising errors.OpPrereqError.
4907

4908
    """
4909
    cfg = self.cfg
4910
    hostname = self.hostname
4911
    node = hostname.name
4912
    primary_ip = self.op.primary_ip = hostname.ip
4913
    if self.op.secondary_ip is None:
4914
      if self.primary_ip_family == netutils.IP6Address.family:
4915
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4916
                                   " IPv4 address must be given as secondary",
4917
                                   errors.ECODE_INVAL)
4918
      self.op.secondary_ip = primary_ip
4919

    
4920
    secondary_ip = self.op.secondary_ip
4921
    if not netutils.IP4Address.IsValid(secondary_ip):
4922
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4923
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4924

    
4925
    node_list = cfg.GetNodeList()
4926
    if not self.op.readd and node in node_list:
4927
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4928
                                 node, errors.ECODE_EXISTS)
4929
    elif self.op.readd and node not in node_list:
4930
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4931
                                 errors.ECODE_NOENT)
4932

    
4933
    self.changed_primary_ip = False
4934

    
4935
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4936
      if self.op.readd and node == existing_node_name:
4937
        if existing_node.secondary_ip != secondary_ip:
4938
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4939
                                     " address configuration as before",
4940
                                     errors.ECODE_INVAL)
4941
        if existing_node.primary_ip != primary_ip:
4942
          self.changed_primary_ip = True
4943

    
4944
        continue
4945

    
4946
      if (existing_node.primary_ip == primary_ip or
4947
          existing_node.secondary_ip == primary_ip or
4948
          existing_node.primary_ip == secondary_ip or
4949
          existing_node.secondary_ip == secondary_ip):
4950
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4951
                                   " existing node %s" % existing_node.name,
4952
                                   errors.ECODE_NOTUNIQUE)
4953

    
4954
    # After this 'if' block, None is no longer a valid value for the
4955
    # _capable op attributes
4956
    if self.op.readd:
4957
      old_node = self.cfg.GetNodeInfo(node)
4958
      assert old_node is not None, "Can't retrieve locked node %s" % node
4959
      for attr in self._NFLAGS:
4960
        if getattr(self.op, attr) is None:
4961
          setattr(self.op, attr, getattr(old_node, attr))
4962
    else:
4963
      for attr in self._NFLAGS:
4964
        if getattr(self.op, attr) is None:
4965
          setattr(self.op, attr, True)
4966

    
4967
    if self.op.readd and not self.op.vm_capable:
4968
      pri, sec = cfg.GetNodeInstances(node)
4969
      if pri or sec:
4970
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4971
                                   " flag set to false, but it already holds"
4972
                                   " instances" % node,
4973
                                   errors.ECODE_STATE)
4974

    
4975
    # check that the type of the node (single versus dual homed) is the
4976
    # same as for the master
4977
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4978
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4979
    newbie_singlehomed = secondary_ip == primary_ip
4980
    if master_singlehomed != newbie_singlehomed:
4981
      if master_singlehomed:
4982
        raise errors.OpPrereqError("The master has no secondary ip but the"
4983
                                   " new node has one",
4984
                                   errors.ECODE_INVAL)
4985
      else:
4986
        raise errors.OpPrereqError("The master has a secondary ip but the"
4987
                                   " new node doesn't have one",
4988
                                   errors.ECODE_INVAL)
4989

    
4990
    # checks reachability
4991
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4992
      raise errors.OpPrereqError("Node not reachable by ping",
4993
                                 errors.ECODE_ENVIRON)
4994

    
4995
    if not newbie_singlehomed:
4996
      # check reachability from my secondary ip to newbie's secondary ip
4997
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4998
                           source=myself.secondary_ip):
4999
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5000
                                   " based ping to node daemon port",
5001
                                   errors.ECODE_ENVIRON)
5002

    
5003
    if self.op.readd:
5004
      exceptions = [node]
5005
    else:
5006
      exceptions = []
5007

    
5008
    if self.op.master_capable:
5009
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5010
    else:
5011
      self.master_candidate = False
5012

    
5013
    if self.op.readd:
5014
      self.new_node = old_node
5015
    else:
5016
      node_group = cfg.LookupNodeGroup(self.op.group)
5017
      self.new_node = objects.Node(name=node,
5018
                                   primary_ip=primary_ip,
5019
                                   secondary_ip=secondary_ip,
5020
                                   master_candidate=self.master_candidate,
5021
                                   offline=False, drained=False,
5022
                                   group=node_group)
5023

    
5024
    if self.op.ndparams:
5025
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5026

    
5027
  def Exec(self, feedback_fn):
5028
    """Adds the new node to the cluster.
5029

5030
    """
5031
    new_node = self.new_node
5032
    node = new_node.name
5033

    
5034
    # We adding a new node so we assume it's powered
5035
    new_node.powered = True
5036

    
5037
    # for re-adds, reset the offline/drained/master-candidate flags;
5038
    # we need to reset here, otherwise offline would prevent RPC calls
5039
    # later in the procedure; this also means that if the re-add
5040
    # fails, we are left with a non-offlined, broken node
5041
    if self.op.readd:
5042
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5043
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5044
      # if we demote the node, we do cleanup later in the procedure
5045
      new_node.master_candidate = self.master_candidate
5046
      if self.changed_primary_ip:
5047
        new_node.primary_ip = self.op.primary_ip
5048

    
5049
    # copy the master/vm_capable flags
5050
    for attr in self._NFLAGS:
5051
      setattr(new_node, attr, getattr(self.op, attr))
5052

    
5053
    # notify the user about any possible mc promotion
5054
    if new_node.master_candidate:
5055
      self.LogInfo("Node will be a master candidate")
5056

    
5057
    if self.op.ndparams:
5058
      new_node.ndparams = self.op.ndparams
5059
    else:
5060
      new_node.ndparams = {}
5061

    
5062
    # check connectivity
5063
    result = self.rpc.call_version([node])[node]
5064
    result.Raise("Can't get version information from node %s" % node)
5065
    if constants.PROTOCOL_VERSION == result.payload:
5066
      logging.info("Communication to node %s fine, sw version %s match",
5067
                   node, result.payload)
5068
    else:
5069
      raise errors.OpExecError("Version mismatch master version %s,"
5070
                               " node version %s" %
5071
                               (constants.PROTOCOL_VERSION, result.payload))
5072

    
5073
    # Add node to our /etc/hosts, and add key to known_hosts
5074
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5075
      master_node = self.cfg.GetMasterNode()
5076
      result = self.rpc.call_etc_hosts_modify(master_node,
5077
                                              constants.ETC_HOSTS_ADD,
5078
                                              self.hostname.name,
5079
                                              self.hostname.ip)
5080
      result.Raise("Can't update hosts file with new host data")
5081

    
5082
    if new_node.secondary_ip != new_node.primary_ip:
5083
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5084
                               False)
5085

    
5086
    node_verify_list = [self.cfg.GetMasterNode()]
5087
    node_verify_param = {
5088
      constants.NV_NODELIST: ([node], {}),
5089
      # TODO: do a node-net-test as well?
5090
    }
5091

    
5092
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5093
                                       self.cfg.GetClusterName())
5094
    for verifier in node_verify_list:
5095
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5096
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5097
      if nl_payload:
5098
        for failed in nl_payload:
5099
          feedback_fn("ssh/hostname verification failed"
5100
                      " (checking from %s): %s" %
5101
                      (verifier, nl_payload[failed]))
5102
        raise errors.OpExecError("ssh/hostname verification failed")
5103

    
5104
    if self.op.readd:
5105
      _RedistributeAncillaryFiles(self)
5106
      self.context.ReaddNode(new_node)
5107
      # make sure we redistribute the config
5108
      self.cfg.Update(new_node, feedback_fn)
5109
      # and make sure the new node will not have old files around
5110
      if not new_node.master_candidate:
5111
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5112
        msg = result.fail_msg
5113
        if msg:
5114
          self.LogWarning("Node failed to demote itself from master"
5115
                          " candidate status: %s" % msg)
5116
    else:
5117
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5118
                                  additional_vm=self.op.vm_capable)
5119
      self.context.AddNode(new_node, self.proc.GetECId())
5120

    
5121

    
5122
class LUNodeSetParams(LogicalUnit):
5123
  """Modifies the parameters of a node.
5124

5125
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5126
      to the node role (as _ROLE_*)
5127
  @cvar _R2F: a dictionary from node role to tuples of flags
5128
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5129

5130
  """
5131
  HPATH = "node-modify"
5132
  HTYPE = constants.HTYPE_NODE
5133
  REQ_BGL = False
5134
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5135
  _F2R = {
5136
    (True, False, False): _ROLE_CANDIDATE,
5137
    (False, True, False): _ROLE_DRAINED,
5138
    (False, False, True): _ROLE_OFFLINE,
5139
    (False, False, False): _ROLE_REGULAR,
5140
    }
5141
  _R2F = dict((v, k) for k, v in _F2R.items())
5142
  _FLAGS = ["master_candidate", "drained", "offline"]
5143

    
5144
  def CheckArguments(self):
5145
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5146
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5147
                self.op.master_capable, self.op.vm_capable,
5148
                self.op.secondary_ip, self.op.ndparams]
5149
    if all_mods.count(None) == len(all_mods):
5150
      raise errors.OpPrereqError("Please pass at least one modification",
5151
                                 errors.ECODE_INVAL)
5152
    if all_mods.count(True) > 1:
5153
      raise errors.OpPrereqError("Can't set the node into more than one"
5154
                                 " state at the same time",
5155
                                 errors.ECODE_INVAL)
5156

    
5157
    # Boolean value that tells us whether we might be demoting from MC
5158
    self.might_demote = (self.op.master_candidate == False or
5159
                         self.op.offline == True or
5160
                         self.op.drained == True or
5161
                         self.op.master_capable == False)
5162

    
5163
    if self.op.secondary_ip:
5164
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5165
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5166
                                   " address" % self.op.secondary_ip,
5167
                                   errors.ECODE_INVAL)
5168

    
5169
    self.lock_all = self.op.auto_promote and self.might_demote
5170
    self.lock_instances = self.op.secondary_ip is not None
5171

    
5172
  def ExpandNames(self):
5173
    if self.lock_all:
5174
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5175
    else:
5176
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5177

    
5178
    if self.lock_instances:
5179
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5180

    
5181
  def DeclareLocks(self, level):
5182
    # If we have locked all instances, before waiting to lock nodes, release
5183
    # all the ones living on nodes unrelated to the current operation.
5184
    if level == locking.LEVEL_NODE and self.lock_instances:
5185
      self.affected_instances = []
5186
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5187
        instances_keep = []
5188

    
5189
        # Build list of instances to release
5190
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5191
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5192
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5193
              self.op.node_name in instance.all_nodes):
5194
            instances_keep.append(instance_name)
5195
            self.affected_instances.append(instance)
5196

    
5197
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5198

    
5199
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5200
                set(instances_keep))
5201

    
5202
  def BuildHooksEnv(self):
5203
    """Build hooks env.
5204

5205
    This runs on the master node.
5206

5207
    """
5208
    return {
5209
      "OP_TARGET": self.op.node_name,
5210
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5211
      "OFFLINE": str(self.op.offline),
5212
      "DRAINED": str(self.op.drained),
5213
      "MASTER_CAPABLE": str(self.op.master_capable),
5214
      "VM_CAPABLE": str(self.op.vm_capable),
5215
      }
5216

    
5217
  def BuildHooksNodes(self):
5218
    """Build hooks nodes.
5219

5220
    """
5221
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5222
    return (nl, nl)
5223

    
5224
  def CheckPrereq(self):
5225
    """Check prerequisites.
5226

5227
    This only checks the instance list against the existing names.
5228

5229
    """
5230
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5231

    
5232
    if (self.op.master_candidate is not None or
5233
        self.op.drained is not None or
5234
        self.op.offline is not None):
5235
      # we can't change the master's node flags
5236
      if self.op.node_name == self.cfg.GetMasterNode():
5237
        raise errors.OpPrereqError("The master role can be changed"
5238
                                   " only via master-failover",
5239
                                   errors.ECODE_INVAL)
5240

    
5241
    if self.op.master_candidate and not node.master_capable:
5242
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5243
                                 " it a master candidate" % node.name,
5244
                                 errors.ECODE_STATE)
5245

    
5246
    if self.op.vm_capable == False:
5247
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5248
      if ipri or isec:
5249
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5250
                                   " the vm_capable flag" % node.name,
5251
                                   errors.ECODE_STATE)
5252

    
5253
    if node.master_candidate and self.might_demote and not self.lock_all:
5254
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5255
      # check if after removing the current node, we're missing master
5256
      # candidates
5257
      (mc_remaining, mc_should, _) = \
5258
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5259
      if mc_remaining < mc_should:
5260
        raise errors.OpPrereqError("Not enough master candidates, please"
5261
                                   " pass auto promote option to allow"
5262
                                   " promotion", errors.ECODE_STATE)
5263

    
5264
    self.old_flags = old_flags = (node.master_candidate,
5265
                                  node.drained, node.offline)
5266
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5267
    self.old_role = old_role = self._F2R[old_flags]
5268

    
5269
    # Check for ineffective changes
5270
    for attr in self._FLAGS:
5271
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5272
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5273
        setattr(self.op, attr, None)
5274

    
5275
    # Past this point, any flag change to False means a transition
5276
    # away from the respective state, as only real changes are kept
5277

    
5278
    # TODO: We might query the real power state if it supports OOB
5279
    if _SupportsOob(self.cfg, node):
5280
      if self.op.offline is False and not (node.powered or
5281
                                           self.op.powered == True):
5282
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5283
                                    " offline status can be reset") %
5284
                                   self.op.node_name)
5285
    elif self.op.powered is not None:
5286
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5287
                                  " as it does not support out-of-band"
5288
                                  " handling") % self.op.node_name)
5289

    
5290
    # If we're being deofflined/drained, we'll MC ourself if needed
5291
    if (self.op.drained == False or self.op.offline == False or
5292
        (self.op.master_capable and not node.master_capable)):
5293
      if _DecideSelfPromotion(self):
5294
        self.op.master_candidate = True
5295
        self.LogInfo("Auto-promoting node to master candidate")
5296

    
5297
    # If we're no longer master capable, we'll demote ourselves from MC
5298
    if self.op.master_capable == False and node.master_candidate:
5299
      self.LogInfo("Demoting from master candidate")
5300
      self.op.master_candidate = False
5301

    
5302
    # Compute new role
5303
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5304
    if self.op.master_candidate:
5305
      new_role = self._ROLE_CANDIDATE
5306
    elif self.op.drained:
5307
      new_role = self._ROLE_DRAINED
5308
    elif self.op.offline:
5309
      new_role = self._ROLE_OFFLINE
5310
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5311
      # False is still in new flags, which means we're un-setting (the
5312
      # only) True flag
5313
      new_role = self._ROLE_REGULAR
5314
    else: # no new flags, nothing, keep old role
5315
      new_role = old_role
5316

    
5317
    self.new_role = new_role
5318

    
5319
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5320
      # Trying to transition out of offline status
5321
      result = self.rpc.call_version([node.name])[node.name]
5322
      if result.fail_msg:
5323
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5324
                                   " to report its version: %s" %
5325
                                   (node.name, result.fail_msg),
5326
                                   errors.ECODE_STATE)
5327
      else:
5328
        self.LogWarning("Transitioning node from offline to online state"
5329
                        " without using re-add. Please make sure the node"
5330
                        " is healthy!")
5331

    
5332
    if self.op.secondary_ip:
5333
      # Ok even without locking, because this can't be changed by any LU
5334
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5335
      master_singlehomed = master.secondary_ip == master.primary_ip
5336
      if master_singlehomed and self.op.secondary_ip:
5337
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5338
                                   " homed cluster", errors.ECODE_INVAL)
5339

    
5340
      if node.offline:
5341
        if self.affected_instances:
5342
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5343
                                     " node has instances (%s) configured"
5344
                                     " to use it" % self.affected_instances)
5345
      else:
5346
        # On online nodes, check that no instances are running, and that
5347
        # the node has the new ip and we can reach it.
5348
        for instance in self.affected_instances:
5349
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5350

    
5351
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5352
        if master.name != node.name:
5353
          # check reachability from master secondary ip to new secondary ip
5354
          if not netutils.TcpPing(self.op.secondary_ip,
5355
                                  constants.DEFAULT_NODED_PORT,
5356
                                  source=master.secondary_ip):
5357
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5358
                                       " based ping to node daemon port",
5359
                                       errors.ECODE_ENVIRON)
5360

    
5361
    if self.op.ndparams:
5362
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5363
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5364
      self.new_ndparams = new_ndparams
5365

    
5366
  def Exec(self, feedback_fn):
5367
    """Modifies a node.
5368

5369
    """
5370
    node = self.node
5371
    old_role = self.old_role
5372
    new_role = self.new_role
5373

    
5374
    result = []
5375

    
5376
    if self.op.ndparams:
5377
      node.ndparams = self.new_ndparams
5378

    
5379
    if self.op.powered is not None:
5380
      node.powered = self.op.powered
5381

    
5382
    for attr in ["master_capable", "vm_capable"]:
5383
      val = getattr(self.op, attr)
5384
      if val is not None:
5385
        setattr(node, attr, val)
5386
        result.append((attr, str(val)))
5387

    
5388
    if new_role != old_role:
5389
      # Tell the node to demote itself, if no longer MC and not offline
5390
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5391
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5392
        if msg:
5393
          self.LogWarning("Node failed to demote itself: %s", msg)
5394

    
5395
      new_flags = self._R2F[new_role]
5396
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5397
        if of != nf:
5398
          result.append((desc, str(nf)))
5399
      (node.master_candidate, node.drained, node.offline) = new_flags
5400

    
5401
      # we locked all nodes, we adjust the CP before updating this node
5402
      if self.lock_all:
5403
        _AdjustCandidatePool(self, [node.name])
5404

    
5405
    if self.op.secondary_ip:
5406
      node.secondary_ip = self.op.secondary_ip
5407
      result.append(("secondary_ip", self.op.secondary_ip))
5408

    
5409
    # this will trigger configuration file update, if needed
5410
    self.cfg.Update(node, feedback_fn)
5411

    
5412
    # this will trigger job queue propagation or cleanup if the mc
5413
    # flag changed
5414
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5415
      self.context.ReaddNode(node)
5416

    
5417
    return result
5418

    
5419

    
5420
class LUNodePowercycle(NoHooksLU):
5421
  """Powercycles a node.
5422

5423
  """
5424
  REQ_BGL = False
5425

    
5426
  def CheckArguments(self):
5427
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5428
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5429
      raise errors.OpPrereqError("The node is the master and the force"
5430
                                 " parameter was not set",
5431
                                 errors.ECODE_INVAL)
5432

    
5433
  def ExpandNames(self):
5434
    """Locking for PowercycleNode.
5435

5436
    This is a last-resort option and shouldn't block on other
5437
    jobs. Therefore, we grab no locks.
5438

5439
    """
5440
    self.needed_locks = {}
5441

    
5442
  def Exec(self, feedback_fn):
5443
    """Reboots a node.
5444

5445
    """
5446
    result = self.rpc.call_node_powercycle(self.op.node_name,
5447
                                           self.cfg.GetHypervisorType())
5448
    result.Raise("Failed to schedule the reboot")
5449
    return result.payload
5450

    
5451

    
5452
class LUClusterQuery(NoHooksLU):
5453
  """Query cluster configuration.
5454

5455
  """
5456
  REQ_BGL = False
5457

    
5458
  def ExpandNames(self):
5459
    self.needed_locks = {}
5460

    
5461
  def Exec(self, feedback_fn):
5462
    """Return cluster config.
5463

5464
    """
5465
    cluster = self.cfg.GetClusterInfo()
5466
    os_hvp = {}
5467

    
5468
    # Filter just for enabled hypervisors
5469
    for os_name, hv_dict in cluster.os_hvp.items():
5470
      os_hvp[os_name] = {}
5471
      for hv_name, hv_params in hv_dict.items():
5472
        if hv_name in cluster.enabled_hypervisors:
5473
          os_hvp[os_name][hv_name] = hv_params
5474

    
5475
    # Convert ip_family to ip_version
5476
    primary_ip_version = constants.IP4_VERSION
5477
    if cluster.primary_ip_family == netutils.IP6Address.family:
5478
      primary_ip_version = constants.IP6_VERSION
5479

    
5480
    result = {
5481
      "software_version": constants.RELEASE_VERSION,
5482
      "protocol_version": constants.PROTOCOL_VERSION,
5483
      "config_version": constants.CONFIG_VERSION,
5484
      "os_api_version": max(constants.OS_API_VERSIONS),
5485
      "export_version": constants.EXPORT_VERSION,
5486
      "architecture": (platform.architecture()[0], platform.machine()),
5487
      "name": cluster.cluster_name,
5488
      "master": cluster.master_node,
5489
      "default_hypervisor": cluster.enabled_hypervisors[0],
5490
      "enabled_hypervisors": cluster.enabled_hypervisors,
5491
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5492
                        for hypervisor_name in cluster.enabled_hypervisors]),
5493
      "os_hvp": os_hvp,
5494
      "beparams": cluster.beparams,
5495
      "osparams": cluster.osparams,
5496
      "nicparams": cluster.nicparams,
5497
      "ndparams": cluster.ndparams,
5498
      "candidate_pool_size": cluster.candidate_pool_size,
5499
      "master_netdev": cluster.master_netdev,
5500
      "volume_group_name": cluster.volume_group_name,
5501
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5502
      "file_storage_dir": cluster.file_storage_dir,
5503
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5504
      "maintain_node_health": cluster.maintain_node_health,
5505
      "ctime": cluster.ctime,
5506
      "mtime": cluster.mtime,
5507
      "uuid": cluster.uuid,
5508
      "tags": list(cluster.GetTags()),
5509
      "uid_pool": cluster.uid_pool,
5510
      "default_iallocator": cluster.default_iallocator,
5511
      "reserved_lvs": cluster.reserved_lvs,
5512
      "primary_ip_version": primary_ip_version,
5513
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5514
      "hidden_os": cluster.hidden_os,
5515
      "blacklisted_os": cluster.blacklisted_os,
5516
      }
5517

    
5518
    return result
5519

    
5520

    
5521
class LUClusterConfigQuery(NoHooksLU):
5522
  """Return configuration values.
5523

5524
  """
5525
  REQ_BGL = False
5526
  _FIELDS_DYNAMIC = utils.FieldSet()
5527
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5528
                                  "watcher_pause", "volume_group_name")
5529

    
5530
  def CheckArguments(self):
5531
    _CheckOutputFields(static=self._FIELDS_STATIC,
5532
                       dynamic=self._FIELDS_DYNAMIC,
5533
                       selected=self.op.output_fields)
5534

    
5535
  def ExpandNames(self):
5536
    self.needed_locks = {}
5537

    
5538
  def Exec(self, feedback_fn):
5539
    """Dump a representation of the cluster config to the standard output.
5540

5541
    """
5542
    values = []
5543
    for field in self.op.output_fields:
5544
      if field == "cluster_name":
5545
        entry = self.cfg.GetClusterName()
5546
      elif field == "master_node":
5547
        entry = self.cfg.GetMasterNode()
5548
      elif field == "drain_flag":
5549
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5550
      elif field == "watcher_pause":
5551
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5552
      elif field == "volume_group_name":
5553
        entry = self.cfg.GetVGName()
5554
      else:
5555
        raise errors.ParameterError(field)
5556
      values.append(entry)
5557
    return values
5558

    
5559

    
5560
class LUInstanceActivateDisks(NoHooksLU):
5561
  """Bring up an instance's disks.
5562

5563
  """
5564
  REQ_BGL = False
5565

    
5566
  def ExpandNames(self):
5567
    self._ExpandAndLockInstance()
5568
    self.needed_locks[locking.LEVEL_NODE] = []
5569
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5570

    
5571
  def DeclareLocks(self, level):
5572
    if level == locking.LEVEL_NODE:
5573
      self._LockInstancesNodes()
5574

    
5575
  def CheckPrereq(self):
5576
    """Check prerequisites.
5577

5578
    This checks that the instance is in the cluster.
5579

5580
    """
5581
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5582
    assert self.instance is not None, \
5583
      "Cannot retrieve locked instance %s" % self.op.instance_name
5584
    _CheckNodeOnline(self, self.instance.primary_node)
5585

    
5586
  def Exec(self, feedback_fn):
5587
    """Activate the disks.
5588

5589
    """
5590
    disks_ok, disks_info = \
5591
              _AssembleInstanceDisks(self, self.instance,
5592
                                     ignore_size=self.op.ignore_size)
5593
    if not disks_ok:
5594
      raise errors.OpExecError("Cannot activate block devices")
5595

    
5596
    return disks_info
5597

    
5598

    
5599
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5600
                           ignore_size=False):
5601
  """Prepare the block devices for an instance.
5602

5603
  This sets up the block devices on all nodes.
5604

5605
  @type lu: L{LogicalUnit}
5606
  @param lu: the logical unit on whose behalf we execute
5607
  @type instance: L{objects.Instance}
5608
  @param instance: the instance for whose disks we assemble
5609
  @type disks: list of L{objects.Disk} or None
5610
  @param disks: which disks to assemble (or all, if None)
5611
  @type ignore_secondaries: boolean
5612
  @param ignore_secondaries: if true, errors on secondary nodes
5613
      won't result in an error return from the function
5614
  @type ignore_size: boolean
5615
  @param ignore_size: if true, the current known size of the disk
5616
      will not be used during the disk activation, useful for cases
5617
      when the size is wrong
5618
  @return: False if the operation failed, otherwise a list of
5619
      (host, instance_visible_name, node_visible_name)
5620
      with the mapping from node devices to instance devices
5621

5622
  """
5623
  device_info = []
5624
  disks_ok = True
5625
  iname = instance.name
5626
  disks = _ExpandCheckDisks(instance, disks)
5627

    
5628
  # With the two passes mechanism we try to reduce the window of
5629
  # opportunity for the race condition of switching DRBD to primary
5630
  # before handshaking occured, but we do not eliminate it
5631

    
5632
  # The proper fix would be to wait (with some limits) until the
5633
  # connection has been made and drbd transitions from WFConnection
5634
  # into any other network-connected state (Connected, SyncTarget,
5635
  # SyncSource, etc.)
5636

    
5637
  # 1st pass, assemble on all nodes in secondary mode
5638
  for idx, inst_disk in enumerate(disks):
5639
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5640
      if ignore_size:
5641
        node_disk = node_disk.Copy()
5642
        node_disk.UnsetSize()
5643
      lu.cfg.SetDiskID(node_disk, node)
5644
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5645
      msg = result.fail_msg
5646
      if msg:
5647
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5648
                           " (is_primary=False, pass=1): %s",
5649
                           inst_disk.iv_name, node, msg)
5650
        if not ignore_secondaries:
5651
          disks_ok = False
5652

    
5653
  # FIXME: race condition on drbd migration to primary
5654

    
5655
  # 2nd pass, do only the primary node
5656
  for idx, inst_disk in enumerate(disks):
5657
    dev_path = None
5658

    
5659
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5660
      if node != instance.primary_node:
5661
        continue
5662
      if ignore_size:
5663
        node_disk = node_disk.Copy()
5664
        node_disk.UnsetSize()
5665
      lu.cfg.SetDiskID(node_disk, node)
5666
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5667
      msg = result.fail_msg
5668
      if msg:
5669
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5670
                           " (is_primary=True, pass=2): %s",
5671
                           inst_disk.iv_name, node, msg)
5672
        disks_ok = False
5673
      else:
5674
        dev_path = result.payload
5675

    
5676
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5677

    
5678
  # leave the disks configured for the primary node
5679
  # this is a workaround that would be fixed better by
5680
  # improving the logical/physical id handling
5681
  for disk in disks:
5682
    lu.cfg.SetDiskID(disk, instance.primary_node)
5683

    
5684
  return disks_ok, device_info
5685

    
5686

    
5687
def _StartInstanceDisks(lu, instance, force):
5688
  """Start the disks of an instance.
5689

5690
  """
5691
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5692
                                           ignore_secondaries=force)
5693
  if not disks_ok:
5694
    _ShutdownInstanceDisks(lu, instance)
5695
    if force is not None and not force:
5696
      lu.proc.LogWarning("", hint="If the message above refers to a"
5697
                         " secondary node,"
5698
                         " you can retry the operation using '--force'.")
5699
    raise errors.OpExecError("Disk consistency error")
5700

    
5701

    
5702
class LUInstanceDeactivateDisks(NoHooksLU):
5703
  """Shutdown an instance's disks.
5704

5705
  """
5706
  REQ_BGL = False
5707

    
5708
  def ExpandNames(self):
5709
    self._ExpandAndLockInstance()
5710
    self.needed_locks[locking.LEVEL_NODE] = []
5711
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5712

    
5713
  def DeclareLocks(self, level):
5714
    if level == locking.LEVEL_NODE:
5715
      self._LockInstancesNodes()
5716

    
5717
  def CheckPrereq(self):
5718
    """Check prerequisites.
5719

5720
    This checks that the instance is in the cluster.
5721

5722
    """
5723
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724
    assert self.instance is not None, \
5725
      "Cannot retrieve locked instance %s" % self.op.instance_name
5726

    
5727
  def Exec(self, feedback_fn):
5728
    """Deactivate the disks
5729

5730
    """
5731
    instance = self.instance
5732
    if self.op.force:
5733
      _ShutdownInstanceDisks(self, instance)
5734
    else:
5735
      _SafeShutdownInstanceDisks(self, instance)
5736

    
5737

    
5738
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5739
  """Shutdown block devices of an instance.
5740

5741
  This function checks if an instance is running, before calling
5742
  _ShutdownInstanceDisks.
5743

5744
  """
5745
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5746
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5747

    
5748

    
5749
def _ExpandCheckDisks(instance, disks):
5750
  """Return the instance disks selected by the disks list
5751

5752
  @type disks: list of L{objects.Disk} or None
5753
  @param disks: selected disks
5754
  @rtype: list of L{objects.Disk}
5755
  @return: selected instance disks to act on
5756

5757
  """
5758
  if disks is None:
5759
    return instance.disks
5760
  else:
5761
    if not set(disks).issubset(instance.disks):
5762
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5763
                                   " target instance")
5764
    return disks
5765

    
5766

    
5767
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5768
  """Shutdown block devices of an instance.
5769

5770
  This does the shutdown on all nodes of the instance.
5771

5772
  If the ignore_primary is false, errors on the primary node are
5773
  ignored.
5774

5775
  """
5776
  all_result = True
5777
  disks = _ExpandCheckDisks(instance, disks)
5778

    
5779
  for disk in disks:
5780
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5781
      lu.cfg.SetDiskID(top_disk, node)
5782
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5783
      msg = result.fail_msg
5784
      if msg:
5785
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5786
                      disk.iv_name, node, msg)
5787
        if ((node == instance.primary_node and not ignore_primary) or
5788
            (node != instance.primary_node and not result.offline)):
5789
          all_result = False
5790
  return all_result
5791

    
5792

    
5793
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5794
  """Checks if a node has enough free memory.
5795

5796
  This function check if a given node has the needed amount of free
5797
  memory. In case the node has less memory or we cannot get the
5798
  information from the node, this function raise an OpPrereqError
5799
  exception.
5800

5801
  @type lu: C{LogicalUnit}
5802
  @param lu: a logical unit from which we get configuration data
5803
  @type node: C{str}
5804
  @param node: the node to check
5805
  @type reason: C{str}
5806
  @param reason: string to use in the error message
5807
  @type requested: C{int}
5808
  @param requested: the amount of memory in MiB to check for
5809
  @type hypervisor_name: C{str}
5810
  @param hypervisor_name: the hypervisor to ask for memory stats
5811
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5812
      we cannot check the node
5813

5814
  """
5815
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5816
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5817
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5818
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5819
  if not isinstance(free_mem, int):
5820
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5821
                               " was '%s'" % (node, free_mem),
5822
                               errors.ECODE_ENVIRON)
5823
  if requested > free_mem:
5824
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5825
                               " needed %s MiB, available %s MiB" %
5826
                               (node, reason, requested, free_mem),
5827
                               errors.ECODE_NORES)
5828

    
5829

    
5830
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5831
  """Checks if nodes have enough free disk space in the all VGs.
5832

5833
  This function check if all given nodes have the needed amount of
5834
  free disk. In case any node has less disk or we cannot get the
5835
  information from the node, this function raise an OpPrereqError
5836
  exception.
5837

5838
  @type lu: C{LogicalUnit}
5839
  @param lu: a logical unit from which we get configuration data
5840
  @type nodenames: C{list}
5841
  @param nodenames: the list of node names to check
5842
  @type req_sizes: C{dict}
5843
  @param req_sizes: the hash of vg and corresponding amount of disk in
5844
      MiB to check for
5845
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5846
      or we cannot check the node
5847

5848
  """
5849
  for vg, req_size in req_sizes.items():
5850
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5851

    
5852

    
5853
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5854
  """Checks if nodes have enough free disk space in the specified VG.
5855

5856
  This function check if all given nodes have the needed amount of
5857
  free disk. In case any node has less disk or we cannot get the
5858
  information from the node, this function raise an OpPrereqError
5859
  exception.
5860

5861
  @type lu: C{LogicalUnit}
5862
  @param lu: a logical unit from which we get configuration data
5863
  @type nodenames: C{list}
5864
  @param nodenames: the list of node names to check
5865
  @type vg: C{str}
5866
  @param vg: the volume group to check
5867
  @type requested: C{int}
5868
  @param requested: the amount of disk in MiB to check for
5869
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5870
      or we cannot check the node
5871

5872
  """
5873
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5874
  for node in nodenames:
5875
    info = nodeinfo[node]
5876
    info.Raise("Cannot get current information from node %s" % node,
5877
               prereq=True, ecode=errors.ECODE_ENVIRON)
5878
    vg_free = info.payload.get("vg_free", None)
5879
    if not isinstance(vg_free, int):
5880
      raise errors.OpPrereqError("Can't compute free disk space on node"
5881
                                 " %s for vg %s, result was '%s'" %
5882
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5883
    if requested > vg_free:
5884
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5885
                                 " vg %s: required %d MiB, available %d MiB" %
5886
                                 (node, vg, requested, vg_free),
5887
                                 errors.ECODE_NORES)
5888

    
5889

    
5890
class LUInstanceStartup(LogicalUnit):
5891
  """Starts an instance.
5892

5893
  """
5894
  HPATH = "instance-start"
5895
  HTYPE = constants.HTYPE_INSTANCE
5896
  REQ_BGL = False
5897

    
5898
  def CheckArguments(self):
5899
    # extra beparams
5900
    if self.op.beparams:
5901
      # fill the beparams dict
5902
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5903

    
5904
  def ExpandNames(self):
5905
    self._ExpandAndLockInstance()
5906

    
5907
  def BuildHooksEnv(self):
5908
    """Build hooks env.
5909

5910
    This runs on master, primary and secondary nodes of the instance.
5911

5912
    """
5913
    env = {
5914
      "FORCE": self.op.force,
5915
      }
5916

    
5917
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5918

    
5919
    return env
5920

    
5921
  def BuildHooksNodes(self):
5922
    """Build hooks nodes.
5923

5924
    """
5925
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5926
    return (nl, nl)
5927

    
5928
  def CheckPrereq(self):
5929
    """Check prerequisites.
5930

5931
    This checks that the instance is in the cluster.
5932

5933
    """
5934
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5935
    assert self.instance is not None, \
5936
      "Cannot retrieve locked instance %s" % self.op.instance_name
5937

    
5938
    # extra hvparams
5939
    if self.op.hvparams:
5940
      # check hypervisor parameter syntax (locally)
5941
      cluster = self.cfg.GetClusterInfo()
5942
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5943
      filled_hvp = cluster.FillHV(instance)
5944
      filled_hvp.update(self.op.hvparams)
5945
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5946
      hv_type.CheckParameterSyntax(filled_hvp)
5947
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5948

    
5949
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5950

    
5951
    if self.primary_offline and self.op.ignore_offline_nodes:
5952
      self.proc.LogWarning("Ignoring offline primary node")
5953

    
5954
      if self.op.hvparams or self.op.beparams:
5955
        self.proc.LogWarning("Overridden parameters are ignored")
5956
    else:
5957
      _CheckNodeOnline(self, instance.primary_node)
5958

    
5959
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5960

    
5961
      # check bridges existence
5962
      _CheckInstanceBridgesExist(self, instance)
5963

    
5964
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5965
                                                instance.name,
5966
                                                instance.hypervisor)
5967
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5968
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5969
      if not remote_info.payload: # not running already
5970
        _CheckNodeFreeMemory(self, instance.primary_node,
5971
                             "starting instance %s" % instance.name,
5972
                             bep[constants.BE_MEMORY], instance.hypervisor)
5973

    
5974
  def Exec(self, feedback_fn):
5975
    """Start the instance.
5976

5977
    """
5978
    instance = self.instance
5979
    force = self.op.force
5980

    
5981
    if not self.op.no_remember:
5982
      self.cfg.MarkInstanceUp(instance.name)
5983

    
5984
    if self.primary_offline:
5985
      assert self.op.ignore_offline_nodes
5986
      self.proc.LogInfo("Primary node offline, marked instance as started")
5987
    else:
5988
      node_current = instance.primary_node
5989

    
5990
      _StartInstanceDisks(self, instance, force)
5991

    
5992
      result = self.rpc.call_instance_start(node_current, instance,
5993
                                            self.op.hvparams, self.op.beparams,
5994
                                            self.op.startup_paused)
5995
      msg = result.fail_msg
5996
      if msg:
5997
        _ShutdownInstanceDisks(self, instance)
5998
        raise errors.OpExecError("Could not start instance: %s" % msg)
5999

    
6000

    
6001
class LUInstanceReboot(LogicalUnit):
6002
  """Reboot an instance.
6003

6004
  """
6005
  HPATH = "instance-reboot"
6006
  HTYPE = constants.HTYPE_INSTANCE
6007
  REQ_BGL = False
6008

    
6009
  def ExpandNames(self):
6010
    self._ExpandAndLockInstance()
6011

    
6012
  def BuildHooksEnv(self):
6013
    """Build hooks env.
6014

6015
    This runs on master, primary and secondary nodes of the instance.
6016

6017
    """
6018
    env = {
6019
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6020
      "REBOOT_TYPE": self.op.reboot_type,
6021
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6022
      }
6023

    
6024
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6025

    
6026
    return env
6027

    
6028
  def BuildHooksNodes(self):
6029
    """Build hooks nodes.
6030

6031
    """
6032
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6033
    return (nl, nl)
6034

    
6035
  def CheckPrereq(self):
6036
    """Check prerequisites.
6037

6038
    This checks that the instance is in the cluster.
6039

6040
    """
6041
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6042
    assert self.instance is not None, \
6043
      "Cannot retrieve locked instance %s" % self.op.instance_name
6044

    
6045
    _CheckNodeOnline(self, instance.primary_node)
6046

    
6047
    # check bridges existence
6048
    _CheckInstanceBridgesExist(self, instance)
6049

    
6050
  def Exec(self, feedback_fn):
6051
    """Reboot the instance.
6052

6053
    """
6054
    instance = self.instance
6055
    ignore_secondaries = self.op.ignore_secondaries
6056
    reboot_type = self.op.reboot_type
6057

    
6058
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6059
                                              instance.name,
6060
                                              instance.hypervisor)
6061
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6062
    instance_running = bool(remote_info.payload)
6063

    
6064
    node_current = instance.primary_node
6065

    
6066
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6067
                                            constants.INSTANCE_REBOOT_HARD]:
6068
      for disk in instance.disks:
6069
        self.cfg.SetDiskID(disk, node_current)
6070
      result = self.rpc.call_instance_reboot(node_current, instance,
6071
                                             reboot_type,
6072
                                             self.op.shutdown_timeout)
6073
      result.Raise("Could not reboot instance")
6074
    else:
6075
      if instance_running:
6076
        result = self.rpc.call_instance_shutdown(node_current, instance,
6077
                                                 self.op.shutdown_timeout)
6078
        result.Raise("Could not shutdown instance for full reboot")
6079
        _ShutdownInstanceDisks(self, instance)
6080
      else:
6081
        self.LogInfo("Instance %s was already stopped, starting now",
6082
                     instance.name)
6083
      _StartInstanceDisks(self, instance, ignore_secondaries)
6084
      result = self.rpc.call_instance_start(node_current, instance,
6085
                                            None, None, False)
6086
      msg = result.fail_msg
6087
      if msg:
6088
        _ShutdownInstanceDisks(self, instance)
6089
        raise errors.OpExecError("Could not start instance for"
6090
                                 " full reboot: %s" % msg)
6091

    
6092
    self.cfg.MarkInstanceUp(instance.name)
6093

    
6094

    
6095
class LUInstanceShutdown(LogicalUnit):
6096
  """Shutdown an instance.
6097

6098
  """
6099
  HPATH = "instance-stop"
6100
  HTYPE = constants.HTYPE_INSTANCE
6101
  REQ_BGL = False
6102

    
6103
  def ExpandNames(self):
6104
    self._ExpandAndLockInstance()
6105

    
6106
  def BuildHooksEnv(self):
6107
    """Build hooks env.
6108

6109
    This runs on master, primary and secondary nodes of the instance.
6110

6111
    """
6112
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6113
    env["TIMEOUT"] = self.op.timeout
6114
    return env
6115

    
6116
  def BuildHooksNodes(self):
6117
    """Build hooks nodes.
6118

6119
    """
6120
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6121
    return (nl, nl)
6122

    
6123
  def CheckPrereq(self):
6124
    """Check prerequisites.
6125

6126
    This checks that the instance is in the cluster.
6127

6128
    """
6129
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6130
    assert self.instance is not None, \
6131
      "Cannot retrieve locked instance %s" % self.op.instance_name
6132

    
6133
    self.primary_offline = \
6134
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6135

    
6136
    if self.primary_offline and self.op.ignore_offline_nodes:
6137
      self.proc.LogWarning("Ignoring offline primary node")
6138
    else:
6139
      _CheckNodeOnline(self, self.instance.primary_node)
6140

    
6141
  def Exec(self, feedback_fn):
6142
    """Shutdown the instance.
6143

6144
    """
6145
    instance = self.instance
6146
    node_current = instance.primary_node
6147
    timeout = self.op.timeout
6148

    
6149
    if not self.op.no_remember:
6150
      self.cfg.MarkInstanceDown(instance.name)
6151

    
6152
    if self.primary_offline:
6153
      assert self.op.ignore_offline_nodes
6154
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6155
    else:
6156
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6157
      msg = result.fail_msg
6158
      if msg:
6159
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6160

    
6161
      _ShutdownInstanceDisks(self, instance)
6162

    
6163

    
6164
class LUInstanceReinstall(LogicalUnit):
6165
  """Reinstall an instance.
6166

6167
  """
6168
  HPATH = "instance-reinstall"
6169
  HTYPE = constants.HTYPE_INSTANCE
6170
  REQ_BGL = False
6171

    
6172
  def ExpandNames(self):
6173
    self._ExpandAndLockInstance()
6174

    
6175
  def BuildHooksEnv(self):
6176
    """Build hooks env.
6177

6178
    This runs on master, primary and secondary nodes of the instance.
6179

6180
    """
6181
    return _BuildInstanceHookEnvByObject(self, self.instance)
6182

    
6183
  def BuildHooksNodes(self):
6184
    """Build hooks nodes.
6185

6186
    """
6187
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6188
    return (nl, nl)
6189

    
6190
  def CheckPrereq(self):
6191
    """Check prerequisites.
6192

6193
    This checks that the instance is in the cluster and is not running.
6194

6195
    """
6196
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197
    assert instance is not None, \
6198
      "Cannot retrieve locked instance %s" % self.op.instance_name
6199
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6200
                     " offline, cannot reinstall")
6201
    for node in instance.secondary_nodes:
6202
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6203
                       " cannot reinstall")
6204

    
6205
    if instance.disk_template == constants.DT_DISKLESS:
6206
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6207
                                 self.op.instance_name,
6208
                                 errors.ECODE_INVAL)
6209
    _CheckInstanceDown(self, instance, "cannot reinstall")
6210

    
6211
    if self.op.os_type is not None:
6212
      # OS verification
6213
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6214
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6215
      instance_os = self.op.os_type
6216
    else:
6217
      instance_os = instance.os
6218

    
6219
    nodelist = list(instance.all_nodes)
6220

    
6221
    if self.op.osparams:
6222
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6223
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6224
      self.os_inst = i_osdict # the new dict (without defaults)
6225
    else:
6226
      self.os_inst = None
6227

    
6228
    self.instance = instance
6229

    
6230
  def Exec(self, feedback_fn):
6231
    """Reinstall the instance.
6232

6233
    """
6234
    inst = self.instance
6235

    
6236
    if self.op.os_type is not None:
6237
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6238
      inst.os = self.op.os_type
6239
      # Write to configuration
6240
      self.cfg.Update(inst, feedback_fn)
6241

    
6242
    _StartInstanceDisks(self, inst, None)
6243
    try:
6244
      feedback_fn("Running the instance OS create scripts...")
6245
      # FIXME: pass debug option from opcode to backend
6246
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6247
                                             self.op.debug_level,
6248
                                             osparams=self.os_inst)
6249
      result.Raise("Could not install OS for instance %s on node %s" %
6250
                   (inst.name, inst.primary_node))
6251
    finally:
6252
      _ShutdownInstanceDisks(self, inst)
6253

    
6254

    
6255
class LUInstanceRecreateDisks(LogicalUnit):
6256
  """Recreate an instance's missing disks.
6257

6258
  """
6259
  HPATH = "instance-recreate-disks"
6260
  HTYPE = constants.HTYPE_INSTANCE
6261
  REQ_BGL = False
6262

    
6263
  def CheckArguments(self):
6264
    # normalise the disk list
6265
    self.op.disks = sorted(frozenset(self.op.disks))
6266

    
6267
  def ExpandNames(self):
6268
    self._ExpandAndLockInstance()
6269
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6270
    if self.op.nodes:
6271
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6272
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6273
    else:
6274
      self.needed_locks[locking.LEVEL_NODE] = []
6275

    
6276
  def DeclareLocks(self, level):
6277
    if level == locking.LEVEL_NODE:
6278
      # if we replace the nodes, we only need to lock the old primary,
6279
      # otherwise we need to lock all nodes for disk re-creation
6280
      primary_only = bool(self.op.nodes)
6281
      self._LockInstancesNodes(primary_only=primary_only)
6282

    
6283
  def BuildHooksEnv(self):
6284
    """Build hooks env.
6285

6286
    This runs on master, primary and secondary nodes of the instance.
6287

6288
    """
6289
    return _BuildInstanceHookEnvByObject(self, self.instance)
6290

    
6291
  def BuildHooksNodes(self):
6292
    """Build hooks nodes.
6293

6294
    """
6295
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6296
    return (nl, nl)
6297

    
6298
  def CheckPrereq(self):
6299
    """Check prerequisites.
6300

6301
    This checks that the instance is in the cluster and is not running.
6302

6303
    """
6304
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6305
    assert instance is not None, \
6306
      "Cannot retrieve locked instance %s" % self.op.instance_name
6307
    if self.op.nodes:
6308
      if len(self.op.nodes) != len(instance.all_nodes):
6309
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6310
                                   " %d replacement nodes were specified" %
6311
                                   (instance.name, len(instance.all_nodes),
6312
                                    len(self.op.nodes)),
6313
                                   errors.ECODE_INVAL)
6314
      assert instance.disk_template != constants.DT_DRBD8 or \
6315
          len(self.op.nodes) == 2
6316
      assert instance.disk_template != constants.DT_PLAIN or \
6317
          len(self.op.nodes) == 1
6318
      primary_node = self.op.nodes[0]
6319
    else:
6320
      primary_node = instance.primary_node
6321
    _CheckNodeOnline(self, primary_node)
6322

    
6323
    if instance.disk_template == constants.DT_DISKLESS:
6324
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6325
                                 self.op.instance_name, errors.ECODE_INVAL)
6326
    # if we replace nodes *and* the old primary is offline, we don't
6327
    # check
6328
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6329
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6330
    if not (self.op.nodes and old_pnode.offline):
6331
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6332

    
6333
    if not self.op.disks:
6334
      self.op.disks = range(len(instance.disks))
6335
    else:
6336
      for idx in self.op.disks:
6337
        if idx >= len(instance.disks):
6338
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6339
                                     errors.ECODE_INVAL)
6340
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6341
      raise errors.OpPrereqError("Can't recreate disks partially and"
6342
                                 " change the nodes at the same time",
6343
                                 errors.ECODE_INVAL)
6344
    self.instance = instance
6345

    
6346
  def Exec(self, feedback_fn):
6347
    """Recreate the disks.
6348

6349
    """
6350
    instance = self.instance
6351

    
6352
    to_skip = []
6353
    mods = [] # keeps track of needed logical_id changes
6354

    
6355
    for idx, disk in enumerate(instance.disks):
6356
      if idx not in self.op.disks: # disk idx has not been passed in
6357
        to_skip.append(idx)
6358
        continue
6359
      # update secondaries for disks, if needed
6360
      if self.op.nodes:
6361
        if disk.dev_type == constants.LD_DRBD8:
6362
          # need to update the nodes and minors
6363
          assert len(self.op.nodes) == 2
6364
          assert len(disk.logical_id) == 6 # otherwise disk internals
6365
                                           # have changed
6366
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6367
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6368
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6369
                    new_minors[0], new_minors[1], old_secret)
6370
          assert len(disk.logical_id) == len(new_id)
6371
          mods.append((idx, new_id))
6372

    
6373
    # now that we have passed all asserts above, we can apply the mods
6374
    # in a single run (to avoid partial changes)
6375
    for idx, new_id in mods:
6376
      instance.disks[idx].logical_id = new_id
6377

    
6378
    # change primary node, if needed
6379
    if self.op.nodes:
6380
      instance.primary_node = self.op.nodes[0]
6381
      self.LogWarning("Changing the instance's nodes, you will have to"
6382
                      " remove any disks left on the older nodes manually")
6383

    
6384
    if self.op.nodes:
6385
      self.cfg.Update(instance, feedback_fn)
6386

    
6387
    _CreateDisks(self, instance, to_skip=to_skip)
6388

    
6389

    
6390
class LUInstanceRename(LogicalUnit):
6391
  """Rename an instance.
6392

6393
  """
6394
  HPATH = "instance-rename"
6395
  HTYPE = constants.HTYPE_INSTANCE
6396

    
6397
  def CheckArguments(self):
6398
    """Check arguments.
6399

6400
    """
6401
    if self.op.ip_check and not self.op.name_check:
6402
      # TODO: make the ip check more flexible and not depend on the name check
6403
      raise errors.OpPrereqError("IP address check requires a name check",
6404
                                 errors.ECODE_INVAL)
6405

    
6406
  def BuildHooksEnv(self):
6407
    """Build hooks env.
6408

6409
    This runs on master, primary and secondary nodes of the instance.
6410

6411
    """
6412
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6413
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6414
    return env
6415

    
6416
  def BuildHooksNodes(self):
6417
    """Build hooks nodes.
6418

6419
    """
6420
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6421
    return (nl, nl)
6422

    
6423
  def CheckPrereq(self):
6424
    """Check prerequisites.
6425

6426
    This checks that the instance is in the cluster and is not running.
6427

6428
    """
6429
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6430
                                                self.op.instance_name)
6431
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6432
    assert instance is not None
6433
    _CheckNodeOnline(self, instance.primary_node)
6434
    _CheckInstanceDown(self, instance, "cannot rename")
6435
    self.instance = instance
6436

    
6437
    new_name = self.op.new_name
6438
    if self.op.name_check:
6439
      hostname = netutils.GetHostname(name=new_name)
6440
      if hostname.name != new_name:
6441
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6442
                     hostname.name)
6443
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6444
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6445
                                    " same as given hostname '%s'") %
6446
                                    (hostname.name, self.op.new_name),
6447
                                    errors.ECODE_INVAL)
6448
      new_name = self.op.new_name = hostname.name
6449
      if (self.op.ip_check and
6450
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6451
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6452
                                   (hostname.ip, new_name),
6453
                                   errors.ECODE_NOTUNIQUE)
6454

    
6455
    instance_list = self.cfg.GetInstanceList()
6456
    if new_name in instance_list and new_name != instance.name:
6457
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6458
                                 new_name, errors.ECODE_EXISTS)
6459

    
6460
  def Exec(self, feedback_fn):
6461
    """Rename the instance.
6462

6463
    """
6464
    inst = self.instance
6465
    old_name = inst.name
6466

    
6467
    rename_file_storage = False
6468
    if (inst.disk_template in constants.DTS_FILEBASED and
6469
        self.op.new_name != inst.name):
6470
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6471
      rename_file_storage = True
6472

    
6473
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6474
    # Change the instance lock. This is definitely safe while we hold the BGL.
6475
    # Otherwise the new lock would have to be added in acquired mode.
6476
    assert self.REQ_BGL
6477
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6478
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6479

    
6480
    # re-read the instance from the configuration after rename
6481
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6482

    
6483
    if rename_file_storage:
6484
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6485
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6486
                                                     old_file_storage_dir,
6487
                                                     new_file_storage_dir)
6488
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6489
                   " (but the instance has been renamed in Ganeti)" %
6490
                   (inst.primary_node, old_file_storage_dir,
6491
                    new_file_storage_dir))
6492

    
6493
    _StartInstanceDisks(self, inst, None)
6494
    try:
6495
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6496
                                                 old_name, self.op.debug_level)
6497
      msg = result.fail_msg
6498
      if msg:
6499
        msg = ("Could not run OS rename script for instance %s on node %s"
6500
               " (but the instance has been renamed in Ganeti): %s" %
6501
               (inst.name, inst.primary_node, msg))
6502
        self.proc.LogWarning(msg)
6503
    finally:
6504
      _ShutdownInstanceDisks(self, inst)
6505

    
6506
    return inst.name
6507

    
6508

    
6509
class LUInstanceRemove(LogicalUnit):
6510
  """Remove an instance.
6511

6512
  """
6513
  HPATH = "instance-remove"
6514
  HTYPE = constants.HTYPE_INSTANCE
6515
  REQ_BGL = False
6516

    
6517
  def ExpandNames(self):
6518
    self._ExpandAndLockInstance()
6519
    self.needed_locks[locking.LEVEL_NODE] = []
6520
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6521

    
6522
  def DeclareLocks(self, level):
6523
    if level == locking.LEVEL_NODE:
6524
      self._LockInstancesNodes()
6525

    
6526
  def BuildHooksEnv(self):
6527
    """Build hooks env.
6528

6529
    This runs on master, primary and secondary nodes of the instance.
6530

6531
    """
6532
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6533
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6534
    return env
6535

    
6536
  def BuildHooksNodes(self):
6537
    """Build hooks nodes.
6538

6539
    """
6540
    nl = [self.cfg.GetMasterNode()]
6541
    nl_post = list(self.instance.all_nodes) + nl
6542
    return (nl, nl_post)
6543

    
6544
  def CheckPrereq(self):
6545
    """Check prerequisites.
6546

6547
    This checks that the instance is in the cluster.
6548

6549
    """
6550
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6551
    assert self.instance is not None, \
6552
      "Cannot retrieve locked instance %s" % self.op.instance_name
6553

    
6554
  def Exec(self, feedback_fn):
6555
    """Remove the instance.
6556

6557
    """
6558
    instance = self.instance
6559
    logging.info("Shutting down instance %s on node %s",
6560
                 instance.name, instance.primary_node)
6561

    
6562
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6563
                                             self.op.shutdown_timeout)
6564
    msg = result.fail_msg
6565
    if msg:
6566
      if self.op.ignore_failures:
6567
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6568
      else:
6569
        raise errors.OpExecError("Could not shutdown instance %s on"
6570
                                 " node %s: %s" %
6571
                                 (instance.name, instance.primary_node, msg))
6572

    
6573
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6574

    
6575

    
6576
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6577
  """Utility function to remove an instance.
6578

6579
  """
6580
  logging.info("Removing block devices for instance %s", instance.name)
6581

    
6582
  if not _RemoveDisks(lu, instance):
6583
    if not ignore_failures:
6584
      raise errors.OpExecError("Can't remove instance's disks")
6585
    feedback_fn("Warning: can't remove instance's disks")
6586

    
6587
  logging.info("Removing instance %s out of cluster config", instance.name)
6588

    
6589
  lu.cfg.RemoveInstance(instance.name)
6590

    
6591
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6592
    "Instance lock removal conflict"
6593

    
6594
  # Remove lock for the instance
6595
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6596

    
6597

    
6598
class LUInstanceQuery(NoHooksLU):
6599
  """Logical unit for querying instances.
6600

6601
  """
6602
  # pylint: disable=W0142
6603
  REQ_BGL = False
6604

    
6605
  def CheckArguments(self):
6606
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6607
                             self.op.output_fields, self.op.use_locking)
6608

    
6609
  def ExpandNames(self):
6610
    self.iq.ExpandNames(self)
6611

    
6612
  def DeclareLocks(self, level):
6613
    self.iq.DeclareLocks(self, level)
6614

    
6615
  def Exec(self, feedback_fn):
6616
    return self.iq.OldStyleQuery(self)
6617

    
6618

    
6619
class LUInstanceFailover(LogicalUnit):
6620
  """Failover an instance.
6621

6622
  """
6623
  HPATH = "instance-failover"
6624
  HTYPE = constants.HTYPE_INSTANCE
6625
  REQ_BGL = False
6626

    
6627
  def CheckArguments(self):
6628
    """Check the arguments.
6629

6630
    """
6631
    self.iallocator = getattr(self.op, "iallocator", None)
6632
    self.target_node = getattr(self.op, "target_node", None)
6633

    
6634
  def ExpandNames(self):
6635
    self._ExpandAndLockInstance()
6636

    
6637
    if self.op.target_node is not None:
6638
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6639

    
6640
    self.needed_locks[locking.LEVEL_NODE] = []
6641
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6642

    
6643
    ignore_consistency = self.op.ignore_consistency
6644
    shutdown_timeout = self.op.shutdown_timeout
6645
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6646
                                       cleanup=False,
6647
                                       failover=True,
6648
                                       ignore_consistency=ignore_consistency,
6649
                                       shutdown_timeout=shutdown_timeout)
6650
    self.tasklets = [self._migrater]
6651

    
6652
  def DeclareLocks(self, level):
6653
    if level == locking.LEVEL_NODE:
6654
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6655
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6656
        if self.op.target_node is None:
6657
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6658
        else:
6659
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6660
                                                   self.op.target_node]
6661
        del self.recalculate_locks[locking.LEVEL_NODE]
6662
      else:
6663
        self._LockInstancesNodes()
6664

    
6665
  def BuildHooksEnv(self):
6666
    """Build hooks env.
6667

6668
    This runs on master, primary and secondary nodes of the instance.
6669

6670
    """
6671
    instance = self._migrater.instance
6672
    source_node = instance.primary_node
6673
    target_node = self.op.target_node
6674
    env = {
6675
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6676
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677
      "OLD_PRIMARY": source_node,
6678
      "NEW_PRIMARY": target_node,
6679
      }
6680

    
6681
    if instance.disk_template in constants.DTS_INT_MIRROR:
6682
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6683
      env["NEW_SECONDARY"] = source_node
6684
    else:
6685
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6686

    
6687
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6688

    
6689
    return env
6690

    
6691
  def BuildHooksNodes(self):
6692
    """Build hooks nodes.
6693

6694
    """
6695
    instance = self._migrater.instance
6696
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6697
    return (nl, nl + [instance.primary_node])
6698

    
6699

    
6700
class LUInstanceMigrate(LogicalUnit):
6701
  """Migrate an instance.
6702

6703
  This is migration without shutting down, compared to the failover,
6704
  which is done with shutdown.
6705

6706
  """
6707
  HPATH = "instance-migrate"
6708
  HTYPE = constants.HTYPE_INSTANCE
6709
  REQ_BGL = False
6710

    
6711
  def ExpandNames(self):
6712
    self._ExpandAndLockInstance()
6713

    
6714
    if self.op.target_node is not None:
6715
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6716

    
6717
    self.needed_locks[locking.LEVEL_NODE] = []
6718
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6719

    
6720
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6721
                                       cleanup=self.op.cleanup,
6722
                                       failover=False,
6723
                                       fallback=self.op.allow_failover)
6724
    self.tasklets = [self._migrater]
6725

    
6726
  def DeclareLocks(self, level):
6727
    if level == locking.LEVEL_NODE:
6728
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6729
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6730
        if self.op.target_node is None:
6731
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6732
        else:
6733
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6734
                                                   self.op.target_node]
6735
        del self.recalculate_locks[locking.LEVEL_NODE]
6736
      else:
6737
        self._LockInstancesNodes()
6738

    
6739
  def BuildHooksEnv(self):
6740
    """Build hooks env.
6741

6742
    This runs on master, primary and secondary nodes of the instance.
6743

6744
    """
6745
    instance = self._migrater.instance
6746
    source_node = instance.primary_node
6747
    target_node = self.op.target_node
6748
    env = _BuildInstanceHookEnvByObject(self, instance)
6749
    env.update({
6750
      "MIGRATE_LIVE": self._migrater.live,
6751
      "MIGRATE_CLEANUP": self.op.cleanup,
6752
      "OLD_PRIMARY": source_node,
6753
      "NEW_PRIMARY": target_node,
6754
      })
6755

    
6756
    if instance.disk_template in constants.DTS_INT_MIRROR:
6757
      env["OLD_SECONDARY"] = target_node
6758
      env["NEW_SECONDARY"] = source_node
6759
    else:
6760
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6761

    
6762
    return env
6763

    
6764
  def BuildHooksNodes(self):
6765
    """Build hooks nodes.
6766

6767
    """
6768
    instance = self._migrater.instance
6769
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6770
    return (nl, nl + [instance.primary_node])
6771

    
6772

    
6773
class LUInstanceMove(LogicalUnit):
6774
  """Move an instance by data-copying.
6775

6776
  """
6777
  HPATH = "instance-move"
6778
  HTYPE = constants.HTYPE_INSTANCE
6779
  REQ_BGL = False
6780

    
6781
  def ExpandNames(self):
6782
    self._ExpandAndLockInstance()
6783
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6784
    self.op.target_node = target_node
6785
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6786
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6787

    
6788
  def DeclareLocks(self, level):
6789
    if level == locking.LEVEL_NODE:
6790
      self._LockInstancesNodes(primary_only=True)
6791

    
6792
  def BuildHooksEnv(self):
6793
    """Build hooks env.
6794

6795
    This runs on master, primary and secondary nodes of the instance.
6796

6797
    """
6798
    env = {
6799
      "TARGET_NODE": self.op.target_node,
6800
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6801
      }
6802
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6803
    return env
6804

    
6805
  def BuildHooksNodes(self):
6806
    """Build hooks nodes.
6807

6808
    """
6809
    nl = [
6810
      self.cfg.GetMasterNode(),
6811
      self.instance.primary_node,
6812
      self.op.target_node,
6813
      ]
6814
    return (nl, nl)
6815

    
6816
  def CheckPrereq(self):
6817
    """Check prerequisites.
6818

6819
    This checks that the instance is in the cluster.
6820

6821
    """
6822
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6823
    assert self.instance is not None, \
6824
      "Cannot retrieve locked instance %s" % self.op.instance_name
6825

    
6826
    node = self.cfg.GetNodeInfo(self.op.target_node)
6827
    assert node is not None, \
6828
      "Cannot retrieve locked node %s" % self.op.target_node
6829

    
6830
    self.target_node = target_node = node.name
6831

    
6832
    if target_node == instance.primary_node:
6833
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6834
                                 (instance.name, target_node),
6835
                                 errors.ECODE_STATE)
6836

    
6837
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6838

    
6839
    for idx, dsk in enumerate(instance.disks):
6840
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6841
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6842
                                   " cannot copy" % idx, errors.ECODE_STATE)
6843

    
6844
    _CheckNodeOnline(self, target_node)
6845
    _CheckNodeNotDrained(self, target_node)
6846
    _CheckNodeVmCapable(self, target_node)
6847

    
6848
    if instance.admin_up:
6849
      # check memory requirements on the secondary node
6850
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6851
                           instance.name, bep[constants.BE_MEMORY],
6852
                           instance.hypervisor)
6853
    else:
6854
      self.LogInfo("Not checking memory on the secondary node as"
6855
                   " instance will not be started")
6856

    
6857
    # check bridge existance
6858
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6859

    
6860
  def Exec(self, feedback_fn):
6861
    """Move an instance.
6862

6863
    The move is done by shutting it down on its present node, copying
6864
    the data over (slow) and starting it on the new node.
6865

6866
    """
6867
    instance = self.instance
6868

    
6869
    source_node = instance.primary_node
6870
    target_node = self.target_node
6871

    
6872
    self.LogInfo("Shutting down instance %s on source node %s",
6873
                 instance.name, source_node)
6874

    
6875
    result = self.rpc.call_instance_shutdown(source_node, instance,
6876
                                             self.op.shutdown_timeout)
6877
    msg = result.fail_msg
6878
    if msg:
6879
      if self.op.ignore_consistency:
6880
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6881
                             " Proceeding anyway. Please make sure node"
6882
                             " %s is down. Error details: %s",
6883
                             instance.name, source_node, source_node, msg)
6884
      else:
6885
        raise errors.OpExecError("Could not shutdown instance %s on"
6886
                                 " node %s: %s" %
6887
                                 (instance.name, source_node, msg))
6888

    
6889
    # create the target disks
6890
    try:
6891
      _CreateDisks(self, instance, target_node=target_node)
6892
    except errors.OpExecError:
6893
      self.LogWarning("Device creation failed, reverting...")
6894
      try:
6895
        _RemoveDisks(self, instance, target_node=target_node)
6896
      finally:
6897
        self.cfg.ReleaseDRBDMinors(instance.name)
6898
        raise
6899

    
6900
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6901

    
6902
    errs = []
6903
    # activate, get path, copy the data over
6904
    for idx, disk in enumerate(instance.disks):
6905
      self.LogInfo("Copying data for disk %d", idx)
6906
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6907
                                               instance.name, True, idx)
6908
      if result.fail_msg:
6909
        self.LogWarning("Can't assemble newly created disk %d: %s",
6910
                        idx, result.fail_msg)
6911
        errs.append(result.fail_msg)
6912
        break
6913
      dev_path = result.payload
6914
      result = self.rpc.call_blockdev_export(source_node, disk,
6915
                                             target_node, dev_path,
6916
                                             cluster_name)
6917
      if result.fail_msg:
6918
        self.LogWarning("Can't copy data over for disk %d: %s",
6919
                        idx, result.fail_msg)
6920
        errs.append(result.fail_msg)
6921
        break
6922

    
6923
    if errs:
6924
      self.LogWarning("Some disks failed to copy, aborting")
6925
      try:
6926
        _RemoveDisks(self, instance, target_node=target_node)
6927
      finally:
6928
        self.cfg.ReleaseDRBDMinors(instance.name)
6929
        raise errors.OpExecError("Errors during disk copy: %s" %
6930
                                 (",".join(errs),))
6931

    
6932
    instance.primary_node = target_node
6933
    self.cfg.Update(instance, feedback_fn)
6934

    
6935
    self.LogInfo("Removing the disks on the original node")
6936
    _RemoveDisks(self, instance, target_node=source_node)
6937

    
6938
    # Only start the instance if it's marked as up
6939
    if instance.admin_up:
6940
      self.LogInfo("Starting instance %s on node %s",
6941
                   instance.name, target_node)
6942

    
6943
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6944
                                           ignore_secondaries=True)
6945
      if not disks_ok:
6946
        _ShutdownInstanceDisks(self, instance)
6947
        raise errors.OpExecError("Can't activate the instance's disks")
6948

    
6949
      result = self.rpc.call_instance_start(target_node, instance,
6950
                                            None, None, False)
6951
      msg = result.fail_msg
6952
      if msg:
6953
        _ShutdownInstanceDisks(self, instance)
6954
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6955
                                 (instance.name, target_node, msg))
6956

    
6957

    
6958
class LUNodeMigrate(LogicalUnit):
6959
  """Migrate all instances from a node.
6960

6961
  """
6962
  HPATH = "node-migrate"
6963
  HTYPE = constants.HTYPE_NODE
6964
  REQ_BGL = False
6965

    
6966
  def CheckArguments(self):
6967
    pass
6968

    
6969
  def ExpandNames(self):
6970
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6971

    
6972
    self.share_locks = _ShareAll()
6973
    self.needed_locks = {
6974
      locking.LEVEL_NODE: [self.op.node_name],
6975
      }
6976

    
6977
  def BuildHooksEnv(self):
6978
    """Build hooks env.
6979

6980
    This runs on the master, the primary and all the secondaries.
6981

6982
    """
6983
    return {
6984
      "NODE_NAME": self.op.node_name,
6985
      }
6986

    
6987
  def BuildHooksNodes(self):
6988
    """Build hooks nodes.
6989

6990
    """
6991
    nl = [self.cfg.GetMasterNode()]
6992
    return (nl, nl)
6993

    
6994
  def CheckPrereq(self):
6995
    pass
6996

    
6997
  def Exec(self, feedback_fn):
6998
    # Prepare jobs for migration instances
6999
    jobs = [
7000
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7001
                                 mode=self.op.mode,
7002
                                 live=self.op.live,
7003
                                 iallocator=self.op.iallocator,
7004
                                 target_node=self.op.target_node)]
7005
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7006
      ]
7007

    
7008
    # TODO: Run iallocator in this opcode and pass correct placement options to
7009
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7010
    # running the iallocator and the actual migration, a good consistency model
7011
    # will have to be found.
7012

    
7013
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7014
            frozenset([self.op.node_name]))
7015

    
7016
    return ResultWithJobs(jobs)
7017

    
7018

    
7019
class TLMigrateInstance(Tasklet):
7020
  """Tasklet class for instance migration.
7021

7022
  @type live: boolean
7023
  @ivar live: whether the migration will be done live or non-live;
7024
      this variable is initalized only after CheckPrereq has run
7025
  @type cleanup: boolean
7026
  @ivar cleanup: Wheater we cleanup from a failed migration
7027
  @type iallocator: string
7028
  @ivar iallocator: The iallocator used to determine target_node
7029
  @type target_node: string
7030
  @ivar target_node: If given, the target_node to reallocate the instance to
7031
  @type failover: boolean
7032
  @ivar failover: Whether operation results in failover or migration
7033
  @type fallback: boolean
7034
  @ivar fallback: Whether fallback to failover is allowed if migration not
7035
                  possible
7036
  @type ignore_consistency: boolean
7037
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7038
                            and target node
7039
  @type shutdown_timeout: int
7040
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7041

7042
  """
7043
  def __init__(self, lu, instance_name, cleanup=False,
7044
               failover=False, fallback=False,
7045
               ignore_consistency=False,
7046
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7047
    """Initializes this class.
7048

7049
    """
7050
    Tasklet.__init__(self, lu)
7051

    
7052
    # Parameters
7053
    self.instance_name = instance_name
7054
    self.cleanup = cleanup
7055
    self.live = False # will be overridden later
7056
    self.failover = failover
7057
    self.fallback = fallback
7058
    self.ignore_consistency = ignore_consistency
7059
    self.shutdown_timeout = shutdown_timeout
7060

    
7061
  def CheckPrereq(self):
7062
    """Check prerequisites.
7063

7064
    This checks that the instance is in the cluster.
7065

7066
    """
7067
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7068
    instance = self.cfg.GetInstanceInfo(instance_name)
7069
    assert instance is not None
7070
    self.instance = instance
7071

    
7072
    if (not self.cleanup and not instance.admin_up and not self.failover and
7073
        self.fallback):
7074
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7075
                      " to failover")
7076
      self.failover = True
7077

    
7078
    if instance.disk_template not in constants.DTS_MIRRORED:
7079
      if self.failover:
7080
        text = "failovers"
7081
      else:
7082
        text = "migrations"
7083
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7084
                                 " %s" % (instance.disk_template, text),
7085
                                 errors.ECODE_STATE)
7086

    
7087
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7088
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7089

    
7090
      if self.lu.op.iallocator:
7091
        self._RunAllocator()
7092
      else:
7093
        # We set set self.target_node as it is required by
7094
        # BuildHooksEnv
7095
        self.target_node = self.lu.op.target_node
7096

    
7097
      # self.target_node is already populated, either directly or by the
7098
      # iallocator run
7099
      target_node = self.target_node
7100
      if self.target_node == instance.primary_node:
7101
        raise errors.OpPrereqError("Cannot migrate instance %s"
7102
                                   " to its primary (%s)" %
7103
                                   (instance.name, instance.primary_node))
7104

    
7105
      if len(self.lu.tasklets) == 1:
7106
        # It is safe to release locks only when we're the only tasklet
7107
        # in the LU
7108
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7109
                      keep=[instance.primary_node, self.target_node])
7110

    
7111
    else:
7112
      secondary_nodes = instance.secondary_nodes
7113
      if not secondary_nodes:
7114
        raise errors.ConfigurationError("No secondary node but using"
7115
                                        " %s disk template" %
7116
                                        instance.disk_template)
7117
      target_node = secondary_nodes[0]
7118
      if self.lu.op.iallocator or (self.lu.op.target_node and
7119
                                   self.lu.op.target_node != target_node):
7120
        if self.failover:
7121
          text = "failed over"
7122
        else:
7123
          text = "migrated"
7124
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7125
                                   " be %s to arbitrary nodes"
7126
                                   " (neither an iallocator nor a target"
7127
                                   " node can be passed)" %
7128
                                   (instance.disk_template, text),
7129
                                   errors.ECODE_INVAL)
7130

    
7131
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7132

    
7133
    # check memory requirements on the secondary node
7134
    if not self.failover or instance.admin_up:
7135
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7136
                           instance.name, i_be[constants.BE_MEMORY],
7137
                           instance.hypervisor)
7138
    else:
7139
      self.lu.LogInfo("Not checking memory on the secondary node as"
7140
                      " instance will not be started")
7141

    
7142
    # check bridge existance
7143
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7144

    
7145
    if not self.cleanup:
7146
      _CheckNodeNotDrained(self.lu, target_node)
7147
      if not self.failover:
7148
        result = self.rpc.call_instance_migratable(instance.primary_node,
7149
                                                   instance)
7150
        if result.fail_msg and self.fallback:
7151
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7152
                          " failover")
7153
          self.failover = True
7154
        else:
7155
          result.Raise("Can't migrate, please use failover",
7156
                       prereq=True, ecode=errors.ECODE_STATE)
7157

    
7158
    assert not (self.failover and self.cleanup)
7159

    
7160
    if not self.failover:
7161
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7162
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7163
                                   " parameters are accepted",
7164
                                   errors.ECODE_INVAL)
7165
      if self.lu.op.live is not None:
7166
        if self.lu.op.live:
7167
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7168
        else:
7169
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7170
        # reset the 'live' parameter to None so that repeated
7171
        # invocations of CheckPrereq do not raise an exception
7172
        self.lu.op.live = None
7173
      elif self.lu.op.mode is None:
7174
        # read the default value from the hypervisor
7175
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7176
                                                skip_globals=False)
7177
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7178

    
7179
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7180
    else:
7181
      # Failover is never live
7182
      self.live = False
7183

    
7184
  def _RunAllocator(self):
7185
    """Run the allocator based on input opcode.
7186

7187
    """
7188
    ial = IAllocator(self.cfg, self.rpc,
7189
                     mode=constants.IALLOCATOR_MODE_RELOC,
7190
                     name=self.instance_name,
7191
                     # TODO See why hail breaks with a single node below
7192
                     relocate_from=[self.instance.primary_node,
7193
                                    self.instance.primary_node],
7194
                     )
7195

    
7196
    ial.Run(self.lu.op.iallocator)
7197

    
7198
    if not ial.success:
7199
      raise errors.OpPrereqError("Can't compute nodes using"
7200
                                 " iallocator '%s': %s" %
7201
                                 (self.lu.op.iallocator, ial.info),
7202
                                 errors.ECODE_NORES)
7203
    if len(ial.result) != ial.required_nodes:
7204
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7205
                                 " of nodes (%s), required %s" %
7206
                                 (self.lu.op.iallocator, len(ial.result),
7207
                                  ial.required_nodes), errors.ECODE_FAULT)
7208
    self.target_node = ial.result[0]
7209
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7210
                 self.instance_name, self.lu.op.iallocator,
7211
                 utils.CommaJoin(ial.result))
7212

    
7213
  def _WaitUntilSync(self):
7214
    """Poll with custom rpc for disk sync.
7215

7216
    This uses our own step-based rpc call.
7217

7218
    """
7219
    self.feedback_fn("* wait until resync is done")
7220
    all_done = False
7221
    while not all_done:
7222
      all_done = True
7223
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7224
                                            self.nodes_ip,
7225
                                            self.instance.disks)
7226
      min_percent = 100
7227
      for node, nres in result.items():
7228
        nres.Raise("Cannot resync disks on node %s" % node)
7229
        node_done, node_percent = nres.payload
7230
        all_done = all_done and node_done
7231
        if node_percent is not None:
7232
          min_percent = min(min_percent, node_percent)
7233
      if not all_done:
7234
        if min_percent < 100:
7235
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7236
        time.sleep(2)
7237

    
7238
  def _EnsureSecondary(self, node):
7239
    """Demote a node to secondary.
7240

7241
    """
7242
    self.feedback_fn("* switching node %s to secondary mode" % node)
7243

    
7244
    for dev in self.instance.disks:
7245
      self.cfg.SetDiskID(dev, node)
7246

    
7247
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7248
                                          self.instance.disks)
7249
    result.Raise("Cannot change disk to secondary on node %s" % node)
7250

    
7251
  def _GoStandalone(self):
7252
    """Disconnect from the network.
7253

7254
    """
7255
    self.feedback_fn("* changing into standalone mode")
7256
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7257
                                               self.instance.disks)
7258
    for node, nres in result.items():
7259
      nres.Raise("Cannot disconnect disks node %s" % node)
7260

    
7261
  def _GoReconnect(self, multimaster):
7262
    """Reconnect to the network.
7263

7264
    """
7265
    if multimaster:
7266
      msg = "dual-master"
7267
    else:
7268
      msg = "single-master"
7269
    self.feedback_fn("* changing disks into %s mode" % msg)
7270
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7271
                                           self.instance.disks,
7272
                                           self.instance.name, multimaster)
7273
    for node, nres in result.items():
7274
      nres.Raise("Cannot change disks config on node %s" % node)
7275

    
7276
  def _ExecCleanup(self):
7277
    """Try to cleanup after a failed migration.
7278

7279
    The cleanup is done by:
7280
      - check that the instance is running only on one node
7281
        (and update the config if needed)
7282
      - change disks on its secondary node to secondary
7283
      - wait until disks are fully synchronized
7284
      - disconnect from the network
7285
      - change disks into single-master mode
7286
      - wait again until disks are fully synchronized
7287

7288
    """
7289
    instance = self.instance
7290
    target_node = self.target_node
7291
    source_node = self.source_node
7292

    
7293
    # check running on only one node
7294
    self.feedback_fn("* checking where the instance actually runs"
7295
                     " (if this hangs, the hypervisor might be in"
7296
                     " a bad state)")
7297
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7298
    for node, result in ins_l.items():
7299
      result.Raise("Can't contact node %s" % node)
7300

    
7301
    runningon_source = instance.name in ins_l[source_node].payload
7302
    runningon_target = instance.name in ins_l[target_node].payload
7303

    
7304
    if runningon_source and runningon_target:
7305
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7306
                               " or the hypervisor is confused; you will have"
7307
                               " to ensure manually that it runs only on one"
7308
                               " and restart this operation")
7309

    
7310
    if not (runningon_source or runningon_target):
7311
      raise errors.OpExecError("Instance does not seem to be running at all;"
7312
                               " in this case it's safer to repair by"
7313
                               " running 'gnt-instance stop' to ensure disk"
7314
                               " shutdown, and then restarting it")
7315

    
7316
    if runningon_target:
7317
      # the migration has actually succeeded, we need to update the config
7318
      self.feedback_fn("* instance running on secondary node (%s),"
7319
                       " updating config" % target_node)
7320
      instance.primary_node = target_node
7321
      self.cfg.Update(instance, self.feedback_fn)
7322
      demoted_node = source_node
7323
    else:
7324
      self.feedback_fn("* instance confirmed to be running on its"
7325
                       " primary node (%s)" % source_node)
7326
      demoted_node = target_node
7327

    
7328
    if instance.disk_template in constants.DTS_INT_MIRROR:
7329
      self._EnsureSecondary(demoted_node)
7330
      try:
7331
        self._WaitUntilSync()
7332
      except errors.OpExecError:
7333
        # we ignore here errors, since if the device is standalone, it
7334
        # won't be able to sync
7335
        pass
7336
      self._GoStandalone()
7337
      self._GoReconnect(False)
7338
      self._WaitUntilSync()
7339

    
7340
    self.feedback_fn("* done")
7341

    
7342
  def _RevertDiskStatus(self):
7343
    """Try to revert the disk status after a failed migration.
7344

7345
    """
7346
    target_node = self.target_node
7347
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7348
      return
7349

    
7350
    try:
7351
      self._EnsureSecondary(target_node)
7352
      self._GoStandalone()
7353
      self._GoReconnect(False)
7354
      self._WaitUntilSync()
7355
    except errors.OpExecError, err:
7356
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7357
                         " please try to recover the instance manually;"
7358
                         " error '%s'" % str(err))
7359

    
7360
  def _AbortMigration(self):
7361
    """Call the hypervisor code to abort a started migration.
7362

7363
    """
7364
    instance = self.instance
7365
    target_node = self.target_node
7366
    migration_info = self.migration_info
7367

    
7368
    abort_result = self.rpc.call_finalize_migration(target_node,
7369
                                                    instance,
7370
                                                    migration_info,
7371
                                                    False)
7372
    abort_msg = abort_result.fail_msg
7373
    if abort_msg:
7374
      logging.error("Aborting migration failed on target node %s: %s",
7375
                    target_node, abort_msg)
7376
      # Don't raise an exception here, as we stil have to try to revert the
7377
      # disk status, even if this step failed.
7378

    
7379
  def _ExecMigration(self):
7380
    """Migrate an instance.
7381

7382
    The migrate is done by:
7383
      - change the disks into dual-master mode
7384
      - wait until disks are fully synchronized again
7385
      - migrate the instance
7386
      - change disks on the new secondary node (the old primary) to secondary
7387
      - wait until disks are fully synchronized
7388
      - change disks into single-master mode
7389

7390
    """
7391
    instance = self.instance
7392
    target_node = self.target_node
7393
    source_node = self.source_node
7394

    
7395
    self.feedback_fn("* checking disk consistency between source and target")
7396
    for dev in instance.disks:
7397
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7398
        raise errors.OpExecError("Disk %s is degraded or not fully"
7399
                                 " synchronized on target node,"
7400
                                 " aborting migration" % dev.iv_name)
7401

    
7402
    # First get the migration information from the remote node
7403
    result = self.rpc.call_migration_info(source_node, instance)
7404
    msg = result.fail_msg
7405
    if msg:
7406
      log_err = ("Failed fetching source migration information from %s: %s" %
7407
                 (source_node, msg))
7408
      logging.error(log_err)
7409
      raise errors.OpExecError(log_err)
7410

    
7411
    self.migration_info = migration_info = result.payload
7412

    
7413
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7414
      # Then switch the disks to master/master mode
7415
      self._EnsureSecondary(target_node)
7416
      self._GoStandalone()
7417
      self._GoReconnect(True)
7418
      self._WaitUntilSync()
7419

    
7420
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7421
    result = self.rpc.call_accept_instance(target_node,
7422
                                           instance,
7423
                                           migration_info,
7424
                                           self.nodes_ip[target_node])
7425

    
7426
    msg = result.fail_msg
7427
    if msg:
7428
      logging.error("Instance pre-migration failed, trying to revert"
7429
                    " disk status: %s", msg)
7430
      self.feedback_fn("Pre-migration failed, aborting")
7431
      self._AbortMigration()
7432
      self._RevertDiskStatus()
7433
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7434
                               (instance.name, msg))
7435

    
7436
    self.feedback_fn("* migrating instance to %s" % target_node)
7437
    result = self.rpc.call_instance_migrate(source_node, instance,
7438
                                            self.nodes_ip[target_node],
7439
                                            self.live)
7440
    msg = result.fail_msg
7441
    if msg:
7442
      logging.error("Instance migration failed, trying to revert"
7443
                    " disk status: %s", msg)
7444
      self.feedback_fn("Migration failed, aborting")
7445
      self._AbortMigration()
7446
      self._RevertDiskStatus()
7447
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7448
                               (instance.name, msg))
7449

    
7450
    instance.primary_node = target_node
7451
    # distribute new instance config to the other nodes
7452
    self.cfg.Update(instance, self.feedback_fn)
7453

    
7454
    result = self.rpc.call_finalize_migration(target_node,
7455
                                              instance,
7456
                                              migration_info,
7457
                                              True)
7458
    msg = result.fail_msg
7459
    if msg:
7460
      logging.error("Instance migration succeeded, but finalization failed:"
7461
                    " %s", msg)
7462
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7463
                               msg)
7464

    
7465
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466
      self._EnsureSecondary(source_node)
7467
      self._WaitUntilSync()
7468
      self._GoStandalone()
7469
      self._GoReconnect(False)
7470
      self._WaitUntilSync()
7471

    
7472
    self.feedback_fn("* done")
7473

    
7474
  def _ExecFailover(self):
7475
    """Failover an instance.
7476

7477
    The failover is done by shutting it down on its present node and
7478
    starting it on the secondary.
7479

7480
    """
7481
    instance = self.instance
7482
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7483

    
7484
    source_node = instance.primary_node
7485
    target_node = self.target_node
7486

    
7487
    if instance.admin_up:
7488
      self.feedback_fn("* checking disk consistency between source and target")
7489
      for dev in instance.disks:
7490
        # for drbd, these are drbd over lvm
7491
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7492
          if primary_node.offline:
7493
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7494
                             " target node %s" %
7495
                             (primary_node.name, dev.iv_name, target_node))
7496
          elif not self.ignore_consistency:
7497
            raise errors.OpExecError("Disk %s is degraded on target node,"
7498
                                     " aborting failover" % dev.iv_name)
7499
    else:
7500
      self.feedback_fn("* not checking disk consistency as instance is not"
7501
                       " running")
7502

    
7503
    self.feedback_fn("* shutting down instance on source node")
7504
    logging.info("Shutting down instance %s on node %s",
7505
                 instance.name, source_node)
7506

    
7507
    result = self.rpc.call_instance_shutdown(source_node, instance,
7508
                                             self.shutdown_timeout)
7509
    msg = result.fail_msg
7510
    if msg:
7511
      if self.ignore_consistency or primary_node.offline:
7512
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7513
                           " proceeding anyway; please make sure node"
7514
                           " %s is down; error details: %s",
7515
                           instance.name, source_node, source_node, msg)
7516
      else:
7517
        raise errors.OpExecError("Could not shutdown instance %s on"
7518
                                 " node %s: %s" %
7519
                                 (instance.name, source_node, msg))
7520

    
7521
    self.feedback_fn("* deactivating the instance's disks on source node")
7522
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7523
      raise errors.OpExecError("Can't shut down the instance's disks")
7524

    
7525
    instance.primary_node = target_node
7526
    # distribute new instance config to the other nodes
7527
    self.cfg.Update(instance, self.feedback_fn)
7528

    
7529
    # Only start the instance if it's marked as up
7530
    if instance.admin_up:
7531
      self.feedback_fn("* activating the instance's disks on target node %s" %
7532
                       target_node)
7533
      logging.info("Starting instance %s on node %s",
7534
                   instance.name, target_node)
7535

    
7536
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7537
                                           ignore_secondaries=True)
7538
      if not disks_ok:
7539
        _ShutdownInstanceDisks(self.lu, instance)
7540
        raise errors.OpExecError("Can't activate the instance's disks")
7541

    
7542
      self.feedback_fn("* starting the instance on the target node %s" %
7543
                       target_node)
7544
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7545
                                            False)
7546
      msg = result.fail_msg
7547
      if msg:
7548
        _ShutdownInstanceDisks(self.lu, instance)
7549
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7550
                                 (instance.name, target_node, msg))
7551

    
7552
  def Exec(self, feedback_fn):
7553
    """Perform the migration.
7554

7555
    """
7556
    self.feedback_fn = feedback_fn
7557
    self.source_node = self.instance.primary_node
7558

    
7559
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7560
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7561
      self.target_node = self.instance.secondary_nodes[0]
7562
      # Otherwise self.target_node has been populated either
7563
      # directly, or through an iallocator.
7564

    
7565
    self.all_nodes = [self.source_node, self.target_node]
7566
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7567
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7568

    
7569
    if self.failover:
7570
      feedback_fn("Failover instance %s" % self.instance.name)
7571
      self._ExecFailover()
7572
    else:
7573
      feedback_fn("Migrating instance %s" % self.instance.name)
7574

    
7575
      if self.cleanup:
7576
        return self._ExecCleanup()
7577
      else:
7578
        return self._ExecMigration()
7579

    
7580

    
7581
def _CreateBlockDev(lu, node, instance, device, force_create,
7582
                    info, force_open):
7583
  """Create a tree of block devices on a given node.
7584

7585
  If this device type has to be created on secondaries, create it and
7586
  all its children.
7587

7588
  If not, just recurse to children keeping the same 'force' value.
7589

7590
  @param lu: the lu on whose behalf we execute
7591
  @param node: the node on which to create the device
7592
  @type instance: L{objects.Instance}
7593
  @param instance: the instance which owns the device
7594
  @type device: L{objects.Disk}
7595
  @param device: the device to create
7596
  @type force_create: boolean
7597
  @param force_create: whether to force creation of this device; this
7598
      will be change to True whenever we find a device which has
7599
      CreateOnSecondary() attribute
7600
  @param info: the extra 'metadata' we should attach to the device
7601
      (this will be represented as a LVM tag)
7602
  @type force_open: boolean
7603
  @param force_open: this parameter will be passes to the
7604
      L{backend.BlockdevCreate} function where it specifies
7605
      whether we run on primary or not, and it affects both
7606
      the child assembly and the device own Open() execution
7607

7608
  """
7609
  if device.CreateOnSecondary():
7610
    force_create = True
7611

    
7612
  if device.children:
7613
    for child in device.children:
7614
      _CreateBlockDev(lu, node, instance, child, force_create,
7615
                      info, force_open)
7616

    
7617
  if not force_create:
7618
    return
7619

    
7620
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7621

    
7622

    
7623
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7624
  """Create a single block device on a given node.
7625

7626
  This will not recurse over children of the device, so they must be
7627
  created in advance.
7628

7629
  @param lu: the lu on whose behalf we execute
7630
  @param node: the node on which to create the device
7631
  @type instance: L{objects.Instance}
7632
  @param instance: the instance which owns the device
7633
  @type device: L{objects.Disk}
7634
  @param device: the device to create
7635
  @param info: the extra 'metadata' we should attach to the device
7636
      (this will be represented as a LVM tag)
7637
  @type force_open: boolean
7638
  @param force_open: this parameter will be passes to the
7639
      L{backend.BlockdevCreate} function where it specifies
7640
      whether we run on primary or not, and it affects both
7641
      the child assembly and the device own Open() execution
7642

7643
  """
7644
  lu.cfg.SetDiskID(device, node)
7645
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7646
                                       instance.name, force_open, info)
7647
  result.Raise("Can't create block device %s on"
7648
               " node %s for instance %s" % (device, node, instance.name))
7649
  if device.physical_id is None:
7650
    device.physical_id = result.payload
7651

    
7652

    
7653
def _GenerateUniqueNames(lu, exts):
7654
  """Generate a suitable LV name.
7655

7656
  This will generate a logical volume name for the given instance.
7657

7658
  """
7659
  results = []
7660
  for val in exts:
7661
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7662
    results.append("%s%s" % (new_id, val))
7663
  return results
7664

    
7665

    
7666
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7667
                         iv_name, p_minor, s_minor):
7668
  """Generate a drbd8 device complete with its children.
7669

7670
  """
7671
  assert len(vgnames) == len(names) == 2
7672
  port = lu.cfg.AllocatePort()
7673
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7674
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7675
                          logical_id=(vgnames[0], names[0]))
7676
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7677
                          logical_id=(vgnames[1], names[1]))
7678
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7679
                          logical_id=(primary, secondary, port,
7680
                                      p_minor, s_minor,
7681
                                      shared_secret),
7682
                          children=[dev_data, dev_meta],
7683
                          iv_name=iv_name)
7684
  return drbd_dev
7685

    
7686

    
7687
def _GenerateDiskTemplate(lu, template_name,
7688
                          instance_name, primary_node,
7689
                          secondary_nodes, disk_info,
7690
                          file_storage_dir, file_driver,
7691
                          base_index, feedback_fn):
7692
  """Generate the entire disk layout for a given template type.
7693

7694
  """
7695
  #TODO: compute space requirements
7696

    
7697
  vgname = lu.cfg.GetVGName()
7698
  disk_count = len(disk_info)
7699
  disks = []
7700
  if template_name == constants.DT_DISKLESS:
7701
    pass
7702
  elif template_name == constants.DT_PLAIN:
7703
    if len(secondary_nodes) != 0:
7704
      raise errors.ProgrammerError("Wrong template configuration")
7705

    
7706
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7707
                                      for i in range(disk_count)])
7708
    for idx, disk in enumerate(disk_info):
7709
      disk_index = idx + base_index
7710
      vg = disk.get(constants.IDISK_VG, vgname)
7711
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7712
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7713
                              size=disk[constants.IDISK_SIZE],
7714
                              logical_id=(vg, names[idx]),
7715
                              iv_name="disk/%d" % disk_index,
7716
                              mode=disk[constants.IDISK_MODE])
7717
      disks.append(disk_dev)
7718
  elif template_name == constants.DT_DRBD8:
7719
    if len(secondary_nodes) != 1:
7720
      raise errors.ProgrammerError("Wrong template configuration")
7721
    remote_node = secondary_nodes[0]
7722
    minors = lu.cfg.AllocateDRBDMinor(
7723
      [primary_node, remote_node] * len(disk_info), instance_name)
7724

    
7725
    names = []
7726
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7727
                                               for i in range(disk_count)]):
7728
      names.append(lv_prefix + "_data")
7729
      names.append(lv_prefix + "_meta")
7730
    for idx, disk in enumerate(disk_info):
7731
      disk_index = idx + base_index
7732
      data_vg = disk.get(constants.IDISK_VG, vgname)
7733
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7734
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7735
                                      disk[constants.IDISK_SIZE],
7736
                                      [data_vg, meta_vg],
7737
                                      names[idx * 2:idx * 2 + 2],
7738
                                      "disk/%d" % disk_index,
7739
                                      minors[idx * 2], minors[idx * 2 + 1])
7740
      disk_dev.mode = disk[constants.IDISK_MODE]
7741
      disks.append(disk_dev)
7742
  elif template_name == constants.DT_FILE:
7743
    if len(secondary_nodes) != 0:
7744
      raise errors.ProgrammerError("Wrong template configuration")
7745

    
7746
    opcodes.RequireFileStorage()
7747

    
7748
    for idx, disk in enumerate(disk_info):
7749
      disk_index = idx + base_index
7750
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7751
                              size=disk[constants.IDISK_SIZE],
7752
                              iv_name="disk/%d" % disk_index,
7753
                              logical_id=(file_driver,
7754
                                          "%s/disk%d" % (file_storage_dir,
7755
                                                         disk_index)),
7756
                              mode=disk[constants.IDISK_MODE])
7757
      disks.append(disk_dev)
7758
  elif template_name == constants.DT_SHARED_FILE:
7759
    if len(secondary_nodes) != 0:
7760
      raise errors.ProgrammerError("Wrong template configuration")
7761

    
7762
    opcodes.RequireSharedFileStorage()
7763

    
7764
    for idx, disk in enumerate(disk_info):
7765
      disk_index = idx + base_index
7766
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7767
                              size=disk[constants.IDISK_SIZE],
7768
                              iv_name="disk/%d" % disk_index,
7769
                              logical_id=(file_driver,
7770
                                          "%s/disk%d" % (file_storage_dir,
7771
                                                         disk_index)),
7772
                              mode=disk[constants.IDISK_MODE])
7773
      disks.append(disk_dev)
7774
  elif template_name == constants.DT_BLOCK:
7775
    if len(secondary_nodes) != 0:
7776
      raise errors.ProgrammerError("Wrong template configuration")
7777

    
7778
    for idx, disk in enumerate(disk_info):
7779
      disk_index = idx + base_index
7780
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7781
                              size=disk[constants.IDISK_SIZE],
7782
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7783
                                          disk[constants.IDISK_ADOPT]),
7784
                              iv_name="disk/%d" % disk_index,
7785
                              mode=disk[constants.IDISK_MODE])
7786
      disks.append(disk_dev)
7787

    
7788
  else:
7789
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7790
  return disks
7791

    
7792

    
7793
def _GetInstanceInfoText(instance):
7794
  """Compute that text that should be added to the disk's metadata.
7795

7796
  """
7797
  return "originstname+%s" % instance.name
7798

    
7799

    
7800
def _CalcEta(time_taken, written, total_size):
7801
  """Calculates the ETA based on size written and total size.
7802

7803
  @param time_taken: The time taken so far
7804
  @param written: amount written so far
7805
  @param total_size: The total size of data to be written
7806
  @return: The remaining time in seconds
7807

7808
  """
7809
  avg_time = time_taken / float(written)
7810
  return (total_size - written) * avg_time
7811

    
7812

    
7813
def _WipeDisks(lu, instance):
7814
  """Wipes instance disks.
7815

7816
  @type lu: L{LogicalUnit}
7817
  @param lu: the logical unit on whose behalf we execute
7818
  @type instance: L{objects.Instance}
7819
  @param instance: the instance whose disks we should create
7820
  @return: the success of the wipe
7821

7822
  """
7823
  node = instance.primary_node
7824

    
7825
  for device in instance.disks:
7826
    lu.cfg.SetDiskID(device, node)
7827

    
7828
  logging.info("Pause sync of instance %s disks", instance.name)
7829
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7830

    
7831
  for idx, success in enumerate(result.payload):
7832
    if not success:
7833
      logging.warn("pause-sync of instance %s for disks %d failed",
7834
                   instance.name, idx)
7835

    
7836
  try:
7837
    for idx, device in enumerate(instance.disks):
7838
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7839
      # MAX_WIPE_CHUNK at max
7840
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7841
                            constants.MIN_WIPE_CHUNK_PERCENT)
7842
      # we _must_ make this an int, otherwise rounding errors will
7843
      # occur
7844
      wipe_chunk_size = int(wipe_chunk_size)
7845

    
7846
      lu.LogInfo("* Wiping disk %d", idx)
7847
      logging.info("Wiping disk %d for instance %s, node %s using"
7848
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7849

    
7850
      offset = 0
7851
      size = device.size
7852
      last_output = 0
7853
      start_time = time.time()
7854

    
7855
      while offset < size:
7856
        wipe_size = min(wipe_chunk_size, size - offset)
7857
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7858
                      idx, offset, wipe_size)
7859
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7860
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7861
                     (idx, offset, wipe_size))
7862
        now = time.time()
7863
        offset += wipe_size
7864
        if now - last_output >= 60:
7865
          eta = _CalcEta(now - start_time, offset, size)
7866
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7867
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7868
          last_output = now
7869
  finally:
7870
    logging.info("Resume sync of instance %s disks", instance.name)
7871

    
7872
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7873

    
7874
    for idx, success in enumerate(result.payload):
7875
      if not success:
7876
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7877
                      " look at the status and troubleshoot the issue", idx)
7878
        logging.warn("resume-sync of instance %s for disks %d failed",
7879
                     instance.name, idx)
7880

    
7881

    
7882
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7883
  """Create all disks for an instance.
7884

7885
  This abstracts away some work from AddInstance.
7886

7887
  @type lu: L{LogicalUnit}
7888
  @param lu: the logical unit on whose behalf we execute
7889
  @type instance: L{objects.Instance}
7890
  @param instance: the instance whose disks we should create
7891
  @type to_skip: list
7892
  @param to_skip: list of indices to skip
7893
  @type target_node: string
7894
  @param target_node: if passed, overrides the target node for creation
7895
  @rtype: boolean
7896
  @return: the success of the creation
7897

7898
  """
7899
  info = _GetInstanceInfoText(instance)
7900
  if target_node is None:
7901
    pnode = instance.primary_node
7902
    all_nodes = instance.all_nodes
7903
  else:
7904
    pnode = target_node
7905
    all_nodes = [pnode]
7906

    
7907
  if instance.disk_template in constants.DTS_FILEBASED:
7908
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7909
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7910

    
7911
    result.Raise("Failed to create directory '%s' on"
7912
                 " node %s" % (file_storage_dir, pnode))
7913

    
7914
  # Note: this needs to be kept in sync with adding of disks in
7915
  # LUInstanceSetParams
7916
  for idx, device in enumerate(instance.disks):
7917
    if to_skip and idx in to_skip:
7918
      continue
7919
    logging.info("Creating volume %s for instance %s",
7920
                 device.iv_name, instance.name)
7921
    #HARDCODE
7922
    for node in all_nodes:
7923
      f_create = node == pnode
7924
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7925

    
7926

    
7927
def _RemoveDisks(lu, instance, target_node=None):
7928
  """Remove all disks for an instance.
7929

7930
  This abstracts away some work from `AddInstance()` and
7931
  `RemoveInstance()`. Note that in case some of the devices couldn't
7932
  be removed, the removal will continue with the other ones (compare
7933
  with `_CreateDisks()`).
7934

7935
  @type lu: L{LogicalUnit}
7936
  @param lu: the logical unit on whose behalf we execute
7937
  @type instance: L{objects.Instance}
7938
  @param instance: the instance whose disks we should remove
7939
  @type target_node: string
7940
  @param target_node: used to override the node on which to remove the disks
7941
  @rtype: boolean
7942
  @return: the success of the removal
7943

7944
  """
7945
  logging.info("Removing block devices for instance %s", instance.name)
7946

    
7947
  all_result = True
7948
  for device in instance.disks:
7949
    if target_node:
7950
      edata = [(target_node, device)]
7951
    else:
7952
      edata = device.ComputeNodeTree(instance.primary_node)
7953
    for node, disk in edata:
7954
      lu.cfg.SetDiskID(disk, node)
7955
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7956
      if msg:
7957
        lu.LogWarning("Could not remove block device %s on node %s,"
7958
                      " continuing anyway: %s", device.iv_name, node, msg)
7959
        all_result = False
7960

    
7961
  if instance.disk_template == constants.DT_FILE:
7962
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7963
    if target_node:
7964
      tgt = target_node
7965
    else:
7966
      tgt = instance.primary_node
7967
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7968
    if result.fail_msg:
7969
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7970
                    file_storage_dir, instance.primary_node, result.fail_msg)
7971
      all_result = False
7972

    
7973
  return all_result
7974

    
7975

    
7976
def _ComputeDiskSizePerVG(disk_template, disks):
7977
  """Compute disk size requirements in the volume group
7978

7979
  """
7980
  def _compute(disks, payload):
7981
    """Universal algorithm.
7982

7983
    """
7984
    vgs = {}
7985
    for disk in disks:
7986
      vgs[disk[constants.IDISK_VG]] = \
7987
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7988

    
7989
    return vgs
7990

    
7991
  # Required free disk space as a function of disk and swap space
7992
  req_size_dict = {
7993
    constants.DT_DISKLESS: {},
7994
    constants.DT_PLAIN: _compute(disks, 0),
7995
    # 128 MB are added for drbd metadata for each disk
7996
    constants.DT_DRBD8: _compute(disks, 128),
7997
    constants.DT_FILE: {},
7998
    constants.DT_SHARED_FILE: {},
7999
  }
8000

    
8001
  if disk_template not in req_size_dict:
8002
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8003
                                 " is unknown" % disk_template)
8004

    
8005
  return req_size_dict[disk_template]
8006

    
8007

    
8008
def _ComputeDiskSize(disk_template, disks):
8009
  """Compute disk size requirements in the volume group
8010

8011
  """
8012
  # Required free disk space as a function of disk and swap space
8013
  req_size_dict = {
8014
    constants.DT_DISKLESS: None,
8015
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8016
    # 128 MB are added for drbd metadata for each disk
8017
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8018
    constants.DT_FILE: None,
8019
    constants.DT_SHARED_FILE: 0,
8020
    constants.DT_BLOCK: 0,
8021
  }
8022

    
8023
  if disk_template not in req_size_dict:
8024
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8025
                                 " is unknown" % disk_template)
8026

    
8027
  return req_size_dict[disk_template]
8028

    
8029

    
8030
def _FilterVmNodes(lu, nodenames):
8031
  """Filters out non-vm_capable nodes from a list.
8032

8033
  @type lu: L{LogicalUnit}
8034
  @param lu: the logical unit for which we check
8035
  @type nodenames: list
8036
  @param nodenames: the list of nodes on which we should check
8037
  @rtype: list
8038
  @return: the list of vm-capable nodes
8039

8040
  """
8041
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8042
  return [name for name in nodenames if name not in vm_nodes]
8043

    
8044

    
8045
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8046
  """Hypervisor parameter validation.
8047

8048
  This function abstract the hypervisor parameter validation to be
8049
  used in both instance create and instance modify.
8050

8051
  @type lu: L{LogicalUnit}
8052
  @param lu: the logical unit for which we check
8053
  @type nodenames: list
8054
  @param nodenames: the list of nodes on which we should check
8055
  @type hvname: string
8056
  @param hvname: the name of the hypervisor we should use
8057
  @type hvparams: dict
8058
  @param hvparams: the parameters which we need to check
8059
  @raise errors.OpPrereqError: if the parameters are not valid
8060

8061
  """
8062
  nodenames = _FilterVmNodes(lu, nodenames)
8063
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8064
                                                  hvname,
8065
                                                  hvparams)
8066
  for node in nodenames:
8067
    info = hvinfo[node]
8068
    if info.offline:
8069
      continue
8070
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8071

    
8072

    
8073
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8074
  """OS parameters validation.
8075

8076
  @type lu: L{LogicalUnit}
8077
  @param lu: the logical unit for which we check
8078
  @type required: boolean
8079
  @param required: whether the validation should fail if the OS is not
8080
      found
8081
  @type nodenames: list
8082
  @param nodenames: the list of nodes on which we should check
8083
  @type osname: string
8084
  @param osname: the name of the hypervisor we should use
8085
  @type osparams: dict
8086
  @param osparams: the parameters which we need to check
8087
  @raise errors.OpPrereqError: if the parameters are not valid
8088

8089
  """
8090
  nodenames = _FilterVmNodes(lu, nodenames)
8091
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8092
                                   [constants.OS_VALIDATE_PARAMETERS],
8093
                                   osparams)
8094
  for node, nres in result.items():
8095
    # we don't check for offline cases since this should be run only
8096
    # against the master node and/or an instance's nodes
8097
    nres.Raise("OS Parameters validation failed on node %s" % node)
8098
    if not nres.payload:
8099
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8100
                 osname, node)
8101

    
8102

    
8103
class LUInstanceCreate(LogicalUnit):
8104
  """Create an instance.
8105

8106
  """
8107
  HPATH = "instance-add"
8108
  HTYPE = constants.HTYPE_INSTANCE
8109
  REQ_BGL = False
8110

    
8111
  def CheckArguments(self):
8112
    """Check arguments.
8113

8114
    """
8115
    # do not require name_check to ease forward/backward compatibility
8116
    # for tools
8117
    if self.op.no_install and self.op.start:
8118
      self.LogInfo("No-installation mode selected, disabling startup")
8119
      self.op.start = False
8120
    # validate/normalize the instance name
8121
    self.op.instance_name = \
8122
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8123

    
8124
    if self.op.ip_check and not self.op.name_check:
8125
      # TODO: make the ip check more flexible and not depend on the name check
8126
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8127
                                 " check", errors.ECODE_INVAL)
8128

    
8129
    # check nics' parameter names
8130
    for nic in self.op.nics:
8131
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8132

    
8133
    # check disks. parameter names and consistent adopt/no-adopt strategy
8134
    has_adopt = has_no_adopt = False
8135
    for disk in self.op.disks:
8136
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8137
      if constants.IDISK_ADOPT in disk:
8138
        has_adopt = True
8139
      else:
8140
        has_no_adopt = True
8141
    if has_adopt and has_no_adopt:
8142
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8143
                                 errors.ECODE_INVAL)
8144
    if has_adopt:
8145
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8146
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8147
                                   " '%s' disk template" %
8148
                                   self.op.disk_template,
8149
                                   errors.ECODE_INVAL)
8150
      if self.op.iallocator is not None:
8151
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8152
                                   " iallocator script", errors.ECODE_INVAL)
8153
      if self.op.mode == constants.INSTANCE_IMPORT:
8154
        raise errors.OpPrereqError("Disk adoption not allowed for"
8155
                                   " instance import", errors.ECODE_INVAL)
8156
    else:
8157
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8158
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8159
                                   " but no 'adopt' parameter given" %
8160
                                   self.op.disk_template,
8161
                                   errors.ECODE_INVAL)
8162

    
8163
    self.adopt_disks = has_adopt
8164

    
8165
    # instance name verification
8166
    if self.op.name_check:
8167
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8168
      self.op.instance_name = self.hostname1.name
8169
      # used in CheckPrereq for ip ping check
8170
      self.check_ip = self.hostname1.ip
8171
    else:
8172
      self.check_ip = None
8173

    
8174
    # file storage checks
8175
    if (self.op.file_driver and
8176
        not self.op.file_driver in constants.FILE_DRIVER):
8177
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8178
                                 self.op.file_driver, errors.ECODE_INVAL)
8179

    
8180
    if self.op.disk_template == constants.DT_FILE:
8181
      opcodes.RequireFileStorage()
8182
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8183
      opcodes.RequireSharedFileStorage()
8184

    
8185
    ### Node/iallocator related checks
8186
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8187

    
8188
    if self.op.pnode is not None:
8189
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8190
        if self.op.snode is None:
8191
          raise errors.OpPrereqError("The networked disk templates need"
8192
                                     " a mirror node", errors.ECODE_INVAL)
8193
      elif self.op.snode:
8194
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8195
                        " template")
8196
        self.op.snode = None
8197

    
8198
    self._cds = _GetClusterDomainSecret()
8199

    
8200
    if self.op.mode == constants.INSTANCE_IMPORT:
8201
      # On import force_variant must be True, because if we forced it at
8202
      # initial install, our only chance when importing it back is that it
8203
      # works again!
8204
      self.op.force_variant = True
8205

    
8206
      if self.op.no_install:
8207
        self.LogInfo("No-installation mode has no effect during import")
8208

    
8209
    elif self.op.mode == constants.INSTANCE_CREATE:
8210
      if self.op.os_type is None:
8211
        raise errors.OpPrereqError("No guest OS specified",
8212
                                   errors.ECODE_INVAL)
8213
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8214
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8215
                                   " installation" % self.op.os_type,
8216
                                   errors.ECODE_STATE)
8217
      if self.op.disk_template is None:
8218
        raise errors.OpPrereqError("No disk template specified",
8219
                                   errors.ECODE_INVAL)
8220

    
8221
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8222
      # Check handshake to ensure both clusters have the same domain secret
8223
      src_handshake = self.op.source_handshake
8224
      if not src_handshake:
8225
        raise errors.OpPrereqError("Missing source handshake",
8226
                                   errors.ECODE_INVAL)
8227

    
8228
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8229
                                                           src_handshake)
8230
      if errmsg:
8231
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8232
                                   errors.ECODE_INVAL)
8233

    
8234
      # Load and check source CA
8235
      self.source_x509_ca_pem = self.op.source_x509_ca
8236
      if not self.source_x509_ca_pem:
8237
        raise errors.OpPrereqError("Missing source X509 CA",
8238
                                   errors.ECODE_INVAL)
8239

    
8240
      try:
8241
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8242
                                                    self._cds)
8243
      except OpenSSL.crypto.Error, err:
8244
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8245
                                   (err, ), errors.ECODE_INVAL)
8246

    
8247
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8248
      if errcode is not None:
8249
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8250
                                   errors.ECODE_INVAL)
8251

    
8252
      self.source_x509_ca = cert
8253

    
8254
      src_instance_name = self.op.source_instance_name
8255
      if not src_instance_name:
8256
        raise errors.OpPrereqError("Missing source instance name",
8257
                                   errors.ECODE_INVAL)
8258

    
8259
      self.source_instance_name = \
8260
          netutils.GetHostname(name=src_instance_name).name
8261

    
8262
    else:
8263
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8264
                                 self.op.mode, errors.ECODE_INVAL)
8265

    
8266
  def ExpandNames(self):
8267
    """ExpandNames for CreateInstance.
8268

8269
    Figure out the right locks for instance creation.
8270

8271
    """
8272
    self.needed_locks = {}
8273

    
8274
    instance_name = self.op.instance_name
8275
    # this is just a preventive check, but someone might still add this
8276
    # instance in the meantime, and creation will fail at lock-add time
8277
    if instance_name in self.cfg.GetInstanceList():
8278
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8279
                                 instance_name, errors.ECODE_EXISTS)
8280

    
8281
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8282

    
8283
    if self.op.iallocator:
8284
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8285
    else:
8286
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8287
      nodelist = [self.op.pnode]
8288
      if self.op.snode is not None:
8289
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8290
        nodelist.append(self.op.snode)
8291
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8292

    
8293
    # in case of import lock the source node too
8294
    if self.op.mode == constants.INSTANCE_IMPORT:
8295
      src_node = self.op.src_node
8296
      src_path = self.op.src_path
8297

    
8298
      if src_path is None:
8299
        self.op.src_path = src_path = self.op.instance_name
8300

    
8301
      if src_node is None:
8302
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8303
        self.op.src_node = None
8304
        if os.path.isabs(src_path):
8305
          raise errors.OpPrereqError("Importing an instance from a path"
8306
                                     " requires a source node option",
8307
                                     errors.ECODE_INVAL)
8308
      else:
8309
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8310
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8311
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8312
        if not os.path.isabs(src_path):
8313
          self.op.src_path = src_path = \
8314
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8315

    
8316
  def _RunAllocator(self):
8317
    """Run the allocator based on input opcode.
8318

8319
    """
8320
    nics = [n.ToDict() for n in self.nics]
8321
    ial = IAllocator(self.cfg, self.rpc,
8322
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8323
                     name=self.op.instance_name,
8324
                     disk_template=self.op.disk_template,
8325
                     tags=self.op.tags,
8326
                     os=self.op.os_type,
8327
                     vcpus=self.be_full[constants.BE_VCPUS],
8328
                     memory=self.be_full[constants.BE_MEMORY],
8329
                     disks=self.disks,
8330
                     nics=nics,
8331
                     hypervisor=self.op.hypervisor,
8332
                     )
8333

    
8334
    ial.Run(self.op.iallocator)
8335

    
8336
    if not ial.success:
8337
      raise errors.OpPrereqError("Can't compute nodes using"
8338
                                 " iallocator '%s': %s" %
8339
                                 (self.op.iallocator, ial.info),
8340
                                 errors.ECODE_NORES)
8341
    if len(ial.result) != ial.required_nodes:
8342
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8343
                                 " of nodes (%s), required %s" %
8344
                                 (self.op.iallocator, len(ial.result),
8345
                                  ial.required_nodes), errors.ECODE_FAULT)
8346
    self.op.pnode = ial.result[0]
8347
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8348
                 self.op.instance_name, self.op.iallocator,
8349
                 utils.CommaJoin(ial.result))
8350
    if ial.required_nodes == 2:
8351
      self.op.snode = ial.result[1]
8352

    
8353
  def BuildHooksEnv(self):
8354
    """Build hooks env.
8355

8356
    This runs on master, primary and secondary nodes of the instance.
8357

8358
    """
8359
    env = {
8360
      "ADD_MODE": self.op.mode,
8361
      }
8362
    if self.op.mode == constants.INSTANCE_IMPORT:
8363
      env["SRC_NODE"] = self.op.src_node
8364
      env["SRC_PATH"] = self.op.src_path
8365
      env["SRC_IMAGES"] = self.src_images
8366

    
8367
    env.update(_BuildInstanceHookEnv(
8368
      name=self.op.instance_name,
8369
      primary_node=self.op.pnode,
8370
      secondary_nodes=self.secondaries,
8371
      status=self.op.start,
8372
      os_type=self.op.os_type,
8373
      memory=self.be_full[constants.BE_MEMORY],
8374
      vcpus=self.be_full[constants.BE_VCPUS],
8375
      nics=_NICListToTuple(self, self.nics),
8376
      disk_template=self.op.disk_template,
8377
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8378
             for d in self.disks],
8379
      bep=self.be_full,
8380
      hvp=self.hv_full,
8381
      hypervisor_name=self.op.hypervisor,
8382
      tags=self.op.tags,
8383
    ))
8384

    
8385
    return env
8386

    
8387
  def BuildHooksNodes(self):
8388
    """Build hooks nodes.
8389

8390
    """
8391
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8392
    return nl, nl
8393

    
8394
  def _ReadExportInfo(self):
8395
    """Reads the export information from disk.
8396

8397
    It will override the opcode source node and path with the actual
8398
    information, if these two were not specified before.
8399

8400
    @return: the export information
8401

8402
    """
8403
    assert self.op.mode == constants.INSTANCE_IMPORT
8404

    
8405
    src_node = self.op.src_node
8406
    src_path = self.op.src_path
8407

    
8408
    if src_node is None:
8409
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8410
      exp_list = self.rpc.call_export_list(locked_nodes)
8411
      found = False
8412
      for node in exp_list:
8413
        if exp_list[node].fail_msg:
8414
          continue
8415
        if src_path in exp_list[node].payload:
8416
          found = True
8417
          self.op.src_node = src_node = node
8418
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8419
                                                       src_path)
8420
          break
8421
      if not found:
8422
        raise errors.OpPrereqError("No export found for relative path %s" %
8423
                                    src_path, errors.ECODE_INVAL)
8424

    
8425
    _CheckNodeOnline(self, src_node)
8426
    result = self.rpc.call_export_info(src_node, src_path)
8427
    result.Raise("No export or invalid export found in dir %s" % src_path)
8428

    
8429
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8430
    if not export_info.has_section(constants.INISECT_EXP):
8431
      raise errors.ProgrammerError("Corrupted export config",
8432
                                   errors.ECODE_ENVIRON)
8433

    
8434
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8435
    if (int(ei_version) != constants.EXPORT_VERSION):
8436
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8437
                                 (ei_version, constants.EXPORT_VERSION),
8438
                                 errors.ECODE_ENVIRON)
8439
    return export_info
8440

    
8441
  def _ReadExportParams(self, einfo):
8442
    """Use export parameters as defaults.
8443

8444
    In case the opcode doesn't specify (as in override) some instance
8445
    parameters, then try to use them from the export information, if
8446
    that declares them.
8447

8448
    """
8449
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8450

    
8451
    if self.op.disk_template is None:
8452
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8453
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8454
                                          "disk_template")
8455
      else:
8456
        raise errors.OpPrereqError("No disk template specified and the export"
8457
                                   " is missing the disk_template information",
8458
                                   errors.ECODE_INVAL)
8459

    
8460
    if not self.op.disks:
8461
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8462
        disks = []
8463
        # TODO: import the disk iv_name too
8464
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8465
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8466
          disks.append({constants.IDISK_SIZE: disk_sz})
8467
        self.op.disks = disks
8468
      else:
8469
        raise errors.OpPrereqError("No disk info specified and the export"
8470
                                   " is missing the disk information",
8471
                                   errors.ECODE_INVAL)
8472

    
8473
    if (not self.op.nics and
8474
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8475
      nics = []
8476
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8477
        ndict = {}
8478
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8479
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8480
          ndict[name] = v
8481
        nics.append(ndict)
8482
      self.op.nics = nics
8483

    
8484
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8485
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8486

    
8487
    if (self.op.hypervisor is None and
8488
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8489
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8490

    
8491
    if einfo.has_section(constants.INISECT_HYP):
8492
      # use the export parameters but do not override the ones
8493
      # specified by the user
8494
      for name, value in einfo.items(constants.INISECT_HYP):
8495
        if name not in self.op.hvparams:
8496
          self.op.hvparams[name] = value
8497

    
8498
    if einfo.has_section(constants.INISECT_BEP):
8499
      # use the parameters, without overriding
8500
      for name, value in einfo.items(constants.INISECT_BEP):
8501
        if name not in self.op.beparams:
8502
          self.op.beparams[name] = value
8503
    else:
8504
      # try to read the parameters old style, from the main section
8505
      for name in constants.BES_PARAMETERS:
8506
        if (name not in self.op.beparams and
8507
            einfo.has_option(constants.INISECT_INS, name)):
8508
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8509

    
8510
    if einfo.has_section(constants.INISECT_OSP):
8511
      # use the parameters, without overriding
8512
      for name, value in einfo.items(constants.INISECT_OSP):
8513
        if name not in self.op.osparams:
8514
          self.op.osparams[name] = value
8515

    
8516
  def _RevertToDefaults(self, cluster):
8517
    """Revert the instance parameters to the default values.
8518

8519
    """
8520
    # hvparams
8521
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8522
    for name in self.op.hvparams.keys():
8523
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8524
        del self.op.hvparams[name]
8525
    # beparams
8526
    be_defs = cluster.SimpleFillBE({})
8527
    for name in self.op.beparams.keys():
8528
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8529
        del self.op.beparams[name]
8530
    # nic params
8531
    nic_defs = cluster.SimpleFillNIC({})
8532
    for nic in self.op.nics:
8533
      for name in constants.NICS_PARAMETERS:
8534
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8535
          del nic[name]
8536
    # osparams
8537
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8538
    for name in self.op.osparams.keys():
8539
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8540
        del self.op.osparams[name]
8541

    
8542
  def _CalculateFileStorageDir(self):
8543
    """Calculate final instance file storage dir.
8544

8545
    """
8546
    # file storage dir calculation/check
8547
    self.instance_file_storage_dir = None
8548
    if self.op.disk_template in constants.DTS_FILEBASED:
8549
      # build the full file storage dir path
8550
      joinargs = []
8551

    
8552
      if self.op.disk_template == constants.DT_SHARED_FILE:
8553
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8554
      else:
8555
        get_fsd_fn = self.cfg.GetFileStorageDir
8556

    
8557
      cfg_storagedir = get_fsd_fn()
8558
      if not cfg_storagedir:
8559
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8560
      joinargs.append(cfg_storagedir)
8561

    
8562
      if self.op.file_storage_dir is not None:
8563
        joinargs.append(self.op.file_storage_dir)
8564

    
8565
      joinargs.append(self.op.instance_name)
8566

    
8567
      # pylint: disable=W0142
8568
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8569

    
8570
  def CheckPrereq(self):
8571
    """Check prerequisites.
8572

8573
    """
8574
    self._CalculateFileStorageDir()
8575

    
8576
    if self.op.mode == constants.INSTANCE_IMPORT:
8577
      export_info = self._ReadExportInfo()
8578
      self._ReadExportParams(export_info)
8579

    
8580
    if (not self.cfg.GetVGName() and
8581
        self.op.disk_template not in constants.DTS_NOT_LVM):
8582
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8583
                                 " instances", errors.ECODE_STATE)
8584

    
8585
    if self.op.hypervisor is None:
8586
      self.op.hypervisor = self.cfg.GetHypervisorType()
8587

    
8588
    cluster = self.cfg.GetClusterInfo()
8589
    enabled_hvs = cluster.enabled_hypervisors
8590
    if self.op.hypervisor not in enabled_hvs:
8591
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8592
                                 " cluster (%s)" % (self.op.hypervisor,
8593
                                  ",".join(enabled_hvs)),
8594
                                 errors.ECODE_STATE)
8595

    
8596
    # Check tag validity
8597
    for tag in self.op.tags:
8598
      objects.TaggableObject.ValidateTag(tag)
8599

    
8600
    # check hypervisor parameter syntax (locally)
8601
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8602
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8603
                                      self.op.hvparams)
8604
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8605
    hv_type.CheckParameterSyntax(filled_hvp)
8606
    self.hv_full = filled_hvp
8607
    # check that we don't specify global parameters on an instance
8608
    _CheckGlobalHvParams(self.op.hvparams)
8609

    
8610
    # fill and remember the beparams dict
8611
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8612
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8613

    
8614
    # build os parameters
8615
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8616

    
8617
    # now that hvp/bep are in final format, let's reset to defaults,
8618
    # if told to do so
8619
    if self.op.identify_defaults:
8620
      self._RevertToDefaults(cluster)
8621

    
8622
    # NIC buildup
8623
    self.nics = []
8624
    for idx, nic in enumerate(self.op.nics):
8625
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8626
      nic_mode = nic_mode_req
8627
      if nic_mode is None:
8628
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8629

    
8630
      # in routed mode, for the first nic, the default ip is 'auto'
8631
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8632
        default_ip_mode = constants.VALUE_AUTO
8633
      else:
8634
        default_ip_mode = constants.VALUE_NONE
8635

    
8636
      # ip validity checks
8637
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8638
      if ip is None or ip.lower() == constants.VALUE_NONE:
8639
        nic_ip = None
8640
      elif ip.lower() == constants.VALUE_AUTO:
8641
        if not self.op.name_check:
8642
          raise errors.OpPrereqError("IP address set to auto but name checks"
8643
                                     " have been skipped",
8644
                                     errors.ECODE_INVAL)
8645
        nic_ip = self.hostname1.ip
8646
      else:
8647
        if not netutils.IPAddress.IsValid(ip):
8648
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8649
                                     errors.ECODE_INVAL)
8650
        nic_ip = ip
8651

    
8652
      # TODO: check the ip address for uniqueness
8653
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8654
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8655
                                   errors.ECODE_INVAL)
8656

    
8657
      # MAC address verification
8658
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8659
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8660
        mac = utils.NormalizeAndValidateMac(mac)
8661

    
8662
        try:
8663
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8664
        except errors.ReservationError:
8665
          raise errors.OpPrereqError("MAC address %s already in use"
8666
                                     " in cluster" % mac,
8667
                                     errors.ECODE_NOTUNIQUE)
8668

    
8669
      #  Build nic parameters
8670
      link = nic.get(constants.INIC_LINK, None)
8671
      nicparams = {}
8672
      if nic_mode_req:
8673
        nicparams[constants.NIC_MODE] = nic_mode_req
8674
      if link:
8675
        nicparams[constants.NIC_LINK] = link
8676

    
8677
      check_params = cluster.SimpleFillNIC(nicparams)
8678
      objects.NIC.CheckParameterSyntax(check_params)
8679
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8680

    
8681
    # disk checks/pre-build
8682
    default_vg = self.cfg.GetVGName()
8683
    self.disks = []
8684
    for disk in self.op.disks:
8685
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8686
      if mode not in constants.DISK_ACCESS_SET:
8687
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8688
                                   mode, errors.ECODE_INVAL)
8689
      size = disk.get(constants.IDISK_SIZE, None)
8690
      if size is None:
8691
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8692
      try:
8693
        size = int(size)
8694
      except (TypeError, ValueError):
8695
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8696
                                   errors.ECODE_INVAL)
8697

    
8698
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8699
      new_disk = {
8700
        constants.IDISK_SIZE: size,
8701
        constants.IDISK_MODE: mode,
8702
        constants.IDISK_VG: data_vg,
8703
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8704
        }
8705
      if constants.IDISK_ADOPT in disk:
8706
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8707
      self.disks.append(new_disk)
8708

    
8709
    if self.op.mode == constants.INSTANCE_IMPORT:
8710

    
8711
      # Check that the new instance doesn't have less disks than the export
8712
      instance_disks = len(self.disks)
8713
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8714
      if instance_disks < export_disks:
8715
        raise errors.OpPrereqError("Not enough disks to import."
8716
                                   " (instance: %d, export: %d)" %
8717
                                   (instance_disks, export_disks),
8718
                                   errors.ECODE_INVAL)
8719

    
8720
      disk_images = []
8721
      for idx in range(export_disks):
8722
        option = "disk%d_dump" % idx
8723
        if export_info.has_option(constants.INISECT_INS, option):
8724
          # FIXME: are the old os-es, disk sizes, etc. useful?
8725
          export_name = export_info.get(constants.INISECT_INS, option)
8726
          image = utils.PathJoin(self.op.src_path, export_name)
8727
          disk_images.append(image)
8728
        else:
8729
          disk_images.append(False)
8730

    
8731
      self.src_images = disk_images
8732

    
8733
      old_name = export_info.get(constants.INISECT_INS, "name")
8734
      try:
8735
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8736
      except (TypeError, ValueError), err:
8737
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8738
                                   " an integer: %s" % str(err),
8739
                                   errors.ECODE_STATE)
8740
      if self.op.instance_name == old_name:
8741
        for idx, nic in enumerate(self.nics):
8742
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8743
            nic_mac_ini = "nic%d_mac" % idx
8744
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8745

    
8746
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8747

    
8748
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8749
    if self.op.ip_check:
8750
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8751
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8752
                                   (self.check_ip, self.op.instance_name),
8753
                                   errors.ECODE_NOTUNIQUE)
8754

    
8755
    #### mac address generation
8756
    # By generating here the mac address both the allocator and the hooks get
8757
    # the real final mac address rather than the 'auto' or 'generate' value.
8758
    # There is a race condition between the generation and the instance object
8759
    # creation, which means that we know the mac is valid now, but we're not
8760
    # sure it will be when we actually add the instance. If things go bad
8761
    # adding the instance will abort because of a duplicate mac, and the
8762
    # creation job will fail.
8763
    for nic in self.nics:
8764
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8765
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8766

    
8767
    #### allocator run
8768

    
8769
    if self.op.iallocator is not None:
8770
      self._RunAllocator()
8771

    
8772
    #### node related checks
8773

    
8774
    # check primary node
8775
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8776
    assert self.pnode is not None, \
8777
      "Cannot retrieve locked node %s" % self.op.pnode
8778
    if pnode.offline:
8779
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8780
                                 pnode.name, errors.ECODE_STATE)
8781
    if pnode.drained:
8782
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8783
                                 pnode.name, errors.ECODE_STATE)
8784
    if not pnode.vm_capable:
8785
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8786
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8787

    
8788
    self.secondaries = []
8789

    
8790
    # mirror node verification
8791
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8792
      if self.op.snode == pnode.name:
8793
        raise errors.OpPrereqError("The secondary node cannot be the"
8794
                                   " primary node", errors.ECODE_INVAL)
8795
      _CheckNodeOnline(self, self.op.snode)
8796
      _CheckNodeNotDrained(self, self.op.snode)
8797
      _CheckNodeVmCapable(self, self.op.snode)
8798
      self.secondaries.append(self.op.snode)
8799

    
8800
    nodenames = [pnode.name] + self.secondaries
8801

    
8802
    if not self.adopt_disks:
8803
      # Check lv size requirements, if not adopting
8804
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8805
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8806

    
8807
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8808
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8809
                                disk[constants.IDISK_ADOPT])
8810
                     for disk in self.disks])
8811
      if len(all_lvs) != len(self.disks):
8812
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8813
                                   errors.ECODE_INVAL)
8814
      for lv_name in all_lvs:
8815
        try:
8816
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8817
          # to ReserveLV uses the same syntax
8818
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8819
        except errors.ReservationError:
8820
          raise errors.OpPrereqError("LV named %s used by another instance" %
8821
                                     lv_name, errors.ECODE_NOTUNIQUE)
8822

    
8823
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8824
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8825

    
8826
      node_lvs = self.rpc.call_lv_list([pnode.name],
8827
                                       vg_names.payload.keys())[pnode.name]
8828
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8829
      node_lvs = node_lvs.payload
8830

    
8831
      delta = all_lvs.difference(node_lvs.keys())
8832
      if delta:
8833
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8834
                                   utils.CommaJoin(delta),
8835
                                   errors.ECODE_INVAL)
8836
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8837
      if online_lvs:
8838
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8839
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8840
                                   errors.ECODE_STATE)
8841
      # update the size of disk based on what is found
8842
      for dsk in self.disks:
8843
        dsk[constants.IDISK_SIZE] = \
8844
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8845
                                        dsk[constants.IDISK_ADOPT])][0]))
8846

    
8847
    elif self.op.disk_template == constants.DT_BLOCK:
8848
      # Normalize and de-duplicate device paths
8849
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8850
                       for disk in self.disks])
8851
      if len(all_disks) != len(self.disks):
8852
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8853
                                   errors.ECODE_INVAL)
8854
      baddisks = [d for d in all_disks
8855
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8856
      if baddisks:
8857
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8858
                                   " cannot be adopted" %
8859
                                   (", ".join(baddisks),
8860
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8861
                                   errors.ECODE_INVAL)
8862

    
8863
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8864
                                            list(all_disks))[pnode.name]
8865
      node_disks.Raise("Cannot get block device information from node %s" %
8866
                       pnode.name)
8867
      node_disks = node_disks.payload
8868
      delta = all_disks.difference(node_disks.keys())
8869
      if delta:
8870
        raise errors.OpPrereqError("Missing block device(s): %s" %
8871
                                   utils.CommaJoin(delta),
8872
                                   errors.ECODE_INVAL)
8873
      for dsk in self.disks:
8874
        dsk[constants.IDISK_SIZE] = \
8875
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8876

    
8877
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8878

    
8879
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8880
    # check OS parameters (remotely)
8881
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8882

    
8883
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8884

    
8885
    # memory check on primary node
8886
    if self.op.start:
8887
      _CheckNodeFreeMemory(self, self.pnode.name,
8888
                           "creating instance %s" % self.op.instance_name,
8889
                           self.be_full[constants.BE_MEMORY],
8890
                           self.op.hypervisor)
8891

    
8892
    self.dry_run_result = list(nodenames)
8893

    
8894
  def Exec(self, feedback_fn):
8895
    """Create and add the instance to the cluster.
8896

8897
    """
8898
    instance = self.op.instance_name
8899
    pnode_name = self.pnode.name
8900

    
8901
    ht_kind = self.op.hypervisor
8902
    if ht_kind in constants.HTS_REQ_PORT:
8903
      network_port = self.cfg.AllocatePort()
8904
    else:
8905
      network_port = None
8906

    
8907
    disks = _GenerateDiskTemplate(self,
8908
                                  self.op.disk_template,
8909
                                  instance, pnode_name,
8910
                                  self.secondaries,
8911
                                  self.disks,
8912
                                  self.instance_file_storage_dir,
8913
                                  self.op.file_driver,
8914
                                  0,
8915
                                  feedback_fn)
8916

    
8917
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8918
                            primary_node=pnode_name,
8919
                            nics=self.nics, disks=disks,
8920
                            disk_template=self.op.disk_template,
8921
                            admin_up=False,
8922
                            network_port=network_port,
8923
                            beparams=self.op.beparams,
8924
                            hvparams=self.op.hvparams,
8925
                            hypervisor=self.op.hypervisor,
8926
                            osparams=self.op.osparams,
8927
                            )
8928

    
8929
    if self.op.tags:
8930
      for tag in self.op.tags:
8931
        iobj.AddTag(tag)
8932

    
8933
    if self.adopt_disks:
8934
      if self.op.disk_template == constants.DT_PLAIN:
8935
        # rename LVs to the newly-generated names; we need to construct
8936
        # 'fake' LV disks with the old data, plus the new unique_id
8937
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8938
        rename_to = []
8939
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8940
          rename_to.append(t_dsk.logical_id)
8941
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8942
          self.cfg.SetDiskID(t_dsk, pnode_name)
8943
        result = self.rpc.call_blockdev_rename(pnode_name,
8944
                                               zip(tmp_disks, rename_to))
8945
        result.Raise("Failed to rename adoped LVs")
8946
    else:
8947
      feedback_fn("* creating instance disks...")
8948
      try:
8949
        _CreateDisks(self, iobj)
8950
      except errors.OpExecError:
8951
        self.LogWarning("Device creation failed, reverting...")
8952
        try:
8953
          _RemoveDisks(self, iobj)
8954
        finally:
8955
          self.cfg.ReleaseDRBDMinors(instance)
8956
          raise
8957

    
8958
    feedback_fn("adding instance %s to cluster config" % instance)
8959

    
8960
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8961

    
8962
    # Declare that we don't want to remove the instance lock anymore, as we've
8963
    # added the instance to the config
8964
    del self.remove_locks[locking.LEVEL_INSTANCE]
8965

    
8966
    if self.op.mode == constants.INSTANCE_IMPORT:
8967
      # Release unused nodes
8968
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8969
    else:
8970
      # Release all nodes
8971
      _ReleaseLocks(self, locking.LEVEL_NODE)
8972

    
8973
    disk_abort = False
8974
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8975
      feedback_fn("* wiping instance disks...")
8976
      try:
8977
        _WipeDisks(self, iobj)
8978
      except errors.OpExecError, err:
8979
        logging.exception("Wiping disks failed")
8980
        self.LogWarning("Wiping instance disks failed (%s)", err)
8981
        disk_abort = True
8982

    
8983
    if disk_abort:
8984
      # Something is already wrong with the disks, don't do anything else
8985
      pass
8986
    elif self.op.wait_for_sync:
8987
      disk_abort = not _WaitForSync(self, iobj)
8988
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8989
      # make sure the disks are not degraded (still sync-ing is ok)
8990
      feedback_fn("* checking mirrors status")
8991
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8992
    else:
8993
      disk_abort = False
8994

    
8995
    if disk_abort:
8996
      _RemoveDisks(self, iobj)
8997
      self.cfg.RemoveInstance(iobj.name)
8998
      # Make sure the instance lock gets removed
8999
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9000
      raise errors.OpExecError("There are some degraded disks for"
9001
                               " this instance")
9002

    
9003
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9004
      if self.op.mode == constants.INSTANCE_CREATE:
9005
        if not self.op.no_install:
9006
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9007
                        not self.op.wait_for_sync)
9008
          if pause_sync:
9009
            feedback_fn("* pausing disk sync to install instance OS")
9010
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9011
                                                              iobj.disks, True)
9012
            for idx, success in enumerate(result.payload):
9013
              if not success:
9014
                logging.warn("pause-sync of instance %s for disk %d failed",
9015
                             instance, idx)
9016

    
9017
          feedback_fn("* running the instance OS create scripts...")
9018
          # FIXME: pass debug option from opcode to backend
9019
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9020
                                                 self.op.debug_level)
9021
          if pause_sync:
9022
            feedback_fn("* resuming disk sync")
9023
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9024
                                                              iobj.disks, False)
9025
            for idx, success in enumerate(result.payload):
9026
              if not success:
9027
                logging.warn("resume-sync of instance %s for disk %d failed",
9028
                             instance, idx)
9029

    
9030
          result.Raise("Could not add os for instance %s"
9031
                       " on node %s" % (instance, pnode_name))
9032

    
9033
      elif self.op.mode == constants.INSTANCE_IMPORT:
9034
        feedback_fn("* running the instance OS import scripts...")
9035

    
9036
        transfers = []
9037

    
9038
        for idx, image in enumerate(self.src_images):
9039
          if not image:
9040
            continue
9041

    
9042
          # FIXME: pass debug option from opcode to backend
9043
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9044
                                             constants.IEIO_FILE, (image, ),
9045
                                             constants.IEIO_SCRIPT,
9046
                                             (iobj.disks[idx], idx),
9047
                                             None)
9048
          transfers.append(dt)
9049

    
9050
        import_result = \
9051
          masterd.instance.TransferInstanceData(self, feedback_fn,
9052
                                                self.op.src_node, pnode_name,
9053
                                                self.pnode.secondary_ip,
9054
                                                iobj, transfers)
9055
        if not compat.all(import_result):
9056
          self.LogWarning("Some disks for instance %s on node %s were not"
9057
                          " imported successfully" % (instance, pnode_name))
9058

    
9059
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9060
        feedback_fn("* preparing remote import...")
9061
        # The source cluster will stop the instance before attempting to make a
9062
        # connection. In some cases stopping an instance can take a long time,
9063
        # hence the shutdown timeout is added to the connection timeout.
9064
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9065
                           self.op.source_shutdown_timeout)
9066
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9067

    
9068
        assert iobj.primary_node == self.pnode.name
9069
        disk_results = \
9070
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9071
                                        self.source_x509_ca,
9072
                                        self._cds, timeouts)
9073
        if not compat.all(disk_results):
9074
          # TODO: Should the instance still be started, even if some disks
9075
          # failed to import (valid for local imports, too)?
9076
          self.LogWarning("Some disks for instance %s on node %s were not"
9077
                          " imported successfully" % (instance, pnode_name))
9078

    
9079
        # Run rename script on newly imported instance
9080
        assert iobj.name == instance
9081
        feedback_fn("Running rename script for %s" % instance)
9082
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9083
                                                   self.source_instance_name,
9084
                                                   self.op.debug_level)
9085
        if result.fail_msg:
9086
          self.LogWarning("Failed to run rename script for %s on node"
9087
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9088

    
9089
      else:
9090
        # also checked in the prereq part
9091
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9092
                                     % self.op.mode)
9093

    
9094
    if self.op.start:
9095
      iobj.admin_up = True
9096
      self.cfg.Update(iobj, feedback_fn)
9097
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9098
      feedback_fn("* starting instance...")
9099
      result = self.rpc.call_instance_start(pnode_name, iobj,
9100
                                            None, None, False)
9101
      result.Raise("Could not start instance")
9102

    
9103
    return list(iobj.all_nodes)
9104

    
9105

    
9106
class LUInstanceConsole(NoHooksLU):
9107
  """Connect to an instance's console.
9108

9109
  This is somewhat special in that it returns the command line that
9110
  you need to run on the master node in order to connect to the
9111
  console.
9112

9113
  """
9114
  REQ_BGL = False
9115

    
9116
  def ExpandNames(self):
9117
    self._ExpandAndLockInstance()
9118

    
9119
  def CheckPrereq(self):
9120
    """Check prerequisites.
9121

9122
    This checks that the instance is in the cluster.
9123

9124
    """
9125
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9126
    assert self.instance is not None, \
9127
      "Cannot retrieve locked instance %s" % self.op.instance_name
9128
    _CheckNodeOnline(self, self.instance.primary_node)
9129

    
9130
  def Exec(self, feedback_fn):
9131
    """Connect to the console of an instance
9132

9133
    """
9134
    instance = self.instance
9135
    node = instance.primary_node
9136

    
9137
    node_insts = self.rpc.call_instance_list([node],
9138
                                             [instance.hypervisor])[node]
9139
    node_insts.Raise("Can't get node information from %s" % node)
9140

    
9141
    if instance.name not in node_insts.payload:
9142
      if instance.admin_up:
9143
        state = constants.INSTST_ERRORDOWN
9144
      else:
9145
        state = constants.INSTST_ADMINDOWN
9146
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9147
                               (instance.name, state))
9148

    
9149
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9150

    
9151
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9152

    
9153

    
9154
def _GetInstanceConsole(cluster, instance):
9155
  """Returns console information for an instance.
9156

9157
  @type cluster: L{objects.Cluster}
9158
  @type instance: L{objects.Instance}
9159
  @rtype: dict
9160

9161
  """
9162
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9163
  # beparams and hvparams are passed separately, to avoid editing the
9164
  # instance and then saving the defaults in the instance itself.
9165
  hvparams = cluster.FillHV(instance)
9166
  beparams = cluster.FillBE(instance)
9167
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9168

    
9169
  assert console.instance == instance.name
9170
  assert console.Validate()
9171

    
9172
  return console.ToDict()
9173

    
9174

    
9175
class LUInstanceReplaceDisks(LogicalUnit):
9176
  """Replace the disks of an instance.
9177

9178
  """
9179
  HPATH = "mirrors-replace"
9180
  HTYPE = constants.HTYPE_INSTANCE
9181
  REQ_BGL = False
9182

    
9183
  def CheckArguments(self):
9184
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9185
                                  self.op.iallocator)
9186

    
9187
  def ExpandNames(self):
9188
    self._ExpandAndLockInstance()
9189

    
9190
    assert locking.LEVEL_NODE not in self.needed_locks
9191
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9192

    
9193
    assert self.op.iallocator is None or self.op.remote_node is None, \
9194
      "Conflicting options"
9195

    
9196
    if self.op.remote_node is not None:
9197
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9198

    
9199
      # Warning: do not remove the locking of the new secondary here
9200
      # unless DRBD8.AddChildren is changed to work in parallel;
9201
      # currently it doesn't since parallel invocations of
9202
      # FindUnusedMinor will conflict
9203
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9204
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9205
    else:
9206
      self.needed_locks[locking.LEVEL_NODE] = []
9207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9208

    
9209
      if self.op.iallocator is not None:
9210
        # iallocator will select a new node in the same group
9211
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9212

    
9213
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9214
                                   self.op.iallocator, self.op.remote_node,
9215
                                   self.op.disks, False, self.op.early_release)
9216

    
9217
    self.tasklets = [self.replacer]
9218

    
9219
  def DeclareLocks(self, level):
9220
    if level == locking.LEVEL_NODEGROUP:
9221
      assert self.op.remote_node is None
9222
      assert self.op.iallocator is not None
9223
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9224

    
9225
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9226
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9227
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9228

    
9229
    elif level == locking.LEVEL_NODE:
9230
      if self.op.iallocator is not None:
9231
        assert self.op.remote_node is None
9232
        assert not self.needed_locks[locking.LEVEL_NODE]
9233

    
9234
        # Lock member nodes of all locked groups
9235
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9236
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9237
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9238
      else:
9239
        self._LockInstancesNodes()
9240

    
9241
  def BuildHooksEnv(self):
9242
    """Build hooks env.
9243

9244
    This runs on the master, the primary and all the secondaries.
9245

9246
    """
9247
    instance = self.replacer.instance
9248
    env = {
9249
      "MODE": self.op.mode,
9250
      "NEW_SECONDARY": self.op.remote_node,
9251
      "OLD_SECONDARY": instance.secondary_nodes[0],
9252
      }
9253
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9254
    return env
9255

    
9256
  def BuildHooksNodes(self):
9257
    """Build hooks nodes.
9258

9259
    """
9260
    instance = self.replacer.instance
9261
    nl = [
9262
      self.cfg.GetMasterNode(),
9263
      instance.primary_node,
9264
      ]
9265
    if self.op.remote_node is not None:
9266
      nl.append(self.op.remote_node)
9267
    return nl, nl
9268

    
9269
  def CheckPrereq(self):
9270
    """Check prerequisites.
9271

9272
    """
9273
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9274
            self.op.iallocator is None)
9275

    
9276
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9277
    if owned_groups:
9278
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9279

    
9280
    return LogicalUnit.CheckPrereq(self)
9281

    
9282

    
9283
class TLReplaceDisks(Tasklet):
9284
  """Replaces disks for an instance.
9285

9286
  Note: Locking is not within the scope of this class.
9287

9288
  """
9289
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9290
               disks, delay_iallocator, early_release):
9291
    """Initializes this class.
9292

9293
    """
9294
    Tasklet.__init__(self, lu)
9295

    
9296
    # Parameters
9297
    self.instance_name = instance_name
9298
    self.mode = mode
9299
    self.iallocator_name = iallocator_name
9300
    self.remote_node = remote_node
9301
    self.disks = disks
9302
    self.delay_iallocator = delay_iallocator
9303
    self.early_release = early_release
9304

    
9305
    # Runtime data
9306
    self.instance = None
9307
    self.new_node = None
9308
    self.target_node = None
9309
    self.other_node = None
9310
    self.remote_node_info = None
9311
    self.node_secondary_ip = None
9312

    
9313
  @staticmethod
9314
  def CheckArguments(mode, remote_node, iallocator):
9315
    """Helper function for users of this class.
9316

9317
    """
9318
    # check for valid parameter combination
9319
    if mode == constants.REPLACE_DISK_CHG:
9320
      if remote_node is None and iallocator is None:
9321
        raise errors.OpPrereqError("When changing the secondary either an"
9322
                                   " iallocator script must be used or the"
9323
                                   " new node given", errors.ECODE_INVAL)
9324

    
9325
      if remote_node is not None and iallocator is not None:
9326
        raise errors.OpPrereqError("Give either the iallocator or the new"
9327
                                   " secondary, not both", errors.ECODE_INVAL)
9328

    
9329
    elif remote_node is not None or iallocator is not None:
9330
      # Not replacing the secondary
9331
      raise errors.OpPrereqError("The iallocator and new node options can"
9332
                                 " only be used when changing the"
9333
                                 " secondary node", errors.ECODE_INVAL)
9334

    
9335
  @staticmethod
9336
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9337
    """Compute a new secondary node using an IAllocator.
9338

9339
    """
9340
    ial = IAllocator(lu.cfg, lu.rpc,
9341
                     mode=constants.IALLOCATOR_MODE_RELOC,
9342
                     name=instance_name,
9343
                     relocate_from=list(relocate_from))
9344

    
9345
    ial.Run(iallocator_name)
9346

    
9347
    if not ial.success:
9348
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9349
                                 " %s" % (iallocator_name, ial.info),
9350
                                 errors.ECODE_NORES)
9351

    
9352
    if len(ial.result) != ial.required_nodes:
9353
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9354
                                 " of nodes (%s), required %s" %
9355
                                 (iallocator_name,
9356
                                  len(ial.result), ial.required_nodes),
9357
                                 errors.ECODE_FAULT)
9358

    
9359
    remote_node_name = ial.result[0]
9360

    
9361
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9362
               instance_name, remote_node_name)
9363

    
9364
    return remote_node_name
9365

    
9366
  def _FindFaultyDisks(self, node_name):
9367
    """Wrapper for L{_FindFaultyInstanceDisks}.
9368

9369
    """
9370
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9371
                                    node_name, True)
9372

    
9373
  def _CheckDisksActivated(self, instance):
9374
    """Checks if the instance disks are activated.
9375

9376
    @param instance: The instance to check disks
9377
    @return: True if they are activated, False otherwise
9378

9379
    """
9380
    nodes = instance.all_nodes
9381

    
9382
    for idx, dev in enumerate(instance.disks):
9383
      for node in nodes:
9384
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9385
        self.cfg.SetDiskID(dev, node)
9386

    
9387
        result = self.rpc.call_blockdev_find(node, dev)
9388

    
9389
        if result.offline:
9390
          continue
9391
        elif result.fail_msg or not result.payload:
9392
          return False
9393

    
9394
    return True
9395

    
9396
  def CheckPrereq(self):
9397
    """Check prerequisites.
9398

9399
    This checks that the instance is in the cluster.
9400

9401
    """
9402
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9403
    assert instance is not None, \
9404
      "Cannot retrieve locked instance %s" % self.instance_name
9405

    
9406
    if instance.disk_template != constants.DT_DRBD8:
9407
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9408
                                 " instances", errors.ECODE_INVAL)
9409

    
9410
    if len(instance.secondary_nodes) != 1:
9411
      raise errors.OpPrereqError("The instance has a strange layout,"
9412
                                 " expected one secondary but found %d" %
9413
                                 len(instance.secondary_nodes),
9414
                                 errors.ECODE_FAULT)
9415

    
9416
    if not self.delay_iallocator:
9417
      self._CheckPrereq2()
9418

    
9419
  def _CheckPrereq2(self):
9420
    """Check prerequisites, second part.
9421

9422
    This function should always be part of CheckPrereq. It was separated and is
9423
    now called from Exec because during node evacuation iallocator was only
9424
    called with an unmodified cluster model, not taking planned changes into
9425
    account.
9426

9427
    """
9428
    instance = self.instance
9429
    secondary_node = instance.secondary_nodes[0]
9430

    
9431
    if self.iallocator_name is None:
9432
      remote_node = self.remote_node
9433
    else:
9434
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9435
                                       instance.name, instance.secondary_nodes)
9436

    
9437
    if remote_node is None:
9438
      self.remote_node_info = None
9439
    else:
9440
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9441
             "Remote node '%s' is not locked" % remote_node
9442

    
9443
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9444
      assert self.remote_node_info is not None, \
9445
        "Cannot retrieve locked node %s" % remote_node
9446

    
9447
    if remote_node == self.instance.primary_node:
9448
      raise errors.OpPrereqError("The specified node is the primary node of"
9449
                                 " the instance", errors.ECODE_INVAL)
9450

    
9451
    if remote_node == secondary_node:
9452
      raise errors.OpPrereqError("The specified node is already the"
9453
                                 " secondary node of the instance",
9454
                                 errors.ECODE_INVAL)
9455

    
9456
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9457
                                    constants.REPLACE_DISK_CHG):
9458
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9459
                                 errors.ECODE_INVAL)
9460

    
9461
    if self.mode == constants.REPLACE_DISK_AUTO:
9462
      if not self._CheckDisksActivated(instance):
9463
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9464
                                   " first" % self.instance_name,
9465
                                   errors.ECODE_STATE)
9466
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9467
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9468

    
9469
      if faulty_primary and faulty_secondary:
9470
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9471
                                   " one node and can not be repaired"
9472
                                   " automatically" % self.instance_name,
9473
                                   errors.ECODE_STATE)
9474

    
9475
      if faulty_primary:
9476
        self.disks = faulty_primary
9477
        self.target_node = instance.primary_node
9478
        self.other_node = secondary_node
9479
        check_nodes = [self.target_node, self.other_node]
9480
      elif faulty_secondary:
9481
        self.disks = faulty_secondary
9482
        self.target_node = secondary_node
9483
        self.other_node = instance.primary_node
9484
        check_nodes = [self.target_node, self.other_node]
9485
      else:
9486
        self.disks = []
9487
        check_nodes = []
9488

    
9489
    else:
9490
      # Non-automatic modes
9491
      if self.mode == constants.REPLACE_DISK_PRI:
9492
        self.target_node = instance.primary_node
9493
        self.other_node = secondary_node
9494
        check_nodes = [self.target_node, self.other_node]
9495

    
9496
      elif self.mode == constants.REPLACE_DISK_SEC:
9497
        self.target_node = secondary_node
9498
        self.other_node = instance.primary_node
9499
        check_nodes = [self.target_node, self.other_node]
9500

    
9501
      elif self.mode == constants.REPLACE_DISK_CHG:
9502
        self.new_node = remote_node
9503
        self.other_node = instance.primary_node
9504
        self.target_node = secondary_node
9505
        check_nodes = [self.new_node, self.other_node]
9506

    
9507
        _CheckNodeNotDrained(self.lu, remote_node)
9508
        _CheckNodeVmCapable(self.lu, remote_node)
9509

    
9510
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9511
        assert old_node_info is not None
9512
        if old_node_info.offline and not self.early_release:
9513
          # doesn't make sense to delay the release
9514
          self.early_release = True
9515
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9516
                          " early-release mode", secondary_node)
9517

    
9518
      else:
9519
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9520
                                     self.mode)
9521

    
9522
      # If not specified all disks should be replaced
9523
      if not self.disks:
9524
        self.disks = range(len(self.instance.disks))
9525

    
9526
    for node in check_nodes:
9527
      _CheckNodeOnline(self.lu, node)
9528

    
9529
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9530
                                                          self.other_node,
9531
                                                          self.target_node]
9532
                              if node_name is not None)
9533

    
9534
    # Release unneeded node locks
9535
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9536

    
9537
    # Release any owned node group
9538
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9539
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9540

    
9541
    # Check whether disks are valid
9542
    for disk_idx in self.disks:
9543
      instance.FindDisk(disk_idx)
9544

    
9545
    # Get secondary node IP addresses
9546
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9547
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9548

    
9549
  def Exec(self, feedback_fn):
9550
    """Execute disk replacement.
9551

9552
    This dispatches the disk replacement to the appropriate handler.
9553

9554
    """
9555
    if self.delay_iallocator:
9556
      self._CheckPrereq2()
9557

    
9558
    if __debug__:
9559
      # Verify owned locks before starting operation
9560
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9561
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9562
          ("Incorrect node locks, owning %s, expected %s" %
9563
           (owned_nodes, self.node_secondary_ip.keys()))
9564

    
9565
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9566
      assert list(owned_instances) == [self.instance_name], \
9567
          "Instance '%s' not locked" % self.instance_name
9568

    
9569
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9570
          "Should not own any node group lock at this point"
9571

    
9572
    if not self.disks:
9573
      feedback_fn("No disks need replacement")
9574
      return
9575

    
9576
    feedback_fn("Replacing disk(s) %s for %s" %
9577
                (utils.CommaJoin(self.disks), self.instance.name))
9578

    
9579
    activate_disks = (not self.instance.admin_up)
9580

    
9581
    # Activate the instance disks if we're replacing them on a down instance
9582
    if activate_disks:
9583
      _StartInstanceDisks(self.lu, self.instance, True)
9584

    
9585
    try:
9586
      # Should we replace the secondary node?
9587
      if self.new_node is not None:
9588
        fn = self._ExecDrbd8Secondary
9589
      else:
9590
        fn = self._ExecDrbd8DiskOnly
9591

    
9592
      result = fn(feedback_fn)
9593
    finally:
9594
      # Deactivate the instance disks if we're replacing them on a
9595
      # down instance
9596
      if activate_disks:
9597
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9598

    
9599
    if __debug__:
9600
      # Verify owned locks
9601
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9602
      nodes = frozenset(self.node_secondary_ip)
9603
      assert ((self.early_release and not owned_nodes) or
9604
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9605
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9606
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9607

    
9608
    return result
9609

    
9610
  def _CheckVolumeGroup(self, nodes):
9611
    self.lu.LogInfo("Checking volume groups")
9612

    
9613
    vgname = self.cfg.GetVGName()
9614

    
9615
    # Make sure volume group exists on all involved nodes
9616
    results = self.rpc.call_vg_list(nodes)
9617
    if not results:
9618
      raise errors.OpExecError("Can't list volume groups on the nodes")
9619

    
9620
    for node in nodes:
9621
      res = results[node]
9622
      res.Raise("Error checking node %s" % node)
9623
      if vgname not in res.payload:
9624
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9625
                                 (vgname, node))
9626

    
9627
  def _CheckDisksExistence(self, nodes):
9628
    # Check disk existence
9629
    for idx, dev in enumerate(self.instance.disks):
9630
      if idx not in self.disks:
9631
        continue
9632

    
9633
      for node in nodes:
9634
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9635
        self.cfg.SetDiskID(dev, node)
9636

    
9637
        result = self.rpc.call_blockdev_find(node, dev)
9638

    
9639
        msg = result.fail_msg
9640
        if msg or not result.payload:
9641
          if not msg:
9642
            msg = "disk not found"
9643
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9644
                                   (idx, node, msg))
9645

    
9646
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9647
    for idx, dev in enumerate(self.instance.disks):
9648
      if idx not in self.disks:
9649
        continue
9650

    
9651
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9652
                      (idx, node_name))
9653

    
9654
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9655
                                   ldisk=ldisk):
9656
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9657
                                 " replace disks for instance %s" %
9658
                                 (node_name, self.instance.name))
9659

    
9660
  def _CreateNewStorage(self, node_name):
9661
    """Create new storage on the primary or secondary node.
9662

9663
    This is only used for same-node replaces, not for changing the
9664
    secondary node, hence we don't want to modify the existing disk.
9665

9666
    """
9667
    iv_names = {}
9668

    
9669
    for idx, dev in enumerate(self.instance.disks):
9670
      if idx not in self.disks:
9671
        continue
9672

    
9673
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9674

    
9675
      self.cfg.SetDiskID(dev, node_name)
9676

    
9677
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9678
      names = _GenerateUniqueNames(self.lu, lv_names)
9679

    
9680
      vg_data = dev.children[0].logical_id[0]
9681
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9682
                             logical_id=(vg_data, names[0]))
9683
      vg_meta = dev.children[1].logical_id[0]
9684
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9685
                             logical_id=(vg_meta, names[1]))
9686

    
9687
      new_lvs = [lv_data, lv_meta]
9688
      old_lvs = [child.Copy() for child in dev.children]
9689
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9690

    
9691
      # we pass force_create=True to force the LVM creation
9692
      for new_lv in new_lvs:
9693
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9694
                        _GetInstanceInfoText(self.instance), False)
9695

    
9696
    return iv_names
9697

    
9698
  def _CheckDevices(self, node_name, iv_names):
9699
    for name, (dev, _, _) in iv_names.iteritems():
9700
      self.cfg.SetDiskID(dev, node_name)
9701

    
9702
      result = self.rpc.call_blockdev_find(node_name, dev)
9703

    
9704
      msg = result.fail_msg
9705
      if msg or not result.payload:
9706
        if not msg:
9707
          msg = "disk not found"
9708
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9709
                                 (name, msg))
9710

    
9711
      if result.payload.is_degraded:
9712
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9713

    
9714
  def _RemoveOldStorage(self, node_name, iv_names):
9715
    for name, (_, old_lvs, _) in iv_names.iteritems():
9716
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9717

    
9718
      for lv in old_lvs:
9719
        self.cfg.SetDiskID(lv, node_name)
9720

    
9721
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9722
        if msg:
9723
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9724
                             hint="remove unused LVs manually")
9725

    
9726
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9727
    """Replace a disk on the primary or secondary for DRBD 8.
9728

9729
    The algorithm for replace is quite complicated:
9730

9731
      1. for each disk to be replaced:
9732

9733
        1. create new LVs on the target node with unique names
9734
        1. detach old LVs from the drbd device
9735
        1. rename old LVs to name_replaced.<time_t>
9736
        1. rename new LVs to old LVs
9737
        1. attach the new LVs (with the old names now) to the drbd device
9738

9739
      1. wait for sync across all devices
9740

9741
      1. for each modified disk:
9742

9743
        1. remove old LVs (which have the name name_replaces.<time_t>)
9744

9745
    Failures are not very well handled.
9746

9747
    """
9748
    steps_total = 6
9749

    
9750
    # Step: check device activation
9751
    self.lu.LogStep(1, steps_total, "Check device existence")
9752
    self._CheckDisksExistence([self.other_node, self.target_node])
9753
    self._CheckVolumeGroup([self.target_node, self.other_node])
9754

    
9755
    # Step: check other node consistency
9756
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9757
    self._CheckDisksConsistency(self.other_node,
9758
                                self.other_node == self.instance.primary_node,
9759
                                False)
9760

    
9761
    # Step: create new storage
9762
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9763
    iv_names = self._CreateNewStorage(self.target_node)
9764

    
9765
    # Step: for each lv, detach+rename*2+attach
9766
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9767
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9768
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9769

    
9770
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9771
                                                     old_lvs)
9772
      result.Raise("Can't detach drbd from local storage on node"
9773
                   " %s for device %s" % (self.target_node, dev.iv_name))
9774
      #dev.children = []
9775
      #cfg.Update(instance)
9776

    
9777
      # ok, we created the new LVs, so now we know we have the needed
9778
      # storage; as such, we proceed on the target node to rename
9779
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9780
      # using the assumption that logical_id == physical_id (which in
9781
      # turn is the unique_id on that node)
9782

    
9783
      # FIXME(iustin): use a better name for the replaced LVs
9784
      temp_suffix = int(time.time())
9785
      ren_fn = lambda d, suff: (d.physical_id[0],
9786
                                d.physical_id[1] + "_replaced-%s" % suff)
9787

    
9788
      # Build the rename list based on what LVs exist on the node
9789
      rename_old_to_new = []
9790
      for to_ren in old_lvs:
9791
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9792
        if not result.fail_msg and result.payload:
9793
          # device exists
9794
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9795

    
9796
      self.lu.LogInfo("Renaming the old LVs on the target node")
9797
      result = self.rpc.call_blockdev_rename(self.target_node,
9798
                                             rename_old_to_new)
9799
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9800

    
9801
      # Now we rename the new LVs to the old LVs
9802
      self.lu.LogInfo("Renaming the new LVs on the target node")
9803
      rename_new_to_old = [(new, old.physical_id)
9804
                           for old, new in zip(old_lvs, new_lvs)]
9805
      result = self.rpc.call_blockdev_rename(self.target_node,
9806
                                             rename_new_to_old)
9807
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9808

    
9809
      # Intermediate steps of in memory modifications
9810
      for old, new in zip(old_lvs, new_lvs):
9811
        new.logical_id = old.logical_id
9812
        self.cfg.SetDiskID(new, self.target_node)
9813

    
9814
      # We need to modify old_lvs so that removal later removes the
9815
      # right LVs, not the newly added ones; note that old_lvs is a
9816
      # copy here
9817
      for disk in old_lvs:
9818
        disk.logical_id = ren_fn(disk, temp_suffix)
9819
        self.cfg.SetDiskID(disk, self.target_node)
9820

    
9821
      # Now that the new lvs have the old name, we can add them to the device
9822
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9823
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9824
                                                  new_lvs)
9825
      msg = result.fail_msg
9826
      if msg:
9827
        for new_lv in new_lvs:
9828
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9829
                                               new_lv).fail_msg
9830
          if msg2:
9831
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9832
                               hint=("cleanup manually the unused logical"
9833
                                     "volumes"))
9834
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9835

    
9836
    cstep = 5
9837
    if self.early_release:
9838
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9839
      cstep += 1
9840
      self._RemoveOldStorage(self.target_node, iv_names)
9841
      # WARNING: we release both node locks here, do not do other RPCs
9842
      # than WaitForSync to the primary node
9843
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9844
                    names=[self.target_node, self.other_node])
9845

    
9846
    # Wait for sync
9847
    # This can fail as the old devices are degraded and _WaitForSync
9848
    # does a combined result over all disks, so we don't check its return value
9849
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9850
    cstep += 1
9851
    _WaitForSync(self.lu, self.instance)
9852

    
9853
    # Check all devices manually
9854
    self._CheckDevices(self.instance.primary_node, iv_names)
9855

    
9856
    # Step: remove old storage
9857
    if not self.early_release:
9858
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9859
      cstep += 1
9860
      self._RemoveOldStorage(self.target_node, iv_names)
9861

    
9862
  def _ExecDrbd8Secondary(self, feedback_fn):
9863
    """Replace the secondary node for DRBD 8.
9864

9865
    The algorithm for replace is quite complicated:
9866
      - for all disks of the instance:
9867
        - create new LVs on the new node with same names
9868
        - shutdown the drbd device on the old secondary
9869
        - disconnect the drbd network on the primary
9870
        - create the drbd device on the new secondary
9871
        - network attach the drbd on the primary, using an artifice:
9872
          the drbd code for Attach() will connect to the network if it
9873
          finds a device which is connected to the good local disks but
9874
          not network enabled
9875
      - wait for sync across all devices
9876
      - remove all disks from the old secondary
9877

9878
    Failures are not very well handled.
9879

9880
    """
9881
    steps_total = 6
9882

    
9883
    pnode = self.instance.primary_node
9884

    
9885
    # Step: check device activation
9886
    self.lu.LogStep(1, steps_total, "Check device existence")
9887
    self._CheckDisksExistence([self.instance.primary_node])
9888
    self._CheckVolumeGroup([self.instance.primary_node])
9889

    
9890
    # Step: check other node consistency
9891
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9892
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9893

    
9894
    # Step: create new storage
9895
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9896
    for idx, dev in enumerate(self.instance.disks):
9897
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9898
                      (self.new_node, idx))
9899
      # we pass force_create=True to force LVM creation
9900
      for new_lv in dev.children:
9901
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9902
                        _GetInstanceInfoText(self.instance), False)
9903

    
9904
    # Step 4: dbrd minors and drbd setups changes
9905
    # after this, we must manually remove the drbd minors on both the
9906
    # error and the success paths
9907
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9908
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9909
                                         for dev in self.instance.disks],
9910
                                        self.instance.name)
9911
    logging.debug("Allocated minors %r", minors)
9912

    
9913
    iv_names = {}
9914
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9915
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9916
                      (self.new_node, idx))
9917
      # create new devices on new_node; note that we create two IDs:
9918
      # one without port, so the drbd will be activated without
9919
      # networking information on the new node at this stage, and one
9920
      # with network, for the latter activation in step 4
9921
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9922
      if self.instance.primary_node == o_node1:
9923
        p_minor = o_minor1
9924
      else:
9925
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9926
        p_minor = o_minor2
9927

    
9928
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9929
                      p_minor, new_minor, o_secret)
9930
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9931
                    p_minor, new_minor, o_secret)
9932

    
9933
      iv_names[idx] = (dev, dev.children, new_net_id)
9934
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9935
                    new_net_id)
9936
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9937
                              logical_id=new_alone_id,
9938
                              children=dev.children,
9939
                              size=dev.size)
9940
      try:
9941
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9942
                              _GetInstanceInfoText(self.instance), False)
9943
      except errors.GenericError:
9944
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9945
        raise
9946

    
9947
    # We have new devices, shutdown the drbd on the old secondary
9948
    for idx, dev in enumerate(self.instance.disks):
9949
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9950
      self.cfg.SetDiskID(dev, self.target_node)
9951
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9952
      if msg:
9953
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9954
                           "node: %s" % (idx, msg),
9955
                           hint=("Please cleanup this device manually as"
9956
                                 " soon as possible"))
9957

    
9958
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9959
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9960
                                               self.instance.disks)[pnode]
9961

    
9962
    msg = result.fail_msg
9963
    if msg:
9964
      # detaches didn't succeed (unlikely)
9965
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9966
      raise errors.OpExecError("Can't detach the disks from the network on"
9967
                               " old node: %s" % (msg,))
9968

    
9969
    # if we managed to detach at least one, we update all the disks of
9970
    # the instance to point to the new secondary
9971
    self.lu.LogInfo("Updating instance configuration")
9972
    for dev, _, new_logical_id in iv_names.itervalues():
9973
      dev.logical_id = new_logical_id
9974
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9975

    
9976
    self.cfg.Update(self.instance, feedback_fn)
9977

    
9978
    # and now perform the drbd attach
9979
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9980
                    " (standalone => connected)")
9981
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9982
                                            self.new_node],
9983
                                           self.node_secondary_ip,
9984
                                           self.instance.disks,
9985
                                           self.instance.name,
9986
                                           False)
9987
    for to_node, to_result in result.items():
9988
      msg = to_result.fail_msg
9989
      if msg:
9990
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9991
                           to_node, msg,
9992
                           hint=("please do a gnt-instance info to see the"
9993
                                 " status of disks"))
9994
    cstep = 5
9995
    if self.early_release:
9996
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9997
      cstep += 1
9998
      self._RemoveOldStorage(self.target_node, iv_names)
9999
      # WARNING: we release all node locks here, do not do other RPCs
10000
      # than WaitForSync to the primary node
10001
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10002
                    names=[self.instance.primary_node,
10003
                           self.target_node,
10004
                           self.new_node])
10005

    
10006
    # Wait for sync
10007
    # This can fail as the old devices are degraded and _WaitForSync
10008
    # does a combined result over all disks, so we don't check its return value
10009
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10010
    cstep += 1
10011
    _WaitForSync(self.lu, self.instance)
10012

    
10013
    # Check all devices manually
10014
    self._CheckDevices(self.instance.primary_node, iv_names)
10015

    
10016
    # Step: remove old storage
10017
    if not self.early_release:
10018
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10019
      self._RemoveOldStorage(self.target_node, iv_names)
10020

    
10021

    
10022
class LURepairNodeStorage(NoHooksLU):
10023
  """Repairs the volume group on a node.
10024

10025
  """
10026
  REQ_BGL = False
10027

    
10028
  def CheckArguments(self):
10029
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10030

    
10031
    storage_type = self.op.storage_type
10032

    
10033
    if (constants.SO_FIX_CONSISTENCY not in
10034
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10035
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10036
                                 " repaired" % storage_type,
10037
                                 errors.ECODE_INVAL)
10038

    
10039
  def ExpandNames(self):
10040
    self.needed_locks = {
10041
      locking.LEVEL_NODE: [self.op.node_name],
10042
      }
10043

    
10044
  def _CheckFaultyDisks(self, instance, node_name):
10045
    """Ensure faulty disks abort the opcode or at least warn."""
10046
    try:
10047
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10048
                                  node_name, True):
10049
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10050
                                   " node '%s'" % (instance.name, node_name),
10051
                                   errors.ECODE_STATE)
10052
    except errors.OpPrereqError, err:
10053
      if self.op.ignore_consistency:
10054
        self.proc.LogWarning(str(err.args[0]))
10055
      else:
10056
        raise
10057

    
10058
  def CheckPrereq(self):
10059
    """Check prerequisites.
10060

10061
    """
10062
    # Check whether any instance on this node has faulty disks
10063
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10064
      if not inst.admin_up:
10065
        continue
10066
      check_nodes = set(inst.all_nodes)
10067
      check_nodes.discard(self.op.node_name)
10068
      for inst_node_name in check_nodes:
10069
        self._CheckFaultyDisks(inst, inst_node_name)
10070

    
10071
  def Exec(self, feedback_fn):
10072
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10073
                (self.op.name, self.op.node_name))
10074

    
10075
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10076
    result = self.rpc.call_storage_execute(self.op.node_name,
10077
                                           self.op.storage_type, st_args,
10078
                                           self.op.name,
10079
                                           constants.SO_FIX_CONSISTENCY)
10080
    result.Raise("Failed to repair storage unit '%s' on %s" %
10081
                 (self.op.name, self.op.node_name))
10082

    
10083

    
10084
class LUNodeEvacuate(NoHooksLU):
10085
  """Evacuates instances off a list of nodes.
10086

10087
  """
10088
  REQ_BGL = False
10089

    
10090
  def CheckArguments(self):
10091
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10092

    
10093
  def ExpandNames(self):
10094
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10095

    
10096
    if self.op.remote_node is not None:
10097
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10098
      assert self.op.remote_node
10099

    
10100
      if self.op.remote_node == self.op.node_name:
10101
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10102
                                   " secondary node", errors.ECODE_INVAL)
10103

    
10104
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10105
        raise errors.OpPrereqError("Without the use of an iallocator only"
10106
                                   " secondary instances can be evacuated",
10107
                                   errors.ECODE_INVAL)
10108

    
10109
    # Declare locks
10110
    self.share_locks = _ShareAll()
10111
    self.needed_locks = {
10112
      locking.LEVEL_INSTANCE: [],
10113
      locking.LEVEL_NODEGROUP: [],
10114
      locking.LEVEL_NODE: [],
10115
      }
10116

    
10117
    # Determine nodes (via group) optimistically, needs verification once locks
10118
    # have been acquired
10119
    self.lock_nodes = self._DetermineNodes()
10120

    
10121
  def _DetermineNodes(self):
10122
    """Gets the list of nodes to operate on.
10123

10124
    """
10125
    if self.op.remote_node is None:
10126
      # Iallocator will choose any node(s) in the same group
10127
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10128
    else:
10129
      group_nodes = frozenset([self.op.remote_node])
10130

    
10131
    # Determine nodes to be locked
10132
    return set([self.op.node_name]) | group_nodes
10133

    
10134
  def _DetermineInstances(self):
10135
    """Builds list of instances to operate on.
10136

10137
    """
10138
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10139

    
10140
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10141
      # Primary instances only
10142
      inst_fn = _GetNodePrimaryInstances
10143
      assert self.op.remote_node is None, \
10144
        "Evacuating primary instances requires iallocator"
10145
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10146
      # Secondary instances only
10147
      inst_fn = _GetNodeSecondaryInstances
10148
    else:
10149
      # All instances
10150
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10151
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10152
      # per instance
10153
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10154
                                 " interface it is not possible to evacuate"
10155
                                 " all instances at once; specify explicitly"
10156
                                 " whether to evacuate primary or secondary"
10157
                                 " instances",
10158
                                 errors.ECODE_INVAL)
10159
      inst_fn = _GetNodeInstances
10160

    
10161
    return inst_fn(self.cfg, self.op.node_name)
10162

    
10163
  def DeclareLocks(self, level):
10164
    if level == locking.LEVEL_INSTANCE:
10165
      # Lock instances optimistically, needs verification once node and group
10166
      # locks have been acquired
10167
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10168
        set(i.name for i in self._DetermineInstances())
10169

    
10170
    elif level == locking.LEVEL_NODEGROUP:
10171
      # Lock node groups for all potential target nodes optimistically, needs
10172
      # verification once nodes have been acquired
10173
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10174
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10175

    
10176
    elif level == locking.LEVEL_NODE:
10177
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10178

    
10179
  def CheckPrereq(self):
10180
    # Verify locks
10181
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10182
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10183
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10184

    
10185
    need_nodes = self._DetermineNodes()
10186

    
10187
    if not owned_nodes.issuperset(need_nodes):
10188
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10189
                                 " locks were acquired, current nodes are"
10190
                                 " are '%s', used to be '%s'; retry the"
10191
                                 " operation" %
10192
                                 (self.op.node_name,
10193
                                  utils.CommaJoin(need_nodes),
10194
                                  utils.CommaJoin(owned_nodes)),
10195
                                 errors.ECODE_STATE)
10196

    
10197
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10198
    if owned_groups != wanted_groups:
10199
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10200
                               " current groups are '%s', used to be '%s';"
10201
                               " retry the operation" %
10202
                               (utils.CommaJoin(wanted_groups),
10203
                                utils.CommaJoin(owned_groups)))
10204

    
10205
    # Determine affected instances
10206
    self.instances = self._DetermineInstances()
10207
    self.instance_names = [i.name for i in self.instances]
10208

    
10209
    if set(self.instance_names) != owned_instances:
10210
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10211
                               " were acquired, current instances are '%s',"
10212
                               " used to be '%s'; retry the operation" %
10213
                               (self.op.node_name,
10214
                                utils.CommaJoin(self.instance_names),
10215
                                utils.CommaJoin(owned_instances)))
10216

    
10217
    if self.instance_names:
10218
      self.LogInfo("Evacuating instances from node '%s': %s",
10219
                   self.op.node_name,
10220
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10221
    else:
10222
      self.LogInfo("No instances to evacuate from node '%s'",
10223
                   self.op.node_name)
10224

    
10225
    if self.op.remote_node is not None:
10226
      for i in self.instances:
10227
        if i.primary_node == self.op.remote_node:
10228
          raise errors.OpPrereqError("Node %s is the primary node of"
10229
                                     " instance %s, cannot use it as"
10230
                                     " secondary" %
10231
                                     (self.op.remote_node, i.name),
10232
                                     errors.ECODE_INVAL)
10233

    
10234
  def Exec(self, feedback_fn):
10235
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10236

    
10237
    if not self.instance_names:
10238
      # No instances to evacuate
10239
      jobs = []
10240

    
10241
    elif self.op.iallocator is not None:
10242
      # TODO: Implement relocation to other group
10243
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10244
                       evac_mode=self.op.mode,
10245
                       instances=list(self.instance_names))
10246

    
10247
      ial.Run(self.op.iallocator)
10248

    
10249
      if not ial.success:
10250
        raise errors.OpPrereqError("Can't compute node evacuation using"
10251
                                   " iallocator '%s': %s" %
10252
                                   (self.op.iallocator, ial.info),
10253
                                   errors.ECODE_NORES)
10254

    
10255
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10256

    
10257
    elif self.op.remote_node is not None:
10258
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10259
      jobs = [
10260
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10261
                                        remote_node=self.op.remote_node,
10262
                                        disks=[],
10263
                                        mode=constants.REPLACE_DISK_CHG,
10264
                                        early_release=self.op.early_release)]
10265
        for instance_name in self.instance_names
10266
        ]
10267

    
10268
    else:
10269
      raise errors.ProgrammerError("No iallocator or remote node")
10270

    
10271
    return ResultWithJobs(jobs)
10272

    
10273

    
10274
def _SetOpEarlyRelease(early_release, op):
10275
  """Sets C{early_release} flag on opcodes if available.
10276

10277
  """
10278
  try:
10279
    op.early_release = early_release
10280
  except AttributeError:
10281
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10282

    
10283
  return op
10284

    
10285

    
10286
def _NodeEvacDest(use_nodes, group, nodes):
10287
  """Returns group or nodes depending on caller's choice.
10288

10289
  """
10290
  if use_nodes:
10291
    return utils.CommaJoin(nodes)
10292
  else:
10293
    return group
10294

    
10295

    
10296
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10297
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10298

10299
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10300
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10301

10302
  @type lu: L{LogicalUnit}
10303
  @param lu: Logical unit instance
10304
  @type alloc_result: tuple/list
10305
  @param alloc_result: Result from iallocator
10306
  @type early_release: bool
10307
  @param early_release: Whether to release locks early if possible
10308
  @type use_nodes: bool
10309
  @param use_nodes: Whether to display node names instead of groups
10310

10311
  """
10312
  (moved, failed, jobs) = alloc_result
10313

    
10314
  if failed:
10315
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10316
                                 for (name, reason) in failed)
10317
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10318
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10319

    
10320
  if moved:
10321
    lu.LogInfo("Instances to be moved: %s",
10322
               utils.CommaJoin("%s (to %s)" %
10323
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10324
                               for (name, group, nodes) in moved))
10325

    
10326
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10327
              map(opcodes.OpCode.LoadOpCode, ops))
10328
          for ops in jobs]
10329

    
10330

    
10331
class LUInstanceGrowDisk(LogicalUnit):
10332
  """Grow a disk of an instance.
10333

10334
  """
10335
  HPATH = "disk-grow"
10336
  HTYPE = constants.HTYPE_INSTANCE
10337
  REQ_BGL = False
10338

    
10339
  def ExpandNames(self):
10340
    self._ExpandAndLockInstance()
10341
    self.needed_locks[locking.LEVEL_NODE] = []
10342
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10343

    
10344
  def DeclareLocks(self, level):
10345
    if level == locking.LEVEL_NODE:
10346
      self._LockInstancesNodes()
10347

    
10348
  def BuildHooksEnv(self):
10349
    """Build hooks env.
10350

10351
    This runs on the master, the primary and all the secondaries.
10352

10353
    """
10354
    env = {
10355
      "DISK": self.op.disk,
10356
      "AMOUNT": self.op.amount,
10357
      }
10358
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10359
    return env
10360

    
10361
  def BuildHooksNodes(self):
10362
    """Build hooks nodes.
10363

10364
    """
10365
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10366
    return (nl, nl)
10367

    
10368
  def CheckPrereq(self):
10369
    """Check prerequisites.
10370

10371
    This checks that the instance is in the cluster.
10372

10373
    """
10374
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10375
    assert instance is not None, \
10376
      "Cannot retrieve locked instance %s" % self.op.instance_name
10377
    nodenames = list(instance.all_nodes)
10378
    for node in nodenames:
10379
      _CheckNodeOnline(self, node)
10380

    
10381
    self.instance = instance
10382

    
10383
    if instance.disk_template not in constants.DTS_GROWABLE:
10384
      raise errors.OpPrereqError("Instance's disk layout does not support"
10385
                                 " growing", errors.ECODE_INVAL)
10386

    
10387
    self.disk = instance.FindDisk(self.op.disk)
10388

    
10389
    if instance.disk_template not in (constants.DT_FILE,
10390
                                      constants.DT_SHARED_FILE):
10391
      # TODO: check the free disk space for file, when that feature will be
10392
      # supported
10393
      _CheckNodesFreeDiskPerVG(self, nodenames,
10394
                               self.disk.ComputeGrowth(self.op.amount))
10395

    
10396
  def Exec(self, feedback_fn):
10397
    """Execute disk grow.
10398

10399
    """
10400
    instance = self.instance
10401
    disk = self.disk
10402

    
10403
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10404
    if not disks_ok:
10405
      raise errors.OpExecError("Cannot activate block device to grow")
10406

    
10407
    # First run all grow ops in dry-run mode
10408
    for node in instance.all_nodes:
10409
      self.cfg.SetDiskID(disk, node)
10410
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10411
      result.Raise("Grow request failed to node %s" % node)
10412

    
10413
    # We know that (as far as we can test) operations across different
10414
    # nodes will succeed, time to run it for real
10415
    for node in instance.all_nodes:
10416
      self.cfg.SetDiskID(disk, node)
10417
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10418
      result.Raise("Grow request failed to node %s" % node)
10419

    
10420
      # TODO: Rewrite code to work properly
10421
      # DRBD goes into sync mode for a short amount of time after executing the
10422
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10423
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10424
      # time is a work-around.
10425
      time.sleep(5)
10426

    
10427
    disk.RecordGrow(self.op.amount)
10428
    self.cfg.Update(instance, feedback_fn)
10429
    if self.op.wait_for_sync:
10430
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10431
      if disk_abort:
10432
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10433
                             " status; please check the instance")
10434
      if not instance.admin_up:
10435
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10436
    elif not instance.admin_up:
10437
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10438
                           " not supposed to be running because no wait for"
10439
                           " sync mode was requested")
10440

    
10441

    
10442
class LUInstanceQueryData(NoHooksLU):
10443
  """Query runtime instance data.
10444

10445
  """
10446
  REQ_BGL = False
10447

    
10448
  def ExpandNames(self):
10449
    self.needed_locks = {}
10450

    
10451
    # Use locking if requested or when non-static information is wanted
10452
    if not (self.op.static or self.op.use_locking):
10453
      self.LogWarning("Non-static data requested, locks need to be acquired")
10454
      self.op.use_locking = True
10455

    
10456
    if self.op.instances or not self.op.use_locking:
10457
      # Expand instance names right here
10458
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10459
    else:
10460
      # Will use acquired locks
10461
      self.wanted_names = None
10462

    
10463
    if self.op.use_locking:
10464
      self.share_locks = _ShareAll()
10465

    
10466
      if self.wanted_names is None:
10467
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10468
      else:
10469
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10470

    
10471
      self.needed_locks[locking.LEVEL_NODE] = []
10472
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10473

    
10474
  def DeclareLocks(self, level):
10475
    if self.op.use_locking and level == locking.LEVEL_NODE:
10476
      self._LockInstancesNodes()
10477

    
10478
  def CheckPrereq(self):
10479
    """Check prerequisites.
10480

10481
    This only checks the optional instance list against the existing names.
10482

10483
    """
10484
    if self.wanted_names is None:
10485
      assert self.op.use_locking, "Locking was not used"
10486
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10487

    
10488
    self.wanted_instances = \
10489
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10490

    
10491
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10492
    """Returns the status of a block device
10493

10494
    """
10495
    if self.op.static or not node:
10496
      return None
10497

    
10498
    self.cfg.SetDiskID(dev, node)
10499

    
10500
    result = self.rpc.call_blockdev_find(node, dev)
10501
    if result.offline:
10502
      return None
10503

    
10504
    result.Raise("Can't compute disk status for %s" % instance_name)
10505

    
10506
    status = result.payload
10507
    if status is None:
10508
      return None
10509

    
10510
    return (status.dev_path, status.major, status.minor,
10511
            status.sync_percent, status.estimated_time,
10512
            status.is_degraded, status.ldisk_status)
10513

    
10514
  def _ComputeDiskStatus(self, instance, snode, dev):
10515
    """Compute block device status.
10516

10517
    """
10518
    if dev.dev_type in constants.LDS_DRBD:
10519
      # we change the snode then (otherwise we use the one passed in)
10520
      if dev.logical_id[0] == instance.primary_node:
10521
        snode = dev.logical_id[1]
10522
      else:
10523
        snode = dev.logical_id[0]
10524

    
10525
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10526
                                              instance.name, dev)
10527
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10528

    
10529
    if dev.children:
10530
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10531
                                        instance, snode),
10532
                         dev.children)
10533
    else:
10534
      dev_children = []
10535

    
10536
    return {
10537
      "iv_name": dev.iv_name,
10538
      "dev_type": dev.dev_type,
10539
      "logical_id": dev.logical_id,
10540
      "physical_id": dev.physical_id,
10541
      "pstatus": dev_pstatus,
10542
      "sstatus": dev_sstatus,
10543
      "children": dev_children,
10544
      "mode": dev.mode,
10545
      "size": dev.size,
10546
      }
10547

    
10548
  def Exec(self, feedback_fn):
10549
    """Gather and return data"""
10550
    result = {}
10551

    
10552
    cluster = self.cfg.GetClusterInfo()
10553

    
10554
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10555
                                          for i in self.wanted_instances)
10556
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10557
      if self.op.static or pnode.offline:
10558
        remote_state = None
10559
        if pnode.offline:
10560
          self.LogWarning("Primary node %s is marked offline, returning static"
10561
                          " information only for instance %s" %
10562
                          (pnode.name, instance.name))
10563
      else:
10564
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10565
                                                  instance.name,
10566
                                                  instance.hypervisor)
10567
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10568
        remote_info = remote_info.payload
10569
        if remote_info and "state" in remote_info:
10570
          remote_state = "up"
10571
        else:
10572
          remote_state = "down"
10573

    
10574
      if instance.admin_up:
10575
        config_state = "up"
10576
      else:
10577
        config_state = "down"
10578

    
10579
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10580
                  instance.disks)
10581

    
10582
      result[instance.name] = {
10583
        "name": instance.name,
10584
        "config_state": config_state,
10585
        "run_state": remote_state,
10586
        "pnode": instance.primary_node,
10587
        "snodes": instance.secondary_nodes,
10588
        "os": instance.os,
10589
        # this happens to be the same format used for hooks
10590
        "nics": _NICListToTuple(self, instance.nics),
10591
        "disk_template": instance.disk_template,
10592
        "disks": disks,
10593
        "hypervisor": instance.hypervisor,
10594
        "network_port": instance.network_port,
10595
        "hv_instance": instance.hvparams,
10596
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10597
        "be_instance": instance.beparams,
10598
        "be_actual": cluster.FillBE(instance),
10599
        "os_instance": instance.osparams,
10600
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10601
        "serial_no": instance.serial_no,
10602
        "mtime": instance.mtime,
10603
        "ctime": instance.ctime,
10604
        "uuid": instance.uuid,
10605
        }
10606

    
10607
    return result
10608

    
10609

    
10610
class LUInstanceSetParams(LogicalUnit):
10611
  """Modifies an instances's parameters.
10612

10613
  """
10614
  HPATH = "instance-modify"
10615
  HTYPE = constants.HTYPE_INSTANCE
10616
  REQ_BGL = False
10617

    
10618
  def CheckArguments(self):
10619
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10620
            self.op.hvparams or self.op.beparams or self.op.os_name):
10621
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10622

    
10623
    if self.op.hvparams:
10624
      _CheckGlobalHvParams(self.op.hvparams)
10625

    
10626
    # Disk validation
10627
    disk_addremove = 0
10628
    for disk_op, disk_dict in self.op.disks:
10629
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10630
      if disk_op == constants.DDM_REMOVE:
10631
        disk_addremove += 1
10632
        continue
10633
      elif disk_op == constants.DDM_ADD:
10634
        disk_addremove += 1
10635
      else:
10636
        if not isinstance(disk_op, int):
10637
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10638
        if not isinstance(disk_dict, dict):
10639
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10640
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10641

    
10642
      if disk_op == constants.DDM_ADD:
10643
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10644
        if mode not in constants.DISK_ACCESS_SET:
10645
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10646
                                     errors.ECODE_INVAL)
10647
        size = disk_dict.get(constants.IDISK_SIZE, None)
10648
        if size is None:
10649
          raise errors.OpPrereqError("Required disk parameter size missing",
10650
                                     errors.ECODE_INVAL)
10651
        try:
10652
          size = int(size)
10653
        except (TypeError, ValueError), err:
10654
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10655
                                     str(err), errors.ECODE_INVAL)
10656
        disk_dict[constants.IDISK_SIZE] = size
10657
      else:
10658
        # modification of disk
10659
        if constants.IDISK_SIZE in disk_dict:
10660
          raise errors.OpPrereqError("Disk size change not possible, use"
10661
                                     " grow-disk", errors.ECODE_INVAL)
10662

    
10663
    if disk_addremove > 1:
10664
      raise errors.OpPrereqError("Only one disk add or remove operation"
10665
                                 " supported at a time", errors.ECODE_INVAL)
10666

    
10667
    if self.op.disks and self.op.disk_template is not None:
10668
      raise errors.OpPrereqError("Disk template conversion and other disk"
10669
                                 " changes not supported at the same time",
10670
                                 errors.ECODE_INVAL)
10671

    
10672
    if (self.op.disk_template and
10673
        self.op.disk_template in constants.DTS_INT_MIRROR and
10674
        self.op.remote_node is None):
10675
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10676
                                 " one requires specifying a secondary node",
10677
                                 errors.ECODE_INVAL)
10678

    
10679
    # NIC validation
10680
    nic_addremove = 0
10681
    for nic_op, nic_dict in self.op.nics:
10682
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10683
      if nic_op == constants.DDM_REMOVE:
10684
        nic_addremove += 1
10685
        continue
10686
      elif nic_op == constants.DDM_ADD:
10687
        nic_addremove += 1
10688
      else:
10689
        if not isinstance(nic_op, int):
10690
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10691
        if not isinstance(nic_dict, dict):
10692
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10693
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10694

    
10695
      # nic_dict should be a dict
10696
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10697
      if nic_ip is not None:
10698
        if nic_ip.lower() == constants.VALUE_NONE:
10699
          nic_dict[constants.INIC_IP] = None
10700
        else:
10701
          if not netutils.IPAddress.IsValid(nic_ip):
10702
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10703
                                       errors.ECODE_INVAL)
10704

    
10705
      nic_bridge = nic_dict.get("bridge", None)
10706
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10707
      if nic_bridge and nic_link:
10708
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10709
                                   " at the same time", errors.ECODE_INVAL)
10710
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10711
        nic_dict["bridge"] = None
10712
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10713
        nic_dict[constants.INIC_LINK] = None
10714

    
10715
      if nic_op == constants.DDM_ADD:
10716
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10717
        if nic_mac is None:
10718
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10719

    
10720
      if constants.INIC_MAC in nic_dict:
10721
        nic_mac = nic_dict[constants.INIC_MAC]
10722
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10723
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10724

    
10725
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10726
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10727
                                     " modifying an existing nic",
10728
                                     errors.ECODE_INVAL)
10729

    
10730
    if nic_addremove > 1:
10731
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10732
                                 " supported at a time", errors.ECODE_INVAL)
10733

    
10734
  def ExpandNames(self):
10735
    self._ExpandAndLockInstance()
10736
    self.needed_locks[locking.LEVEL_NODE] = []
10737
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10738

    
10739
  def DeclareLocks(self, level):
10740
    if level == locking.LEVEL_NODE:
10741
      self._LockInstancesNodes()
10742
      if self.op.disk_template and self.op.remote_node:
10743
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10744
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10745

    
10746
  def BuildHooksEnv(self):
10747
    """Build hooks env.
10748

10749
    This runs on the master, primary and secondaries.
10750

10751
    """
10752
    args = dict()
10753
    if constants.BE_MEMORY in self.be_new:
10754
      args["memory"] = self.be_new[constants.BE_MEMORY]
10755
    if constants.BE_VCPUS in self.be_new:
10756
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10757
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10758
    # information at all.
10759
    if self.op.nics:
10760
      args["nics"] = []
10761
      nic_override = dict(self.op.nics)
10762
      for idx, nic in enumerate(self.instance.nics):
10763
        if idx in nic_override:
10764
          this_nic_override = nic_override[idx]
10765
        else:
10766
          this_nic_override = {}
10767
        if constants.INIC_IP in this_nic_override:
10768
          ip = this_nic_override[constants.INIC_IP]
10769
        else:
10770
          ip = nic.ip
10771
        if constants.INIC_MAC in this_nic_override:
10772
          mac = this_nic_override[constants.INIC_MAC]
10773
        else:
10774
          mac = nic.mac
10775
        if idx in self.nic_pnew:
10776
          nicparams = self.nic_pnew[idx]
10777
        else:
10778
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10779
        mode = nicparams[constants.NIC_MODE]
10780
        link = nicparams[constants.NIC_LINK]
10781
        args["nics"].append((ip, mac, mode, link))
10782
      if constants.DDM_ADD in nic_override:
10783
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10784
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10785
        nicparams = self.nic_pnew[constants.DDM_ADD]
10786
        mode = nicparams[constants.NIC_MODE]
10787
        link = nicparams[constants.NIC_LINK]
10788
        args["nics"].append((ip, mac, mode, link))
10789
      elif constants.DDM_REMOVE in nic_override:
10790
        del args["nics"][-1]
10791

    
10792
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10793
    if self.op.disk_template:
10794
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10795

    
10796
    return env
10797

    
10798
  def BuildHooksNodes(self):
10799
    """Build hooks nodes.
10800

10801
    """
10802
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10803
    return (nl, nl)
10804

    
10805
  def CheckPrereq(self):
10806
    """Check prerequisites.
10807

10808
    This only checks the instance list against the existing names.
10809

10810
    """
10811
    # checking the new params on the primary/secondary nodes
10812

    
10813
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10814
    cluster = self.cluster = self.cfg.GetClusterInfo()
10815
    assert self.instance is not None, \
10816
      "Cannot retrieve locked instance %s" % self.op.instance_name
10817
    pnode = instance.primary_node
10818
    nodelist = list(instance.all_nodes)
10819

    
10820
    # OS change
10821
    if self.op.os_name and not self.op.force:
10822
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10823
                      self.op.force_variant)
10824
      instance_os = self.op.os_name
10825
    else:
10826
      instance_os = instance.os
10827

    
10828
    if self.op.disk_template:
10829
      if instance.disk_template == self.op.disk_template:
10830
        raise errors.OpPrereqError("Instance already has disk template %s" %
10831
                                   instance.disk_template, errors.ECODE_INVAL)
10832

    
10833
      if (instance.disk_template,
10834
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10835
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10836
                                   " %s to %s" % (instance.disk_template,
10837
                                                  self.op.disk_template),
10838
                                   errors.ECODE_INVAL)
10839
      _CheckInstanceDown(self, instance, "cannot change disk template")
10840
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10841
        if self.op.remote_node == pnode:
10842
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10843
                                     " as the primary node of the instance" %
10844
                                     self.op.remote_node, errors.ECODE_STATE)
10845
        _CheckNodeOnline(self, self.op.remote_node)
10846
        _CheckNodeNotDrained(self, self.op.remote_node)
10847
        # FIXME: here we assume that the old instance type is DT_PLAIN
10848
        assert instance.disk_template == constants.DT_PLAIN
10849
        disks = [{constants.IDISK_SIZE: d.size,
10850
                  constants.IDISK_VG: d.logical_id[0]}
10851
                 for d in instance.disks]
10852
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10853
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10854

    
10855
    # hvparams processing
10856
    if self.op.hvparams:
10857
      hv_type = instance.hypervisor
10858
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10859
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10860
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10861

    
10862
      # local check
10863
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10864
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10865
      self.hv_new = hv_new # the new actual values
10866
      self.hv_inst = i_hvdict # the new dict (without defaults)
10867
    else:
10868
      self.hv_new = self.hv_inst = {}
10869

    
10870
    # beparams processing
10871
    if self.op.beparams:
10872
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10873
                                   use_none=True)
10874
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10875
      be_new = cluster.SimpleFillBE(i_bedict)
10876
      self.be_new = be_new # the new actual values
10877
      self.be_inst = i_bedict # the new dict (without defaults)
10878
    else:
10879
      self.be_new = self.be_inst = {}
10880
    be_old = cluster.FillBE(instance)
10881

    
10882
    # osparams processing
10883
    if self.op.osparams:
10884
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10885
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10886
      self.os_inst = i_osdict # the new dict (without defaults)
10887
    else:
10888
      self.os_inst = {}
10889

    
10890
    self.warn = []
10891

    
10892
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10893
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10894
      mem_check_list = [pnode]
10895
      if be_new[constants.BE_AUTO_BALANCE]:
10896
        # either we changed auto_balance to yes or it was from before
10897
        mem_check_list.extend(instance.secondary_nodes)
10898
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10899
                                                  instance.hypervisor)
10900
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10901
                                         instance.hypervisor)
10902
      pninfo = nodeinfo[pnode]
10903
      msg = pninfo.fail_msg
10904
      if msg:
10905
        # Assume the primary node is unreachable and go ahead
10906
        self.warn.append("Can't get info from primary node %s: %s" %
10907
                         (pnode, msg))
10908
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10909
        self.warn.append("Node data from primary node %s doesn't contain"
10910
                         " free memory information" % pnode)
10911
      elif instance_info.fail_msg:
10912
        self.warn.append("Can't get instance runtime information: %s" %
10913
                        instance_info.fail_msg)
10914
      else:
10915
        if instance_info.payload:
10916
          current_mem = int(instance_info.payload["memory"])
10917
        else:
10918
          # Assume instance not running
10919
          # (there is a slight race condition here, but it's not very probable,
10920
          # and we have no other way to check)
10921
          current_mem = 0
10922
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10923
                    pninfo.payload["memory_free"])
10924
        if miss_mem > 0:
10925
          raise errors.OpPrereqError("This change will prevent the instance"
10926
                                     " from starting, due to %d MB of memory"
10927
                                     " missing on its primary node" % miss_mem,
10928
                                     errors.ECODE_NORES)
10929

    
10930
      if be_new[constants.BE_AUTO_BALANCE]:
10931
        for node, nres in nodeinfo.items():
10932
          if node not in instance.secondary_nodes:
10933
            continue
10934
          nres.Raise("Can't get info from secondary node %s" % node,
10935
                     prereq=True, ecode=errors.ECODE_STATE)
10936
          if not isinstance(nres.payload.get("memory_free", None), int):
10937
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10938
                                       " memory information" % node,
10939
                                       errors.ECODE_STATE)
10940
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10941
            raise errors.OpPrereqError("This change will prevent the instance"
10942
                                       " from failover to its secondary node"
10943
                                       " %s, due to not enough memory" % node,
10944
                                       errors.ECODE_STATE)
10945

    
10946
    # NIC processing
10947
    self.nic_pnew = {}
10948
    self.nic_pinst = {}
10949
    for nic_op, nic_dict in self.op.nics:
10950
      if nic_op == constants.DDM_REMOVE:
10951
        if not instance.nics:
10952
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10953
                                     errors.ECODE_INVAL)
10954
        continue
10955
      if nic_op != constants.DDM_ADD:
10956
        # an existing nic
10957
        if not instance.nics:
10958
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10959
                                     " no NICs" % nic_op,
10960
                                     errors.ECODE_INVAL)
10961
        if nic_op < 0 or nic_op >= len(instance.nics):
10962
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10963
                                     " are 0 to %d" %
10964
                                     (nic_op, len(instance.nics) - 1),
10965
                                     errors.ECODE_INVAL)
10966
        old_nic_params = instance.nics[nic_op].nicparams
10967
        old_nic_ip = instance.nics[nic_op].ip
10968
      else:
10969
        old_nic_params = {}
10970
        old_nic_ip = None
10971

    
10972
      update_params_dict = dict([(key, nic_dict[key])
10973
                                 for key in constants.NICS_PARAMETERS
10974
                                 if key in nic_dict])
10975

    
10976
      if "bridge" in nic_dict:
10977
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10978

    
10979
      new_nic_params = _GetUpdatedParams(old_nic_params,
10980
                                         update_params_dict)
10981
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10982
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10983
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10984
      self.nic_pinst[nic_op] = new_nic_params
10985
      self.nic_pnew[nic_op] = new_filled_nic_params
10986
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10987

    
10988
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10989
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10990
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10991
        if msg:
10992
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10993
          if self.op.force:
10994
            self.warn.append(msg)
10995
          else:
10996
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10997
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10998
        if constants.INIC_IP in nic_dict:
10999
          nic_ip = nic_dict[constants.INIC_IP]
11000
        else:
11001
          nic_ip = old_nic_ip
11002
        if nic_ip is None:
11003
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11004
                                     " on a routed nic", errors.ECODE_INVAL)
11005
      if constants.INIC_MAC in nic_dict:
11006
        nic_mac = nic_dict[constants.INIC_MAC]
11007
        if nic_mac is None:
11008
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11009
                                     errors.ECODE_INVAL)
11010
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11011
          # otherwise generate the mac
11012
          nic_dict[constants.INIC_MAC] = \
11013
            self.cfg.GenerateMAC(self.proc.GetECId())
11014
        else:
11015
          # or validate/reserve the current one
11016
          try:
11017
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11018
          except errors.ReservationError:
11019
            raise errors.OpPrereqError("MAC address %s already in use"
11020
                                       " in cluster" % nic_mac,
11021
                                       errors.ECODE_NOTUNIQUE)
11022

    
11023
    # DISK processing
11024
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11025
      raise errors.OpPrereqError("Disk operations not supported for"
11026
                                 " diskless instances",
11027
                                 errors.ECODE_INVAL)
11028
    for disk_op, _ in self.op.disks:
11029
      if disk_op == constants.DDM_REMOVE:
11030
        if len(instance.disks) == 1:
11031
          raise errors.OpPrereqError("Cannot remove the last disk of"
11032
                                     " an instance", errors.ECODE_INVAL)
11033
        _CheckInstanceDown(self, instance, "cannot remove disks")
11034

    
11035
      if (disk_op == constants.DDM_ADD and
11036
          len(instance.disks) >= constants.MAX_DISKS):
11037
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11038
                                   " add more" % constants.MAX_DISKS,
11039
                                   errors.ECODE_STATE)
11040
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11041
        # an existing disk
11042
        if disk_op < 0 or disk_op >= len(instance.disks):
11043
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11044
                                     " are 0 to %d" %
11045
                                     (disk_op, len(instance.disks)),
11046
                                     errors.ECODE_INVAL)
11047

    
11048
    return
11049

    
11050
  def _ConvertPlainToDrbd(self, feedback_fn):
11051
    """Converts an instance from plain to drbd.
11052

11053
    """
11054
    feedback_fn("Converting template to drbd")
11055
    instance = self.instance
11056
    pnode = instance.primary_node
11057
    snode = self.op.remote_node
11058

    
11059
    # create a fake disk info for _GenerateDiskTemplate
11060
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11061
                  constants.IDISK_VG: d.logical_id[0]}
11062
                 for d in instance.disks]
11063
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11064
                                      instance.name, pnode, [snode],
11065
                                      disk_info, None, None, 0, feedback_fn)
11066
    info = _GetInstanceInfoText(instance)
11067
    feedback_fn("Creating aditional volumes...")
11068
    # first, create the missing data and meta devices
11069
    for disk in new_disks:
11070
      # unfortunately this is... not too nice
11071
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11072
                            info, True)
11073
      for child in disk.children:
11074
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11075
    # at this stage, all new LVs have been created, we can rename the
11076
    # old ones
11077
    feedback_fn("Renaming original volumes...")
11078
    rename_list = [(o, n.children[0].logical_id)
11079
                   for (o, n) in zip(instance.disks, new_disks)]
11080
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11081
    result.Raise("Failed to rename original LVs")
11082

    
11083
    feedback_fn("Initializing DRBD devices...")
11084
    # all child devices are in place, we can now create the DRBD devices
11085
    for disk in new_disks:
11086
      for node in [pnode, snode]:
11087
        f_create = node == pnode
11088
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11089

    
11090
    # at this point, the instance has been modified
11091
    instance.disk_template = constants.DT_DRBD8
11092
    instance.disks = new_disks
11093
    self.cfg.Update(instance, feedback_fn)
11094

    
11095
    # disks are created, waiting for sync
11096
    disk_abort = not _WaitForSync(self, instance,
11097
                                  oneshot=not self.op.wait_for_sync)
11098
    if disk_abort:
11099
      raise errors.OpExecError("There are some degraded disks for"
11100
                               " this instance, please cleanup manually")
11101

    
11102
  def _ConvertDrbdToPlain(self, feedback_fn):
11103
    """Converts an instance from drbd to plain.
11104

11105
    """
11106
    instance = self.instance
11107
    assert len(instance.secondary_nodes) == 1
11108
    pnode = instance.primary_node
11109
    snode = instance.secondary_nodes[0]
11110
    feedback_fn("Converting template to plain")
11111

    
11112
    old_disks = instance.disks
11113
    new_disks = [d.children[0] for d in old_disks]
11114

    
11115
    # copy over size and mode
11116
    for parent, child in zip(old_disks, new_disks):
11117
      child.size = parent.size
11118
      child.mode = parent.mode
11119

    
11120
    # update instance structure
11121
    instance.disks = new_disks
11122
    instance.disk_template = constants.DT_PLAIN
11123
    self.cfg.Update(instance, feedback_fn)
11124

    
11125
    feedback_fn("Removing volumes on the secondary node...")
11126
    for disk in old_disks:
11127
      self.cfg.SetDiskID(disk, snode)
11128
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11129
      if msg:
11130
        self.LogWarning("Could not remove block device %s on node %s,"
11131
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11132

    
11133
    feedback_fn("Removing unneeded volumes on the primary node...")
11134
    for idx, disk in enumerate(old_disks):
11135
      meta = disk.children[1]
11136
      self.cfg.SetDiskID(meta, pnode)
11137
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11138
      if msg:
11139
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11140
                        " continuing anyway: %s", idx, pnode, msg)
11141

    
11142
  def Exec(self, feedback_fn):
11143
    """Modifies an instance.
11144

11145
    All parameters take effect only at the next restart of the instance.
11146

11147
    """
11148
    # Process here the warnings from CheckPrereq, as we don't have a
11149
    # feedback_fn there.
11150
    for warn in self.warn:
11151
      feedback_fn("WARNING: %s" % warn)
11152

    
11153
    result = []
11154
    instance = self.instance
11155
    # disk changes
11156
    for disk_op, disk_dict in self.op.disks:
11157
      if disk_op == constants.DDM_REMOVE:
11158
        # remove the last disk
11159
        device = instance.disks.pop()
11160
        device_idx = len(instance.disks)
11161
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11162
          self.cfg.SetDiskID(disk, node)
11163
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11164
          if msg:
11165
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11166
                            " continuing anyway", device_idx, node, msg)
11167
        result.append(("disk/%d" % device_idx, "remove"))
11168
      elif disk_op == constants.DDM_ADD:
11169
        # add a new disk
11170
        if instance.disk_template in (constants.DT_FILE,
11171
                                        constants.DT_SHARED_FILE):
11172
          file_driver, file_path = instance.disks[0].logical_id
11173
          file_path = os.path.dirname(file_path)
11174
        else:
11175
          file_driver = file_path = None
11176
        disk_idx_base = len(instance.disks)
11177
        new_disk = _GenerateDiskTemplate(self,
11178
                                         instance.disk_template,
11179
                                         instance.name, instance.primary_node,
11180
                                         instance.secondary_nodes,
11181
                                         [disk_dict],
11182
                                         file_path,
11183
                                         file_driver,
11184
                                         disk_idx_base, feedback_fn)[0]
11185
        instance.disks.append(new_disk)
11186
        info = _GetInstanceInfoText(instance)
11187

    
11188
        logging.info("Creating volume %s for instance %s",
11189
                     new_disk.iv_name, instance.name)
11190
        # Note: this needs to be kept in sync with _CreateDisks
11191
        #HARDCODE
11192
        for node in instance.all_nodes:
11193
          f_create = node == instance.primary_node
11194
          try:
11195
            _CreateBlockDev(self, node, instance, new_disk,
11196
                            f_create, info, f_create)
11197
          except errors.OpExecError, err:
11198
            self.LogWarning("Failed to create volume %s (%s) on"
11199
                            " node %s: %s",
11200
                            new_disk.iv_name, new_disk, node, err)
11201
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11202
                       (new_disk.size, new_disk.mode)))
11203
      else:
11204
        # change a given disk
11205
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11206
        result.append(("disk.mode/%d" % disk_op,
11207
                       disk_dict[constants.IDISK_MODE]))
11208

    
11209
    if self.op.disk_template:
11210
      r_shut = _ShutdownInstanceDisks(self, instance)
11211
      if not r_shut:
11212
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11213
                                 " proceed with disk template conversion")
11214
      mode = (instance.disk_template, self.op.disk_template)
11215
      try:
11216
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11217
      except:
11218
        self.cfg.ReleaseDRBDMinors(instance.name)
11219
        raise
11220
      result.append(("disk_template", self.op.disk_template))
11221

    
11222
    # NIC changes
11223
    for nic_op, nic_dict in self.op.nics:
11224
      if nic_op == constants.DDM_REMOVE:
11225
        # remove the last nic
11226
        del instance.nics[-1]
11227
        result.append(("nic.%d" % len(instance.nics), "remove"))
11228
      elif nic_op == constants.DDM_ADD:
11229
        # mac and bridge should be set, by now
11230
        mac = nic_dict[constants.INIC_MAC]
11231
        ip = nic_dict.get(constants.INIC_IP, None)
11232
        nicparams = self.nic_pinst[constants.DDM_ADD]
11233
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11234
        instance.nics.append(new_nic)
11235
        result.append(("nic.%d" % (len(instance.nics) - 1),
11236
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11237
                       (new_nic.mac, new_nic.ip,
11238
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11239
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11240
                       )))
11241
      else:
11242
        for key in (constants.INIC_MAC, constants.INIC_IP):
11243
          if key in nic_dict:
11244
            setattr(instance.nics[nic_op], key, nic_dict[key])
11245
        if nic_op in self.nic_pinst:
11246
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11247
        for key, val in nic_dict.iteritems():
11248
          result.append(("nic.%s/%d" % (key, nic_op), val))
11249

    
11250
    # hvparams changes
11251
    if self.op.hvparams:
11252
      instance.hvparams = self.hv_inst
11253
      for key, val in self.op.hvparams.iteritems():
11254
        result.append(("hv/%s" % key, val))
11255

    
11256
    # beparams changes
11257
    if self.op.beparams:
11258
      instance.beparams = self.be_inst
11259
      for key, val in self.op.beparams.iteritems():
11260
        result.append(("be/%s" % key, val))
11261

    
11262
    # OS change
11263
    if self.op.os_name:
11264
      instance.os = self.op.os_name
11265

    
11266
    # osparams changes
11267
    if self.op.osparams:
11268
      instance.osparams = self.os_inst
11269
      for key, val in self.op.osparams.iteritems():
11270
        result.append(("os/%s" % key, val))
11271

    
11272
    self.cfg.Update(instance, feedback_fn)
11273

    
11274
    return result
11275

    
11276
  _DISK_CONVERSIONS = {
11277
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11278
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11279
    }
11280

    
11281

    
11282
class LUInstanceChangeGroup(LogicalUnit):
11283
  HPATH = "instance-change-group"
11284
  HTYPE = constants.HTYPE_INSTANCE
11285
  REQ_BGL = False
11286

    
11287
  def ExpandNames(self):
11288
    self.share_locks = _ShareAll()
11289
    self.needed_locks = {
11290
      locking.LEVEL_NODEGROUP: [],
11291
      locking.LEVEL_NODE: [],
11292
      }
11293

    
11294
    self._ExpandAndLockInstance()
11295

    
11296
    if self.op.target_groups:
11297
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11298
                                  self.op.target_groups)
11299
    else:
11300
      self.req_target_uuids = None
11301

    
11302
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11303

    
11304
  def DeclareLocks(self, level):
11305
    if level == locking.LEVEL_NODEGROUP:
11306
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11307

    
11308
      if self.req_target_uuids:
11309
        lock_groups = set(self.req_target_uuids)
11310

    
11311
        # Lock all groups used by instance optimistically; this requires going
11312
        # via the node before it's locked, requiring verification later on
11313
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11314
        lock_groups.update(instance_groups)
11315
      else:
11316
        # No target groups, need to lock all of them
11317
        lock_groups = locking.ALL_SET
11318

    
11319
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11320

    
11321
    elif level == locking.LEVEL_NODE:
11322
      if self.req_target_uuids:
11323
        # Lock all nodes used by instances
11324
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11325
        self._LockInstancesNodes()
11326

    
11327
        # Lock all nodes in all potential target groups
11328
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11329
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11330
        member_nodes = [node_name
11331
                        for group in lock_groups
11332
                        for node_name in self.cfg.GetNodeGroup(group).members]
11333
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11334
      else:
11335
        # Lock all nodes as all groups are potential targets
11336
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11337

    
11338
  def CheckPrereq(self):
11339
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11340
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11341
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11342

    
11343
    assert (self.req_target_uuids is None or
11344
            owned_groups.issuperset(self.req_target_uuids))
11345
    assert owned_instances == set([self.op.instance_name])
11346

    
11347
    # Get instance information
11348
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11349

    
11350
    # Check if node groups for locked instance are still correct
11351
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11352
      ("Instance %s's nodes changed while we kept the lock" %
11353
       self.op.instance_name)
11354

    
11355
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11356
                                           owned_groups)
11357

    
11358
    if self.req_target_uuids:
11359
      # User requested specific target groups
11360
      self.target_uuids = self.req_target_uuids
11361
    else:
11362
      # All groups except those used by the instance are potential targets
11363
      self.target_uuids = owned_groups - inst_groups
11364

    
11365
    conflicting_groups = self.target_uuids & inst_groups
11366
    if conflicting_groups:
11367
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11368
                                 " used by the instance '%s'" %
11369
                                 (utils.CommaJoin(conflicting_groups),
11370
                                  self.op.instance_name),
11371
                                 errors.ECODE_INVAL)
11372

    
11373
    if not self.target_uuids:
11374
      raise errors.OpPrereqError("There are no possible target groups",
11375
                                 errors.ECODE_INVAL)
11376

    
11377
  def BuildHooksEnv(self):
11378
    """Build hooks env.
11379

11380
    """
11381
    assert self.target_uuids
11382

    
11383
    env = {
11384
      "TARGET_GROUPS": " ".join(self.target_uuids),
11385
      }
11386

    
11387
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11388

    
11389
    return env
11390

    
11391
  def BuildHooksNodes(self):
11392
    """Build hooks nodes.
11393

11394
    """
11395
    mn = self.cfg.GetMasterNode()
11396
    return ([mn], [mn])
11397

    
11398
  def Exec(self, feedback_fn):
11399
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11400

    
11401
    assert instances == [self.op.instance_name], "Instance not locked"
11402

    
11403
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11404
                     instances=instances, target_groups=list(self.target_uuids))
11405

    
11406
    ial.Run(self.op.iallocator)
11407

    
11408
    if not ial.success:
11409
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11410
                                 " instance '%s' using iallocator '%s': %s" %
11411
                                 (self.op.instance_name, self.op.iallocator,
11412
                                  ial.info),
11413
                                 errors.ECODE_NORES)
11414

    
11415
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11416

    
11417
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11418
                 " instance '%s'", len(jobs), self.op.instance_name)
11419

    
11420
    return ResultWithJobs(jobs)
11421

    
11422

    
11423
class LUBackupQuery(NoHooksLU):
11424
  """Query the exports list
11425

11426
  """
11427
  REQ_BGL = False
11428

    
11429
  def ExpandNames(self):
11430
    self.needed_locks = {}
11431
    self.share_locks[locking.LEVEL_NODE] = 1
11432
    if not self.op.nodes:
11433
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11434
    else:
11435
      self.needed_locks[locking.LEVEL_NODE] = \
11436
        _GetWantedNodes(self, self.op.nodes)
11437

    
11438
  def Exec(self, feedback_fn):
11439
    """Compute the list of all the exported system images.
11440

11441
    @rtype: dict
11442
    @return: a dictionary with the structure node->(export-list)
11443
        where export-list is a list of the instances exported on
11444
        that node.
11445

11446
    """
11447
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11448
    rpcresult = self.rpc.call_export_list(self.nodes)
11449
    result = {}
11450
    for node in rpcresult:
11451
      if rpcresult[node].fail_msg:
11452
        result[node] = False
11453
      else:
11454
        result[node] = rpcresult[node].payload
11455

    
11456
    return result
11457

    
11458

    
11459
class LUBackupPrepare(NoHooksLU):
11460
  """Prepares an instance for an export and returns useful information.
11461

11462
  """
11463
  REQ_BGL = False
11464

    
11465
  def ExpandNames(self):
11466
    self._ExpandAndLockInstance()
11467

    
11468
  def CheckPrereq(self):
11469
    """Check prerequisites.
11470

11471
    """
11472
    instance_name = self.op.instance_name
11473

    
11474
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11475
    assert self.instance is not None, \
11476
          "Cannot retrieve locked instance %s" % self.op.instance_name
11477
    _CheckNodeOnline(self, self.instance.primary_node)
11478

    
11479
    self._cds = _GetClusterDomainSecret()
11480

    
11481
  def Exec(self, feedback_fn):
11482
    """Prepares an instance for an export.
11483

11484
    """
11485
    instance = self.instance
11486

    
11487
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11488
      salt = utils.GenerateSecret(8)
11489

    
11490
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11491
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11492
                                              constants.RIE_CERT_VALIDITY)
11493
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11494

    
11495
      (name, cert_pem) = result.payload
11496

    
11497
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11498
                                             cert_pem)
11499

    
11500
      return {
11501
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11502
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11503
                          salt),
11504
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11505
        }
11506

    
11507
    return None
11508

    
11509

    
11510
class LUBackupExport(LogicalUnit):
11511
  """Export an instance to an image in the cluster.
11512

11513
  """
11514
  HPATH = "instance-export"
11515
  HTYPE = constants.HTYPE_INSTANCE
11516
  REQ_BGL = False
11517

    
11518
  def CheckArguments(self):
11519
    """Check the arguments.
11520

11521
    """
11522
    self.x509_key_name = self.op.x509_key_name
11523
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11524

    
11525
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11526
      if not self.x509_key_name:
11527
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11528
                                   errors.ECODE_INVAL)
11529

    
11530
      if not self.dest_x509_ca_pem:
11531
        raise errors.OpPrereqError("Missing destination X509 CA",
11532
                                   errors.ECODE_INVAL)
11533

    
11534
  def ExpandNames(self):
11535
    self._ExpandAndLockInstance()
11536

    
11537
    # Lock all nodes for local exports
11538
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11539
      # FIXME: lock only instance primary and destination node
11540
      #
11541
      # Sad but true, for now we have do lock all nodes, as we don't know where
11542
      # the previous export might be, and in this LU we search for it and
11543
      # remove it from its current node. In the future we could fix this by:
11544
      #  - making a tasklet to search (share-lock all), then create the
11545
      #    new one, then one to remove, after
11546
      #  - removing the removal operation altogether
11547
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11548

    
11549
  def DeclareLocks(self, level):
11550
    """Last minute lock declaration."""
11551
    # All nodes are locked anyway, so nothing to do here.
11552

    
11553
  def BuildHooksEnv(self):
11554
    """Build hooks env.
11555

11556
    This will run on the master, primary node and target node.
11557

11558
    """
11559
    env = {
11560
      "EXPORT_MODE": self.op.mode,
11561
      "EXPORT_NODE": self.op.target_node,
11562
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11563
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11564
      # TODO: Generic function for boolean env variables
11565
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11566
      }
11567

    
11568
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11569

    
11570
    return env
11571

    
11572
  def BuildHooksNodes(self):
11573
    """Build hooks nodes.
11574

11575
    """
11576
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11577

    
11578
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11579
      nl.append(self.op.target_node)
11580

    
11581
    return (nl, nl)
11582

    
11583
  def CheckPrereq(self):
11584
    """Check prerequisites.
11585

11586
    This checks that the instance and node names are valid.
11587

11588
    """
11589
    instance_name = self.op.instance_name
11590

    
11591
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11592
    assert self.instance is not None, \
11593
          "Cannot retrieve locked instance %s" % self.op.instance_name
11594
    _CheckNodeOnline(self, self.instance.primary_node)
11595

    
11596
    if (self.op.remove_instance and self.instance.admin_up and
11597
        not self.op.shutdown):
11598
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11599
                                 " down before")
11600

    
11601
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11602
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11603
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11604
      assert self.dst_node is not None
11605

    
11606
      _CheckNodeOnline(self, self.dst_node.name)
11607
      _CheckNodeNotDrained(self, self.dst_node.name)
11608

    
11609
      self._cds = None
11610
      self.dest_disk_info = None
11611
      self.dest_x509_ca = None
11612

    
11613
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11614
      self.dst_node = None
11615

    
11616
      if len(self.op.target_node) != len(self.instance.disks):
11617
        raise errors.OpPrereqError(("Received destination information for %s"
11618
                                    " disks, but instance %s has %s disks") %
11619
                                   (len(self.op.target_node), instance_name,
11620
                                    len(self.instance.disks)),
11621
                                   errors.ECODE_INVAL)
11622

    
11623
      cds = _GetClusterDomainSecret()
11624

    
11625
      # Check X509 key name
11626
      try:
11627
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11628
      except (TypeError, ValueError), err:
11629
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11630

    
11631
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11632
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11633
                                   errors.ECODE_INVAL)
11634

    
11635
      # Load and verify CA
11636
      try:
11637
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11638
      except OpenSSL.crypto.Error, err:
11639
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11640
                                   (err, ), errors.ECODE_INVAL)
11641

    
11642
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11643
      if errcode is not None:
11644
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11645
                                   (msg, ), errors.ECODE_INVAL)
11646

    
11647
      self.dest_x509_ca = cert
11648

    
11649
      # Verify target information
11650
      disk_info = []
11651
      for idx, disk_data in enumerate(self.op.target_node):
11652
        try:
11653
          (host, port, magic) = \
11654
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11655
        except errors.GenericError, err:
11656
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11657
                                     (idx, err), errors.ECODE_INVAL)
11658

    
11659
        disk_info.append((host, port, magic))
11660

    
11661
      assert len(disk_info) == len(self.op.target_node)
11662
      self.dest_disk_info = disk_info
11663

    
11664
    else:
11665
      raise errors.ProgrammerError("Unhandled export mode %r" %
11666
                                   self.op.mode)
11667

    
11668
    # instance disk type verification
11669
    # TODO: Implement export support for file-based disks
11670
    for disk in self.instance.disks:
11671
      if disk.dev_type == constants.LD_FILE:
11672
        raise errors.OpPrereqError("Export not supported for instances with"
11673
                                   " file-based disks", errors.ECODE_INVAL)
11674

    
11675
  def _CleanupExports(self, feedback_fn):
11676
    """Removes exports of current instance from all other nodes.
11677

11678
    If an instance in a cluster with nodes A..D was exported to node C, its
11679
    exports will be removed from the nodes A, B and D.
11680

11681
    """
11682
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11683

    
11684
    nodelist = self.cfg.GetNodeList()
11685
    nodelist.remove(self.dst_node.name)
11686

    
11687
    # on one-node clusters nodelist will be empty after the removal
11688
    # if we proceed the backup would be removed because OpBackupQuery
11689
    # substitutes an empty list with the full cluster node list.
11690
    iname = self.instance.name
11691
    if nodelist:
11692
      feedback_fn("Removing old exports for instance %s" % iname)
11693
      exportlist = self.rpc.call_export_list(nodelist)
11694
      for node in exportlist:
11695
        if exportlist[node].fail_msg:
11696
          continue
11697
        if iname in exportlist[node].payload:
11698
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11699
          if msg:
11700
            self.LogWarning("Could not remove older export for instance %s"
11701
                            " on node %s: %s", iname, node, msg)
11702

    
11703
  def Exec(self, feedback_fn):
11704
    """Export an instance to an image in the cluster.
11705

11706
    """
11707
    assert self.op.mode in constants.EXPORT_MODES
11708

    
11709
    instance = self.instance
11710
    src_node = instance.primary_node
11711

    
11712
    if self.op.shutdown:
11713
      # shutdown the instance, but not the disks
11714
      feedback_fn("Shutting down instance %s" % instance.name)
11715
      result = self.rpc.call_instance_shutdown(src_node, instance,
11716
                                               self.op.shutdown_timeout)
11717
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11718
      result.Raise("Could not shutdown instance %s on"
11719
                   " node %s" % (instance.name, src_node))
11720

    
11721
    # set the disks ID correctly since call_instance_start needs the
11722
    # correct drbd minor to create the symlinks
11723
    for disk in instance.disks:
11724
      self.cfg.SetDiskID(disk, src_node)
11725

    
11726
    activate_disks = (not instance.admin_up)
11727

    
11728
    if activate_disks:
11729
      # Activate the instance disks if we'exporting a stopped instance
11730
      feedback_fn("Activating disks for %s" % instance.name)
11731
      _StartInstanceDisks(self, instance, None)
11732

    
11733
    try:
11734
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11735
                                                     instance)
11736

    
11737
      helper.CreateSnapshots()
11738
      try:
11739
        if (self.op.shutdown and instance.admin_up and
11740
            not self.op.remove_instance):
11741
          assert not activate_disks
11742
          feedback_fn("Starting instance %s" % instance.name)
11743
          result = self.rpc.call_instance_start(src_node, instance,
11744
                                                None, None, False)
11745
          msg = result.fail_msg
11746
          if msg:
11747
            feedback_fn("Failed to start instance: %s" % msg)
11748
            _ShutdownInstanceDisks(self, instance)
11749
            raise errors.OpExecError("Could not start instance: %s" % msg)
11750

    
11751
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11752
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11753
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11754
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11755
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11756

    
11757
          (key_name, _, _) = self.x509_key_name
11758

    
11759
          dest_ca_pem = \
11760
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11761
                                            self.dest_x509_ca)
11762

    
11763
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11764
                                                     key_name, dest_ca_pem,
11765
                                                     timeouts)
11766
      finally:
11767
        helper.Cleanup()
11768

    
11769
      # Check for backwards compatibility
11770
      assert len(dresults) == len(instance.disks)
11771
      assert compat.all(isinstance(i, bool) for i in dresults), \
11772
             "Not all results are boolean: %r" % dresults
11773

    
11774
    finally:
11775
      if activate_disks:
11776
        feedback_fn("Deactivating disks for %s" % instance.name)
11777
        _ShutdownInstanceDisks(self, instance)
11778

    
11779
    if not (compat.all(dresults) and fin_resu):
11780
      failures = []
11781
      if not fin_resu:
11782
        failures.append("export finalization")
11783
      if not compat.all(dresults):
11784
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11785
                               if not dsk)
11786
        failures.append("disk export: disk(s) %s" % fdsk)
11787

    
11788
      raise errors.OpExecError("Export failed, errors in %s" %
11789
                               utils.CommaJoin(failures))
11790

    
11791
    # At this point, the export was successful, we can cleanup/finish
11792

    
11793
    # Remove instance if requested
11794
    if self.op.remove_instance:
11795
      feedback_fn("Removing instance %s" % instance.name)
11796
      _RemoveInstance(self, feedback_fn, instance,
11797
                      self.op.ignore_remove_failures)
11798

    
11799
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11800
      self._CleanupExports(feedback_fn)
11801

    
11802
    return fin_resu, dresults
11803

    
11804

    
11805
class LUBackupRemove(NoHooksLU):
11806
  """Remove exports related to the named instance.
11807

11808
  """
11809
  REQ_BGL = False
11810

    
11811
  def ExpandNames(self):
11812
    self.needed_locks = {}
11813
    # We need all nodes to be locked in order for RemoveExport to work, but we
11814
    # don't need to lock the instance itself, as nothing will happen to it (and
11815
    # we can remove exports also for a removed instance)
11816
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11817

    
11818
  def Exec(self, feedback_fn):
11819
    """Remove any export.
11820

11821
    """
11822
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11823
    # If the instance was not found we'll try with the name that was passed in.
11824
    # This will only work if it was an FQDN, though.
11825
    fqdn_warn = False
11826
    if not instance_name:
11827
      fqdn_warn = True
11828
      instance_name = self.op.instance_name
11829

    
11830
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11831
    exportlist = self.rpc.call_export_list(locked_nodes)
11832
    found = False
11833
    for node in exportlist:
11834
      msg = exportlist[node].fail_msg
11835
      if msg:
11836
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11837
        continue
11838
      if instance_name in exportlist[node].payload:
11839
        found = True
11840
        result = self.rpc.call_export_remove(node, instance_name)
11841
        msg = result.fail_msg
11842
        if msg:
11843
          logging.error("Could not remove export for instance %s"
11844
                        " on node %s: %s", instance_name, node, msg)
11845

    
11846
    if fqdn_warn and not found:
11847
      feedback_fn("Export not found. If trying to remove an export belonging"
11848
                  " to a deleted instance please use its Fully Qualified"
11849
                  " Domain Name.")
11850

    
11851

    
11852
class LUGroupAdd(LogicalUnit):
11853
  """Logical unit for creating node groups.
11854

11855
  """
11856
  HPATH = "group-add"
11857
  HTYPE = constants.HTYPE_GROUP
11858
  REQ_BGL = False
11859

    
11860
  def ExpandNames(self):
11861
    # We need the new group's UUID here so that we can create and acquire the
11862
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11863
    # that it should not check whether the UUID exists in the configuration.
11864
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11865
    self.needed_locks = {}
11866
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11867

    
11868
  def CheckPrereq(self):
11869
    """Check prerequisites.
11870

11871
    This checks that the given group name is not an existing node group
11872
    already.
11873

11874
    """
11875
    try:
11876
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11877
    except errors.OpPrereqError:
11878
      pass
11879
    else:
11880
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11881
                                 " node group (UUID: %s)" %
11882
                                 (self.op.group_name, existing_uuid),
11883
                                 errors.ECODE_EXISTS)
11884

    
11885
    if self.op.ndparams:
11886
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11887

    
11888
  def BuildHooksEnv(self):
11889
    """Build hooks env.
11890

11891
    """
11892
    return {
11893
      "GROUP_NAME": self.op.group_name,
11894
      }
11895

    
11896
  def BuildHooksNodes(self):
11897
    """Build hooks nodes.
11898

11899
    """
11900
    mn = self.cfg.GetMasterNode()
11901
    return ([mn], [mn])
11902

    
11903
  def Exec(self, feedback_fn):
11904
    """Add the node group to the cluster.
11905

11906
    """
11907
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11908
                                  uuid=self.group_uuid,
11909
                                  alloc_policy=self.op.alloc_policy,
11910
                                  ndparams=self.op.ndparams)
11911

    
11912
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11913
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11914

    
11915

    
11916
class LUGroupAssignNodes(NoHooksLU):
11917
  """Logical unit for assigning nodes to groups.
11918

11919
  """
11920
  REQ_BGL = False
11921

    
11922
  def ExpandNames(self):
11923
    # These raise errors.OpPrereqError on their own:
11924
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11925
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11926

    
11927
    # We want to lock all the affected nodes and groups. We have readily
11928
    # available the list of nodes, and the *destination* group. To gather the
11929
    # list of "source" groups, we need to fetch node information later on.
11930
    self.needed_locks = {
11931
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11932
      locking.LEVEL_NODE: self.op.nodes,
11933
      }
11934

    
11935
  def DeclareLocks(self, level):
11936
    if level == locking.LEVEL_NODEGROUP:
11937
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11938

    
11939
      # Try to get all affected nodes' groups without having the group or node
11940
      # lock yet. Needs verification later in the code flow.
11941
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11942

    
11943
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11944

    
11945
  def CheckPrereq(self):
11946
    """Check prerequisites.
11947

11948
    """
11949
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11950
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11951
            frozenset(self.op.nodes))
11952

    
11953
    expected_locks = (set([self.group_uuid]) |
11954
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11955
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11956
    if actual_locks != expected_locks:
11957
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11958
                               " current groups are '%s', used to be '%s'" %
11959
                               (utils.CommaJoin(expected_locks),
11960
                                utils.CommaJoin(actual_locks)))
11961

    
11962
    self.node_data = self.cfg.GetAllNodesInfo()
11963
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11964
    instance_data = self.cfg.GetAllInstancesInfo()
11965

    
11966
    if self.group is None:
11967
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11968
                               (self.op.group_name, self.group_uuid))
11969

    
11970
    (new_splits, previous_splits) = \
11971
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11972
                                             for node in self.op.nodes],
11973
                                            self.node_data, instance_data)
11974

    
11975
    if new_splits:
11976
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11977

    
11978
      if not self.op.force:
11979
        raise errors.OpExecError("The following instances get split by this"
11980
                                 " change and --force was not given: %s" %
11981
                                 fmt_new_splits)
11982
      else:
11983
        self.LogWarning("This operation will split the following instances: %s",
11984
                        fmt_new_splits)
11985

    
11986
        if previous_splits:
11987
          self.LogWarning("In addition, these already-split instances continue"
11988
                          " to be split across groups: %s",
11989
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11990

    
11991
  def Exec(self, feedback_fn):
11992
    """Assign nodes to a new group.
11993

11994
    """
11995
    for node in self.op.nodes:
11996
      self.node_data[node].group = self.group_uuid
11997

    
11998
    # FIXME: Depends on side-effects of modifying the result of
11999
    # C{cfg.GetAllNodesInfo}
12000

    
12001
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12002

    
12003
  @staticmethod
12004
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12005
    """Check for split instances after a node assignment.
12006

12007
    This method considers a series of node assignments as an atomic operation,
12008
    and returns information about split instances after applying the set of
12009
    changes.
12010

12011
    In particular, it returns information about newly split instances, and
12012
    instances that were already split, and remain so after the change.
12013

12014
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12015
    considered.
12016

12017
    @type changes: list of (node_name, new_group_uuid) pairs.
12018
    @param changes: list of node assignments to consider.
12019
    @param node_data: a dict with data for all nodes
12020
    @param instance_data: a dict with all instances to consider
12021
    @rtype: a two-tuple
12022
    @return: a list of instances that were previously okay and result split as a
12023
      consequence of this change, and a list of instances that were previously
12024
      split and this change does not fix.
12025

12026
    """
12027
    changed_nodes = dict((node, group) for node, group in changes
12028
                         if node_data[node].group != group)
12029

    
12030
    all_split_instances = set()
12031
    previously_split_instances = set()
12032

    
12033
    def InstanceNodes(instance):
12034
      return [instance.primary_node] + list(instance.secondary_nodes)
12035

    
12036
    for inst in instance_data.values():
12037
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12038
        continue
12039

    
12040
      instance_nodes = InstanceNodes(inst)
12041

    
12042
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12043
        previously_split_instances.add(inst.name)
12044

    
12045
      if len(set(changed_nodes.get(node, node_data[node].group)
12046
                 for node in instance_nodes)) > 1:
12047
        all_split_instances.add(inst.name)
12048

    
12049
    return (list(all_split_instances - previously_split_instances),
12050
            list(previously_split_instances & all_split_instances))
12051

    
12052

    
12053
class _GroupQuery(_QueryBase):
12054
  FIELDS = query.GROUP_FIELDS
12055

    
12056
  def ExpandNames(self, lu):
12057
    lu.needed_locks = {}
12058

    
12059
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12060
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12061

    
12062
    if not self.names:
12063
      self.wanted = [name_to_uuid[name]
12064
                     for name in utils.NiceSort(name_to_uuid.keys())]
12065
    else:
12066
      # Accept names to be either names or UUIDs.
12067
      missing = []
12068
      self.wanted = []
12069
      all_uuid = frozenset(self._all_groups.keys())
12070

    
12071
      for name in self.names:
12072
        if name in all_uuid:
12073
          self.wanted.append(name)
12074
        elif name in name_to_uuid:
12075
          self.wanted.append(name_to_uuid[name])
12076
        else:
12077
          missing.append(name)
12078

    
12079
      if missing:
12080
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12081
                                   utils.CommaJoin(missing),
12082
                                   errors.ECODE_NOENT)
12083

    
12084
  def DeclareLocks(self, lu, level):
12085
    pass
12086

    
12087
  def _GetQueryData(self, lu):
12088
    """Computes the list of node groups and their attributes.
12089

12090
    """
12091
    do_nodes = query.GQ_NODE in self.requested_data
12092
    do_instances = query.GQ_INST in self.requested_data
12093

    
12094
    group_to_nodes = None
12095
    group_to_instances = None
12096

    
12097
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12098
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12099
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12100
    # instance->node. Hence, we will need to process nodes even if we only need
12101
    # instance information.
12102
    if do_nodes or do_instances:
12103
      all_nodes = lu.cfg.GetAllNodesInfo()
12104
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12105
      node_to_group = {}
12106

    
12107
      for node in all_nodes.values():
12108
        if node.group in group_to_nodes:
12109
          group_to_nodes[node.group].append(node.name)
12110
          node_to_group[node.name] = node.group
12111

    
12112
      if do_instances:
12113
        all_instances = lu.cfg.GetAllInstancesInfo()
12114
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12115

    
12116
        for instance in all_instances.values():
12117
          node = instance.primary_node
12118
          if node in node_to_group:
12119
            group_to_instances[node_to_group[node]].append(instance.name)
12120

    
12121
        if not do_nodes:
12122
          # Do not pass on node information if it was not requested.
12123
          group_to_nodes = None
12124

    
12125
    return query.GroupQueryData([self._all_groups[uuid]
12126
                                 for uuid in self.wanted],
12127
                                group_to_nodes, group_to_instances)
12128

    
12129

    
12130
class LUGroupQuery(NoHooksLU):
12131
  """Logical unit for querying node groups.
12132

12133
  """
12134
  REQ_BGL = False
12135

    
12136
  def CheckArguments(self):
12137
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12138
                          self.op.output_fields, False)
12139

    
12140
  def ExpandNames(self):
12141
    self.gq.ExpandNames(self)
12142

    
12143
  def DeclareLocks(self, level):
12144
    self.gq.DeclareLocks(self, level)
12145

    
12146
  def Exec(self, feedback_fn):
12147
    return self.gq.OldStyleQuery(self)
12148

    
12149

    
12150
class LUGroupSetParams(LogicalUnit):
12151
  """Modifies the parameters of a node group.
12152

12153
  """
12154
  HPATH = "group-modify"
12155
  HTYPE = constants.HTYPE_GROUP
12156
  REQ_BGL = False
12157

    
12158
  def CheckArguments(self):
12159
    all_changes = [
12160
      self.op.ndparams,
12161
      self.op.alloc_policy,
12162
      ]
12163

    
12164
    if all_changes.count(None) == len(all_changes):
12165
      raise errors.OpPrereqError("Please pass at least one modification",
12166
                                 errors.ECODE_INVAL)
12167

    
12168
  def ExpandNames(self):
12169
    # This raises errors.OpPrereqError on its own:
12170
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12171

    
12172
    self.needed_locks = {
12173
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12174
      }
12175

    
12176
  def CheckPrereq(self):
12177
    """Check prerequisites.
12178

12179
    """
12180
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12181

    
12182
    if self.group is None:
12183
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12184
                               (self.op.group_name, self.group_uuid))
12185

    
12186
    if self.op.ndparams:
12187
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12188
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12189
      self.new_ndparams = new_ndparams
12190

    
12191
  def BuildHooksEnv(self):
12192
    """Build hooks env.
12193

12194
    """
12195
    return {
12196
      "GROUP_NAME": self.op.group_name,
12197
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12198
      }
12199

    
12200
  def BuildHooksNodes(self):
12201
    """Build hooks nodes.
12202

12203
    """
12204
    mn = self.cfg.GetMasterNode()
12205
    return ([mn], [mn])
12206

    
12207
  def Exec(self, feedback_fn):
12208
    """Modifies the node group.
12209

12210
    """
12211
    result = []
12212

    
12213
    if self.op.ndparams:
12214
      self.group.ndparams = self.new_ndparams
12215
      result.append(("ndparams", str(self.group.ndparams)))
12216

    
12217
    if self.op.alloc_policy:
12218
      self.group.alloc_policy = self.op.alloc_policy
12219

    
12220
    self.cfg.Update(self.group, feedback_fn)
12221
    return result
12222

    
12223

    
12224
class LUGroupRemove(LogicalUnit):
12225
  HPATH = "group-remove"
12226
  HTYPE = constants.HTYPE_GROUP
12227
  REQ_BGL = False
12228

    
12229
  def ExpandNames(self):
12230
    # This will raises errors.OpPrereqError on its own:
12231
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12232
    self.needed_locks = {
12233
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12234
      }
12235

    
12236
  def CheckPrereq(self):
12237
    """Check prerequisites.
12238

12239
    This checks that the given group name exists as a node group, that is
12240
    empty (i.e., contains no nodes), and that is not the last group of the
12241
    cluster.
12242

12243
    """
12244
    # Verify that the group is empty.
12245
    group_nodes = [node.name
12246
                   for node in self.cfg.GetAllNodesInfo().values()
12247
                   if node.group == self.group_uuid]
12248

    
12249
    if group_nodes:
12250
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12251
                                 " nodes: %s" %
12252
                                 (self.op.group_name,
12253
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12254
                                 errors.ECODE_STATE)
12255

    
12256
    # Verify the cluster would not be left group-less.
12257
    if len(self.cfg.GetNodeGroupList()) == 1:
12258
      raise errors.OpPrereqError("Group '%s' is the only group,"
12259
                                 " cannot be removed" %
12260
                                 self.op.group_name,
12261
                                 errors.ECODE_STATE)
12262

    
12263
  def BuildHooksEnv(self):
12264
    """Build hooks env.
12265

12266
    """
12267
    return {
12268
      "GROUP_NAME": self.op.group_name,
12269
      }
12270

    
12271
  def BuildHooksNodes(self):
12272
    """Build hooks nodes.
12273

12274
    """
12275
    mn = self.cfg.GetMasterNode()
12276
    return ([mn], [mn])
12277

    
12278
  def Exec(self, feedback_fn):
12279
    """Remove the node group.
12280

12281
    """
12282
    try:
12283
      self.cfg.RemoveNodeGroup(self.group_uuid)
12284
    except errors.ConfigurationError:
12285
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12286
                               (self.op.group_name, self.group_uuid))
12287

    
12288
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12289

    
12290

    
12291
class LUGroupRename(LogicalUnit):
12292
  HPATH = "group-rename"
12293
  HTYPE = constants.HTYPE_GROUP
12294
  REQ_BGL = False
12295

    
12296
  def ExpandNames(self):
12297
    # This raises errors.OpPrereqError on its own:
12298
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12299

    
12300
    self.needed_locks = {
12301
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12302
      }
12303

    
12304
  def CheckPrereq(self):
12305
    """Check prerequisites.
12306

12307
    Ensures requested new name is not yet used.
12308

12309
    """
12310
    try:
12311
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12312
    except errors.OpPrereqError:
12313
      pass
12314
    else:
12315
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12316
                                 " node group (UUID: %s)" %
12317
                                 (self.op.new_name, new_name_uuid),
12318
                                 errors.ECODE_EXISTS)
12319

    
12320
  def BuildHooksEnv(self):
12321
    """Build hooks env.
12322

12323
    """
12324
    return {
12325
      "OLD_NAME": self.op.group_name,
12326
      "NEW_NAME": self.op.new_name,
12327
      }
12328

    
12329
  def BuildHooksNodes(self):
12330
    """Build hooks nodes.
12331

12332
    """
12333
    mn = self.cfg.GetMasterNode()
12334

    
12335
    all_nodes = self.cfg.GetAllNodesInfo()
12336
    all_nodes.pop(mn, None)
12337

    
12338
    run_nodes = [mn]
12339
    run_nodes.extend(node.name for node in all_nodes.values()
12340
                     if node.group == self.group_uuid)
12341

    
12342
    return (run_nodes, run_nodes)
12343

    
12344
  def Exec(self, feedback_fn):
12345
    """Rename the node group.
12346

12347
    """
12348
    group = self.cfg.GetNodeGroup(self.group_uuid)
12349

    
12350
    if group is None:
12351
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12352
                               (self.op.group_name, self.group_uuid))
12353

    
12354
    group.name = self.op.new_name
12355
    self.cfg.Update(group, feedback_fn)
12356

    
12357
    return self.op.new_name
12358

    
12359

    
12360
class LUGroupEvacuate(LogicalUnit):
12361
  HPATH = "group-evacuate"
12362
  HTYPE = constants.HTYPE_GROUP
12363
  REQ_BGL = False
12364

    
12365
  def ExpandNames(self):
12366
    # This raises errors.OpPrereqError on its own:
12367
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12368

    
12369
    if self.op.target_groups:
12370
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12371
                                  self.op.target_groups)
12372
    else:
12373
      self.req_target_uuids = []
12374

    
12375
    if self.group_uuid in self.req_target_uuids:
12376
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12377
                                 " as a target group (targets are %s)" %
12378
                                 (self.group_uuid,
12379
                                  utils.CommaJoin(self.req_target_uuids)),
12380
                                 errors.ECODE_INVAL)
12381

    
12382
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12383

    
12384
    self.share_locks = _ShareAll()
12385
    self.needed_locks = {
12386
      locking.LEVEL_INSTANCE: [],
12387
      locking.LEVEL_NODEGROUP: [],
12388
      locking.LEVEL_NODE: [],
12389
      }
12390

    
12391
  def DeclareLocks(self, level):
12392
    if level == locking.LEVEL_INSTANCE:
12393
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12394

    
12395
      # Lock instances optimistically, needs verification once node and group
12396
      # locks have been acquired
12397
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12398
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12399

    
12400
    elif level == locking.LEVEL_NODEGROUP:
12401
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12402

    
12403
      if self.req_target_uuids:
12404
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12405

    
12406
        # Lock all groups used by instances optimistically; this requires going
12407
        # via the node before it's locked, requiring verification later on
12408
        lock_groups.update(group_uuid
12409
                           for instance_name in
12410
                             self.owned_locks(locking.LEVEL_INSTANCE)
12411
                           for group_uuid in
12412
                             self.cfg.GetInstanceNodeGroups(instance_name))
12413
      else:
12414
        # No target groups, need to lock all of them
12415
        lock_groups = locking.ALL_SET
12416

    
12417
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12418

    
12419
    elif level == locking.LEVEL_NODE:
12420
      # This will only lock the nodes in the group to be evacuated which
12421
      # contain actual instances
12422
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12423
      self._LockInstancesNodes()
12424

    
12425
      # Lock all nodes in group to be evacuated and target groups
12426
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12427
      assert self.group_uuid in owned_groups
12428
      member_nodes = [node_name
12429
                      for group in owned_groups
12430
                      for node_name in self.cfg.GetNodeGroup(group).members]
12431
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12432

    
12433
  def CheckPrereq(self):
12434
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12435
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12436
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12437

    
12438
    assert owned_groups.issuperset(self.req_target_uuids)
12439
    assert self.group_uuid in owned_groups
12440

    
12441
    # Check if locked instances are still correct
12442
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12443

    
12444
    # Get instance information
12445
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12446

    
12447
    # Check if node groups for locked instances are still correct
12448
    for instance_name in owned_instances:
12449
      inst = self.instances[instance_name]
12450
      assert owned_nodes.issuperset(inst.all_nodes), \
12451
        "Instance %s's nodes changed while we kept the lock" % instance_name
12452

    
12453
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12454
                                             owned_groups)
12455

    
12456
      assert self.group_uuid in inst_groups, \
12457
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12458

    
12459
    if self.req_target_uuids:
12460
      # User requested specific target groups
12461
      self.target_uuids = self.req_target_uuids
12462
    else:
12463
      # All groups except the one to be evacuated are potential targets
12464
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12465
                           if group_uuid != self.group_uuid]
12466

    
12467
      if not self.target_uuids:
12468
        raise errors.OpPrereqError("There are no possible target groups",
12469
                                   errors.ECODE_INVAL)
12470

    
12471
  def BuildHooksEnv(self):
12472
    """Build hooks env.
12473

12474
    """
12475
    return {
12476
      "GROUP_NAME": self.op.group_name,
12477
      "TARGET_GROUPS": " ".join(self.target_uuids),
12478
      }
12479

    
12480
  def BuildHooksNodes(self):
12481
    """Build hooks nodes.
12482

12483
    """
12484
    mn = self.cfg.GetMasterNode()
12485

    
12486
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12487

    
12488
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12489

    
12490
    return (run_nodes, run_nodes)
12491

    
12492
  def Exec(self, feedback_fn):
12493
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12494

    
12495
    assert self.group_uuid not in self.target_uuids
12496

    
12497
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12498
                     instances=instances, target_groups=self.target_uuids)
12499

    
12500
    ial.Run(self.op.iallocator)
12501

    
12502
    if not ial.success:
12503
      raise errors.OpPrereqError("Can't compute group evacuation using"
12504
                                 " iallocator '%s': %s" %
12505
                                 (self.op.iallocator, ial.info),
12506
                                 errors.ECODE_NORES)
12507

    
12508
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12509

    
12510
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12511
                 len(jobs), self.op.group_name)
12512

    
12513
    return ResultWithJobs(jobs)
12514

    
12515

    
12516
class TagsLU(NoHooksLU): # pylint: disable=W0223
12517
  """Generic tags LU.
12518

12519
  This is an abstract class which is the parent of all the other tags LUs.
12520

12521
  """
12522
  def ExpandNames(self):
12523
    self.group_uuid = None
12524
    self.needed_locks = {}
12525
    if self.op.kind == constants.TAG_NODE:
12526
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12527
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12528
    elif self.op.kind == constants.TAG_INSTANCE:
12529
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12530
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12531
    elif self.op.kind == constants.TAG_NODEGROUP:
12532
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12533

    
12534
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12535
    # not possible to acquire the BGL based on opcode parameters)
12536

    
12537
  def CheckPrereq(self):
12538
    """Check prerequisites.
12539

12540
    """
12541
    if self.op.kind == constants.TAG_CLUSTER:
12542
      self.target = self.cfg.GetClusterInfo()
12543
    elif self.op.kind == constants.TAG_NODE:
12544
      self.target = self.cfg.GetNodeInfo(self.op.name)
12545
    elif self.op.kind == constants.TAG_INSTANCE:
12546
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12547
    elif self.op.kind == constants.TAG_NODEGROUP:
12548
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12549
    else:
12550
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12551
                                 str(self.op.kind), errors.ECODE_INVAL)
12552

    
12553

    
12554
class LUTagsGet(TagsLU):
12555
  """Returns the tags of a given object.
12556

12557
  """
12558
  REQ_BGL = False
12559

    
12560
  def ExpandNames(self):
12561
    TagsLU.ExpandNames(self)
12562

    
12563
    # Share locks as this is only a read operation
12564
    self.share_locks = _ShareAll()
12565

    
12566
  def Exec(self, feedback_fn):
12567
    """Returns the tag list.
12568

12569
    """
12570
    return list(self.target.GetTags())
12571

    
12572

    
12573
class LUTagsSearch(NoHooksLU):
12574
  """Searches the tags for a given pattern.
12575

12576
  """
12577
  REQ_BGL = False
12578

    
12579
  def ExpandNames(self):
12580
    self.needed_locks = {}
12581

    
12582
  def CheckPrereq(self):
12583
    """Check prerequisites.
12584

12585
    This checks the pattern passed for validity by compiling it.
12586

12587
    """
12588
    try:
12589
      self.re = re.compile(self.op.pattern)
12590
    except re.error, err:
12591
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12592
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12593

    
12594
  def Exec(self, feedback_fn):
12595
    """Returns the tag list.
12596

12597
    """
12598
    cfg = self.cfg
12599
    tgts = [("/cluster", cfg.GetClusterInfo())]
12600
    ilist = cfg.GetAllInstancesInfo().values()
12601
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12602
    nlist = cfg.GetAllNodesInfo().values()
12603
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12604
    tgts.extend(("/nodegroup/%s" % n.name, n)
12605
                for n in cfg.GetAllNodeGroupsInfo().values())
12606
    results = []
12607
    for path, target in tgts:
12608
      for tag in target.GetTags():
12609
        if self.re.search(tag):
12610
          results.append((path, tag))
12611
    return results
12612

    
12613

    
12614
class LUTagsSet(TagsLU):
12615
  """Sets a tag on a given object.
12616

12617
  """
12618
  REQ_BGL = False
12619

    
12620
  def CheckPrereq(self):
12621
    """Check prerequisites.
12622

12623
    This checks the type and length of the tag name and value.
12624

12625
    """
12626
    TagsLU.CheckPrereq(self)
12627
    for tag in self.op.tags:
12628
      objects.TaggableObject.ValidateTag(tag)
12629

    
12630
  def Exec(self, feedback_fn):
12631
    """Sets the tag.
12632

12633
    """
12634
    try:
12635
      for tag in self.op.tags:
12636
        self.target.AddTag(tag)
12637
    except errors.TagError, err:
12638
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12639
    self.cfg.Update(self.target, feedback_fn)
12640

    
12641

    
12642
class LUTagsDel(TagsLU):
12643
  """Delete a list of tags from a given object.
12644

12645
  """
12646
  REQ_BGL = False
12647

    
12648
  def CheckPrereq(self):
12649
    """Check prerequisites.
12650

12651
    This checks that we have the given tag.
12652

12653
    """
12654
    TagsLU.CheckPrereq(self)
12655
    for tag in self.op.tags:
12656
      objects.TaggableObject.ValidateTag(tag)
12657
    del_tags = frozenset(self.op.tags)
12658
    cur_tags = self.target.GetTags()
12659

    
12660
    diff_tags = del_tags - cur_tags
12661
    if diff_tags:
12662
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12663
      raise errors.OpPrereqError("Tag(s) %s not found" %
12664
                                 (utils.CommaJoin(diff_names), ),
12665
                                 errors.ECODE_NOENT)
12666

    
12667
  def Exec(self, feedback_fn):
12668
    """Remove the tag from the object.
12669

12670
    """
12671
    for tag in self.op.tags:
12672
      self.target.RemoveTag(tag)
12673
    self.cfg.Update(self.target, feedback_fn)
12674

    
12675

    
12676
class LUTestDelay(NoHooksLU):
12677
  """Sleep for a specified amount of time.
12678

12679
  This LU sleeps on the master and/or nodes for a specified amount of
12680
  time.
12681

12682
  """
12683
  REQ_BGL = False
12684

    
12685
  def ExpandNames(self):
12686
    """Expand names and set required locks.
12687

12688
    This expands the node list, if any.
12689

12690
    """
12691
    self.needed_locks = {}
12692
    if self.op.on_nodes:
12693
      # _GetWantedNodes can be used here, but is not always appropriate to use
12694
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12695
      # more information.
12696
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12697
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12698

    
12699
  def _TestDelay(self):
12700
    """Do the actual sleep.
12701

12702
    """
12703
    if self.op.on_master:
12704
      if not utils.TestDelay(self.op.duration):
12705
        raise errors.OpExecError("Error during master delay test")
12706
    if self.op.on_nodes:
12707
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12708
      for node, node_result in result.items():
12709
        node_result.Raise("Failure during rpc call to node %s" % node)
12710

    
12711
  def Exec(self, feedback_fn):
12712
    """Execute the test delay opcode, with the wanted repetitions.
12713

12714
    """
12715
    if self.op.repeat == 0:
12716
      self._TestDelay()
12717
    else:
12718
      top_value = self.op.repeat - 1
12719
      for i in range(self.op.repeat):
12720
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12721
        self._TestDelay()
12722

    
12723

    
12724
class LUTestJqueue(NoHooksLU):
12725
  """Utility LU to test some aspects of the job queue.
12726

12727
  """
12728
  REQ_BGL = False
12729

    
12730
  # Must be lower than default timeout for WaitForJobChange to see whether it
12731
  # notices changed jobs
12732
  _CLIENT_CONNECT_TIMEOUT = 20.0
12733
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12734

    
12735
  @classmethod
12736
  def _NotifyUsingSocket(cls, cb, errcls):
12737
    """Opens a Unix socket and waits for another program to connect.
12738

12739
    @type cb: callable
12740
    @param cb: Callback to send socket name to client
12741
    @type errcls: class
12742
    @param errcls: Exception class to use for errors
12743

12744
    """
12745
    # Using a temporary directory as there's no easy way to create temporary
12746
    # sockets without writing a custom loop around tempfile.mktemp and
12747
    # socket.bind
12748
    tmpdir = tempfile.mkdtemp()
12749
    try:
12750
      tmpsock = utils.PathJoin(tmpdir, "sock")
12751

    
12752
      logging.debug("Creating temporary socket at %s", tmpsock)
12753
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12754
      try:
12755
        sock.bind(tmpsock)
12756
        sock.listen(1)
12757

    
12758
        # Send details to client
12759
        cb(tmpsock)
12760

    
12761
        # Wait for client to connect before continuing
12762
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12763
        try:
12764
          (conn, _) = sock.accept()
12765
        except socket.error, err:
12766
          raise errcls("Client didn't connect in time (%s)" % err)
12767
      finally:
12768
        sock.close()
12769
    finally:
12770
      # Remove as soon as client is connected
12771
      shutil.rmtree(tmpdir)
12772

    
12773
    # Wait for client to close
12774
    try:
12775
      try:
12776
        # pylint: disable=E1101
12777
        # Instance of '_socketobject' has no ... member
12778
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12779
        conn.recv(1)
12780
      except socket.error, err:
12781
        raise errcls("Client failed to confirm notification (%s)" % err)
12782
    finally:
12783
      conn.close()
12784

    
12785
  def _SendNotification(self, test, arg, sockname):
12786
    """Sends a notification to the client.
12787

12788
    @type test: string
12789
    @param test: Test name
12790
    @param arg: Test argument (depends on test)
12791
    @type sockname: string
12792
    @param sockname: Socket path
12793

12794
    """
12795
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12796

    
12797
  def _Notify(self, prereq, test, arg):
12798
    """Notifies the client of a test.
12799

12800
    @type prereq: bool
12801
    @param prereq: Whether this is a prereq-phase test
12802
    @type test: string
12803
    @param test: Test name
12804
    @param arg: Test argument (depends on test)
12805

12806
    """
12807
    if prereq:
12808
      errcls = errors.OpPrereqError
12809
    else:
12810
      errcls = errors.OpExecError
12811

    
12812
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12813
                                                  test, arg),
12814
                                   errcls)
12815

    
12816
  def CheckArguments(self):
12817
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12818
    self.expandnames_calls = 0
12819

    
12820
  def ExpandNames(self):
12821
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12822
    if checkargs_calls < 1:
12823
      raise errors.ProgrammerError("CheckArguments was not called")
12824

    
12825
    self.expandnames_calls += 1
12826

    
12827
    if self.op.notify_waitlock:
12828
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12829

    
12830
    self.LogInfo("Expanding names")
12831

    
12832
    # Get lock on master node (just to get a lock, not for a particular reason)
12833
    self.needed_locks = {
12834
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12835
      }
12836

    
12837
  def Exec(self, feedback_fn):
12838
    if self.expandnames_calls < 1:
12839
      raise errors.ProgrammerError("ExpandNames was not called")
12840

    
12841
    if self.op.notify_exec:
12842
      self._Notify(False, constants.JQT_EXEC, None)
12843

    
12844
    self.LogInfo("Executing")
12845

    
12846
    if self.op.log_messages:
12847
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12848
      for idx, msg in enumerate(self.op.log_messages):
12849
        self.LogInfo("Sending log message %s", idx + 1)
12850
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12851
        # Report how many test messages have been sent
12852
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12853

    
12854
    if self.op.fail:
12855
      raise errors.OpExecError("Opcode failure was requested")
12856

    
12857
    return True
12858

    
12859

    
12860
class IAllocator(object):
12861
  """IAllocator framework.
12862

12863
  An IAllocator instance has three sets of attributes:
12864
    - cfg that is needed to query the cluster
12865
    - input data (all members of the _KEYS class attribute are required)
12866
    - four buffer attributes (in|out_data|text), that represent the
12867
      input (to the external script) in text and data structure format,
12868
      and the output from it, again in two formats
12869
    - the result variables from the script (success, info, nodes) for
12870
      easy usage
12871

12872
  """
12873
  # pylint: disable=R0902
12874
  # lots of instance attributes
12875

    
12876
  def __init__(self, cfg, rpc, mode, **kwargs):
12877
    self.cfg = cfg
12878
    self.rpc = rpc
12879
    # init buffer variables
12880
    self.in_text = self.out_text = self.in_data = self.out_data = None
12881
    # init all input fields so that pylint is happy
12882
    self.mode = mode
12883
    self.memory = self.disks = self.disk_template = None
12884
    self.os = self.tags = self.nics = self.vcpus = None
12885
    self.hypervisor = None
12886
    self.relocate_from = None
12887
    self.name = None
12888
    self.instances = None
12889
    self.evac_mode = None
12890
    self.target_groups = []
12891
    # computed fields
12892
    self.required_nodes = None
12893
    # init result fields
12894
    self.success = self.info = self.result = None
12895

    
12896
    try:
12897
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12898
    except KeyError:
12899
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12900
                                   " IAllocator" % self.mode)
12901

    
12902
    keyset = [n for (n, _) in keydata]
12903

    
12904
    for key in kwargs:
12905
      if key not in keyset:
12906
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12907
                                     " IAllocator" % key)
12908
      setattr(self, key, kwargs[key])
12909

    
12910
    for key in keyset:
12911
      if key not in kwargs:
12912
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12913
                                     " IAllocator" % key)
12914
    self._BuildInputData(compat.partial(fn, self), keydata)
12915

    
12916
  def _ComputeClusterData(self):
12917
    """Compute the generic allocator input data.
12918

12919
    This is the data that is independent of the actual operation.
12920

12921
    """
12922
    cfg = self.cfg
12923
    cluster_info = cfg.GetClusterInfo()
12924
    # cluster data
12925
    data = {
12926
      "version": constants.IALLOCATOR_VERSION,
12927
      "cluster_name": cfg.GetClusterName(),
12928
      "cluster_tags": list(cluster_info.GetTags()),
12929
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12930
      # we don't have job IDs
12931
      }
12932
    ninfo = cfg.GetAllNodesInfo()
12933
    iinfo = cfg.GetAllInstancesInfo().values()
12934
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12935

    
12936
    # node data
12937
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12938

    
12939
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12940
      hypervisor_name = self.hypervisor
12941
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12942
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12943
    else:
12944
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12945

    
12946
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12947
                                        hypervisor_name)
12948
    node_iinfo = \
12949
      self.rpc.call_all_instances_info(node_list,
12950
                                       cluster_info.enabled_hypervisors)
12951

    
12952
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12953

    
12954
    config_ndata = self._ComputeBasicNodeData(ninfo)
12955
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12956
                                                 i_list, config_ndata)
12957
    assert len(data["nodes"]) == len(ninfo), \
12958
        "Incomplete node data computed"
12959

    
12960
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12961

    
12962
    self.in_data = data
12963

    
12964
  @staticmethod
12965
  def _ComputeNodeGroupData(cfg):
12966
    """Compute node groups data.
12967

12968
    """
12969
    ng = dict((guuid, {
12970
      "name": gdata.name,
12971
      "alloc_policy": gdata.alloc_policy,
12972
      })
12973
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12974

    
12975
    return ng
12976

    
12977
  @staticmethod
12978
  def _ComputeBasicNodeData(node_cfg):
12979
    """Compute global node data.
12980

12981
    @rtype: dict
12982
    @returns: a dict of name: (node dict, node config)
12983

12984
    """
12985
    # fill in static (config-based) values
12986
    node_results = dict((ninfo.name, {
12987
      "tags": list(ninfo.GetTags()),
12988
      "primary_ip": ninfo.primary_ip,
12989
      "secondary_ip": ninfo.secondary_ip,
12990
      "offline": ninfo.offline,
12991
      "drained": ninfo.drained,
12992
      "master_candidate": ninfo.master_candidate,
12993
      "group": ninfo.group,
12994
      "master_capable": ninfo.master_capable,
12995
      "vm_capable": ninfo.vm_capable,
12996
      })
12997
      for ninfo in node_cfg.values())
12998

    
12999
    return node_results
13000

    
13001
  @staticmethod
13002
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13003
                              node_results):
13004
    """Compute global node data.
13005

13006
    @param node_results: the basic node structures as filled from the config
13007

13008
    """
13009
    # make a copy of the current dict
13010
    node_results = dict(node_results)
13011
    for nname, nresult in node_data.items():
13012
      assert nname in node_results, "Missing basic data for node %s" % nname
13013
      ninfo = node_cfg[nname]
13014

    
13015
      if not (ninfo.offline or ninfo.drained):
13016
        nresult.Raise("Can't get data for node %s" % nname)
13017
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13018
                                nname)
13019
        remote_info = nresult.payload
13020

    
13021
        for attr in ["memory_total", "memory_free", "memory_dom0",
13022
                     "vg_size", "vg_free", "cpu_total"]:
13023
          if attr not in remote_info:
13024
            raise errors.OpExecError("Node '%s' didn't return attribute"
13025
                                     " '%s'" % (nname, attr))
13026
          if not isinstance(remote_info[attr], int):
13027
            raise errors.OpExecError("Node '%s' returned invalid value"
13028
                                     " for '%s': %s" %
13029
                                     (nname, attr, remote_info[attr]))
13030
        # compute memory used by primary instances
13031
        i_p_mem = i_p_up_mem = 0
13032
        for iinfo, beinfo in i_list:
13033
          if iinfo.primary_node == nname:
13034
            i_p_mem += beinfo[constants.BE_MEMORY]
13035
            if iinfo.name not in node_iinfo[nname].payload:
13036
              i_used_mem = 0
13037
            else:
13038
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13039
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13040
            remote_info["memory_free"] -= max(0, i_mem_diff)
13041

    
13042
            if iinfo.admin_up:
13043
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13044

    
13045
        # compute memory used by instances
13046
        pnr_dyn = {
13047
          "total_memory": remote_info["memory_total"],
13048
          "reserved_memory": remote_info["memory_dom0"],
13049
          "free_memory": remote_info["memory_free"],
13050
          "total_disk": remote_info["vg_size"],
13051
          "free_disk": remote_info["vg_free"],
13052
          "total_cpus": remote_info["cpu_total"],
13053
          "i_pri_memory": i_p_mem,
13054
          "i_pri_up_memory": i_p_up_mem,
13055
          }
13056
        pnr_dyn.update(node_results[nname])
13057
        node_results[nname] = pnr_dyn
13058

    
13059
    return node_results
13060

    
13061
  @staticmethod
13062
  def _ComputeInstanceData(cluster_info, i_list):
13063
    """Compute global instance data.
13064

13065
    """
13066
    instance_data = {}
13067
    for iinfo, beinfo in i_list:
13068
      nic_data = []
13069
      for nic in iinfo.nics:
13070
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13071
        nic_dict = {
13072
          "mac": nic.mac,
13073
          "ip": nic.ip,
13074
          "mode": filled_params[constants.NIC_MODE],
13075
          "link": filled_params[constants.NIC_LINK],
13076
          }
13077
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13078
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13079
        nic_data.append(nic_dict)
13080
      pir = {
13081
        "tags": list(iinfo.GetTags()),
13082
        "admin_up": iinfo.admin_up,
13083
        "vcpus": beinfo[constants.BE_VCPUS],
13084
        "memory": beinfo[constants.BE_MEMORY],
13085
        "os": iinfo.os,
13086
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13087
        "nics": nic_data,
13088
        "disks": [{constants.IDISK_SIZE: dsk.size,
13089
                   constants.IDISK_MODE: dsk.mode}
13090
                  for dsk in iinfo.disks],
13091
        "disk_template": iinfo.disk_template,
13092
        "hypervisor": iinfo.hypervisor,
13093
        }
13094
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13095
                                                 pir["disks"])
13096
      instance_data[iinfo.name] = pir
13097

    
13098
    return instance_data
13099

    
13100
  def _AddNewInstance(self):
13101
    """Add new instance data to allocator structure.
13102

13103
    This in combination with _AllocatorGetClusterData will create the
13104
    correct structure needed as input for the allocator.
13105

13106
    The checks for the completeness of the opcode must have already been
13107
    done.
13108

13109
    """
13110
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13111

    
13112
    if self.disk_template in constants.DTS_INT_MIRROR:
13113
      self.required_nodes = 2
13114
    else:
13115
      self.required_nodes = 1
13116

    
13117
    request = {
13118
      "name": self.name,
13119
      "disk_template": self.disk_template,
13120
      "tags": self.tags,
13121
      "os": self.os,
13122
      "vcpus": self.vcpus,
13123
      "memory": self.memory,
13124
      "disks": self.disks,
13125
      "disk_space_total": disk_space,
13126
      "nics": self.nics,
13127
      "required_nodes": self.required_nodes,
13128
      "hypervisor": self.hypervisor,
13129
      }
13130

    
13131
    return request
13132

    
13133
  def _AddRelocateInstance(self):
13134
    """Add relocate instance data to allocator structure.
13135

13136
    This in combination with _IAllocatorGetClusterData will create the
13137
    correct structure needed as input for the allocator.
13138

13139
    The checks for the completeness of the opcode must have already been
13140
    done.
13141

13142
    """
13143
    instance = self.cfg.GetInstanceInfo(self.name)
13144
    if instance is None:
13145
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13146
                                   " IAllocator" % self.name)
13147

    
13148
    if instance.disk_template not in constants.DTS_MIRRORED:
13149
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13150
                                 errors.ECODE_INVAL)
13151

    
13152
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13153
        len(instance.secondary_nodes) != 1:
13154
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13155
                                 errors.ECODE_STATE)
13156

    
13157
    self.required_nodes = 1
13158
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13159
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13160

    
13161
    request = {
13162
      "name": self.name,
13163
      "disk_space_total": disk_space,
13164
      "required_nodes": self.required_nodes,
13165
      "relocate_from": self.relocate_from,
13166
      }
13167
    return request
13168

    
13169
  def _AddNodeEvacuate(self):
13170
    """Get data for node-evacuate requests.
13171

13172
    """
13173
    return {
13174
      "instances": self.instances,
13175
      "evac_mode": self.evac_mode,
13176
      }
13177

    
13178
  def _AddChangeGroup(self):
13179
    """Get data for node-evacuate requests.
13180

13181
    """
13182
    return {
13183
      "instances": self.instances,
13184
      "target_groups": self.target_groups,
13185
      }
13186

    
13187
  def _BuildInputData(self, fn, keydata):
13188
    """Build input data structures.
13189

13190
    """
13191
    self._ComputeClusterData()
13192

    
13193
    request = fn()
13194
    request["type"] = self.mode
13195
    for keyname, keytype in keydata:
13196
      if keyname not in request:
13197
        raise errors.ProgrammerError("Request parameter %s is missing" %
13198
                                     keyname)
13199
      val = request[keyname]
13200
      if not keytype(val):
13201
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13202
                                     " validation, value %s, expected"
13203
                                     " type %s" % (keyname, val, keytype))
13204
    self.in_data["request"] = request
13205

    
13206
    self.in_text = serializer.Dump(self.in_data)
13207

    
13208
  _STRING_LIST = ht.TListOf(ht.TString)
13209
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13210
     # pylint: disable=E1101
13211
     # Class '...' has no 'OP_ID' member
13212
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13213
                          opcodes.OpInstanceMigrate.OP_ID,
13214
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13215
     })))
13216

    
13217
  _NEVAC_MOVED = \
13218
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13219
                       ht.TItems([ht.TNonEmptyString,
13220
                                  ht.TNonEmptyString,
13221
                                  ht.TListOf(ht.TNonEmptyString),
13222
                                 ])))
13223
  _NEVAC_FAILED = \
13224
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13225
                       ht.TItems([ht.TNonEmptyString,
13226
                                  ht.TMaybeString,
13227
                                 ])))
13228
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13229
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13230

    
13231
  _MODE_DATA = {
13232
    constants.IALLOCATOR_MODE_ALLOC:
13233
      (_AddNewInstance,
13234
       [
13235
        ("name", ht.TString),
13236
        ("memory", ht.TInt),
13237
        ("disks", ht.TListOf(ht.TDict)),
13238
        ("disk_template", ht.TString),
13239
        ("os", ht.TString),
13240
        ("tags", _STRING_LIST),
13241
        ("nics", ht.TListOf(ht.TDict)),
13242
        ("vcpus", ht.TInt),
13243
        ("hypervisor", ht.TString),
13244
        ], ht.TList),
13245
    constants.IALLOCATOR_MODE_RELOC:
13246
      (_AddRelocateInstance,
13247
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13248
       ht.TList),
13249
     constants.IALLOCATOR_MODE_NODE_EVAC:
13250
      (_AddNodeEvacuate, [
13251
        ("instances", _STRING_LIST),
13252
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13253
        ], _NEVAC_RESULT),
13254
     constants.IALLOCATOR_MODE_CHG_GROUP:
13255
      (_AddChangeGroup, [
13256
        ("instances", _STRING_LIST),
13257
        ("target_groups", _STRING_LIST),
13258
        ], _NEVAC_RESULT),
13259
    }
13260

    
13261
  def Run(self, name, validate=True, call_fn=None):
13262
    """Run an instance allocator and return the results.
13263

13264
    """
13265
    if call_fn is None:
13266
      call_fn = self.rpc.call_iallocator_runner
13267

    
13268
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13269
    result.Raise("Failure while running the iallocator script")
13270

    
13271
    self.out_text = result.payload
13272
    if validate:
13273
      self._ValidateResult()
13274

    
13275
  def _ValidateResult(self):
13276
    """Process the allocator results.
13277

13278
    This will process and if successful save the result in
13279
    self.out_data and the other parameters.
13280

13281
    """
13282
    try:
13283
      rdict = serializer.Load(self.out_text)
13284
    except Exception, err:
13285
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13286

    
13287
    if not isinstance(rdict, dict):
13288
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13289

    
13290
    # TODO: remove backwards compatiblity in later versions
13291
    if "nodes" in rdict and "result" not in rdict:
13292
      rdict["result"] = rdict["nodes"]
13293
      del rdict["nodes"]
13294

    
13295
    for key in "success", "info", "result":
13296
      if key not in rdict:
13297
        raise errors.OpExecError("Can't parse iallocator results:"
13298
                                 " missing key '%s'" % key)
13299
      setattr(self, key, rdict[key])
13300

    
13301
    if not self._result_check(self.result):
13302
      raise errors.OpExecError("Iallocator returned invalid result,"
13303
                               " expected %s, got %s" %
13304
                               (self._result_check, self.result),
13305
                               errors.ECODE_INVAL)
13306

    
13307
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13308
      assert self.relocate_from is not None
13309
      assert self.required_nodes == 1
13310

    
13311
      node2group = dict((name, ndata["group"])
13312
                        for (name, ndata) in self.in_data["nodes"].items())
13313

    
13314
      fn = compat.partial(self._NodesToGroups, node2group,
13315
                          self.in_data["nodegroups"])
13316

    
13317
      instance = self.cfg.GetInstanceInfo(self.name)
13318
      request_groups = fn(self.relocate_from + [instance.primary_node])
13319
      result_groups = fn(rdict["result"] + [instance.primary_node])
13320

    
13321
      if self.success and not set(result_groups).issubset(request_groups):
13322
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13323
                                 " differ from original groups (%s)" %
13324
                                 (utils.CommaJoin(result_groups),
13325
                                  utils.CommaJoin(request_groups)))
13326

    
13327
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13328
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13329

    
13330
    self.out_data = rdict
13331

    
13332
  @staticmethod
13333
  def _NodesToGroups(node2group, groups, nodes):
13334
    """Returns a list of unique group names for a list of nodes.
13335

13336
    @type node2group: dict
13337
    @param node2group: Map from node name to group UUID
13338
    @type groups: dict
13339
    @param groups: Group information
13340
    @type nodes: list
13341
    @param nodes: Node names
13342

13343
    """
13344
    result = set()
13345

    
13346
    for node in nodes:
13347
      try:
13348
        group_uuid = node2group[node]
13349
      except KeyError:
13350
        # Ignore unknown node
13351
        pass
13352
      else:
13353
        try:
13354
          group = groups[group_uuid]
13355
        except KeyError:
13356
          # Can't find group, let's use UUID
13357
          group_name = group_uuid
13358
        else:
13359
          group_name = group["name"]
13360

    
13361
        result.add(group_name)
13362

    
13363
    return sorted(result)
13364

    
13365

    
13366
class LUTestAllocator(NoHooksLU):
13367
  """Run allocator tests.
13368

13369
  This LU runs the allocator tests
13370

13371
  """
13372
  def CheckPrereq(self):
13373
    """Check prerequisites.
13374

13375
    This checks the opcode parameters depending on the director and mode test.
13376

13377
    """
13378
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13379
      for attr in ["memory", "disks", "disk_template",
13380
                   "os", "tags", "nics", "vcpus"]:
13381
        if not hasattr(self.op, attr):
13382
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13383
                                     attr, errors.ECODE_INVAL)
13384
      iname = self.cfg.ExpandInstanceName(self.op.name)
13385
      if iname is not None:
13386
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13387
                                   iname, errors.ECODE_EXISTS)
13388
      if not isinstance(self.op.nics, list):
13389
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13390
                                   errors.ECODE_INVAL)
13391
      if not isinstance(self.op.disks, list):
13392
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13393
                                   errors.ECODE_INVAL)
13394
      for row in self.op.disks:
13395
        if (not isinstance(row, dict) or
13396
            constants.IDISK_SIZE not in row or
13397
            not isinstance(row[constants.IDISK_SIZE], int) or
13398
            constants.IDISK_MODE not in row or
13399
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13400
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13401
                                     " parameter", errors.ECODE_INVAL)
13402
      if self.op.hypervisor is None:
13403
        self.op.hypervisor = self.cfg.GetHypervisorType()
13404
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13405
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13406
      self.op.name = fname
13407
      self.relocate_from = \
13408
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13409
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13410
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13411
      if not self.op.instances:
13412
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13413
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13414
    else:
13415
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13416
                                 self.op.mode, errors.ECODE_INVAL)
13417

    
13418
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13419
      if self.op.allocator is None:
13420
        raise errors.OpPrereqError("Missing allocator name",
13421
                                   errors.ECODE_INVAL)
13422
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13423
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13424
                                 self.op.direction, errors.ECODE_INVAL)
13425

    
13426
  def Exec(self, feedback_fn):
13427
    """Run the allocator test.
13428

13429
    """
13430
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13431
      ial = IAllocator(self.cfg, self.rpc,
13432
                       mode=self.op.mode,
13433
                       name=self.op.name,
13434
                       memory=self.op.memory,
13435
                       disks=self.op.disks,
13436
                       disk_template=self.op.disk_template,
13437
                       os=self.op.os,
13438
                       tags=self.op.tags,
13439
                       nics=self.op.nics,
13440
                       vcpus=self.op.vcpus,
13441
                       hypervisor=self.op.hypervisor,
13442
                       )
13443
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13444
      ial = IAllocator(self.cfg, self.rpc,
13445
                       mode=self.op.mode,
13446
                       name=self.op.name,
13447
                       relocate_from=list(self.relocate_from),
13448
                       )
13449
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13450
      ial = IAllocator(self.cfg, self.rpc,
13451
                       mode=self.op.mode,
13452
                       instances=self.op.instances,
13453
                       target_groups=self.op.target_groups)
13454
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13455
      ial = IAllocator(self.cfg, self.rpc,
13456
                       mode=self.op.mode,
13457
                       instances=self.op.instances,
13458
                       evac_mode=self.op.evac_mode)
13459
    else:
13460
      raise errors.ProgrammerError("Uncatched mode %s in"
13461
                                   " LUTestAllocator.Exec", self.op.mode)
13462

    
13463
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13464
      result = ial.in_text
13465
    else:
13466
      ial.Run(self.op.allocator, validate=False)
13467
      result = ial.out_text
13468
    return result
13469

    
13470

    
13471
#: Query type implementations
13472
_QUERY_IMPL = {
13473
  constants.QR_INSTANCE: _InstanceQuery,
13474
  constants.QR_NODE: _NodeQuery,
13475
  constants.QR_GROUP: _GroupQuery,
13476
  constants.QR_OS: _OsQuery,
13477
  }
13478

    
13479
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13480

    
13481

    
13482
def _GetQueryImplementation(name):
13483
  """Returns the implemtnation for a query type.
13484

13485
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13486

13487
  """
13488
  try:
13489
    return _QUERY_IMPL[name]
13490
  except KeyError:
13491
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13492
                               errors.ECODE_INVAL)