Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ d05326fc

History | View | Annotate | Download (473.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_all_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113
           "Found file listed in more than one file list"
2114

    
2115
    # Define functions determining which nodes to consider for a file
2116
    files2nodefn = [
2117
      (files_all, None),
2118
      (files_all_opt, None),
2119
      (files_mc, lambda node: (node.master_candidate or
2120
                               node.name == master_node)),
2121
      (files_vm, lambda node: node.vm_capable),
2122
      ]
2123

    
2124
    # Build mapping from filename to list of nodes which should have the file
2125
    nodefiles = {}
2126
    for (files, fn) in files2nodefn:
2127
      if fn is None:
2128
        filenodes = nodeinfo
2129
      else:
2130
        filenodes = filter(fn, nodeinfo)
2131
      nodefiles.update((filename,
2132
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2133
                       for filename in files)
2134

    
2135
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2136

    
2137
    fileinfo = dict((filename, {}) for filename in nodefiles)
2138
    ignore_nodes = set()
2139

    
2140
    for node in nodeinfo:
2141
      if node.offline:
2142
        ignore_nodes.add(node.name)
2143
        continue
2144

    
2145
      nresult = all_nvinfo[node.name]
2146

    
2147
      if nresult.fail_msg or not nresult.payload:
2148
        node_files = None
2149
      else:
2150
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151

    
2152
      test = not (node_files and isinstance(node_files, dict))
2153
      errorif(test, cls.ENODEFILECHECK, node.name,
2154
              "Node did not return file checksum data")
2155
      if test:
2156
        ignore_nodes.add(node.name)
2157
        continue
2158

    
2159
      # Build per-checksum mapping from filename to nodes having it
2160
      for (filename, checksum) in node_files.items():
2161
        assert filename in nodefiles
2162
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163

    
2164
    for (filename, checksums) in fileinfo.items():
2165
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166

    
2167
      # Nodes having the file
2168
      with_file = frozenset(node_name
2169
                            for nodes in fileinfo[filename].values()
2170
                            for node_name in nodes) - ignore_nodes
2171

    
2172
      expected_nodes = nodefiles[filename] - ignore_nodes
2173

    
2174
      # Nodes missing file
2175
      missing_file = expected_nodes - with_file
2176

    
2177
      if filename in files_all_opt:
2178
        # All or no nodes
2179
        errorif(missing_file and missing_file != expected_nodes,
2180
                cls.ECLUSTERFILECHECK, None,
2181
                "File %s is optional, but it must exist on all or no"
2182
                " nodes (not found on %s)",
2183
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184
      else:
2185
        # Non-optional files
2186
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187
                "File %s is missing from node(s) %s", filename,
2188
                utils.CommaJoin(utils.NiceSort(missing_file)))
2189

    
2190
        # Warn if a node has a file it shouldn't
2191
        unexpected = with_file - expected_nodes
2192
        errorif(unexpected,
2193
                cls.ECLUSTERFILECHECK, None,
2194
                "File %s should not exist on node(s) %s",
2195
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196

    
2197
      # See if there are multiple versions of the file
2198
      test = len(checksums) > 1
2199
      if test:
2200
        variants = ["variant %s on %s" %
2201
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202
                    for (idx, (checksum, nodes)) in
2203
                      enumerate(sorted(checksums.items()))]
2204
      else:
2205
        variants = []
2206

    
2207
      errorif(test, cls.ECLUSTERFILECHECK, None,
2208
              "File %s found with %s different checksums (%s)",
2209
              filename, len(checksums), "; ".join(variants))
2210

    
2211
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212
                      drbd_map):
2213
    """Verifies and the node DRBD status.
2214

2215
    @type ninfo: L{objects.Node}
2216
    @param ninfo: the node to check
2217
    @param nresult: the remote results for the node
2218
    @param instanceinfo: the dict of instances
2219
    @param drbd_helper: the configured DRBD usermode helper
2220
    @param drbd_map: the DRBD map as returned by
2221
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222

2223
    """
2224
    node = ninfo.name
2225
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226

    
2227
    if drbd_helper:
2228
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229
      test = (helper_result == None)
2230
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
               "no drbd usermode helper returned")
2232
      if helper_result:
2233
        status, payload = helper_result
2234
        test = not status
2235
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236
                 "drbd usermode helper check unsuccessful: %s", payload)
2237
        test = status and (payload != drbd_helper)
2238
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239
                 "wrong drbd usermode helper: %s", payload)
2240

    
2241
    # compute the DRBD minors
2242
    node_drbd = {}
2243
    for minor, instance in drbd_map[node].items():
2244
      test = instance not in instanceinfo
2245
      _ErrorIf(test, self.ECLUSTERCFG, None,
2246
               "ghost instance '%s' in temporary DRBD map", instance)
2247
        # ghost instance should not be running, but otherwise we
2248
        # don't give double warnings (both ghost instance and
2249
        # unallocated minor in use)
2250
      if test:
2251
        node_drbd[minor] = (instance, False)
2252
      else:
2253
        instance = instanceinfo[instance]
2254
        node_drbd[minor] = (instance.name, instance.admin_up)
2255

    
2256
    # and now check them
2257
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258
    test = not isinstance(used_minors, (tuple, list))
2259
    _ErrorIf(test, self.ENODEDRBD, node,
2260
             "cannot parse drbd status file: %s", str(used_minors))
2261
    if test:
2262
      # we cannot check drbd status
2263
      return
2264

    
2265
    for minor, (iname, must_exist) in node_drbd.items():
2266
      test = minor not in used_minors and must_exist
2267
      _ErrorIf(test, self.ENODEDRBD, node,
2268
               "drbd minor %d of instance %s is not active", minor, iname)
2269
    for minor in used_minors:
2270
      test = minor not in node_drbd
2271
      _ErrorIf(test, self.ENODEDRBD, node,
2272
               "unallocated drbd minor %d is in use", minor)
2273

    
2274
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275
    """Builds the node OS structures.
2276

2277
    @type ninfo: L{objects.Node}
2278
    @param ninfo: the node to check
2279
    @param nresult: the remote results for the node
2280
    @param nimg: the node image object
2281

2282
    """
2283
    node = ninfo.name
2284
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285

    
2286
    remote_os = nresult.get(constants.NV_OSLIST, None)
2287
    test = (not isinstance(remote_os, list) or
2288
            not compat.all(isinstance(v, list) and len(v) == 7
2289
                           for v in remote_os))
2290

    
2291
    _ErrorIf(test, self.ENODEOS, node,
2292
             "node hasn't returned valid OS data")
2293

    
2294
    nimg.os_fail = test
2295

    
2296
    if test:
2297
      return
2298

    
2299
    os_dict = {}
2300

    
2301
    for (name, os_path, status, diagnose,
2302
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303

    
2304
      if name not in os_dict:
2305
        os_dict[name] = []
2306

    
2307
      # parameters is a list of lists instead of list of tuples due to
2308
      # JSON lacking a real tuple type, fix it:
2309
      parameters = [tuple(v) for v in parameters]
2310
      os_dict[name].append((os_path, status, diagnose,
2311
                            set(variants), set(parameters), set(api_ver)))
2312

    
2313
    nimg.oslist = os_dict
2314

    
2315
  def _VerifyNodeOS(self, ninfo, nimg, base):
2316
    """Verifies the node OS list.
2317

2318
    @type ninfo: L{objects.Node}
2319
    @param ninfo: the node to check
2320
    @param nimg: the node image object
2321
    @param base: the 'template' node we match against (e.g. from the master)
2322

2323
    """
2324
    node = ninfo.name
2325
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326

    
2327
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328

    
2329
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330
    for os_name, os_data in nimg.oslist.items():
2331
      assert os_data, "Empty OS status for OS %s?!" % os_name
2332
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333
      _ErrorIf(not f_status, self.ENODEOS, node,
2334
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2337
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2338
      # comparisons with the 'base' image
2339
      test = os_name not in base.oslist
2340
      _ErrorIf(test, self.ENODEOS, node,
2341
               "Extra OS %s not present on reference node (%s)",
2342
               os_name, base.name)
2343
      if test:
2344
        continue
2345
      assert base.oslist[os_name], "Base node has empty OS status?"
2346
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347
      if not b_status:
2348
        # base OS is invalid, skipping
2349
        continue
2350
      for kind, a, b in [("API version", f_api, b_api),
2351
                         ("variants list", f_var, b_var),
2352
                         ("parameters", beautify_params(f_param),
2353
                          beautify_params(b_param))]:
2354
        _ErrorIf(a != b, self.ENODEOS, node,
2355
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356
                 kind, os_name, base.name,
2357
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358

    
2359
    # check any missing OSes
2360
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361
    _ErrorIf(missing, self.ENODEOS, node,
2362
             "OSes present on reference node %s but missing on this node: %s",
2363
             base.name, utils.CommaJoin(missing))
2364

    
2365
  def _VerifyOob(self, ninfo, nresult):
2366
    """Verifies out of band functionality of a node.
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371

2372
    """
2373
    node = ninfo.name
2374
    # We just have to verify the paths on master and/or master candidates
2375
    # as the oob helper is invoked on the master
2376
    if ((ninfo.master_candidate or ninfo.master_capable) and
2377
        constants.NV_OOB_PATHS in nresult):
2378
      for path_result in nresult[constants.NV_OOB_PATHS]:
2379
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380

    
2381
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382
    """Verifies and updates the node volume data.
2383

2384
    This function will update a L{NodeImage}'s internal structures
2385
    with data from the remote call.
2386

2387
    @type ninfo: L{objects.Node}
2388
    @param ninfo: the node to check
2389
    @param nresult: the remote results for the node
2390
    @param nimg: the node image object
2391
    @param vg_name: the configured VG name
2392

2393
    """
2394
    node = ninfo.name
2395
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396

    
2397
    nimg.lvm_fail = True
2398
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399
    if vg_name is None:
2400
      pass
2401
    elif isinstance(lvdata, basestring):
2402
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403
               utils.SafeEncode(lvdata))
2404
    elif not isinstance(lvdata, dict):
2405
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406
    else:
2407
      nimg.volumes = lvdata
2408
      nimg.lvm_fail = False
2409

    
2410
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411
    """Verifies and updates the node instance list.
2412

2413
    If the listing was successful, then updates this node's instance
2414
    list. Otherwise, it marks the RPC call as failed for the instance
2415
    list key.
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421

2422
    """
2423
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2424
    test = not isinstance(idata, list)
2425
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2427
    if test:
2428
      nimg.hyp_fail = True
2429
    else:
2430
      nimg.instances = idata
2431

    
2432
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433
    """Verifies and computes a node information map
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    # try to read free memory (from the hypervisor)
2446
    hv_info = nresult.get(constants.NV_HVINFO, None)
2447
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449
    if not test:
2450
      try:
2451
        nimg.mfree = int(hv_info["memory_free"])
2452
      except (ValueError, TypeError):
2453
        _ErrorIf(True, self.ENODERPC, node,
2454
                 "node returned invalid nodeinfo, check hypervisor")
2455

    
2456
    # FIXME: devise a free space model for file based instances as well
2457
    if vg_name is not None:
2458
      test = (constants.NV_VGLIST not in nresult or
2459
              vg_name not in nresult[constants.NV_VGLIST])
2460
      _ErrorIf(test, self.ENODELVM, node,
2461
               "node didn't return data for the volume group '%s'"
2462
               " - it is either missing or broken", vg_name)
2463
      if not test:
2464
        try:
2465
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466
        except (ValueError, TypeError):
2467
          _ErrorIf(True, self.ENODERPC, node,
2468
                   "node returned invalid LVM info, check LVM status")
2469

    
2470
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471
    """Gets per-disk status information for all instances.
2472

2473
    @type nodelist: list of strings
2474
    @param nodelist: Node names
2475
    @type node_image: dict of (name, L{objects.Node})
2476
    @param node_image: Node objects
2477
    @type instanceinfo: dict of (name, L{objects.Instance})
2478
    @param instanceinfo: Instance objects
2479
    @rtype: {instance: {node: [(succes, payload)]}}
2480
    @return: a dictionary of per-instance dictionaries with nodes as
2481
        keys and disk information as values; the disk information is a
2482
        list of tuples (success, payload)
2483

2484
    """
2485
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486

    
2487
    node_disks = {}
2488
    node_disks_devonly = {}
2489
    diskless_instances = set()
2490
    diskless = constants.DT_DISKLESS
2491

    
2492
    for nname in nodelist:
2493
      node_instances = list(itertools.chain(node_image[nname].pinst,
2494
                                            node_image[nname].sinst))
2495
      diskless_instances.update(inst for inst in node_instances
2496
                                if instanceinfo[inst].disk_template == diskless)
2497
      disks = [(inst, disk)
2498
               for inst in node_instances
2499
               for disk in instanceinfo[inst].disks]
2500

    
2501
      if not disks:
2502
        # No need to collect data
2503
        continue
2504

    
2505
      node_disks[nname] = disks
2506

    
2507
      # Creating copies as SetDiskID below will modify the objects and that can
2508
      # lead to incorrect data returned from nodes
2509
      devonly = [dev.Copy() for (_, dev) in disks]
2510

    
2511
      for dev in devonly:
2512
        self.cfg.SetDiskID(dev, nname)
2513

    
2514
      node_disks_devonly[nname] = devonly
2515

    
2516
    assert len(node_disks) == len(node_disks_devonly)
2517

    
2518
    # Collect data from all nodes with disks
2519
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520
                                                          node_disks_devonly)
2521

    
2522
    assert len(result) == len(node_disks)
2523

    
2524
    instdisk = {}
2525

    
2526
    for (nname, nres) in result.items():
2527
      disks = node_disks[nname]
2528

    
2529
      if nres.offline:
2530
        # No data from this node
2531
        data = len(disks) * [(False, "node offline")]
2532
      else:
2533
        msg = nres.fail_msg
2534
        _ErrorIf(msg, self.ENODERPC, nname,
2535
                 "while getting disk information: %s", msg)
2536
        if msg:
2537
          # No data from this node
2538
          data = len(disks) * [(False, msg)]
2539
        else:
2540
          data = []
2541
          for idx, i in enumerate(nres.payload):
2542
            if isinstance(i, (tuple, list)) and len(i) == 2:
2543
              data.append(i)
2544
            else:
2545
              logging.warning("Invalid result from node %s, entry %d: %s",
2546
                              nname, idx, i)
2547
              data.append((False, "Invalid result from the remote node"))
2548

    
2549
      for ((inst, _), status) in zip(disks, data):
2550
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551

    
2552
    # Add empty entries for diskless instances.
2553
    for inst in diskless_instances:
2554
      assert inst not in instdisk
2555
      instdisk[inst] = {}
2556

    
2557
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559
                      compat.all(isinstance(s, (tuple, list)) and
2560
                                 len(s) == 2 for s in statuses)
2561
                      for inst, nnames in instdisk.items()
2562
                      for nname, statuses in nnames.items())
2563
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564

    
2565
    return instdisk
2566

    
2567
  @staticmethod
2568
  def _SshNodeSelector(group_uuid, all_nodes):
2569
    """Create endless iterators for all potential SSH check hosts.
2570

2571
    """
2572
    nodes = [node for node in all_nodes
2573
             if (node.group != group_uuid and
2574
                 not node.offline)]
2575
    keyfunc = operator.attrgetter("group")
2576

    
2577
    return map(itertools.cycle,
2578
               [sorted(map(operator.attrgetter("name"), names))
2579
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580
                                                  keyfunc)])
2581

    
2582
  @classmethod
2583
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584
    """Choose which nodes should talk to which other nodes.
2585

2586
    We will make nodes contact all nodes in their group, and one node from
2587
    every other group.
2588

2589
    @warning: This algorithm has a known issue if one node group is much
2590
      smaller than others (e.g. just one node). In such a case all other
2591
      nodes will talk to the single node.
2592

2593
    """
2594
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596

    
2597
    return (online_nodes,
2598
            dict((name, sorted([i.next() for i in sel]))
2599
                 for name in online_nodes))
2600

    
2601
  def BuildHooksEnv(self):
2602
    """Build hooks env.
2603

2604
    Cluster-Verify hooks just ran in the post phase and their failure makes
2605
    the output be logged in the verify output and the verification to fail.
2606

2607
    """
2608
    env = {
2609
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610
      }
2611

    
2612
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613
               for node in self.my_node_info.values())
2614

    
2615
    return env
2616

    
2617
  def BuildHooksNodes(self):
2618
    """Build hooks nodes.
2619

2620
    """
2621
    return ([], self.my_node_names)
2622

    
2623
  def Exec(self, feedback_fn):
2624
    """Verify integrity of the node group, performing various test on nodes.
2625

2626
    """
2627
    # This method has too many local variables. pylint: disable=R0914
2628
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629

    
2630
    if not self.my_node_names:
2631
      # empty node group
2632
      feedback_fn("* Empty node group, skipping verification")
2633
      return True
2634

    
2635
    self.bad = False
2636
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637
    verbose = self.op.verbose
2638
    self._feedback_fn = feedback_fn
2639

    
2640
    vg_name = self.cfg.GetVGName()
2641
    drbd_helper = self.cfg.GetDRBDHelper()
2642
    cluster = self.cfg.GetClusterInfo()
2643
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644
    hypervisors = cluster.enabled_hypervisors
2645
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646

    
2647
    i_non_redundant = [] # Non redundant instances
2648
    i_non_a_balanced = [] # Non auto-balanced instances
2649
    n_offline = 0 # Count of offline nodes
2650
    n_drained = 0 # Count of nodes being drained
2651
    node_vol_should = {}
2652

    
2653
    # FIXME: verify OS list
2654

    
2655
    # File verification
2656
    filemap = _ComputeAncillaryFiles(cluster, False)
2657

    
2658
    # do local checksums
2659
    master_node = self.master_node = self.cfg.GetMasterNode()
2660
    master_ip = self.cfg.GetMasterIP()
2661

    
2662
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663

    
2664
    node_verify_param = {
2665
      constants.NV_FILELIST:
2666
        utils.UniqueSequence(filename
2667
                             for files in filemap
2668
                             for filename in files),
2669
      constants.NV_NODELIST:
2670
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671
                                  self.all_node_info.values()),
2672
      constants.NV_HYPERVISOR: hypervisors,
2673
      constants.NV_HVPARAMS:
2674
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676
                                 for node in node_data_list
2677
                                 if not node.offline],
2678
      constants.NV_INSTANCELIST: hypervisors,
2679
      constants.NV_VERSION: None,
2680
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681
      constants.NV_NODESETUP: None,
2682
      constants.NV_TIME: None,
2683
      constants.NV_MASTERIP: (master_node, master_ip),
2684
      constants.NV_OSLIST: None,
2685
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686
      }
2687

    
2688
    if vg_name is not None:
2689
      node_verify_param[constants.NV_VGLIST] = None
2690
      node_verify_param[constants.NV_LVLIST] = vg_name
2691
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2692
      node_verify_param[constants.NV_DRBDLIST] = None
2693

    
2694
    if drbd_helper:
2695
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696

    
2697
    # bridge checks
2698
    # FIXME: this needs to be changed per node-group, not cluster-wide
2699
    bridges = set()
2700
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
      bridges.add(default_nicpp[constants.NIC_LINK])
2703
    for instance in self.my_inst_info.values():
2704
      for nic in instance.nics:
2705
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707
          bridges.add(full_nic[constants.NIC_LINK])
2708

    
2709
    if bridges:
2710
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711

    
2712
    # Build our expected cluster state
2713
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714
                                                 name=node.name,
2715
                                                 vm_capable=node.vm_capable))
2716
                      for node in node_data_list)
2717

    
2718
    # Gather OOB paths
2719
    oob_paths = []
2720
    for node in self.all_node_info.values():
2721
      path = _SupportsOob(self.cfg, node)
2722
      if path and path not in oob_paths:
2723
        oob_paths.append(path)
2724

    
2725
    if oob_paths:
2726
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727

    
2728
    for instance in self.my_inst_names:
2729
      inst_config = self.my_inst_info[instance]
2730

    
2731
      for nname in inst_config.all_nodes:
2732
        if nname not in node_image:
2733
          gnode = self.NodeImage(name=nname)
2734
          gnode.ghost = (nname not in self.all_node_info)
2735
          node_image[nname] = gnode
2736

    
2737
      inst_config.MapLVsByNode(node_vol_should)
2738

    
2739
      pnode = inst_config.primary_node
2740
      node_image[pnode].pinst.append(instance)
2741

    
2742
      for snode in inst_config.secondary_nodes:
2743
        nimg = node_image[snode]
2744
        nimg.sinst.append(instance)
2745
        if pnode not in nimg.sbp:
2746
          nimg.sbp[pnode] = []
2747
        nimg.sbp[pnode].append(instance)
2748

    
2749
    # At this point, we have the in-memory data structures complete,
2750
    # except for the runtime information, which we'll gather next
2751

    
2752
    # Due to the way our RPC system works, exact response times cannot be
2753
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754
    # time before and after executing the request, we can at least have a time
2755
    # window.
2756
    nvinfo_starttime = time.time()
2757
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758
                                           node_verify_param,
2759
                                           self.cfg.GetClusterName())
2760
    nvinfo_endtime = time.time()
2761

    
2762
    if self.extra_lv_nodes and vg_name is not None:
2763
      extra_lv_nvinfo = \
2764
          self.rpc.call_node_verify(self.extra_lv_nodes,
2765
                                    {constants.NV_LVLIST: vg_name},
2766
                                    self.cfg.GetClusterName())
2767
    else:
2768
      extra_lv_nvinfo = {}
2769

    
2770
    all_drbd_map = self.cfg.ComputeDRBDMap()
2771

    
2772
    feedback_fn("* Gathering disk information (%s nodes)" %
2773
                len(self.my_node_names))
2774
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775
                                     self.my_inst_info)
2776

    
2777
    feedback_fn("* Verifying configuration file consistency")
2778

    
2779
    # If not all nodes are being checked, we need to make sure the master node
2780
    # and a non-checked vm_capable node are in the list.
2781
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782
    if absent_nodes:
2783
      vf_nvinfo = all_nvinfo.copy()
2784
      vf_node_info = list(self.my_node_info.values())
2785
      additional_nodes = []
2786
      if master_node not in self.my_node_info:
2787
        additional_nodes.append(master_node)
2788
        vf_node_info.append(self.all_node_info[master_node])
2789
      # Add the first vm_capable node we find which is not included
2790
      for node in absent_nodes:
2791
        nodeinfo = self.all_node_info[node]
2792
        if nodeinfo.vm_capable and not nodeinfo.offline:
2793
          additional_nodes.append(node)
2794
          vf_node_info.append(self.all_node_info[node])
2795
          break
2796
      key = constants.NV_FILELIST
2797
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798
                                                 {key: node_verify_param[key]},
2799
                                                 self.cfg.GetClusterName()))
2800
    else:
2801
      vf_nvinfo = all_nvinfo
2802
      vf_node_info = self.my_node_info.values()
2803

    
2804
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805

    
2806
    feedback_fn("* Verifying node status")
2807

    
2808
    refos_img = None
2809

    
2810
    for node_i in node_data_list:
2811
      node = node_i.name
2812
      nimg = node_image[node]
2813

    
2814
      if node_i.offline:
2815
        if verbose:
2816
          feedback_fn("* Skipping offline node %s" % (node,))
2817
        n_offline += 1
2818
        continue
2819

    
2820
      if node == master_node:
2821
        ntype = "master"
2822
      elif node_i.master_candidate:
2823
        ntype = "master candidate"
2824
      elif node_i.drained:
2825
        ntype = "drained"
2826
        n_drained += 1
2827
      else:
2828
        ntype = "regular"
2829
      if verbose:
2830
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831

    
2832
      msg = all_nvinfo[node].fail_msg
2833
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834
      if msg:
2835
        nimg.rpc_fail = True
2836
        continue
2837

    
2838
      nresult = all_nvinfo[node].payload
2839

    
2840
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2841
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842
      self._VerifyNodeNetwork(node_i, nresult)
2843
      self._VerifyOob(node_i, nresult)
2844

    
2845
      if nimg.vm_capable:
2846
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2847
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848
                             all_drbd_map)
2849

    
2850
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851
        self._UpdateNodeInstances(node_i, nresult, nimg)
2852
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853
        self._UpdateNodeOS(node_i, nresult, nimg)
2854

    
2855
        if not nimg.os_fail:
2856
          if refos_img is None:
2857
            refos_img = nimg
2858
          self._VerifyNodeOS(node_i, nimg, refos_img)
2859
        self._VerifyNodeBridges(node_i, nresult, bridges)
2860

    
2861
        # Check whether all running instancies are primary for the node. (This
2862
        # can no longer be done from _VerifyInstance below, since some of the
2863
        # wrong instances could be from other node groups.)
2864
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865

    
2866
        for inst in non_primary_inst:
2867
          test = inst in self.all_inst_info
2868
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869
                   "instance should not run on node %s", node_i.name)
2870
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871
                   "node is running unknown instance %s", inst)
2872

    
2873
    for node, result in extra_lv_nvinfo.items():
2874
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875
                              node_image[node], vg_name)
2876

    
2877
    feedback_fn("* Verifying instance status")
2878
    for instance in self.my_inst_names:
2879
      if verbose:
2880
        feedback_fn("* Verifying instance %s" % instance)
2881
      inst_config = self.my_inst_info[instance]
2882
      self._VerifyInstance(instance, inst_config, node_image,
2883
                           instdisk[instance])
2884
      inst_nodes_offline = []
2885

    
2886
      pnode = inst_config.primary_node
2887
      pnode_img = node_image[pnode]
2888
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889
               self.ENODERPC, pnode, "instance %s, connection to"
2890
               " primary node failed", instance)
2891

    
2892
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893
               self.EINSTANCEBADNODE, instance,
2894
               "instance is marked as running and lives on offline node %s",
2895
               inst_config.primary_node)
2896

    
2897
      # If the instance is non-redundant we cannot survive losing its primary
2898
      # node, so we are not N+1 compliant. On the other hand we have no disk
2899
      # templates with more than one secondary so that situation is not well
2900
      # supported either.
2901
      # FIXME: does not support file-backed instances
2902
      if not inst_config.secondary_nodes:
2903
        i_non_redundant.append(instance)
2904

    
2905
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906
               instance, "instance has multiple secondary nodes: %s",
2907
               utils.CommaJoin(inst_config.secondary_nodes),
2908
               code=self.ETYPE_WARNING)
2909

    
2910
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911
        pnode = inst_config.primary_node
2912
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913
        instance_groups = {}
2914

    
2915
        for node in instance_nodes:
2916
          instance_groups.setdefault(self.all_node_info[node].group,
2917
                                     []).append(node)
2918

    
2919
        pretty_list = [
2920
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921
          # Sort so that we always list the primary node first.
2922
          for group, nodes in sorted(instance_groups.items(),
2923
                                     key=lambda (_, nodes): pnode in nodes,
2924
                                     reverse=True)]
2925

    
2926
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927
                      instance, "instance has primary and secondary nodes in"
2928
                      " different groups: %s", utils.CommaJoin(pretty_list),
2929
                      code=self.ETYPE_WARNING)
2930

    
2931
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932
        i_non_a_balanced.append(instance)
2933

    
2934
      for snode in inst_config.secondary_nodes:
2935
        s_img = node_image[snode]
2936
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937
                 "instance %s, connection to secondary node failed", instance)
2938

    
2939
        if s_img.offline:
2940
          inst_nodes_offline.append(snode)
2941

    
2942
      # warn that the instance lives on offline nodes
2943
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944
               "instance has offline secondary node(s) %s",
2945
               utils.CommaJoin(inst_nodes_offline))
2946
      # ... or ghost/non-vm_capable nodes
2947
      for node in inst_config.all_nodes:
2948
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949
                 "instance lives on ghost node %s", node)
2950
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951
                 instance, "instance lives on non-vm_capable node %s", node)
2952

    
2953
    feedback_fn("* Verifying orphan volumes")
2954
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2955

    
2956
    # We will get spurious "unknown volume" warnings if any node of this group
2957
    # is secondary for an instance whose primary is in another group. To avoid
2958
    # them, we find these instances and add their volumes to node_vol_should.
2959
    for inst in self.all_inst_info.values():
2960
      for secondary in inst.secondary_nodes:
2961
        if (secondary in self.my_node_info
2962
            and inst.name not in self.my_inst_info):
2963
          inst.MapLVsByNode(node_vol_should)
2964
          break
2965

    
2966
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967

    
2968
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969
      feedback_fn("* Verifying N+1 Memory redundancy")
2970
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971

    
2972
    feedback_fn("* Other Notes")
2973
    if i_non_redundant:
2974
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975
                  % len(i_non_redundant))
2976

    
2977
    if i_non_a_balanced:
2978
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979
                  % len(i_non_a_balanced))
2980

    
2981
    if n_offline:
2982
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983

    
2984
    if n_drained:
2985
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986

    
2987
    return not self.bad
2988

    
2989
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990
    """Analyze the post-hooks' result
2991

2992
    This method analyses the hook result, handles it, and sends some
2993
    nicely-formatted feedback back to the user.
2994

2995
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997
    @param hooks_results: the results of the multi-node hooks rpc call
2998
    @param feedback_fn: function used send feedback back to the caller
2999
    @param lu_result: previous Exec result
3000
    @return: the new Exec result, based on the previous result
3001
        and hook results
3002

3003
    """
3004
    # We only really run POST phase hooks, only for non-empty groups,
3005
    # and are only interested in their results
3006
    if not self.my_node_names:
3007
      # empty node group
3008
      pass
3009
    elif phase == constants.HOOKS_PHASE_POST:
3010
      # Used to change hooks' output to proper indentation
3011
      feedback_fn("* Hooks Results")
3012
      assert hooks_results, "invalid result from hooks"
3013

    
3014
      for node_name in hooks_results:
3015
        res = hooks_results[node_name]
3016
        msg = res.fail_msg
3017
        test = msg and not res.offline
3018
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019
                      "Communication failure in hooks execution: %s", msg)
3020
        if res.offline or msg:
3021
          # No need to investigate payload if node is offline or gave
3022
          # an error.
3023
          continue
3024
        for script, hkr, output in res.payload:
3025
          test = hkr == constants.HKR_FAIL
3026
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027
                        "Script %s failed, output:", script)
3028
          if test:
3029
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3030
            feedback_fn("%s" % output)
3031
            lu_result = False
3032

    
3033
    return lu_result
3034

    
3035

    
3036
class LUClusterVerifyDisks(NoHooksLU):
3037
  """Verifies the cluster disks status.
3038

3039
  """
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.share_locks = _ShareAll()
3044
    self.needed_locks = {
3045
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046
      }
3047

    
3048
  def Exec(self, feedback_fn):
3049
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050

    
3051
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053
                           for group in group_names])
3054

    
3055

    
3056
class LUGroupVerifyDisks(NoHooksLU):
3057
  """Verifies the status of all disks in a node group.
3058

3059
  """
3060
  REQ_BGL = False
3061

    
3062
  def ExpandNames(self):
3063
    # Raises errors.OpPrereqError on its own if group can't be found
3064
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065

    
3066
    self.share_locks = _ShareAll()
3067
    self.needed_locks = {
3068
      locking.LEVEL_INSTANCE: [],
3069
      locking.LEVEL_NODEGROUP: [],
3070
      locking.LEVEL_NODE: [],
3071
      }
3072

    
3073
  def DeclareLocks(self, level):
3074
    if level == locking.LEVEL_INSTANCE:
3075
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076

    
3077
      # Lock instances optimistically, needs verification once node and group
3078
      # locks have been acquired
3079
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3080
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3081

    
3082
    elif level == locking.LEVEL_NODEGROUP:
3083
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084

    
3085
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086
        set([self.group_uuid] +
3087
            # Lock all groups used by instances optimistically; this requires
3088
            # going via the node before it's locked, requiring verification
3089
            # later on
3090
            [group_uuid
3091
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093

    
3094
    elif level == locking.LEVEL_NODE:
3095
      # This will only lock the nodes in the group to be verified which contain
3096
      # actual instances
3097
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098
      self._LockInstancesNodes()
3099

    
3100
      # Lock all nodes in group to be verified
3101
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104

    
3105
  def CheckPrereq(self):
3106
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109

    
3110
    assert self.group_uuid in owned_groups
3111

    
3112
    # Check if locked instances are still correct
3113
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114

    
3115
    # Get instance information
3116
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117

    
3118
    # Check if node groups for locked instances are still correct
3119
    for (instance_name, inst) in self.instances.items():
3120
      assert owned_nodes.issuperset(inst.all_nodes), \
3121
        "Instance %s's nodes changed while we kept the lock" % instance_name
3122

    
3123
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124
                                             owned_groups)
3125

    
3126
      assert self.group_uuid in inst_groups, \
3127
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Verify integrity of cluster disks.
3131

3132
    @rtype: tuple of three items
3133
    @return: a tuple of (dict of node-to-node_error, list of instances
3134
        which need activate-disks, dict of instance: (node, volume) for
3135
        missing volumes
3136

3137
    """
3138
    res_nodes = {}
3139
    res_instances = set()
3140
    res_missing = {}
3141

    
3142
    nv_dict = _MapInstanceDisksToNodes([inst
3143
                                        for inst in self.instances.values()
3144
                                        if inst.admin_up])
3145

    
3146
    if nv_dict:
3147
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148
                             set(self.cfg.GetVmCapableNodeList()))
3149

    
3150
      node_lvs = self.rpc.call_lv_list(nodes, [])
3151

    
3152
      for (node, node_res) in node_lvs.items():
3153
        if node_res.offline:
3154
          continue
3155

    
3156
        msg = node_res.fail_msg
3157
        if msg:
3158
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159
          res_nodes[node] = msg
3160
          continue
3161

    
3162
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3163
          inst = nv_dict.pop((node, lv_name), None)
3164
          if not (lv_online or inst is None):
3165
            res_instances.add(inst)
3166

    
3167
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3168
      # better
3169
      for key, inst in nv_dict.iteritems():
3170
        res_missing.setdefault(inst, []).append(list(key))
3171

    
3172
    return (res_nodes, list(res_instances), res_missing)
3173

    
3174

    
3175
class LUClusterRepairDiskSizes(NoHooksLU):
3176
  """Verifies the cluster disks sizes.
3177

3178
  """
3179
  REQ_BGL = False
3180

    
3181
  def ExpandNames(self):
3182
    if self.op.instances:
3183
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184
      self.needed_locks = {
3185
        locking.LEVEL_NODE: [],
3186
        locking.LEVEL_INSTANCE: self.wanted_names,
3187
        }
3188
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189
    else:
3190
      self.wanted_names = None
3191
      self.needed_locks = {
3192
        locking.LEVEL_NODE: locking.ALL_SET,
3193
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3194
        }
3195
    self.share_locks = {
3196
      locking.LEVEL_NODE: 1,
3197
      locking.LEVEL_INSTANCE: 0,
3198
      }
3199

    
3200
  def DeclareLocks(self, level):
3201
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202
      self._LockInstancesNodes(primary_only=True)
3203

    
3204
  def CheckPrereq(self):
3205
    """Check prerequisites.
3206

3207
    This only checks the optional instance list against the existing names.
3208

3209
    """
3210
    if self.wanted_names is None:
3211
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3212

    
3213
    self.wanted_instances = \
3214
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3215

    
3216
  def _EnsureChildSizes(self, disk):
3217
    """Ensure children of the disk have the needed disk size.
3218

3219
    This is valid mainly for DRBD8 and fixes an issue where the
3220
    children have smaller disk size.
3221

3222
    @param disk: an L{ganeti.objects.Disk} object
3223

3224
    """
3225
    if disk.dev_type == constants.LD_DRBD8:
3226
      assert disk.children, "Empty children for DRBD8?"
3227
      fchild = disk.children[0]
3228
      mismatch = fchild.size < disk.size
3229
      if mismatch:
3230
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3231
                     fchild.size, disk.size)
3232
        fchild.size = disk.size
3233

    
3234
      # and we recurse on this child only, not on the metadev
3235
      return self._EnsureChildSizes(fchild) or mismatch
3236
    else:
3237
      return False
3238

    
3239
  def Exec(self, feedback_fn):
3240
    """Verify the size of cluster disks.
3241

3242
    """
3243
    # TODO: check child disks too
3244
    # TODO: check differences in size between primary/secondary nodes
3245
    per_node_disks = {}
3246
    for instance in self.wanted_instances:
3247
      pnode = instance.primary_node
3248
      if pnode not in per_node_disks:
3249
        per_node_disks[pnode] = []
3250
      for idx, disk in enumerate(instance.disks):
3251
        per_node_disks[pnode].append((instance, idx, disk))
3252

    
3253
    changed = []
3254
    for node, dskl in per_node_disks.items():
3255
      newl = [v[2].Copy() for v in dskl]
3256
      for dsk in newl:
3257
        self.cfg.SetDiskID(dsk, node)
3258
      result = self.rpc.call_blockdev_getsize(node, newl)
3259
      if result.fail_msg:
3260
        self.LogWarning("Failure in blockdev_getsize call to node"
3261
                        " %s, ignoring", node)
3262
        continue
3263
      if len(result.payload) != len(dskl):
3264
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265
                        " result.payload=%s", node, len(dskl), result.payload)
3266
        self.LogWarning("Invalid result from node %s, ignoring node results",
3267
                        node)
3268
        continue
3269
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3270
        if size is None:
3271
          self.LogWarning("Disk %d of instance %s did not return size"
3272
                          " information, ignoring", idx, instance.name)
3273
          continue
3274
        if not isinstance(size, (int, long)):
3275
          self.LogWarning("Disk %d of instance %s did not return valid"
3276
                          " size information, ignoring", idx, instance.name)
3277
          continue
3278
        size = size >> 20
3279
        if size != disk.size:
3280
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3281
                       " correcting: recorded %d, actual %d", idx,
3282
                       instance.name, disk.size, size)
3283
          disk.size = size
3284
          self.cfg.Update(instance, feedback_fn)
3285
          changed.append((instance.name, idx, size))
3286
        if self._EnsureChildSizes(disk):
3287
          self.cfg.Update(instance, feedback_fn)
3288
          changed.append((instance.name, idx, disk.size))
3289
    return changed
3290

    
3291

    
3292
class LUClusterRename(LogicalUnit):
3293
  """Rename the cluster.
3294

3295
  """
3296
  HPATH = "cluster-rename"
3297
  HTYPE = constants.HTYPE_CLUSTER
3298

    
3299
  def BuildHooksEnv(self):
3300
    """Build hooks env.
3301

3302
    """
3303
    return {
3304
      "OP_TARGET": self.cfg.GetClusterName(),
3305
      "NEW_NAME": self.op.name,
3306
      }
3307

    
3308
  def BuildHooksNodes(self):
3309
    """Build hooks nodes.
3310

3311
    """
3312
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3313

    
3314
  def CheckPrereq(self):
3315
    """Verify that the passed name is a valid one.
3316

3317
    """
3318
    hostname = netutils.GetHostname(name=self.op.name,
3319
                                    family=self.cfg.GetPrimaryIPFamily())
3320

    
3321
    new_name = hostname.name
3322
    self.ip = new_ip = hostname.ip
3323
    old_name = self.cfg.GetClusterName()
3324
    old_ip = self.cfg.GetMasterIP()
3325
    if new_name == old_name and new_ip == old_ip:
3326
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327
                                 " cluster has changed",
3328
                                 errors.ECODE_INVAL)
3329
    if new_ip != old_ip:
3330
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332
                                   " reachable on the network" %
3333
                                   new_ip, errors.ECODE_NOTUNIQUE)
3334

    
3335
    self.op.name = new_name
3336

    
3337
  def Exec(self, feedback_fn):
3338
    """Rename the cluster.
3339

3340
    """
3341
    clustername = self.op.name
3342
    ip = self.ip
3343

    
3344
    # shutdown the master IP
3345
    master = self.cfg.GetMasterNode()
3346
    result = self.rpc.call_node_stop_master(master, False)
3347
    result.Raise("Could not disable the master role")
3348

    
3349
    try:
3350
      cluster = self.cfg.GetClusterInfo()
3351
      cluster.cluster_name = clustername
3352
      cluster.master_ip = ip
3353
      self.cfg.Update(cluster, feedback_fn)
3354

    
3355
      # update the known hosts file
3356
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357
      node_list = self.cfg.GetOnlineNodeList()
3358
      try:
3359
        node_list.remove(master)
3360
      except ValueError:
3361
        pass
3362
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3363
    finally:
3364
      result = self.rpc.call_node_start_master(master, False, False)
3365
      msg = result.fail_msg
3366
      if msg:
3367
        self.LogWarning("Could not re-enable the master role on"
3368
                        " the master, please restart manually: %s", msg)
3369

    
3370
    return clustername
3371

    
3372

    
3373
class LUClusterSetParams(LogicalUnit):
3374
  """Change the parameters of the cluster.
3375

3376
  """
3377
  HPATH = "cluster-modify"
3378
  HTYPE = constants.HTYPE_CLUSTER
3379
  REQ_BGL = False
3380

    
3381
  def CheckArguments(self):
3382
    """Check parameters
3383

3384
    """
3385
    if self.op.uid_pool:
3386
      uidpool.CheckUidPool(self.op.uid_pool)
3387

    
3388
    if self.op.add_uids:
3389
      uidpool.CheckUidPool(self.op.add_uids)
3390

    
3391
    if self.op.remove_uids:
3392
      uidpool.CheckUidPool(self.op.remove_uids)
3393

    
3394
  def ExpandNames(self):
3395
    # FIXME: in the future maybe other cluster params won't require checking on
3396
    # all nodes to be modified.
3397
    self.needed_locks = {
3398
      locking.LEVEL_NODE: locking.ALL_SET,
3399
    }
3400
    self.share_locks[locking.LEVEL_NODE] = 1
3401

    
3402
  def BuildHooksEnv(self):
3403
    """Build hooks env.
3404

3405
    """
3406
    return {
3407
      "OP_TARGET": self.cfg.GetClusterName(),
3408
      "NEW_VG_NAME": self.op.vg_name,
3409
      }
3410

    
3411
  def BuildHooksNodes(self):
3412
    """Build hooks nodes.
3413

3414
    """
3415
    mn = self.cfg.GetMasterNode()
3416
    return ([mn], [mn])
3417

    
3418
  def CheckPrereq(self):
3419
    """Check prerequisites.
3420

3421
    This checks whether the given params don't conflict and
3422
    if the given volume group is valid.
3423

3424
    """
3425
    if self.op.vg_name is not None and not self.op.vg_name:
3426
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428
                                   " instances exist", errors.ECODE_INVAL)
3429

    
3430
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3433
                                   " drbd-based instances exist",
3434
                                   errors.ECODE_INVAL)
3435

    
3436
    node_list = self.owned_locks(locking.LEVEL_NODE)
3437

    
3438
    # if vg_name not None, checks given volume group on all nodes
3439
    if self.op.vg_name:
3440
      vglist = self.rpc.call_vg_list(node_list)
3441
      for node in node_list:
3442
        msg = vglist[node].fail_msg
3443
        if msg:
3444
          # ignoring down node
3445
          self.LogWarning("Error while gathering data on node %s"
3446
                          " (ignoring node): %s", node, msg)
3447
          continue
3448
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3449
                                              self.op.vg_name,
3450
                                              constants.MIN_VG_SIZE)
3451
        if vgstatus:
3452
          raise errors.OpPrereqError("Error on node '%s': %s" %
3453
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3454

    
3455
    if self.op.drbd_helper:
3456
      # checks given drbd helper on all nodes
3457
      helpers = self.rpc.call_drbd_helper(node_list)
3458
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3459
        if ninfo.offline:
3460
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3461
          continue
3462
        msg = helpers[node].fail_msg
3463
        if msg:
3464
          raise errors.OpPrereqError("Error checking drbd helper on node"
3465
                                     " '%s': %s" % (node, msg),
3466
                                     errors.ECODE_ENVIRON)
3467
        node_helper = helpers[node].payload
3468
        if node_helper != self.op.drbd_helper:
3469
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470
                                     (node, node_helper), errors.ECODE_ENVIRON)
3471

    
3472
    self.cluster = cluster = self.cfg.GetClusterInfo()
3473
    # validate params changes
3474
    if self.op.beparams:
3475
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3477

    
3478
    if self.op.ndparams:
3479
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3481

    
3482
      # TODO: we need a more general way to handle resetting
3483
      # cluster-level parameters to default values
3484
      if self.new_ndparams["oob_program"] == "":
3485
        self.new_ndparams["oob_program"] = \
3486
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3487

    
3488
    if self.op.nicparams:
3489
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3492
      nic_errors = []
3493

    
3494
      # check all instances for consistency
3495
      for instance in self.cfg.GetAllInstancesInfo().values():
3496
        for nic_idx, nic in enumerate(instance.nics):
3497
          params_copy = copy.deepcopy(nic.nicparams)
3498
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3499

    
3500
          # check parameter syntax
3501
          try:
3502
            objects.NIC.CheckParameterSyntax(params_filled)
3503
          except errors.ConfigurationError, err:
3504
            nic_errors.append("Instance %s, nic/%d: %s" %
3505
                              (instance.name, nic_idx, err))
3506

    
3507
          # if we're moving instances to routed, check that they have an ip
3508
          target_mode = params_filled[constants.NIC_MODE]
3509
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511
                              " address" % (instance.name, nic_idx))
3512
      if nic_errors:
3513
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514
                                   "\n".join(nic_errors))
3515

    
3516
    # hypervisor list/parameters
3517
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518
    if self.op.hvparams:
3519
      for hv_name, hv_dict in self.op.hvparams.items():
3520
        if hv_name not in self.new_hvparams:
3521
          self.new_hvparams[hv_name] = hv_dict
3522
        else:
3523
          self.new_hvparams[hv_name].update(hv_dict)
3524

    
3525
    # os hypervisor parameters
3526
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3527
    if self.op.os_hvp:
3528
      for os_name, hvs in self.op.os_hvp.items():
3529
        if os_name not in self.new_os_hvp:
3530
          self.new_os_hvp[os_name] = hvs
3531
        else:
3532
          for hv_name, hv_dict in hvs.items():
3533
            if hv_name not in self.new_os_hvp[os_name]:
3534
              self.new_os_hvp[os_name][hv_name] = hv_dict
3535
            else:
3536
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3537

    
3538
    # os parameters
3539
    self.new_osp = objects.FillDict(cluster.osparams, {})
3540
    if self.op.osparams:
3541
      for os_name, osp in self.op.osparams.items():
3542
        if os_name not in self.new_osp:
3543
          self.new_osp[os_name] = {}
3544

    
3545
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3546
                                                  use_none=True)
3547

    
3548
        if not self.new_osp[os_name]:
3549
          # we removed all parameters
3550
          del self.new_osp[os_name]
3551
        else:
3552
          # check the parameter validity (remote check)
3553
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554
                         os_name, self.new_osp[os_name])
3555

    
3556
    # changes to the hypervisor list
3557
    if self.op.enabled_hypervisors is not None:
3558
      self.hv_list = self.op.enabled_hypervisors
3559
      for hv in self.hv_list:
3560
        # if the hypervisor doesn't already exist in the cluster
3561
        # hvparams, we initialize it to empty, and then (in both
3562
        # cases) we make sure to fill the defaults, as we might not
3563
        # have a complete defaults list if the hypervisor wasn't
3564
        # enabled before
3565
        if hv not in new_hvp:
3566
          new_hvp[hv] = {}
3567
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3569
    else:
3570
      self.hv_list = cluster.enabled_hypervisors
3571

    
3572
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573
      # either the enabled list has changed, or the parameters have, validate
3574
      for hv_name, hv_params in self.new_hvparams.items():
3575
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576
            (self.op.enabled_hypervisors and
3577
             hv_name in self.op.enabled_hypervisors)):
3578
          # either this is a new hypervisor, or its parameters have changed
3579
          hv_class = hypervisor.GetHypervisor(hv_name)
3580
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581
          hv_class.CheckParameterSyntax(hv_params)
3582
          _CheckHVParams(self, node_list, hv_name, hv_params)
3583

    
3584
    if self.op.os_hvp:
3585
      # no need to check any newly-enabled hypervisors, since the
3586
      # defaults have already been checked in the above code-block
3587
      for os_name, os_hvp in self.new_os_hvp.items():
3588
        for hv_name, hv_params in os_hvp.items():
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          # we need to fill in the new os_hvp on top of the actual hv_p
3591
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3592
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3593
          hv_class = hypervisor.GetHypervisor(hv_name)
3594
          hv_class.CheckParameterSyntax(new_osp)
3595
          _CheckHVParams(self, node_list, hv_name, new_osp)
3596

    
3597
    if self.op.default_iallocator:
3598
      alloc_script = utils.FindFile(self.op.default_iallocator,
3599
                                    constants.IALLOCATOR_SEARCH_PATH,
3600
                                    os.path.isfile)
3601
      if alloc_script is None:
3602
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603
                                   " specified" % self.op.default_iallocator,
3604
                                   errors.ECODE_INVAL)
3605

    
3606
  def Exec(self, feedback_fn):
3607
    """Change the parameters of the cluster.
3608

3609
    """
3610
    if self.op.vg_name is not None:
3611
      new_volume = self.op.vg_name
3612
      if not new_volume:
3613
        new_volume = None
3614
      if new_volume != self.cfg.GetVGName():
3615
        self.cfg.SetVGName(new_volume)
3616
      else:
3617
        feedback_fn("Cluster LVM configuration already in desired"
3618
                    " state, not changing")
3619
    if self.op.drbd_helper is not None:
3620
      new_helper = self.op.drbd_helper
3621
      if not new_helper:
3622
        new_helper = None
3623
      if new_helper != self.cfg.GetDRBDHelper():
3624
        self.cfg.SetDRBDHelper(new_helper)
3625
      else:
3626
        feedback_fn("Cluster DRBD helper already in desired state,"
3627
                    " not changing")
3628
    if self.op.hvparams:
3629
      self.cluster.hvparams = self.new_hvparams
3630
    if self.op.os_hvp:
3631
      self.cluster.os_hvp = self.new_os_hvp
3632
    if self.op.enabled_hypervisors is not None:
3633
      self.cluster.hvparams = self.new_hvparams
3634
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635
    if self.op.beparams:
3636
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637
    if self.op.nicparams:
3638
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639
    if self.op.osparams:
3640
      self.cluster.osparams = self.new_osp
3641
    if self.op.ndparams:
3642
      self.cluster.ndparams = self.new_ndparams
3643

    
3644
    if self.op.candidate_pool_size is not None:
3645
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646
      # we need to update the pool size here, otherwise the save will fail
3647
      _AdjustCandidatePool(self, [])
3648

    
3649
    if self.op.maintain_node_health is not None:
3650
      self.cluster.maintain_node_health = self.op.maintain_node_health
3651

    
3652
    if self.op.prealloc_wipe_disks is not None:
3653
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3654

    
3655
    if self.op.add_uids is not None:
3656
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3657

    
3658
    if self.op.remove_uids is not None:
3659
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3660

    
3661
    if self.op.uid_pool is not None:
3662
      self.cluster.uid_pool = self.op.uid_pool
3663

    
3664
    if self.op.default_iallocator is not None:
3665
      self.cluster.default_iallocator = self.op.default_iallocator
3666

    
3667
    if self.op.reserved_lvs is not None:
3668
      self.cluster.reserved_lvs = self.op.reserved_lvs
3669

    
3670
    def helper_os(aname, mods, desc):
3671
      desc += " OS list"
3672
      lst = getattr(self.cluster, aname)
3673
      for key, val in mods:
3674
        if key == constants.DDM_ADD:
3675
          if val in lst:
3676
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3677
          else:
3678
            lst.append(val)
3679
        elif key == constants.DDM_REMOVE:
3680
          if val in lst:
3681
            lst.remove(val)
3682
          else:
3683
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3684
        else:
3685
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3686

    
3687
    if self.op.hidden_os:
3688
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3689

    
3690
    if self.op.blacklisted_os:
3691
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3692

    
3693
    if self.op.master_netdev:
3694
      master = self.cfg.GetMasterNode()
3695
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696
                  self.cluster.master_netdev)
3697
      result = self.rpc.call_node_stop_master(master, False)
3698
      result.Raise("Could not disable the master ip")
3699
      feedback_fn("Changing master_netdev from %s to %s" %
3700
                  (self.cluster.master_netdev, self.op.master_netdev))
3701
      self.cluster.master_netdev = self.op.master_netdev
3702

    
3703
    self.cfg.Update(self.cluster, feedback_fn)
3704

    
3705
    if self.op.master_netdev:
3706
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707
                  self.op.master_netdev)
3708
      result = self.rpc.call_node_start_master(master, False, False)
3709
      if result.fail_msg:
3710
        self.LogWarning("Could not re-enable the master ip on"
3711
                        " the master, please restart manually: %s",
3712
                        result.fail_msg)
3713

    
3714

    
3715
def _UploadHelper(lu, nodes, fname):
3716
  """Helper for uploading a file and showing warnings.
3717

3718
  """
3719
  if os.path.exists(fname):
3720
    result = lu.rpc.call_upload_file(nodes, fname)
3721
    for to_node, to_result in result.items():
3722
      msg = to_result.fail_msg
3723
      if msg:
3724
        msg = ("Copy of file %s to node %s failed: %s" %
3725
               (fname, to_node, msg))
3726
        lu.proc.LogWarning(msg)
3727

    
3728

    
3729
def _ComputeAncillaryFiles(cluster, redist):
3730
  """Compute files external to Ganeti which need to be consistent.
3731

3732
  @type redist: boolean
3733
  @param redist: Whether to include files which need to be redistributed
3734

3735
  """
3736
  # Compute files for all nodes
3737
  files_all = set([
3738
    constants.SSH_KNOWN_HOSTS_FILE,
3739
    constants.CONFD_HMAC_KEY,
3740
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3741
    ])
3742

    
3743
  if not redist:
3744
    files_all.update(constants.ALL_CERT_FILES)
3745
    files_all.update(ssconf.SimpleStore().GetFileList())
3746
  else:
3747
    # we need to ship at least the RAPI certificate
3748
    files_all.add(constants.RAPI_CERT_FILE)
3749

    
3750
  if cluster.modify_etc_hosts:
3751
    files_all.add(constants.ETC_HOSTS)
3752

    
3753
  # Files which must either exist on all nodes or on none
3754
  files_all_opt = set([
3755
    constants.RAPI_USERS_FILE,
3756
    ])
3757

    
3758
  # Files which should only be on master candidates
3759
  files_mc = set()
3760
  if not redist:
3761
    files_mc.add(constants.CLUSTER_CONF_FILE)
3762

    
3763
  # Files which should only be on VM-capable nodes
3764
  files_vm = set(filename
3765
    for hv_name in cluster.enabled_hypervisors
3766
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3767

    
3768
  # Filenames must be unique
3769
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3770
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3771
         "Found file listed in more than one file list"
3772

    
3773
  return (files_all, files_all_opt, files_mc, files_vm)
3774

    
3775

    
3776
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3777
  """Distribute additional files which are part of the cluster configuration.
3778

3779
  ConfigWriter takes care of distributing the config and ssconf files, but
3780
  there are more files which should be distributed to all nodes. This function
3781
  makes sure those are copied.
3782

3783
  @param lu: calling logical unit
3784
  @param additional_nodes: list of nodes not in the config to distribute to
3785
  @type additional_vm: boolean
3786
  @param additional_vm: whether the additional nodes are vm-capable or not
3787

3788
  """
3789
  # Gather target nodes
3790
  cluster = lu.cfg.GetClusterInfo()
3791
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3792

    
3793
  online_nodes = lu.cfg.GetOnlineNodeList()
3794
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3795

    
3796
  if additional_nodes is not None:
3797
    online_nodes.extend(additional_nodes)
3798
    if additional_vm:
3799
      vm_nodes.extend(additional_nodes)
3800

    
3801
  # Never distribute to master node
3802
  for nodelist in [online_nodes, vm_nodes]:
3803
    if master_info.name in nodelist:
3804
      nodelist.remove(master_info.name)
3805

    
3806
  # Gather file lists
3807
  (files_all, files_all_opt, files_mc, files_vm) = \
3808
    _ComputeAncillaryFiles(cluster, True)
3809

    
3810
  # Never re-distribute configuration file from here
3811
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3812
              constants.CLUSTER_CONF_FILE in files_vm)
3813
  assert not files_mc, "Master candidates not handled in this function"
3814

    
3815
  filemap = [
3816
    (online_nodes, files_all),
3817
    (online_nodes, files_all_opt),
3818
    (vm_nodes, files_vm),
3819
    ]
3820

    
3821
  # Upload the files
3822
  for (node_list, files) in filemap:
3823
    for fname in files:
3824
      _UploadHelper(lu, node_list, fname)
3825

    
3826

    
3827
class LUClusterRedistConf(NoHooksLU):
3828
  """Force the redistribution of cluster configuration.
3829

3830
  This is a very simple LU.
3831

3832
  """
3833
  REQ_BGL = False
3834

    
3835
  def ExpandNames(self):
3836
    self.needed_locks = {
3837
      locking.LEVEL_NODE: locking.ALL_SET,
3838
    }
3839
    self.share_locks[locking.LEVEL_NODE] = 1
3840

    
3841
  def Exec(self, feedback_fn):
3842
    """Redistribute the configuration.
3843

3844
    """
3845
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3846
    _RedistributeAncillaryFiles(self)
3847

    
3848

    
3849
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3850
  """Sleep and poll for an instance's disk to sync.
3851

3852
  """
3853
  if not instance.disks or disks is not None and not disks:
3854
    return True
3855

    
3856
  disks = _ExpandCheckDisks(instance, disks)
3857

    
3858
  if not oneshot:
3859
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3860

    
3861
  node = instance.primary_node
3862

    
3863
  for dev in disks:
3864
    lu.cfg.SetDiskID(dev, node)
3865

    
3866
  # TODO: Convert to utils.Retry
3867

    
3868
  retries = 0
3869
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3870
  while True:
3871
    max_time = 0
3872
    done = True
3873
    cumul_degraded = False
3874
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3875
    msg = rstats.fail_msg
3876
    if msg:
3877
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3878
      retries += 1
3879
      if retries >= 10:
3880
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3881
                                 " aborting." % node)
3882
      time.sleep(6)
3883
      continue
3884
    rstats = rstats.payload
3885
    retries = 0
3886
    for i, mstat in enumerate(rstats):
3887
      if mstat is None:
3888
        lu.LogWarning("Can't compute data for node %s/%s",
3889
                           node, disks[i].iv_name)
3890
        continue
3891

    
3892
      cumul_degraded = (cumul_degraded or
3893
                        (mstat.is_degraded and mstat.sync_percent is None))
3894
      if mstat.sync_percent is not None:
3895
        done = False
3896
        if mstat.estimated_time is not None:
3897
          rem_time = ("%s remaining (estimated)" %
3898
                      utils.FormatSeconds(mstat.estimated_time))
3899
          max_time = mstat.estimated_time
3900
        else:
3901
          rem_time = "no time estimate"
3902
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3903
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3904

    
3905
    # if we're done but degraded, let's do a few small retries, to
3906
    # make sure we see a stable and not transient situation; therefore
3907
    # we force restart of the loop
3908
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3909
      logging.info("Degraded disks found, %d retries left", degr_retries)
3910
      degr_retries -= 1
3911
      time.sleep(1)
3912
      continue
3913

    
3914
    if done or oneshot:
3915
      break
3916

    
3917
    time.sleep(min(60, max_time))
3918

    
3919
  if done:
3920
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3921
  return not cumul_degraded
3922

    
3923

    
3924
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3925
  """Check that mirrors are not degraded.
3926

3927
  The ldisk parameter, if True, will change the test from the
3928
  is_degraded attribute (which represents overall non-ok status for
3929
  the device(s)) to the ldisk (representing the local storage status).
3930

3931
  """
3932
  lu.cfg.SetDiskID(dev, node)
3933

    
3934
  result = True
3935

    
3936
  if on_primary or dev.AssembleOnSecondary():
3937
    rstats = lu.rpc.call_blockdev_find(node, dev)
3938
    msg = rstats.fail_msg
3939
    if msg:
3940
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3941
      result = False
3942
    elif not rstats.payload:
3943
      lu.LogWarning("Can't find disk on node %s", node)
3944
      result = False
3945
    else:
3946
      if ldisk:
3947
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3948
      else:
3949
        result = result and not rstats.payload.is_degraded
3950

    
3951
  if dev.children:
3952
    for child in dev.children:
3953
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3954

    
3955
  return result
3956

    
3957

    
3958
class LUOobCommand(NoHooksLU):
3959
  """Logical unit for OOB handling.
3960

3961
  """
3962
  REG_BGL = False
3963
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3964

    
3965
  def ExpandNames(self):
3966
    """Gather locks we need.
3967

3968
    """
3969
    if self.op.node_names:
3970
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3971
      lock_names = self.op.node_names
3972
    else:
3973
      lock_names = locking.ALL_SET
3974

    
3975
    self.needed_locks = {
3976
      locking.LEVEL_NODE: lock_names,
3977
      }
3978

    
3979
  def CheckPrereq(self):
3980
    """Check prerequisites.
3981

3982
    This checks:
3983
     - the node exists in the configuration
3984
     - OOB is supported
3985

3986
    Any errors are signaled by raising errors.OpPrereqError.
3987

3988
    """
3989
    self.nodes = []
3990
    self.master_node = self.cfg.GetMasterNode()
3991

    
3992
    assert self.op.power_delay >= 0.0
3993

    
3994
    if self.op.node_names:
3995
      if (self.op.command in self._SKIP_MASTER and
3996
          self.master_node in self.op.node_names):
3997
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3998
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3999

    
4000
        if master_oob_handler:
4001
          additional_text = ("run '%s %s %s' if you want to operate on the"
4002
                             " master regardless") % (master_oob_handler,
4003
                                                      self.op.command,
4004
                                                      self.master_node)
4005
        else:
4006
          additional_text = "it does not support out-of-band operations"
4007

    
4008
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4009
                                    " allowed for %s; %s") %
4010
                                   (self.master_node, self.op.command,
4011
                                    additional_text), errors.ECODE_INVAL)
4012
    else:
4013
      self.op.node_names = self.cfg.GetNodeList()
4014
      if self.op.command in self._SKIP_MASTER:
4015
        self.op.node_names.remove(self.master_node)
4016

    
4017
    if self.op.command in self._SKIP_MASTER:
4018
      assert self.master_node not in self.op.node_names
4019

    
4020
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4021
      if node is None:
4022
        raise errors.OpPrereqError("Node %s not found" % node_name,
4023
                                   errors.ECODE_NOENT)
4024
      else:
4025
        self.nodes.append(node)
4026

    
4027
      if (not self.op.ignore_status and
4028
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4029
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4030
                                    " not marked offline") % node_name,
4031
                                   errors.ECODE_STATE)
4032

    
4033
  def Exec(self, feedback_fn):
4034
    """Execute OOB and return result if we expect any.
4035

4036
    """
4037
    master_node = self.master_node
4038
    ret = []
4039

    
4040
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4041
                                              key=lambda node: node.name)):
4042
      node_entry = [(constants.RS_NORMAL, node.name)]
4043
      ret.append(node_entry)
4044

    
4045
      oob_program = _SupportsOob(self.cfg, node)
4046

    
4047
      if not oob_program:
4048
        node_entry.append((constants.RS_UNAVAIL, None))
4049
        continue
4050

    
4051
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4052
                   self.op.command, oob_program, node.name)
4053
      result = self.rpc.call_run_oob(master_node, oob_program,
4054
                                     self.op.command, node.name,
4055
                                     self.op.timeout)
4056

    
4057
      if result.fail_msg:
4058
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4059
                        node.name, result.fail_msg)
4060
        node_entry.append((constants.RS_NODATA, None))
4061
      else:
4062
        try:
4063
          self._CheckPayload(result)
4064
        except errors.OpExecError, err:
4065
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4066
                          node.name, err)
4067
          node_entry.append((constants.RS_NODATA, None))
4068
        else:
4069
          if self.op.command == constants.OOB_HEALTH:
4070
            # For health we should log important events
4071
            for item, status in result.payload:
4072
              if status in [constants.OOB_STATUS_WARNING,
4073
                            constants.OOB_STATUS_CRITICAL]:
4074
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4075
                                item, node.name, status)
4076

    
4077
          if self.op.command == constants.OOB_POWER_ON:
4078
            node.powered = True
4079
          elif self.op.command == constants.OOB_POWER_OFF:
4080
            node.powered = False
4081
          elif self.op.command == constants.OOB_POWER_STATUS:
4082
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4083
            if powered != node.powered:
4084
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4085
                               " match actual power state (%s)"), node.powered,
4086
                              node.name, powered)
4087

    
4088
          # For configuration changing commands we should update the node
4089
          if self.op.command in (constants.OOB_POWER_ON,
4090
                                 constants.OOB_POWER_OFF):
4091
            self.cfg.Update(node, feedback_fn)
4092

    
4093
          node_entry.append((constants.RS_NORMAL, result.payload))
4094

    
4095
          if (self.op.command == constants.OOB_POWER_ON and
4096
              idx < len(self.nodes) - 1):
4097
            time.sleep(self.op.power_delay)
4098

    
4099
    return ret
4100

    
4101
  def _CheckPayload(self, result):
4102
    """Checks if the payload is valid.
4103

4104
    @param result: RPC result
4105
    @raises errors.OpExecError: If payload is not valid
4106

4107
    """
4108
    errs = []
4109
    if self.op.command == constants.OOB_HEALTH:
4110
      if not isinstance(result.payload, list):
4111
        errs.append("command 'health' is expected to return a list but got %s" %
4112
                    type(result.payload))
4113
      else:
4114
        for item, status in result.payload:
4115
          if status not in constants.OOB_STATUSES:
4116
            errs.append("health item '%s' has invalid status '%s'" %
4117
                        (item, status))
4118

    
4119
    if self.op.command == constants.OOB_POWER_STATUS:
4120
      if not isinstance(result.payload, dict):
4121
        errs.append("power-status is expected to return a dict but got %s" %
4122
                    type(result.payload))
4123

    
4124
    if self.op.command in [
4125
        constants.OOB_POWER_ON,
4126
        constants.OOB_POWER_OFF,
4127
        constants.OOB_POWER_CYCLE,
4128
        ]:
4129
      if result.payload is not None:
4130
        errs.append("%s is expected to not return payload but got '%s'" %
4131
                    (self.op.command, result.payload))
4132

    
4133
    if errs:
4134
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4135
                               utils.CommaJoin(errs))
4136

    
4137

    
4138
class _OsQuery(_QueryBase):
4139
  FIELDS = query.OS_FIELDS
4140

    
4141
  def ExpandNames(self, lu):
4142
    # Lock all nodes in shared mode
4143
    # Temporary removal of locks, should be reverted later
4144
    # TODO: reintroduce locks when they are lighter-weight
4145
    lu.needed_locks = {}
4146
    #self.share_locks[locking.LEVEL_NODE] = 1
4147
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4148

    
4149
    # The following variables interact with _QueryBase._GetNames
4150
    if self.names:
4151
      self.wanted = self.names
4152
    else:
4153
      self.wanted = locking.ALL_SET
4154

    
4155
    self.do_locking = self.use_locking
4156

    
4157
  def DeclareLocks(self, lu, level):
4158
    pass
4159

    
4160
  @staticmethod
4161
  def _DiagnoseByOS(rlist):
4162
    """Remaps a per-node return list into an a per-os per-node dictionary
4163

4164
    @param rlist: a map with node names as keys and OS objects as values
4165

4166
    @rtype: dict
4167
    @return: a dictionary with osnames as keys and as value another
4168
        map, with nodes as keys and tuples of (path, status, diagnose,
4169
        variants, parameters, api_versions) as values, eg::
4170

4171
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4172
                                     (/srv/..., False, "invalid api")],
4173
                           "node2": [(/srv/..., True, "", [], [])]}
4174
          }
4175

4176
    """
4177
    all_os = {}
4178
    # we build here the list of nodes that didn't fail the RPC (at RPC
4179
    # level), so that nodes with a non-responding node daemon don't
4180
    # make all OSes invalid
4181
    good_nodes = [node_name for node_name in rlist
4182
                  if not rlist[node_name].fail_msg]
4183
    for node_name, nr in rlist.items():
4184
      if nr.fail_msg or not nr.payload:
4185
        continue
4186
      for (name, path, status, diagnose, variants,
4187
           params, api_versions) in nr.payload:
4188
        if name not in all_os:
4189
          # build a list of nodes for this os containing empty lists
4190
          # for each node in node_list
4191
          all_os[name] = {}
4192
          for nname in good_nodes:
4193
            all_os[name][nname] = []
4194
        # convert params from [name, help] to (name, help)
4195
        params = [tuple(v) for v in params]
4196
        all_os[name][node_name].append((path, status, diagnose,
4197
                                        variants, params, api_versions))
4198
    return all_os
4199

    
4200
  def _GetQueryData(self, lu):
4201
    """Computes the list of nodes and their attributes.
4202

4203
    """
4204
    # Locking is not used
4205
    assert not (compat.any(lu.glm.is_owned(level)
4206
                           for level in locking.LEVELS
4207
                           if level != locking.LEVEL_CLUSTER) or
4208
                self.do_locking or self.use_locking)
4209

    
4210
    valid_nodes = [node.name
4211
                   for node in lu.cfg.GetAllNodesInfo().values()
4212
                   if not node.offline and node.vm_capable]
4213
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4214
    cluster = lu.cfg.GetClusterInfo()
4215

    
4216
    data = {}
4217

    
4218
    for (os_name, os_data) in pol.items():
4219
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4220
                          hidden=(os_name in cluster.hidden_os),
4221
                          blacklisted=(os_name in cluster.blacklisted_os))
4222

    
4223
      variants = set()
4224
      parameters = set()
4225
      api_versions = set()
4226

    
4227
      for idx, osl in enumerate(os_data.values()):
4228
        info.valid = bool(info.valid and osl and osl[0][1])
4229
        if not info.valid:
4230
          break
4231

    
4232
        (node_variants, node_params, node_api) = osl[0][3:6]
4233
        if idx == 0:
4234
          # First entry
4235
          variants.update(node_variants)
4236
          parameters.update(node_params)
4237
          api_versions.update(node_api)
4238
        else:
4239
          # Filter out inconsistent values
4240
          variants.intersection_update(node_variants)
4241
          parameters.intersection_update(node_params)
4242
          api_versions.intersection_update(node_api)
4243

    
4244
      info.variants = list(variants)
4245
      info.parameters = list(parameters)
4246
      info.api_versions = list(api_versions)
4247

    
4248
      data[os_name] = info
4249

    
4250
    # Prepare data in requested order
4251
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4252
            if name in data]
4253

    
4254

    
4255
class LUOsDiagnose(NoHooksLU):
4256
  """Logical unit for OS diagnose/query.
4257

4258
  """
4259
  REQ_BGL = False
4260

    
4261
  @staticmethod
4262
  def _BuildFilter(fields, names):
4263
    """Builds a filter for querying OSes.
4264

4265
    """
4266
    name_filter = qlang.MakeSimpleFilter("name", names)
4267

    
4268
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4269
    # respective field is not requested
4270
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4271
                     for fname in ["hidden", "blacklisted"]
4272
                     if fname not in fields]
4273
    if "valid" not in fields:
4274
      status_filter.append([qlang.OP_TRUE, "valid"])
4275

    
4276
    if status_filter:
4277
      status_filter.insert(0, qlang.OP_AND)
4278
    else:
4279
      status_filter = None
4280

    
4281
    if name_filter and status_filter:
4282
      return [qlang.OP_AND, name_filter, status_filter]
4283
    elif name_filter:
4284
      return name_filter
4285
    else:
4286
      return status_filter
4287

    
4288
  def CheckArguments(self):
4289
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4290
                       self.op.output_fields, False)
4291

    
4292
  def ExpandNames(self):
4293
    self.oq.ExpandNames(self)
4294

    
4295
  def Exec(self, feedback_fn):
4296
    return self.oq.OldStyleQuery(self)
4297

    
4298

    
4299
class LUNodeRemove(LogicalUnit):
4300
  """Logical unit for removing a node.
4301

4302
  """
4303
  HPATH = "node-remove"
4304
  HTYPE = constants.HTYPE_NODE
4305

    
4306
  def BuildHooksEnv(self):
4307
    """Build hooks env.
4308

4309
    This doesn't run on the target node in the pre phase as a failed
4310
    node would then be impossible to remove.
4311

4312
    """
4313
    return {
4314
      "OP_TARGET": self.op.node_name,
4315
      "NODE_NAME": self.op.node_name,
4316
      }
4317

    
4318
  def BuildHooksNodes(self):
4319
    """Build hooks nodes.
4320

4321
    """
4322
    all_nodes = self.cfg.GetNodeList()
4323
    try:
4324
      all_nodes.remove(self.op.node_name)
4325
    except ValueError:
4326
      logging.warning("Node '%s', which is about to be removed, was not found"
4327
                      " in the list of all nodes", self.op.node_name)
4328
    return (all_nodes, all_nodes)
4329

    
4330
  def CheckPrereq(self):
4331
    """Check prerequisites.
4332

4333
    This checks:
4334
     - the node exists in the configuration
4335
     - it does not have primary or secondary instances
4336
     - it's not the master
4337

4338
    Any errors are signaled by raising errors.OpPrereqError.
4339

4340
    """
4341
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4342
    node = self.cfg.GetNodeInfo(self.op.node_name)
4343
    assert node is not None
4344

    
4345
    masternode = self.cfg.GetMasterNode()
4346
    if node.name == masternode:
4347
      raise errors.OpPrereqError("Node is the master node, failover to another"
4348
                                 " node is required", errors.ECODE_INVAL)
4349

    
4350
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4351
      if node.name in instance.all_nodes:
4352
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4353
                                   " please remove first" % instance_name,
4354
                                   errors.ECODE_INVAL)
4355
    self.op.node_name = node.name
4356
    self.node = node
4357

    
4358
  def Exec(self, feedback_fn):
4359
    """Removes the node from the cluster.
4360

4361
    """
4362
    node = self.node
4363
    logging.info("Stopping the node daemon and removing configs from node %s",
4364
                 node.name)
4365

    
4366
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4367

    
4368
    # Promote nodes to master candidate as needed
4369
    _AdjustCandidatePool(self, exceptions=[node.name])
4370
    self.context.RemoveNode(node.name)
4371

    
4372
    # Run post hooks on the node before it's removed
4373
    _RunPostHook(self, node.name)
4374

    
4375
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4376
    msg = result.fail_msg
4377
    if msg:
4378
      self.LogWarning("Errors encountered on the remote node while leaving"
4379
                      " the cluster: %s", msg)
4380

    
4381
    # Remove node from our /etc/hosts
4382
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4383
      master_node = self.cfg.GetMasterNode()
4384
      result = self.rpc.call_etc_hosts_modify(master_node,
4385
                                              constants.ETC_HOSTS_REMOVE,
4386
                                              node.name, None)
4387
      result.Raise("Can't update hosts file with new host data")
4388
      _RedistributeAncillaryFiles(self)
4389

    
4390

    
4391
class _NodeQuery(_QueryBase):
4392
  FIELDS = query.NODE_FIELDS
4393

    
4394
  def ExpandNames(self, lu):
4395
    lu.needed_locks = {}
4396
    lu.share_locks = _ShareAll()
4397

    
4398
    if self.names:
4399
      self.wanted = _GetWantedNodes(lu, self.names)
4400
    else:
4401
      self.wanted = locking.ALL_SET
4402

    
4403
    self.do_locking = (self.use_locking and
4404
                       query.NQ_LIVE in self.requested_data)
4405

    
4406
    if self.do_locking:
4407
      # If any non-static field is requested we need to lock the nodes
4408
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4409

    
4410
  def DeclareLocks(self, lu, level):
4411
    pass
4412

    
4413
  def _GetQueryData(self, lu):
4414
    """Computes the list of nodes and their attributes.
4415

4416
    """
4417
    all_info = lu.cfg.GetAllNodesInfo()
4418

    
4419
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4420

    
4421
    # Gather data as requested
4422
    if query.NQ_LIVE in self.requested_data:
4423
      # filter out non-vm_capable nodes
4424
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4425

    
4426
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4427
                                        lu.cfg.GetHypervisorType())
4428
      live_data = dict((name, nresult.payload)
4429
                       for (name, nresult) in node_data.items()
4430
                       if not nresult.fail_msg and nresult.payload)
4431
    else:
4432
      live_data = None
4433

    
4434
    if query.NQ_INST in self.requested_data:
4435
      node_to_primary = dict([(name, set()) for name in nodenames])
4436
      node_to_secondary = dict([(name, set()) for name in nodenames])
4437

    
4438
      inst_data = lu.cfg.GetAllInstancesInfo()
4439

    
4440
      for inst in inst_data.values():
4441
        if inst.primary_node in node_to_primary:
4442
          node_to_primary[inst.primary_node].add(inst.name)
4443
        for secnode in inst.secondary_nodes:
4444
          if secnode in node_to_secondary:
4445
            node_to_secondary[secnode].add(inst.name)
4446
    else:
4447
      node_to_primary = None
4448
      node_to_secondary = None
4449

    
4450
    if query.NQ_OOB in self.requested_data:
4451
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4452
                         for name, node in all_info.iteritems())
4453
    else:
4454
      oob_support = None
4455

    
4456
    if query.NQ_GROUP in self.requested_data:
4457
      groups = lu.cfg.GetAllNodeGroupsInfo()
4458
    else:
4459
      groups = {}
4460

    
4461
    return query.NodeQueryData([all_info[name] for name in nodenames],
4462
                               live_data, lu.cfg.GetMasterNode(),
4463
                               node_to_primary, node_to_secondary, groups,
4464
                               oob_support, lu.cfg.GetClusterInfo())
4465

    
4466

    
4467
class LUNodeQuery(NoHooksLU):
4468
  """Logical unit for querying nodes.
4469

4470
  """
4471
  # pylint: disable=W0142
4472
  REQ_BGL = False
4473

    
4474
  def CheckArguments(self):
4475
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4476
                         self.op.output_fields, self.op.use_locking)
4477

    
4478
  def ExpandNames(self):
4479
    self.nq.ExpandNames(self)
4480

    
4481
  def Exec(self, feedback_fn):
4482
    return self.nq.OldStyleQuery(self)
4483

    
4484

    
4485
class LUNodeQueryvols(NoHooksLU):
4486
  """Logical unit for getting volumes on node(s).
4487

4488
  """
4489
  REQ_BGL = False
4490
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4491
  _FIELDS_STATIC = utils.FieldSet("node")
4492

    
4493
  def CheckArguments(self):
4494
    _CheckOutputFields(static=self._FIELDS_STATIC,
4495
                       dynamic=self._FIELDS_DYNAMIC,
4496
                       selected=self.op.output_fields)
4497

    
4498
  def ExpandNames(self):
4499
    self.needed_locks = {}
4500
    self.share_locks[locking.LEVEL_NODE] = 1
4501
    if not self.op.nodes:
4502
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4503
    else:
4504
      self.needed_locks[locking.LEVEL_NODE] = \
4505
        _GetWantedNodes(self, self.op.nodes)
4506

    
4507
  def Exec(self, feedback_fn):
4508
    """Computes the list of nodes and their attributes.
4509

4510
    """
4511
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4512
    volumes = self.rpc.call_node_volumes(nodenames)
4513

    
4514
    ilist = self.cfg.GetAllInstancesInfo()
4515
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4516

    
4517
    output = []
4518
    for node in nodenames:
4519
      nresult = volumes[node]
4520
      if nresult.offline:
4521
        continue
4522
      msg = nresult.fail_msg
4523
      if msg:
4524
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4525
        continue
4526

    
4527
      node_vols = sorted(nresult.payload,
4528
                         key=operator.itemgetter("dev"))
4529

    
4530
      for vol in node_vols:
4531
        node_output = []
4532
        for field in self.op.output_fields:
4533
          if field == "node":
4534
            val = node
4535
          elif field == "phys":
4536
            val = vol["dev"]
4537
          elif field == "vg":
4538
            val = vol["vg"]
4539
          elif field == "name":
4540
            val = vol["name"]
4541
          elif field == "size":
4542
            val = int(float(vol["size"]))
4543
          elif field == "instance":
4544
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4545
          else:
4546
            raise errors.ParameterError(field)
4547
          node_output.append(str(val))
4548

    
4549
        output.append(node_output)
4550

    
4551
    return output
4552

    
4553

    
4554
class LUNodeQueryStorage(NoHooksLU):
4555
  """Logical unit for getting information on storage units on node(s).
4556

4557
  """
4558
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4559
  REQ_BGL = False
4560

    
4561
  def CheckArguments(self):
4562
    _CheckOutputFields(static=self._FIELDS_STATIC,
4563
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4564
                       selected=self.op.output_fields)
4565

    
4566
  def ExpandNames(self):
4567
    self.needed_locks = {}
4568
    self.share_locks[locking.LEVEL_NODE] = 1
4569

    
4570
    if self.op.nodes:
4571
      self.needed_locks[locking.LEVEL_NODE] = \
4572
        _GetWantedNodes(self, self.op.nodes)
4573
    else:
4574
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4575

    
4576
  def Exec(self, feedback_fn):
4577
    """Computes the list of nodes and their attributes.
4578

4579
    """
4580
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4581

    
4582
    # Always get name to sort by
4583
    if constants.SF_NAME in self.op.output_fields:
4584
      fields = self.op.output_fields[:]
4585
    else:
4586
      fields = [constants.SF_NAME] + self.op.output_fields
4587

    
4588
    # Never ask for node or type as it's only known to the LU
4589
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4590
      while extra in fields:
4591
        fields.remove(extra)
4592

    
4593
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4594
    name_idx = field_idx[constants.SF_NAME]
4595

    
4596
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4597
    data = self.rpc.call_storage_list(self.nodes,
4598
                                      self.op.storage_type, st_args,
4599
                                      self.op.name, fields)
4600

    
4601
    result = []
4602

    
4603
    for node in utils.NiceSort(self.nodes):
4604
      nresult = data[node]
4605
      if nresult.offline:
4606
        continue
4607

    
4608
      msg = nresult.fail_msg
4609
      if msg:
4610
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4611
        continue
4612

    
4613
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4614

    
4615
      for name in utils.NiceSort(rows.keys()):
4616
        row = rows[name]
4617

    
4618
        out = []
4619

    
4620
        for field in self.op.output_fields:
4621
          if field == constants.SF_NODE:
4622
            val = node
4623
          elif field == constants.SF_TYPE:
4624
            val = self.op.storage_type
4625
          elif field in field_idx:
4626
            val = row[field_idx[field]]
4627
          else:
4628
            raise errors.ParameterError(field)
4629

    
4630
          out.append(val)
4631

    
4632
        result.append(out)
4633

    
4634
    return result
4635

    
4636

    
4637
class _InstanceQuery(_QueryBase):
4638
  FIELDS = query.INSTANCE_FIELDS
4639

    
4640
  def ExpandNames(self, lu):
4641
    lu.needed_locks = {}
4642
    lu.share_locks = _ShareAll()
4643

    
4644
    if self.names:
4645
      self.wanted = _GetWantedInstances(lu, self.names)
4646
    else:
4647
      self.wanted = locking.ALL_SET
4648

    
4649
    self.do_locking = (self.use_locking and
4650
                       query.IQ_LIVE in self.requested_data)
4651
    if self.do_locking:
4652
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4653
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4654
      lu.needed_locks[locking.LEVEL_NODE] = []
4655
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4656

    
4657
    self.do_grouplocks = (self.do_locking and
4658
                          query.IQ_NODES in self.requested_data)
4659

    
4660
  def DeclareLocks(self, lu, level):
4661
    if self.do_locking:
4662
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4663
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4664

    
4665
        # Lock all groups used by instances optimistically; this requires going
4666
        # via the node before it's locked, requiring verification later on
4667
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4668
          set(group_uuid
4669
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4670
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4671
      elif level == locking.LEVEL_NODE:
4672
        lu._LockInstancesNodes() # pylint: disable=W0212
4673

    
4674
  @staticmethod
4675
  def _CheckGroupLocks(lu):
4676
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4677
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4678

    
4679
    # Check if node groups for locked instances are still correct
4680
    for instance_name in owned_instances:
4681
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4682

    
4683
  def _GetQueryData(self, lu):
4684
    """Computes the list of instances and their attributes.
4685

4686
    """
4687
    if self.do_grouplocks:
4688
      self._CheckGroupLocks(lu)
4689

    
4690
    cluster = lu.cfg.GetClusterInfo()
4691
    all_info = lu.cfg.GetAllInstancesInfo()
4692

    
4693
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4694

    
4695
    instance_list = [all_info[name] for name in instance_names]
4696
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4697
                                        for inst in instance_list)))
4698
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4699
    bad_nodes = []
4700
    offline_nodes = []
4701
    wrongnode_inst = set()
4702

    
4703
    # Gather data as requested
4704
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4705
      live_data = {}
4706
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4707
      for name in nodes:
4708
        result = node_data[name]
4709
        if result.offline:
4710
          # offline nodes will be in both lists
4711
          assert result.fail_msg
4712
          offline_nodes.append(name)
4713
        if result.fail_msg:
4714
          bad_nodes.append(name)
4715
        elif result.payload:
4716
          for inst in result.payload:
4717
            if inst in all_info:
4718
              if all_info[inst].primary_node == name:
4719
                live_data.update(result.payload)
4720
              else:
4721
                wrongnode_inst.add(inst)
4722
            else:
4723
              # orphan instance; we don't list it here as we don't
4724
              # handle this case yet in the output of instance listing
4725
              logging.warning("Orphan instance '%s' found on node %s",
4726
                              inst, name)
4727
        # else no instance is alive
4728
    else:
4729
      live_data = {}
4730

    
4731
    if query.IQ_DISKUSAGE in self.requested_data:
4732
      disk_usage = dict((inst.name,
4733
                         _ComputeDiskSize(inst.disk_template,
4734
                                          [{constants.IDISK_SIZE: disk.size}
4735
                                           for disk in inst.disks]))
4736
                        for inst in instance_list)
4737
    else:
4738
      disk_usage = None
4739

    
4740
    if query.IQ_CONSOLE in self.requested_data:
4741
      consinfo = {}
4742
      for inst in instance_list:
4743
        if inst.name in live_data:
4744
          # Instance is running
4745
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4746
        else:
4747
          consinfo[inst.name] = None
4748
      assert set(consinfo.keys()) == set(instance_names)
4749
    else:
4750
      consinfo = None
4751

    
4752
    if query.IQ_NODES in self.requested_data:
4753
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4754
                                            instance_list)))
4755
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4756
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4757
                    for uuid in set(map(operator.attrgetter("group"),
4758
                                        nodes.values())))
4759
    else:
4760
      nodes = None
4761
      groups = None
4762

    
4763
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4764
                                   disk_usage, offline_nodes, bad_nodes,
4765
                                   live_data, wrongnode_inst, consinfo,
4766
                                   nodes, groups)
4767

    
4768

    
4769
class LUQuery(NoHooksLU):
4770
  """Query for resources/items of a certain kind.
4771

4772
  """
4773
  # pylint: disable=W0142
4774
  REQ_BGL = False
4775

    
4776
  def CheckArguments(self):
4777
    qcls = _GetQueryImplementation(self.op.what)
4778

    
4779
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4780

    
4781
  def ExpandNames(self):
4782
    self.impl.ExpandNames(self)
4783

    
4784
  def DeclareLocks(self, level):
4785
    self.impl.DeclareLocks(self, level)
4786

    
4787
  def Exec(self, feedback_fn):
4788
    return self.impl.NewStyleQuery(self)
4789

    
4790

    
4791
class LUQueryFields(NoHooksLU):
4792
  """Query for resources/items of a certain kind.
4793

4794
  """
4795
  # pylint: disable=W0142
4796
  REQ_BGL = False
4797

    
4798
  def CheckArguments(self):
4799
    self.qcls = _GetQueryImplementation(self.op.what)
4800

    
4801
  def ExpandNames(self):
4802
    self.needed_locks = {}
4803

    
4804
  def Exec(self, feedback_fn):
4805
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4806

    
4807

    
4808
class LUNodeModifyStorage(NoHooksLU):
4809
  """Logical unit for modifying a storage volume on a node.
4810

4811
  """
4812
  REQ_BGL = False
4813

    
4814
  def CheckArguments(self):
4815
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4816

    
4817
    storage_type = self.op.storage_type
4818

    
4819
    try:
4820
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4821
    except KeyError:
4822
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4823
                                 " modified" % storage_type,
4824
                                 errors.ECODE_INVAL)
4825

    
4826
    diff = set(self.op.changes.keys()) - modifiable
4827
    if diff:
4828
      raise errors.OpPrereqError("The following fields can not be modified for"
4829
                                 " storage units of type '%s': %r" %
4830
                                 (storage_type, list(diff)),
4831
                                 errors.ECODE_INVAL)
4832

    
4833
  def ExpandNames(self):
4834
    self.needed_locks = {
4835
      locking.LEVEL_NODE: self.op.node_name,
4836
      }
4837

    
4838
  def Exec(self, feedback_fn):
4839
    """Computes the list of nodes and their attributes.
4840

4841
    """
4842
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4843
    result = self.rpc.call_storage_modify(self.op.node_name,
4844
                                          self.op.storage_type, st_args,
4845
                                          self.op.name, self.op.changes)
4846
    result.Raise("Failed to modify storage unit '%s' on %s" %
4847
                 (self.op.name, self.op.node_name))
4848

    
4849

    
4850
class LUNodeAdd(LogicalUnit):
4851
  """Logical unit for adding node to the cluster.
4852

4853
  """
4854
  HPATH = "node-add"
4855
  HTYPE = constants.HTYPE_NODE
4856
  _NFLAGS = ["master_capable", "vm_capable"]
4857

    
4858
  def CheckArguments(self):
4859
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4860
    # validate/normalize the node name
4861
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4862
                                         family=self.primary_ip_family)
4863
    self.op.node_name = self.hostname.name
4864

    
4865
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4866
      raise errors.OpPrereqError("Cannot readd the master node",
4867
                                 errors.ECODE_STATE)
4868

    
4869
    if self.op.readd and self.op.group:
4870
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4871
                                 " being readded", errors.ECODE_INVAL)
4872

    
4873
  def BuildHooksEnv(self):
4874
    """Build hooks env.
4875

4876
    This will run on all nodes before, and on all nodes + the new node after.
4877

4878
    """
4879
    return {
4880
      "OP_TARGET": self.op.node_name,
4881
      "NODE_NAME": self.op.node_name,
4882
      "NODE_PIP": self.op.primary_ip,
4883
      "NODE_SIP": self.op.secondary_ip,
4884
      "MASTER_CAPABLE": str(self.op.master_capable),
4885
      "VM_CAPABLE": str(self.op.vm_capable),
4886
      }
4887

    
4888
  def BuildHooksNodes(self):
4889
    """Build hooks nodes.
4890

4891
    """
4892
    # Exclude added node
4893
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4894
    post_nodes = pre_nodes + [self.op.node_name, ]
4895

    
4896
    return (pre_nodes, post_nodes)
4897

    
4898
  def CheckPrereq(self):
4899
    """Check prerequisites.
4900

4901
    This checks:
4902
     - the new node is not already in the config
4903
     - it is resolvable
4904
     - its parameters (single/dual homed) matches the cluster
4905

4906
    Any errors are signaled by raising errors.OpPrereqError.
4907

4908
    """
4909
    cfg = self.cfg
4910
    hostname = self.hostname
4911
    node = hostname.name
4912
    primary_ip = self.op.primary_ip = hostname.ip
4913
    if self.op.secondary_ip is None:
4914
      if self.primary_ip_family == netutils.IP6Address.family:
4915
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4916
                                   " IPv4 address must be given as secondary",
4917
                                   errors.ECODE_INVAL)
4918
      self.op.secondary_ip = primary_ip
4919

    
4920
    secondary_ip = self.op.secondary_ip
4921
    if not netutils.IP4Address.IsValid(secondary_ip):
4922
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4923
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4924

    
4925
    node_list = cfg.GetNodeList()
4926
    if not self.op.readd and node in node_list:
4927
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4928
                                 node, errors.ECODE_EXISTS)
4929
    elif self.op.readd and node not in node_list:
4930
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4931
                                 errors.ECODE_NOENT)
4932

    
4933
    self.changed_primary_ip = False
4934

    
4935
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4936
      if self.op.readd and node == existing_node_name:
4937
        if existing_node.secondary_ip != secondary_ip:
4938
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4939
                                     " address configuration as before",
4940
                                     errors.ECODE_INVAL)
4941
        if existing_node.primary_ip != primary_ip:
4942
          self.changed_primary_ip = True
4943

    
4944
        continue
4945

    
4946
      if (existing_node.primary_ip == primary_ip or
4947
          existing_node.secondary_ip == primary_ip or
4948
          existing_node.primary_ip == secondary_ip or
4949
          existing_node.secondary_ip == secondary_ip):
4950
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4951
                                   " existing node %s" % existing_node.name,
4952
                                   errors.ECODE_NOTUNIQUE)
4953

    
4954
    # After this 'if' block, None is no longer a valid value for the
4955
    # _capable op attributes
4956
    if self.op.readd:
4957
      old_node = self.cfg.GetNodeInfo(node)
4958
      assert old_node is not None, "Can't retrieve locked node %s" % node
4959
      for attr in self._NFLAGS:
4960
        if getattr(self.op, attr) is None:
4961
          setattr(self.op, attr, getattr(old_node, attr))
4962
    else:
4963
      for attr in self._NFLAGS:
4964
        if getattr(self.op, attr) is None:
4965
          setattr(self.op, attr, True)
4966

    
4967
    if self.op.readd and not self.op.vm_capable:
4968
      pri, sec = cfg.GetNodeInstances(node)
4969
      if pri or sec:
4970
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4971
                                   " flag set to false, but it already holds"
4972
                                   " instances" % node,
4973
                                   errors.ECODE_STATE)
4974

    
4975
    # check that the type of the node (single versus dual homed) is the
4976
    # same as for the master
4977
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4978
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4979
    newbie_singlehomed = secondary_ip == primary_ip
4980
    if master_singlehomed != newbie_singlehomed:
4981
      if master_singlehomed:
4982
        raise errors.OpPrereqError("The master has no secondary ip but the"
4983
                                   " new node has one",
4984
                                   errors.ECODE_INVAL)
4985
      else:
4986
        raise errors.OpPrereqError("The master has a secondary ip but the"
4987
                                   " new node doesn't have one",
4988
                                   errors.ECODE_INVAL)
4989

    
4990
    # checks reachability
4991
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4992
      raise errors.OpPrereqError("Node not reachable by ping",
4993
                                 errors.ECODE_ENVIRON)
4994

    
4995
    if not newbie_singlehomed:
4996
      # check reachability from my secondary ip to newbie's secondary ip
4997
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4998
                           source=myself.secondary_ip):
4999
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5000
                                   " based ping to node daemon port",
5001
                                   errors.ECODE_ENVIRON)
5002

    
5003
    if self.op.readd:
5004
      exceptions = [node]
5005
    else:
5006
      exceptions = []
5007

    
5008
    if self.op.master_capable:
5009
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5010
    else:
5011
      self.master_candidate = False
5012

    
5013
    if self.op.readd:
5014
      self.new_node = old_node
5015
    else:
5016
      node_group = cfg.LookupNodeGroup(self.op.group)
5017
      self.new_node = objects.Node(name=node,
5018
                                   primary_ip=primary_ip,
5019
                                   secondary_ip=secondary_ip,
5020
                                   master_candidate=self.master_candidate,
5021
                                   offline=False, drained=False,
5022
                                   group=node_group)
5023

    
5024
    if self.op.ndparams:
5025
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5026

    
5027
  def Exec(self, feedback_fn):
5028
    """Adds the new node to the cluster.
5029

5030
    """
5031
    new_node = self.new_node
5032
    node = new_node.name
5033

    
5034
    # We adding a new node so we assume it's powered
5035
    new_node.powered = True
5036

    
5037
    # for re-adds, reset the offline/drained/master-candidate flags;
5038
    # we need to reset here, otherwise offline would prevent RPC calls
5039
    # later in the procedure; this also means that if the re-add
5040
    # fails, we are left with a non-offlined, broken node
5041
    if self.op.readd:
5042
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5043
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5044
      # if we demote the node, we do cleanup later in the procedure
5045
      new_node.master_candidate = self.master_candidate
5046
      if self.changed_primary_ip:
5047
        new_node.primary_ip = self.op.primary_ip
5048

    
5049
    # copy the master/vm_capable flags
5050
    for attr in self._NFLAGS:
5051
      setattr(new_node, attr, getattr(self.op, attr))
5052

    
5053
    # notify the user about any possible mc promotion
5054
    if new_node.master_candidate:
5055
      self.LogInfo("Node will be a master candidate")
5056

    
5057
    if self.op.ndparams:
5058
      new_node.ndparams = self.op.ndparams
5059
    else:
5060
      new_node.ndparams = {}
5061

    
5062
    # check connectivity
5063
    result = self.rpc.call_version([node])[node]
5064
    result.Raise("Can't get version information from node %s" % node)
5065
    if constants.PROTOCOL_VERSION == result.payload:
5066
      logging.info("Communication to node %s fine, sw version %s match",
5067
                   node, result.payload)
5068
    else:
5069
      raise errors.OpExecError("Version mismatch master version %s,"
5070
                               " node version %s" %
5071
                               (constants.PROTOCOL_VERSION, result.payload))
5072

    
5073
    # Add node to our /etc/hosts, and add key to known_hosts
5074
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5075
      master_node = self.cfg.GetMasterNode()
5076
      result = self.rpc.call_etc_hosts_modify(master_node,
5077
                                              constants.ETC_HOSTS_ADD,
5078
                                              self.hostname.name,
5079
                                              self.hostname.ip)
5080
      result.Raise("Can't update hosts file with new host data")
5081

    
5082
    if new_node.secondary_ip != new_node.primary_ip:
5083
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5084
                               False)
5085

    
5086
    node_verify_list = [self.cfg.GetMasterNode()]
5087
    node_verify_param = {
5088
      constants.NV_NODELIST: ([node], {}),
5089
      # TODO: do a node-net-test as well?
5090
    }
5091

    
5092
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5093
                                       self.cfg.GetClusterName())
5094
    for verifier in node_verify_list:
5095
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5096
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5097
      if nl_payload:
5098
        for failed in nl_payload:
5099
          feedback_fn("ssh/hostname verification failed"
5100
                      " (checking from %s): %s" %
5101
                      (verifier, nl_payload[failed]))
5102
        raise errors.OpExecError("ssh/hostname verification failed")
5103

    
5104
    if self.op.readd:
5105
      _RedistributeAncillaryFiles(self)
5106
      self.context.ReaddNode(new_node)
5107
      # make sure we redistribute the config
5108
      self.cfg.Update(new_node, feedback_fn)
5109
      # and make sure the new node will not have old files around
5110
      if not new_node.master_candidate:
5111
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5112
        msg = result.fail_msg
5113
        if msg:
5114
          self.LogWarning("Node failed to demote itself from master"
5115
                          " candidate status: %s" % msg)
5116
    else:
5117
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5118
                                  additional_vm=self.op.vm_capable)
5119
      self.context.AddNode(new_node, self.proc.GetECId())
5120

    
5121

    
5122
class LUNodeSetParams(LogicalUnit):
5123
  """Modifies the parameters of a node.
5124

5125
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5126
      to the node role (as _ROLE_*)
5127
  @cvar _R2F: a dictionary from node role to tuples of flags
5128
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5129

5130
  """
5131
  HPATH = "node-modify"
5132
  HTYPE = constants.HTYPE_NODE
5133
  REQ_BGL = False
5134
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5135
  _F2R = {
5136
    (True, False, False): _ROLE_CANDIDATE,
5137
    (False, True, False): _ROLE_DRAINED,
5138
    (False, False, True): _ROLE_OFFLINE,
5139
    (False, False, False): _ROLE_REGULAR,
5140
    }
5141
  _R2F = dict((v, k) for k, v in _F2R.items())
5142
  _FLAGS = ["master_candidate", "drained", "offline"]
5143

    
5144
  def CheckArguments(self):
5145
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5146
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5147
                self.op.master_capable, self.op.vm_capable,
5148
                self.op.secondary_ip, self.op.ndparams]
5149
    if all_mods.count(None) == len(all_mods):
5150
      raise errors.OpPrereqError("Please pass at least one modification",
5151
                                 errors.ECODE_INVAL)
5152
    if all_mods.count(True) > 1:
5153
      raise errors.OpPrereqError("Can't set the node into more than one"
5154
                                 " state at the same time",
5155
                                 errors.ECODE_INVAL)
5156

    
5157
    # Boolean value that tells us whether we might be demoting from MC
5158
    self.might_demote = (self.op.master_candidate == False or
5159
                         self.op.offline == True or
5160
                         self.op.drained == True or
5161
                         self.op.master_capable == False)
5162

    
5163
    if self.op.secondary_ip:
5164
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5165
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5166
                                   " address" % self.op.secondary_ip,
5167
                                   errors.ECODE_INVAL)
5168

    
5169
    self.lock_all = self.op.auto_promote and self.might_demote
5170
    self.lock_instances = self.op.secondary_ip is not None
5171

    
5172
  def ExpandNames(self):
5173
    if self.lock_all:
5174
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5175
    else:
5176
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5177

    
5178
    if self.lock_instances:
5179
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5180

    
5181
  def DeclareLocks(self, level):
5182
    # If we have locked all instances, before waiting to lock nodes, release
5183
    # all the ones living on nodes unrelated to the current operation.
5184
    if level == locking.LEVEL_NODE and self.lock_instances:
5185
      self.affected_instances = []
5186
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5187
        instances_keep = []
5188

    
5189
        # Build list of instances to release
5190
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5191
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5192
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5193
              self.op.node_name in instance.all_nodes):
5194
            instances_keep.append(instance_name)
5195
            self.affected_instances.append(instance)
5196

    
5197
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5198

    
5199
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5200
                set(instances_keep))
5201

    
5202
  def BuildHooksEnv(self):
5203
    """Build hooks env.
5204

5205
    This runs on the master node.
5206

5207
    """
5208
    return {
5209
      "OP_TARGET": self.op.node_name,
5210
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5211
      "OFFLINE": str(self.op.offline),
5212
      "DRAINED": str(self.op.drained),
5213
      "MASTER_CAPABLE": str(self.op.master_capable),
5214
      "VM_CAPABLE": str(self.op.vm_capable),
5215
      }
5216

    
5217
  def BuildHooksNodes(self):
5218
    """Build hooks nodes.
5219

5220
    """
5221
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5222
    return (nl, nl)
5223

    
5224
  def CheckPrereq(self):
5225
    """Check prerequisites.
5226

5227
    This only checks the instance list against the existing names.
5228

5229
    """
5230
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5231

    
5232
    if (self.op.master_candidate is not None or
5233
        self.op.drained is not None or
5234
        self.op.offline is not None):
5235
      # we can't change the master's node flags
5236
      if self.op.node_name == self.cfg.GetMasterNode():
5237
        raise errors.OpPrereqError("The master role can be changed"
5238
                                   " only via master-failover",
5239
                                   errors.ECODE_INVAL)
5240

    
5241
    if self.op.master_candidate and not node.master_capable:
5242
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5243
                                 " it a master candidate" % node.name,
5244
                                 errors.ECODE_STATE)
5245

    
5246
    if self.op.vm_capable == False:
5247
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5248
      if ipri or isec:
5249
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5250
                                   " the vm_capable flag" % node.name,
5251
                                   errors.ECODE_STATE)
5252

    
5253
    if node.master_candidate and self.might_demote and not self.lock_all:
5254
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5255
      # check if after removing the current node, we're missing master
5256
      # candidates
5257
      (mc_remaining, mc_should, _) = \
5258
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5259
      if mc_remaining < mc_should:
5260
        raise errors.OpPrereqError("Not enough master candidates, please"
5261
                                   " pass auto promote option to allow"
5262
                                   " promotion", errors.ECODE_STATE)
5263

    
5264
    self.old_flags = old_flags = (node.master_candidate,
5265
                                  node.drained, node.offline)
5266
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5267
    self.old_role = old_role = self._F2R[old_flags]
5268

    
5269
    # Check for ineffective changes
5270
    for attr in self._FLAGS:
5271
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5272
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5273
        setattr(self.op, attr, None)
5274

    
5275
    # Past this point, any flag change to False means a transition
5276
    # away from the respective state, as only real changes are kept
5277

    
5278
    # TODO: We might query the real power state if it supports OOB
5279
    if _SupportsOob(self.cfg, node):
5280
      if self.op.offline is False and not (node.powered or
5281
                                           self.op.powered == True):
5282
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5283
                                    " offline status can be reset") %
5284
                                   self.op.node_name)
5285
    elif self.op.powered is not None:
5286
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5287
                                  " as it does not support out-of-band"
5288
                                  " handling") % self.op.node_name)
5289

    
5290
    # If we're being deofflined/drained, we'll MC ourself if needed
5291
    if (self.op.drained == False or self.op.offline == False or
5292
        (self.op.master_capable and not node.master_capable)):
5293
      if _DecideSelfPromotion(self):
5294
        self.op.master_candidate = True
5295
        self.LogInfo("Auto-promoting node to master candidate")
5296

    
5297
    # If we're no longer master capable, we'll demote ourselves from MC
5298
    if self.op.master_capable == False and node.master_candidate:
5299
      self.LogInfo("Demoting from master candidate")
5300
      self.op.master_candidate = False
5301

    
5302
    # Compute new role
5303
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5304
    if self.op.master_candidate:
5305
      new_role = self._ROLE_CANDIDATE
5306
    elif self.op.drained:
5307
      new_role = self._ROLE_DRAINED
5308
    elif self.op.offline:
5309
      new_role = self._ROLE_OFFLINE
5310
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5311
      # False is still in new flags, which means we're un-setting (the
5312
      # only) True flag
5313
      new_role = self._ROLE_REGULAR
5314
    else: # no new flags, nothing, keep old role
5315
      new_role = old_role
5316

    
5317
    self.new_role = new_role
5318

    
5319
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5320
      # Trying to transition out of offline status
5321
      result = self.rpc.call_version([node.name])[node.name]
5322
      if result.fail_msg:
5323
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5324
                                   " to report its version: %s" %
5325
                                   (node.name, result.fail_msg),
5326
                                   errors.ECODE_STATE)
5327
      else:
5328
        self.LogWarning("Transitioning node from offline to online state"
5329
                        " without using re-add. Please make sure the node"
5330
                        " is healthy!")
5331

    
5332
    if self.op.secondary_ip:
5333
      # Ok even without locking, because this can't be changed by any LU
5334
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5335
      master_singlehomed = master.secondary_ip == master.primary_ip
5336
      if master_singlehomed and self.op.secondary_ip:
5337
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5338
                                   " homed cluster", errors.ECODE_INVAL)
5339

    
5340
      if node.offline:
5341
        if self.affected_instances:
5342
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5343
                                     " node has instances (%s) configured"
5344
                                     " to use it" % self.affected_instances)
5345
      else:
5346
        # On online nodes, check that no instances are running, and that
5347
        # the node has the new ip and we can reach it.
5348
        for instance in self.affected_instances:
5349
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5350

    
5351
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5352
        if master.name != node.name:
5353
          # check reachability from master secondary ip to new secondary ip
5354
          if not netutils.TcpPing(self.op.secondary_ip,
5355
                                  constants.DEFAULT_NODED_PORT,
5356
                                  source=master.secondary_ip):
5357
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5358
                                       " based ping to node daemon port",
5359
                                       errors.ECODE_ENVIRON)
5360

    
5361
    if self.op.ndparams:
5362
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5363
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5364
      self.new_ndparams = new_ndparams
5365

    
5366
  def Exec(self, feedback_fn):
5367
    """Modifies a node.
5368

5369
    """
5370
    node = self.node
5371
    old_role = self.old_role
5372
    new_role = self.new_role
5373

    
5374
    result = []
5375

    
5376
    if self.op.ndparams:
5377
      node.ndparams = self.new_ndparams
5378

    
5379
    if self.op.powered is not None:
5380
      node.powered = self.op.powered
5381

    
5382
    for attr in ["master_capable", "vm_capable"]:
5383
      val = getattr(self.op, attr)
5384
      if val is not None:
5385
        setattr(node, attr, val)
5386
        result.append((attr, str(val)))
5387

    
5388
    if new_role != old_role:
5389
      # Tell the node to demote itself, if no longer MC and not offline
5390
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5391
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5392
        if msg:
5393
          self.LogWarning("Node failed to demote itself: %s", msg)
5394

    
5395
      new_flags = self._R2F[new_role]
5396
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5397
        if of != nf:
5398
          result.append((desc, str(nf)))
5399
      (node.master_candidate, node.drained, node.offline) = new_flags
5400

    
5401
      # we locked all nodes, we adjust the CP before updating this node
5402
      if self.lock_all:
5403
        _AdjustCandidatePool(self, [node.name])
5404

    
5405
    if self.op.secondary_ip:
5406
      node.secondary_ip = self.op.secondary_ip
5407
      result.append(("secondary_ip", self.op.secondary_ip))
5408

    
5409
    # this will trigger configuration file update, if needed
5410
    self.cfg.Update(node, feedback_fn)
5411

    
5412
    # this will trigger job queue propagation or cleanup if the mc
5413
    # flag changed
5414
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5415
      self.context.ReaddNode(node)
5416

    
5417
    return result
5418

    
5419

    
5420
class LUNodePowercycle(NoHooksLU):
5421
  """Powercycles a node.
5422

5423
  """
5424
  REQ_BGL = False
5425

    
5426
  def CheckArguments(self):
5427
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5428
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5429
      raise errors.OpPrereqError("The node is the master and the force"
5430
                                 " parameter was not set",
5431
                                 errors.ECODE_INVAL)
5432

    
5433
  def ExpandNames(self):
5434
    """Locking for PowercycleNode.
5435

5436
    This is a last-resort option and shouldn't block on other
5437
    jobs. Therefore, we grab no locks.
5438

5439
    """
5440
    self.needed_locks = {}
5441

    
5442
  def Exec(self, feedback_fn):
5443
    """Reboots a node.
5444

5445
    """
5446
    result = self.rpc.call_node_powercycle(self.op.node_name,
5447
                                           self.cfg.GetHypervisorType())
5448
    result.Raise("Failed to schedule the reboot")
5449
    return result.payload
5450

    
5451

    
5452
class LUClusterQuery(NoHooksLU):
5453
  """Query cluster configuration.
5454

5455
  """
5456
  REQ_BGL = False
5457

    
5458
  def ExpandNames(self):
5459
    self.needed_locks = {}
5460

    
5461
  def Exec(self, feedback_fn):
5462
    """Return cluster config.
5463

5464
    """
5465
    cluster = self.cfg.GetClusterInfo()
5466
    os_hvp = {}
5467

    
5468
    # Filter just for enabled hypervisors
5469
    for os_name, hv_dict in cluster.os_hvp.items():
5470
      os_hvp[os_name] = {}
5471
      for hv_name, hv_params in hv_dict.items():
5472
        if hv_name in cluster.enabled_hypervisors:
5473
          os_hvp[os_name][hv_name] = hv_params
5474

    
5475
    # Convert ip_family to ip_version
5476
    primary_ip_version = constants.IP4_VERSION
5477
    if cluster.primary_ip_family == netutils.IP6Address.family:
5478
      primary_ip_version = constants.IP6_VERSION
5479

    
5480
    result = {
5481
      "software_version": constants.RELEASE_VERSION,
5482
      "protocol_version": constants.PROTOCOL_VERSION,
5483
      "config_version": constants.CONFIG_VERSION,
5484
      "os_api_version": max(constants.OS_API_VERSIONS),
5485
      "export_version": constants.EXPORT_VERSION,
5486
      "architecture": (platform.architecture()[0], platform.machine()),
5487
      "name": cluster.cluster_name,
5488
      "master": cluster.master_node,
5489
      "default_hypervisor": cluster.enabled_hypervisors[0],
5490
      "enabled_hypervisors": cluster.enabled_hypervisors,
5491
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5492
                        for hypervisor_name in cluster.enabled_hypervisors]),
5493
      "os_hvp": os_hvp,
5494
      "beparams": cluster.beparams,
5495
      "osparams": cluster.osparams,
5496
      "nicparams": cluster.nicparams,
5497
      "ndparams": cluster.ndparams,
5498
      "candidate_pool_size": cluster.candidate_pool_size,
5499
      "master_netdev": cluster.master_netdev,
5500
      "volume_group_name": cluster.volume_group_name,
5501
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5502
      "file_storage_dir": cluster.file_storage_dir,
5503
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5504
      "maintain_node_health": cluster.maintain_node_health,
5505
      "ctime": cluster.ctime,
5506
      "mtime": cluster.mtime,
5507
      "uuid": cluster.uuid,
5508
      "tags": list(cluster.GetTags()),
5509
      "uid_pool": cluster.uid_pool,
5510
      "default_iallocator": cluster.default_iallocator,
5511
      "reserved_lvs": cluster.reserved_lvs,
5512
      "primary_ip_version": primary_ip_version,
5513
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5514
      "hidden_os": cluster.hidden_os,
5515
      "blacklisted_os": cluster.blacklisted_os,
5516
      }
5517

    
5518
    return result
5519

    
5520

    
5521
class LUClusterConfigQuery(NoHooksLU):
5522
  """Return configuration values.
5523

5524
  """
5525
  REQ_BGL = False
5526
  _FIELDS_DYNAMIC = utils.FieldSet()
5527
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5528
                                  "watcher_pause", "volume_group_name")
5529

    
5530
  def CheckArguments(self):
5531
    _CheckOutputFields(static=self._FIELDS_STATIC,
5532
                       dynamic=self._FIELDS_DYNAMIC,
5533
                       selected=self.op.output_fields)
5534

    
5535
  def ExpandNames(self):
5536
    self.needed_locks = {}
5537

    
5538
  def Exec(self, feedback_fn):
5539
    """Dump a representation of the cluster config to the standard output.
5540

5541
    """
5542
    values = []
5543
    for field in self.op.output_fields:
5544
      if field == "cluster_name":
5545
        entry = self.cfg.GetClusterName()
5546
      elif field == "master_node":
5547
        entry = self.cfg.GetMasterNode()
5548
      elif field == "drain_flag":
5549
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5550
      elif field == "watcher_pause":
5551
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5552
      elif field == "volume_group_name":
5553
        entry = self.cfg.GetVGName()
5554
      else:
5555
        raise errors.ParameterError(field)
5556
      values.append(entry)
5557
    return values
5558

    
5559

    
5560
class LUInstanceActivateDisks(NoHooksLU):
5561
  """Bring up an instance's disks.
5562

5563
  """
5564
  REQ_BGL = False
5565

    
5566
  def ExpandNames(self):
5567
    self._ExpandAndLockInstance()
5568
    self.needed_locks[locking.LEVEL_NODE] = []
5569
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5570

    
5571
  def DeclareLocks(self, level):
5572
    if level == locking.LEVEL_NODE:
5573
      self._LockInstancesNodes()
5574

    
5575
  def CheckPrereq(self):
5576
    """Check prerequisites.
5577

5578
    This checks that the instance is in the cluster.
5579

5580
    """
5581
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5582
    assert self.instance is not None, \
5583
      "Cannot retrieve locked instance %s" % self.op.instance_name
5584
    _CheckNodeOnline(self, self.instance.primary_node)
5585

    
5586
  def Exec(self, feedback_fn):
5587
    """Activate the disks.
5588

5589
    """
5590
    disks_ok, disks_info = \
5591
              _AssembleInstanceDisks(self, self.instance,
5592
                                     ignore_size=self.op.ignore_size)
5593
    if not disks_ok:
5594
      raise errors.OpExecError("Cannot activate block devices")
5595

    
5596
    return disks_info
5597

    
5598

    
5599
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5600
                           ignore_size=False):
5601
  """Prepare the block devices for an instance.
5602

5603
  This sets up the block devices on all nodes.
5604

5605
  @type lu: L{LogicalUnit}
5606
  @param lu: the logical unit on whose behalf we execute
5607
  @type instance: L{objects.Instance}
5608
  @param instance: the instance for whose disks we assemble
5609
  @type disks: list of L{objects.Disk} or None
5610
  @param disks: which disks to assemble (or all, if None)
5611
  @type ignore_secondaries: boolean
5612
  @param ignore_secondaries: if true, errors on secondary nodes
5613
      won't result in an error return from the function
5614
  @type ignore_size: boolean
5615
  @param ignore_size: if true, the current known size of the disk
5616
      will not be used during the disk activation, useful for cases
5617
      when the size is wrong
5618
  @return: False if the operation failed, otherwise a list of
5619
      (host, instance_visible_name, node_visible_name)
5620
      with the mapping from node devices to instance devices
5621

5622
  """
5623
  device_info = []
5624
  disks_ok = True
5625
  iname = instance.name
5626
  disks = _ExpandCheckDisks(instance, disks)
5627

    
5628
  # With the two passes mechanism we try to reduce the window of
5629
  # opportunity for the race condition of switching DRBD to primary
5630
  # before handshaking occured, but we do not eliminate it
5631

    
5632
  # The proper fix would be to wait (with some limits) until the
5633
  # connection has been made and drbd transitions from WFConnection
5634
  # into any other network-connected state (Connected, SyncTarget,
5635
  # SyncSource, etc.)
5636

    
5637
  # 1st pass, assemble on all nodes in secondary mode
5638
  for idx, inst_disk in enumerate(disks):
5639
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5640
      if ignore_size:
5641
        node_disk = node_disk.Copy()
5642
        node_disk.UnsetSize()
5643
      lu.cfg.SetDiskID(node_disk, node)
5644
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5645
      msg = result.fail_msg
5646
      if msg:
5647
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5648
                           " (is_primary=False, pass=1): %s",
5649
                           inst_disk.iv_name, node, msg)
5650
        if not ignore_secondaries:
5651
          disks_ok = False
5652

    
5653
  # FIXME: race condition on drbd migration to primary
5654

    
5655
  # 2nd pass, do only the primary node
5656
  for idx, inst_disk in enumerate(disks):
5657
    dev_path = None
5658

    
5659
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5660
      if node != instance.primary_node:
5661
        continue
5662
      if ignore_size:
5663
        node_disk = node_disk.Copy()
5664
        node_disk.UnsetSize()
5665
      lu.cfg.SetDiskID(node_disk, node)
5666
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5667
      msg = result.fail_msg
5668
      if msg:
5669
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5670
                           " (is_primary=True, pass=2): %s",
5671
                           inst_disk.iv_name, node, msg)
5672
        disks_ok = False
5673
      else:
5674
        dev_path = result.payload
5675

    
5676
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5677

    
5678
  # leave the disks configured for the primary node
5679
  # this is a workaround that would be fixed better by
5680
  # improving the logical/physical id handling
5681
  for disk in disks:
5682
    lu.cfg.SetDiskID(disk, instance.primary_node)
5683

    
5684
  return disks_ok, device_info
5685

    
5686

    
5687
def _StartInstanceDisks(lu, instance, force):
5688
  """Start the disks of an instance.
5689

5690
  """
5691
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5692
                                           ignore_secondaries=force)
5693
  if not disks_ok:
5694
    _ShutdownInstanceDisks(lu, instance)
5695
    if force is not None and not force:
5696
      lu.proc.LogWarning("", hint="If the message above refers to a"
5697
                         " secondary node,"
5698
                         " you can retry the operation using '--force'.")
5699
    raise errors.OpExecError("Disk consistency error")
5700

    
5701

    
5702
class LUInstanceDeactivateDisks(NoHooksLU):
5703
  """Shutdown an instance's disks.
5704

5705
  """
5706
  REQ_BGL = False
5707

    
5708
  def ExpandNames(self):
5709
    self._ExpandAndLockInstance()
5710
    self.needed_locks[locking.LEVEL_NODE] = []
5711
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5712

    
5713
  def DeclareLocks(self, level):
5714
    if level == locking.LEVEL_NODE:
5715
      self._LockInstancesNodes()
5716

    
5717
  def CheckPrereq(self):
5718
    """Check prerequisites.
5719

5720
    This checks that the instance is in the cluster.
5721

5722
    """
5723
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724
    assert self.instance is not None, \
5725
      "Cannot retrieve locked instance %s" % self.op.instance_name
5726

    
5727
  def Exec(self, feedback_fn):
5728
    """Deactivate the disks
5729

5730
    """
5731
    instance = self.instance
5732
    if self.op.force:
5733
      _ShutdownInstanceDisks(self, instance)
5734
    else:
5735
      _SafeShutdownInstanceDisks(self, instance)
5736

    
5737

    
5738
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5739
  """Shutdown block devices of an instance.
5740

5741
  This function checks if an instance is running, before calling
5742
  _ShutdownInstanceDisks.
5743

5744
  """
5745
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5746
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5747

    
5748

    
5749
def _ExpandCheckDisks(instance, disks):
5750
  """Return the instance disks selected by the disks list
5751

5752
  @type disks: list of L{objects.Disk} or None
5753
  @param disks: selected disks
5754
  @rtype: list of L{objects.Disk}
5755
  @return: selected instance disks to act on
5756

5757
  """
5758
  if disks is None:
5759
    return instance.disks
5760
  else:
5761
    if not set(disks).issubset(instance.disks):
5762
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5763
                                   " target instance")
5764
    return disks
5765

    
5766

    
5767
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5768
  """Shutdown block devices of an instance.
5769

5770
  This does the shutdown on all nodes of the instance.
5771

5772
  If the ignore_primary is false, errors on the primary node are
5773
  ignored.
5774

5775
  """
5776
  all_result = True
5777
  disks = _ExpandCheckDisks(instance, disks)
5778

    
5779
  for disk in disks:
5780
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5781
      lu.cfg.SetDiskID(top_disk, node)
5782
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5783
      msg = result.fail_msg
5784
      if msg:
5785
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5786
                      disk.iv_name, node, msg)
5787
        if ((node == instance.primary_node and not ignore_primary) or
5788
            (node != instance.primary_node and not result.offline)):
5789
          all_result = False
5790
  return all_result
5791

    
5792

    
5793
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5794
  """Checks if a node has enough free memory.
5795

5796
  This function check if a given node has the needed amount of free
5797
  memory. In case the node has less memory or we cannot get the
5798
  information from the node, this function raise an OpPrereqError
5799
  exception.
5800

5801
  @type lu: C{LogicalUnit}
5802
  @param lu: a logical unit from which we get configuration data
5803
  @type node: C{str}
5804
  @param node: the node to check
5805
  @type reason: C{str}
5806
  @param reason: string to use in the error message
5807
  @type requested: C{int}
5808
  @param requested: the amount of memory in MiB to check for
5809
  @type hypervisor_name: C{str}
5810
  @param hypervisor_name: the hypervisor to ask for memory stats
5811
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5812
      we cannot check the node
5813

5814
  """
5815
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5816
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5817
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5818
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5819
  if not isinstance(free_mem, int):
5820
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5821
                               " was '%s'" % (node, free_mem),
5822
                               errors.ECODE_ENVIRON)
5823
  if requested > free_mem:
5824
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5825
                               " needed %s MiB, available %s MiB" %
5826
                               (node, reason, requested, free_mem),
5827
                               errors.ECODE_NORES)
5828

    
5829

    
5830
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5831
  """Checks if nodes have enough free disk space in the all VGs.
5832

5833
  This function check if all given nodes have the needed amount of
5834
  free disk. In case any node has less disk or we cannot get the
5835
  information from the node, this function raise an OpPrereqError
5836
  exception.
5837

5838
  @type lu: C{LogicalUnit}
5839
  @param lu: a logical unit from which we get configuration data
5840
  @type nodenames: C{list}
5841
  @param nodenames: the list of node names to check
5842
  @type req_sizes: C{dict}
5843
  @param req_sizes: the hash of vg and corresponding amount of disk in
5844
      MiB to check for
5845
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5846
      or we cannot check the node
5847

5848
  """
5849
  for vg, req_size in req_sizes.items():
5850
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5851

    
5852

    
5853
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5854
  """Checks if nodes have enough free disk space in the specified VG.
5855

5856
  This function check if all given nodes have the needed amount of
5857
  free disk. In case any node has less disk or we cannot get the
5858
  information from the node, this function raise an OpPrereqError
5859
  exception.
5860

5861
  @type lu: C{LogicalUnit}
5862
  @param lu: a logical unit from which we get configuration data
5863
  @type nodenames: C{list}
5864
  @param nodenames: the list of node names to check
5865
  @type vg: C{str}
5866
  @param vg: the volume group to check
5867
  @type requested: C{int}
5868
  @param requested: the amount of disk in MiB to check for
5869
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5870
      or we cannot check the node
5871

5872
  """
5873
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5874
  for node in nodenames:
5875
    info = nodeinfo[node]
5876
    info.Raise("Cannot get current information from node %s" % node,
5877
               prereq=True, ecode=errors.ECODE_ENVIRON)
5878
    vg_free = info.payload.get("vg_free", None)
5879
    if not isinstance(vg_free, int):
5880
      raise errors.OpPrereqError("Can't compute free disk space on node"
5881
                                 " %s for vg %s, result was '%s'" %
5882
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5883
    if requested > vg_free:
5884
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5885
                                 " vg %s: required %d MiB, available %d MiB" %
5886
                                 (node, vg, requested, vg_free),
5887
                                 errors.ECODE_NORES)
5888

    
5889

    
5890
class LUInstanceStartup(LogicalUnit):
5891
  """Starts an instance.
5892

5893
  """
5894
  HPATH = "instance-start"
5895
  HTYPE = constants.HTYPE_INSTANCE
5896
  REQ_BGL = False
5897

    
5898
  def CheckArguments(self):
5899
    # extra beparams
5900
    if self.op.beparams:
5901
      # fill the beparams dict
5902
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5903

    
5904
  def ExpandNames(self):
5905
    self._ExpandAndLockInstance()
5906

    
5907
  def BuildHooksEnv(self):
5908
    """Build hooks env.
5909

5910
    This runs on master, primary and secondary nodes of the instance.
5911

5912
    """
5913
    env = {
5914
      "FORCE": self.op.force,
5915
      }
5916

    
5917
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5918

    
5919
    return env
5920

    
5921
  def BuildHooksNodes(self):
5922
    """Build hooks nodes.
5923

5924
    """
5925
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5926
    return (nl, nl)
5927

    
5928
  def CheckPrereq(self):
5929
    """Check prerequisites.
5930

5931
    This checks that the instance is in the cluster.
5932

5933
    """
5934
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5935
    assert self.instance is not None, \
5936
      "Cannot retrieve locked instance %s" % self.op.instance_name
5937

    
5938
    # extra hvparams
5939
    if self.op.hvparams:
5940
      # check hypervisor parameter syntax (locally)
5941
      cluster = self.cfg.GetClusterInfo()
5942
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5943
      filled_hvp = cluster.FillHV(instance)
5944
      filled_hvp.update(self.op.hvparams)
5945
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5946
      hv_type.CheckParameterSyntax(filled_hvp)
5947
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5948

    
5949
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5950

    
5951
    if self.primary_offline and self.op.ignore_offline_nodes:
5952
      self.proc.LogWarning("Ignoring offline primary node")
5953

    
5954
      if self.op.hvparams or self.op.beparams:
5955
        self.proc.LogWarning("Overridden parameters are ignored")
5956
    else:
5957
      _CheckNodeOnline(self, instance.primary_node)
5958

    
5959
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5960

    
5961
      # check bridges existence
5962
      _CheckInstanceBridgesExist(self, instance)
5963

    
5964
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5965
                                                instance.name,
5966
                                                instance.hypervisor)
5967
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5968
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5969
      if not remote_info.payload: # not running already
5970
        _CheckNodeFreeMemory(self, instance.primary_node,
5971
                             "starting instance %s" % instance.name,
5972
                             bep[constants.BE_MEMORY], instance.hypervisor)
5973

    
5974
  def Exec(self, feedback_fn):
5975
    """Start the instance.
5976

5977
    """
5978
    instance = self.instance
5979
    force = self.op.force
5980

    
5981
    if not self.op.no_remember:
5982
      self.cfg.MarkInstanceUp(instance.name)
5983

    
5984
    if self.primary_offline:
5985
      assert self.op.ignore_offline_nodes
5986
      self.proc.LogInfo("Primary node offline, marked instance as started")
5987
    else:
5988
      node_current = instance.primary_node
5989

    
5990
      _StartInstanceDisks(self, instance, force)
5991

    
5992
      result = self.rpc.call_instance_start(node_current, instance,
5993
                                            self.op.hvparams, self.op.beparams,
5994
                                            self.op.startup_paused)
5995
      msg = result.fail_msg
5996
      if msg:
5997
        _ShutdownInstanceDisks(self, instance)
5998
        raise errors.OpExecError("Could not start instance: %s" % msg)
5999

    
6000

    
6001
class LUInstanceReboot(LogicalUnit):
6002
  """Reboot an instance.
6003

6004
  """
6005
  HPATH = "instance-reboot"
6006
  HTYPE = constants.HTYPE_INSTANCE
6007
  REQ_BGL = False
6008

    
6009
  def ExpandNames(self):
6010
    self._ExpandAndLockInstance()
6011

    
6012
  def BuildHooksEnv(self):
6013
    """Build hooks env.
6014

6015
    This runs on master, primary and secondary nodes of the instance.
6016

6017
    """
6018
    env = {
6019
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6020
      "REBOOT_TYPE": self.op.reboot_type,
6021
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6022
      }
6023

    
6024
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6025

    
6026
    return env
6027

    
6028
  def BuildHooksNodes(self):
6029
    """Build hooks nodes.
6030

6031
    """
6032
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6033
    return (nl, nl)
6034

    
6035
  def CheckPrereq(self):
6036
    """Check prerequisites.
6037

6038
    This checks that the instance is in the cluster.
6039

6040
    """
6041
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6042
    assert self.instance is not None, \
6043
      "Cannot retrieve locked instance %s" % self.op.instance_name
6044

    
6045
    _CheckNodeOnline(self, instance.primary_node)
6046

    
6047
    # check bridges existence
6048
    _CheckInstanceBridgesExist(self, instance)
6049

    
6050
  def Exec(self, feedback_fn):
6051
    """Reboot the instance.
6052

6053
    """
6054
    instance = self.instance
6055
    ignore_secondaries = self.op.ignore_secondaries
6056
    reboot_type = self.op.reboot_type
6057

    
6058
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6059
                                              instance.name,
6060
                                              instance.hypervisor)
6061
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6062
    instance_running = bool(remote_info.payload)
6063

    
6064
    node_current = instance.primary_node
6065

    
6066
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6067
                                            constants.INSTANCE_REBOOT_HARD]:
6068
      for disk in instance.disks:
6069
        self.cfg.SetDiskID(disk, node_current)
6070
      result = self.rpc.call_instance_reboot(node_current, instance,
6071
                                             reboot_type,
6072
                                             self.op.shutdown_timeout)
6073
      result.Raise("Could not reboot instance")
6074
    else:
6075
      if instance_running:
6076
        result = self.rpc.call_instance_shutdown(node_current, instance,
6077
                                                 self.op.shutdown_timeout)
6078
        result.Raise("Could not shutdown instance for full reboot")
6079
        _ShutdownInstanceDisks(self, instance)
6080
      else:
6081
        self.LogInfo("Instance %s was already stopped, starting now",
6082
                     instance.name)
6083
      _StartInstanceDisks(self, instance, ignore_secondaries)
6084
      result = self.rpc.call_instance_start(node_current, instance,
6085
                                            None, None, False)
6086
      msg = result.fail_msg
6087
      if msg:
6088
        _ShutdownInstanceDisks(self, instance)
6089
        raise errors.OpExecError("Could not start instance for"
6090
                                 " full reboot: %s" % msg)
6091

    
6092
    self.cfg.MarkInstanceUp(instance.name)
6093

    
6094

    
6095
class LUInstanceShutdown(LogicalUnit):
6096
  """Shutdown an instance.
6097

6098
  """
6099
  HPATH = "instance-stop"
6100
  HTYPE = constants.HTYPE_INSTANCE
6101
  REQ_BGL = False
6102

    
6103
  def ExpandNames(self):
6104
    self._ExpandAndLockInstance()
6105

    
6106
  def BuildHooksEnv(self):
6107
    """Build hooks env.
6108

6109
    This runs on master, primary and secondary nodes of the instance.
6110

6111
    """
6112
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6113
    env["TIMEOUT"] = self.op.timeout
6114
    return env
6115

    
6116
  def BuildHooksNodes(self):
6117
    """Build hooks nodes.
6118

6119
    """
6120
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6121
    return (nl, nl)
6122

    
6123
  def CheckPrereq(self):
6124
    """Check prerequisites.
6125

6126
    This checks that the instance is in the cluster.
6127

6128
    """
6129
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6130
    assert self.instance is not None, \
6131
      "Cannot retrieve locked instance %s" % self.op.instance_name
6132

    
6133
    self.primary_offline = \
6134
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6135

    
6136
    if self.primary_offline and self.op.ignore_offline_nodes:
6137
      self.proc.LogWarning("Ignoring offline primary node")
6138
    else:
6139
      _CheckNodeOnline(self, self.instance.primary_node)
6140

    
6141
  def Exec(self, feedback_fn):
6142
    """Shutdown the instance.
6143

6144
    """
6145
    instance = self.instance
6146
    node_current = instance.primary_node
6147
    timeout = self.op.timeout
6148

    
6149
    if not self.op.no_remember:
6150
      self.cfg.MarkInstanceDown(instance.name)
6151

    
6152
    if self.primary_offline:
6153
      assert self.op.ignore_offline_nodes
6154
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6155
    else:
6156
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6157
      msg = result.fail_msg
6158
      if msg:
6159
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6160

    
6161
      _ShutdownInstanceDisks(self, instance)
6162

    
6163

    
6164
class LUInstanceReinstall(LogicalUnit):
6165
  """Reinstall an instance.
6166

6167
  """
6168
  HPATH = "instance-reinstall"
6169
  HTYPE = constants.HTYPE_INSTANCE
6170
  REQ_BGL = False
6171

    
6172
  def ExpandNames(self):
6173
    self._ExpandAndLockInstance()
6174

    
6175
  def BuildHooksEnv(self):
6176
    """Build hooks env.
6177

6178
    This runs on master, primary and secondary nodes of the instance.
6179

6180
    """
6181
    return _BuildInstanceHookEnvByObject(self, self.instance)
6182

    
6183
  def BuildHooksNodes(self):
6184
    """Build hooks nodes.
6185

6186
    """
6187
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6188
    return (nl, nl)
6189

    
6190
  def CheckPrereq(self):
6191
    """Check prerequisites.
6192

6193
    This checks that the instance is in the cluster and is not running.
6194

6195
    """
6196
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197
    assert instance is not None, \
6198
      "Cannot retrieve locked instance %s" % self.op.instance_name
6199
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6200
                     " offline, cannot reinstall")
6201
    for node in instance.secondary_nodes:
6202
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6203
                       " cannot reinstall")
6204

    
6205
    if instance.disk_template == constants.DT_DISKLESS:
6206
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6207
                                 self.op.instance_name,
6208
                                 errors.ECODE_INVAL)
6209
    _CheckInstanceDown(self, instance, "cannot reinstall")
6210

    
6211
    if self.op.os_type is not None:
6212
      # OS verification
6213
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6214
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6215
      instance_os = self.op.os_type
6216
    else:
6217
      instance_os = instance.os
6218

    
6219
    nodelist = list(instance.all_nodes)
6220

    
6221
    if self.op.osparams:
6222
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6223
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6224
      self.os_inst = i_osdict # the new dict (without defaults)
6225
    else:
6226
      self.os_inst = None
6227

    
6228
    self.instance = instance
6229

    
6230
  def Exec(self, feedback_fn):
6231
    """Reinstall the instance.
6232

6233
    """
6234
    inst = self.instance
6235

    
6236
    if self.op.os_type is not None:
6237
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6238
      inst.os = self.op.os_type
6239
      # Write to configuration
6240
      self.cfg.Update(inst, feedback_fn)
6241

    
6242
    _StartInstanceDisks(self, inst, None)
6243
    try:
6244
      feedback_fn("Running the instance OS create scripts...")
6245
      # FIXME: pass debug option from opcode to backend
6246
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6247
                                             self.op.debug_level,
6248
                                             osparams=self.os_inst)
6249
      result.Raise("Could not install OS for instance %s on node %s" %
6250
                   (inst.name, inst.primary_node))
6251
    finally:
6252
      _ShutdownInstanceDisks(self, inst)
6253

    
6254

    
6255
class LUInstanceRecreateDisks(LogicalUnit):
6256
  """Recreate an instance's missing disks.
6257

6258
  """
6259
  HPATH = "instance-recreate-disks"
6260
  HTYPE = constants.HTYPE_INSTANCE
6261
  REQ_BGL = False
6262

    
6263
  def CheckArguments(self):
6264
    # normalise the disk list
6265
    self.op.disks = sorted(frozenset(self.op.disks))
6266

    
6267
  def ExpandNames(self):
6268
    self._ExpandAndLockInstance()
6269
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6270
    if self.op.nodes:
6271
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6272
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6273
    else:
6274
      self.needed_locks[locking.LEVEL_NODE] = []
6275

    
6276
  def DeclareLocks(self, level):
6277
    if level == locking.LEVEL_NODE:
6278
      # if we replace the nodes, we only need to lock the old primary,
6279
      # otherwise we need to lock all nodes for disk re-creation
6280
      primary_only = bool(self.op.nodes)
6281
      self._LockInstancesNodes(primary_only=primary_only)
6282

    
6283
  def BuildHooksEnv(self):
6284
    """Build hooks env.
6285

6286
    This runs on master, primary and secondary nodes of the instance.
6287

6288
    """
6289
    return _BuildInstanceHookEnvByObject(self, self.instance)
6290

    
6291
  def BuildHooksNodes(self):
6292
    """Build hooks nodes.
6293

6294
    """
6295
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6296
    return (nl, nl)
6297

    
6298
  def CheckPrereq(self):
6299
    """Check prerequisites.
6300

6301
    This checks that the instance is in the cluster and is not running.
6302

6303
    """
6304
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6305
    assert instance is not None, \
6306
      "Cannot retrieve locked instance %s" % self.op.instance_name
6307
    if self.op.nodes:
6308
      if len(self.op.nodes) != len(instance.all_nodes):
6309
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6310
                                   " %d replacement nodes were specified" %
6311
                                   (instance.name, len(instance.all_nodes),
6312
                                    len(self.op.nodes)),
6313
                                   errors.ECODE_INVAL)
6314
      assert instance.disk_template != constants.DT_DRBD8 or \
6315
          len(self.op.nodes) == 2
6316
      assert instance.disk_template != constants.DT_PLAIN or \
6317
          len(self.op.nodes) == 1
6318
      primary_node = self.op.nodes[0]
6319
    else:
6320
      primary_node = instance.primary_node
6321
    _CheckNodeOnline(self, primary_node)
6322

    
6323
    if instance.disk_template == constants.DT_DISKLESS:
6324
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6325
                                 self.op.instance_name, errors.ECODE_INVAL)
6326
    # if we replace nodes *and* the old primary is offline, we don't
6327
    # check
6328
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6329
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6330
    if not (self.op.nodes and old_pnode.offline):
6331
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6332

    
6333
    if not self.op.disks:
6334
      self.op.disks = range(len(instance.disks))
6335
    else:
6336
      for idx in self.op.disks:
6337
        if idx >= len(instance.disks):
6338
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6339
                                     errors.ECODE_INVAL)
6340
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6341
      raise errors.OpPrereqError("Can't recreate disks partially and"
6342
                                 " change the nodes at the same time",
6343
                                 errors.ECODE_INVAL)
6344
    self.instance = instance
6345

    
6346
  def Exec(self, feedback_fn):
6347
    """Recreate the disks.
6348

6349
    """
6350
    instance = self.instance
6351

    
6352
    to_skip = []
6353
    mods = [] # keeps track of needed logical_id changes
6354

    
6355
    for idx, disk in enumerate(instance.disks):
6356
      if idx not in self.op.disks: # disk idx has not been passed in
6357
        to_skip.append(idx)
6358
        continue
6359
      # update secondaries for disks, if needed
6360
      if self.op.nodes:
6361
        if disk.dev_type == constants.LD_DRBD8:
6362
          # need to update the nodes and minors
6363
          assert len(self.op.nodes) == 2
6364
          assert len(disk.logical_id) == 6 # otherwise disk internals
6365
                                           # have changed
6366
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6367
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6368
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6369
                    new_minors[0], new_minors[1], old_secret)
6370
          assert len(disk.logical_id) == len(new_id)
6371
          mods.append((idx, new_id))
6372

    
6373
    # now that we have passed all asserts above, we can apply the mods
6374
    # in a single run (to avoid partial changes)
6375
    for idx, new_id in mods:
6376
      instance.disks[idx].logical_id = new_id
6377

    
6378
    # change primary node, if needed
6379
    if self.op.nodes:
6380
      instance.primary_node = self.op.nodes[0]
6381
      self.LogWarning("Changing the instance's nodes, you will have to"
6382
                      " remove any disks left on the older nodes manually")
6383

    
6384
    if self.op.nodes:
6385
      self.cfg.Update(instance, feedback_fn)
6386

    
6387
    _CreateDisks(self, instance, to_skip=to_skip)
6388

    
6389

    
6390
class LUInstanceRename(LogicalUnit):
6391
  """Rename an instance.
6392

6393
  """
6394
  HPATH = "instance-rename"
6395
  HTYPE = constants.HTYPE_INSTANCE
6396

    
6397
  def CheckArguments(self):
6398
    """Check arguments.
6399

6400
    """
6401
    if self.op.ip_check and not self.op.name_check:
6402
      # TODO: make the ip check more flexible and not depend on the name check
6403
      raise errors.OpPrereqError("IP address check requires a name check",
6404
                                 errors.ECODE_INVAL)
6405

    
6406
  def BuildHooksEnv(self):
6407
    """Build hooks env.
6408

6409
    This runs on master, primary and secondary nodes of the instance.
6410

6411
    """
6412
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6413
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6414
    return env
6415

    
6416
  def BuildHooksNodes(self):
6417
    """Build hooks nodes.
6418

6419
    """
6420
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6421
    return (nl, nl)
6422

    
6423
  def CheckPrereq(self):
6424
    """Check prerequisites.
6425

6426
    This checks that the instance is in the cluster and is not running.
6427

6428
    """
6429
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6430
                                                self.op.instance_name)
6431
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6432
    assert instance is not None
6433
    _CheckNodeOnline(self, instance.primary_node)
6434
    _CheckInstanceDown(self, instance, "cannot rename")
6435
    self.instance = instance
6436

    
6437
    new_name = self.op.new_name
6438
    if self.op.name_check:
6439
      hostname = netutils.GetHostname(name=new_name)
6440
      if hostname.name != new_name:
6441
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6442
                     hostname.name)
6443
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6444
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6445
                                    " same as given hostname '%s'") %
6446
                                    (hostname.name, self.op.new_name),
6447
                                    errors.ECODE_INVAL)
6448
      new_name = self.op.new_name = hostname.name
6449
      if (self.op.ip_check and
6450
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6451
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6452
                                   (hostname.ip, new_name),
6453
                                   errors.ECODE_NOTUNIQUE)
6454

    
6455
    instance_list = self.cfg.GetInstanceList()
6456
    if new_name in instance_list and new_name != instance.name:
6457
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6458
                                 new_name, errors.ECODE_EXISTS)
6459

    
6460
  def Exec(self, feedback_fn):
6461
    """Rename the instance.
6462

6463
    """
6464
    inst = self.instance
6465
    old_name = inst.name
6466

    
6467
    rename_file_storage = False
6468
    if (inst.disk_template in constants.DTS_FILEBASED and
6469
        self.op.new_name != inst.name):
6470
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6471
      rename_file_storage = True
6472

    
6473
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6474
    # Change the instance lock. This is definitely safe while we hold the BGL.
6475
    # Otherwise the new lock would have to be added in acquired mode.
6476
    assert self.REQ_BGL
6477
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6478
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6479

    
6480
    # re-read the instance from the configuration after rename
6481
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6482

    
6483
    if rename_file_storage:
6484
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6485
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6486
                                                     old_file_storage_dir,
6487
                                                     new_file_storage_dir)
6488
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6489
                   " (but the instance has been renamed in Ganeti)" %
6490
                   (inst.primary_node, old_file_storage_dir,
6491
                    new_file_storage_dir))
6492

    
6493
    _StartInstanceDisks(self, inst, None)
6494
    try:
6495
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6496
                                                 old_name, self.op.debug_level)
6497
      msg = result.fail_msg
6498
      if msg:
6499
        msg = ("Could not run OS rename script for instance %s on node %s"
6500
               " (but the instance has been renamed in Ganeti): %s" %
6501
               (inst.name, inst.primary_node, msg))
6502
        self.proc.LogWarning(msg)
6503
    finally:
6504
      _ShutdownInstanceDisks(self, inst)
6505

    
6506
    return inst.name
6507

    
6508

    
6509
class LUInstanceRemove(LogicalUnit):
6510
  """Remove an instance.
6511

6512
  """
6513
  HPATH = "instance-remove"
6514
  HTYPE = constants.HTYPE_INSTANCE
6515
  REQ_BGL = False
6516

    
6517
  def ExpandNames(self):
6518
    self._ExpandAndLockInstance()
6519
    self.needed_locks[locking.LEVEL_NODE] = []
6520
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6521

    
6522
  def DeclareLocks(self, level):
6523
    if level == locking.LEVEL_NODE:
6524
      self._LockInstancesNodes()
6525

    
6526
  def BuildHooksEnv(self):
6527
    """Build hooks env.
6528

6529
    This runs on master, primary and secondary nodes of the instance.
6530

6531
    """
6532
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6533
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6534
    return env
6535

    
6536
  def BuildHooksNodes(self):
6537
    """Build hooks nodes.
6538

6539
    """
6540
    nl = [self.cfg.GetMasterNode()]
6541
    nl_post = list(self.instance.all_nodes) + nl
6542
    return (nl, nl_post)
6543

    
6544
  def CheckPrereq(self):
6545
    """Check prerequisites.
6546

6547
    This checks that the instance is in the cluster.
6548

6549
    """
6550
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6551
    assert self.instance is not None, \
6552
      "Cannot retrieve locked instance %s" % self.op.instance_name
6553

    
6554
  def Exec(self, feedback_fn):
6555
    """Remove the instance.
6556

6557
    """
6558
    instance = self.instance
6559
    logging.info("Shutting down instance %s on node %s",
6560
                 instance.name, instance.primary_node)
6561

    
6562
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6563
                                             self.op.shutdown_timeout)
6564
    msg = result.fail_msg
6565
    if msg:
6566
      if self.op.ignore_failures:
6567
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6568
      else:
6569
        raise errors.OpExecError("Could not shutdown instance %s on"
6570
                                 " node %s: %s" %
6571
                                 (instance.name, instance.primary_node, msg))
6572

    
6573
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6574

    
6575

    
6576
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6577
  """Utility function to remove an instance.
6578

6579
  """
6580
  logging.info("Removing block devices for instance %s", instance.name)
6581

    
6582
  if not _RemoveDisks(lu, instance):
6583
    if not ignore_failures:
6584
      raise errors.OpExecError("Can't remove instance's disks")
6585
    feedback_fn("Warning: can't remove instance's disks")
6586

    
6587
  logging.info("Removing instance %s out of cluster config", instance.name)
6588

    
6589
  lu.cfg.RemoveInstance(instance.name)
6590

    
6591
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6592
    "Instance lock removal conflict"
6593

    
6594
  # Remove lock for the instance
6595
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6596

    
6597

    
6598
class LUInstanceQuery(NoHooksLU):
6599
  """Logical unit for querying instances.
6600

6601
  """
6602
  # pylint: disable=W0142
6603
  REQ_BGL = False
6604

    
6605
  def CheckArguments(self):
6606
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6607
                             self.op.output_fields, self.op.use_locking)
6608

    
6609
  def ExpandNames(self):
6610
    self.iq.ExpandNames(self)
6611

    
6612
  def DeclareLocks(self, level):
6613
    self.iq.DeclareLocks(self, level)
6614

    
6615
  def Exec(self, feedback_fn):
6616
    return self.iq.OldStyleQuery(self)
6617

    
6618

    
6619
class LUInstanceFailover(LogicalUnit):
6620
  """Failover an instance.
6621

6622
  """
6623
  HPATH = "instance-failover"
6624
  HTYPE = constants.HTYPE_INSTANCE
6625
  REQ_BGL = False
6626

    
6627
  def CheckArguments(self):
6628
    """Check the arguments.
6629

6630
    """
6631
    self.iallocator = getattr(self.op, "iallocator", None)
6632
    self.target_node = getattr(self.op, "target_node", None)
6633

    
6634
  def ExpandNames(self):
6635
    self._ExpandAndLockInstance()
6636

    
6637
    if self.op.target_node is not None:
6638
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6639

    
6640
    self.needed_locks[locking.LEVEL_NODE] = []
6641
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6642

    
6643
    ignore_consistency = self.op.ignore_consistency
6644
    shutdown_timeout = self.op.shutdown_timeout
6645
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6646
                                       cleanup=False,
6647
                                       failover=True,
6648
                                       ignore_consistency=ignore_consistency,
6649
                                       shutdown_timeout=shutdown_timeout)
6650
    self.tasklets = [self._migrater]
6651

    
6652
  def DeclareLocks(self, level):
6653
    if level == locking.LEVEL_NODE:
6654
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6655
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6656
        if self.op.target_node is None:
6657
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6658
        else:
6659
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6660
                                                   self.op.target_node]
6661
        del self.recalculate_locks[locking.LEVEL_NODE]
6662
      else:
6663
        self._LockInstancesNodes()
6664

    
6665
  def BuildHooksEnv(self):
6666
    """Build hooks env.
6667

6668
    This runs on master, primary and secondary nodes of the instance.
6669

6670
    """
6671
    instance = self._migrater.instance
6672
    source_node = instance.primary_node
6673
    target_node = self.op.target_node
6674
    env = {
6675
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6676
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677
      "OLD_PRIMARY": source_node,
6678
      "NEW_PRIMARY": target_node,
6679
      }
6680

    
6681
    if instance.disk_template in constants.DTS_INT_MIRROR:
6682
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6683
      env["NEW_SECONDARY"] = source_node
6684
    else:
6685
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6686

    
6687
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6688

    
6689
    return env
6690

    
6691
  def BuildHooksNodes(self):
6692
    """Build hooks nodes.
6693

6694
    """
6695
    instance = self._migrater.instance
6696
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6697
    return (nl, nl + [instance.primary_node])
6698

    
6699

    
6700
class LUInstanceMigrate(LogicalUnit):
6701
  """Migrate an instance.
6702

6703
  This is migration without shutting down, compared to the failover,
6704
  which is done with shutdown.
6705

6706
  """
6707
  HPATH = "instance-migrate"
6708
  HTYPE = constants.HTYPE_INSTANCE
6709
  REQ_BGL = False
6710

    
6711
  def ExpandNames(self):
6712
    self._ExpandAndLockInstance()
6713

    
6714
    if self.op.target_node is not None:
6715
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6716

    
6717
    self.needed_locks[locking.LEVEL_NODE] = []
6718
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6719

    
6720
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6721
                                       cleanup=self.op.cleanup,
6722
                                       failover=False,
6723
                                       fallback=self.op.allow_failover)
6724
    self.tasklets = [self._migrater]
6725

    
6726
  def DeclareLocks(self, level):
6727
    if level == locking.LEVEL_NODE:
6728
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6729
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6730
        if self.op.target_node is None:
6731
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6732
        else:
6733
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6734
                                                   self.op.target_node]
6735
        del self.recalculate_locks[locking.LEVEL_NODE]
6736
      else:
6737
        self._LockInstancesNodes()
6738

    
6739
  def BuildHooksEnv(self):
6740
    """Build hooks env.
6741

6742
    This runs on master, primary and secondary nodes of the instance.
6743

6744
    """
6745
    instance = self._migrater.instance
6746
    source_node = instance.primary_node
6747
    target_node = self.op.target_node
6748
    env = _BuildInstanceHookEnvByObject(self, instance)
6749
    env.update({
6750
      "MIGRATE_LIVE": self._migrater.live,
6751
      "MIGRATE_CLEANUP": self.op.cleanup,
6752
      "OLD_PRIMARY": source_node,
6753
      "NEW_PRIMARY": target_node,
6754
      })
6755

    
6756
    if instance.disk_template in constants.DTS_INT_MIRROR:
6757
      env["OLD_SECONDARY"] = target_node
6758
      env["NEW_SECONDARY"] = source_node
6759
    else:
6760
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6761

    
6762
    return env
6763

    
6764
  def BuildHooksNodes(self):
6765
    """Build hooks nodes.
6766

6767
    """
6768
    instance = self._migrater.instance
6769
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6770
    return (nl, nl + [instance.primary_node])
6771

    
6772

    
6773
class LUInstanceMove(LogicalUnit):
6774
  """Move an instance by data-copying.
6775

6776
  """
6777
  HPATH = "instance-move"
6778
  HTYPE = constants.HTYPE_INSTANCE
6779
  REQ_BGL = False
6780

    
6781
  def ExpandNames(self):
6782
    self._ExpandAndLockInstance()
6783
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6784
    self.op.target_node = target_node
6785
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6786
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6787

    
6788
  def DeclareLocks(self, level):
6789
    if level == locking.LEVEL_NODE:
6790
      self._LockInstancesNodes(primary_only=True)
6791

    
6792
  def BuildHooksEnv(self):
6793
    """Build hooks env.
6794

6795
    This runs on master, primary and secondary nodes of the instance.
6796

6797
    """
6798
    env = {
6799
      "TARGET_NODE": self.op.target_node,
6800
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6801
      }
6802
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6803
    return env
6804

    
6805
  def BuildHooksNodes(self):
6806
    """Build hooks nodes.
6807

6808
    """
6809
    nl = [
6810
      self.cfg.GetMasterNode(),
6811
      self.instance.primary_node,
6812
      self.op.target_node,
6813
      ]
6814
    return (nl, nl)
6815

    
6816
  def CheckPrereq(self):
6817
    """Check prerequisites.
6818

6819
    This checks that the instance is in the cluster.
6820

6821
    """
6822
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6823
    assert self.instance is not None, \
6824
      "Cannot retrieve locked instance %s" % self.op.instance_name
6825

    
6826
    node = self.cfg.GetNodeInfo(self.op.target_node)
6827
    assert node is not None, \
6828
      "Cannot retrieve locked node %s" % self.op.target_node
6829

    
6830
    self.target_node = target_node = node.name
6831

    
6832
    if target_node == instance.primary_node:
6833
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6834
                                 (instance.name, target_node),
6835
                                 errors.ECODE_STATE)
6836

    
6837
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6838

    
6839
    for idx, dsk in enumerate(instance.disks):
6840
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6841
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6842
                                   " cannot copy" % idx, errors.ECODE_STATE)
6843

    
6844
    _CheckNodeOnline(self, target_node)
6845
    _CheckNodeNotDrained(self, target_node)
6846
    _CheckNodeVmCapable(self, target_node)
6847

    
6848
    if instance.admin_up:
6849
      # check memory requirements on the secondary node
6850
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6851
                           instance.name, bep[constants.BE_MEMORY],
6852
                           instance.hypervisor)
6853
    else:
6854
      self.LogInfo("Not checking memory on the secondary node as"
6855
                   " instance will not be started")
6856

    
6857
    # check bridge existance
6858
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6859

    
6860
  def Exec(self, feedback_fn):
6861
    """Move an instance.
6862

6863
    The move is done by shutting it down on its present node, copying
6864
    the data over (slow) and starting it on the new node.
6865

6866
    """
6867
    instance = self.instance
6868

    
6869
    source_node = instance.primary_node
6870
    target_node = self.target_node
6871

    
6872
    self.LogInfo("Shutting down instance %s on source node %s",
6873
                 instance.name, source_node)
6874

    
6875
    result = self.rpc.call_instance_shutdown(source_node, instance,
6876
                                             self.op.shutdown_timeout)
6877
    msg = result.fail_msg
6878
    if msg:
6879
      if self.op.ignore_consistency:
6880
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6881
                             " Proceeding anyway. Please make sure node"
6882
                             " %s is down. Error details: %s",
6883
                             instance.name, source_node, source_node, msg)
6884
      else:
6885
        raise errors.OpExecError("Could not shutdown instance %s on"
6886
                                 " node %s: %s" %
6887
                                 (instance.name, source_node, msg))
6888

    
6889
    # create the target disks
6890
    try:
6891
      _CreateDisks(self, instance, target_node=target_node)
6892
    except errors.OpExecError:
6893
      self.LogWarning("Device creation failed, reverting...")
6894
      try:
6895
        _RemoveDisks(self, instance, target_node=target_node)
6896
      finally:
6897
        self.cfg.ReleaseDRBDMinors(instance.name)
6898
        raise
6899

    
6900
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6901

    
6902
    errs = []
6903
    # activate, get path, copy the data over
6904
    for idx, disk in enumerate(instance.disks):
6905
      self.LogInfo("Copying data for disk %d", idx)
6906
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6907
                                               instance.name, True, idx)
6908
      if result.fail_msg:
6909
        self.LogWarning("Can't assemble newly created disk %d: %s",
6910
                        idx, result.fail_msg)
6911
        errs.append(result.fail_msg)
6912
        break
6913
      dev_path = result.payload
6914
      result = self.rpc.call_blockdev_export(source_node, disk,
6915
                                             target_node, dev_path,
6916
                                             cluster_name)
6917
      if result.fail_msg:
6918
        self.LogWarning("Can't copy data over for disk %d: %s",
6919
                        idx, result.fail_msg)
6920
        errs.append(result.fail_msg)
6921
        break
6922

    
6923
    if errs:
6924
      self.LogWarning("Some disks failed to copy, aborting")
6925
      try:
6926
        _RemoveDisks(self, instance, target_node=target_node)
6927
      finally:
6928
        self.cfg.ReleaseDRBDMinors(instance.name)
6929
        raise errors.OpExecError("Errors during disk copy: %s" %
6930
                                 (",".join(errs),))
6931

    
6932
    instance.primary_node = target_node
6933
    self.cfg.Update(instance, feedback_fn)
6934

    
6935
    self.LogInfo("Removing the disks on the original node")
6936
    _RemoveDisks(self, instance, target_node=source_node)
6937

    
6938
    # Only start the instance if it's marked as up
6939
    if instance.admin_up:
6940
      self.LogInfo("Starting instance %s on node %s",
6941
                   instance.name, target_node)
6942

    
6943
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6944
                                           ignore_secondaries=True)
6945
      if not disks_ok:
6946
        _ShutdownInstanceDisks(self, instance)
6947
        raise errors.OpExecError("Can't activate the instance's disks")
6948

    
6949
      result = self.rpc.call_instance_start(target_node, instance,
6950
                                            None, None, False)
6951
      msg = result.fail_msg
6952
      if msg:
6953
        _ShutdownInstanceDisks(self, instance)
6954
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6955
                                 (instance.name, target_node, msg))
6956

    
6957

    
6958
class LUNodeMigrate(LogicalUnit):
6959
  """Migrate all instances from a node.
6960

6961
  """
6962
  HPATH = "node-migrate"
6963
  HTYPE = constants.HTYPE_NODE
6964
  REQ_BGL = False
6965

    
6966
  def CheckArguments(self):
6967
    pass
6968

    
6969
  def ExpandNames(self):
6970
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6971

    
6972
    self.share_locks = _ShareAll()
6973
    self.needed_locks = {
6974
      locking.LEVEL_NODE: [self.op.node_name],
6975
      }
6976

    
6977
  def BuildHooksEnv(self):
6978
    """Build hooks env.
6979

6980
    This runs on the master, the primary and all the secondaries.
6981

6982
    """
6983
    return {
6984
      "NODE_NAME": self.op.node_name,
6985
      }
6986

    
6987
  def BuildHooksNodes(self):
6988
    """Build hooks nodes.
6989

6990
    """
6991
    nl = [self.cfg.GetMasterNode()]
6992
    return (nl, nl)
6993

    
6994
  def CheckPrereq(self):
6995
    pass
6996

    
6997
  def Exec(self, feedback_fn):
6998
    # Prepare jobs for migration instances
6999
    jobs = [
7000
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7001
                                 mode=self.op.mode,
7002
                                 live=self.op.live,
7003
                                 iallocator=self.op.iallocator,
7004
                                 target_node=self.op.target_node)]
7005
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7006
      ]
7007

    
7008
    # TODO: Run iallocator in this opcode and pass correct placement options to
7009
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7010
    # running the iallocator and the actual migration, a good consistency model
7011
    # will have to be found.
7012

    
7013
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7014
            frozenset([self.op.node_name]))
7015

    
7016
    return ResultWithJobs(jobs)
7017

    
7018

    
7019
class TLMigrateInstance(Tasklet):
7020
  """Tasklet class for instance migration.
7021

7022
  @type live: boolean
7023
  @ivar live: whether the migration will be done live or non-live;
7024
      this variable is initalized only after CheckPrereq has run
7025
  @type cleanup: boolean
7026
  @ivar cleanup: Wheater we cleanup from a failed migration
7027
  @type iallocator: string
7028
  @ivar iallocator: The iallocator used to determine target_node
7029
  @type target_node: string
7030
  @ivar target_node: If given, the target_node to reallocate the instance to
7031
  @type failover: boolean
7032
  @ivar failover: Whether operation results in failover or migration
7033
  @type fallback: boolean
7034
  @ivar fallback: Whether fallback to failover is allowed if migration not
7035
                  possible
7036
  @type ignore_consistency: boolean
7037
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7038
                            and target node
7039
  @type shutdown_timeout: int
7040
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7041

7042
  """
7043
  def __init__(self, lu, instance_name, cleanup=False,
7044
               failover=False, fallback=False,
7045
               ignore_consistency=False,
7046
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7047
    """Initializes this class.
7048

7049
    """
7050
    Tasklet.__init__(self, lu)
7051

    
7052
    # Parameters
7053
    self.instance_name = instance_name
7054
    self.cleanup = cleanup
7055
    self.live = False # will be overridden later
7056
    self.failover = failover
7057
    self.fallback = fallback
7058
    self.ignore_consistency = ignore_consistency
7059
    self.shutdown_timeout = shutdown_timeout
7060

    
7061
  def CheckPrereq(self):
7062
    """Check prerequisites.
7063

7064
    This checks that the instance is in the cluster.
7065

7066
    """
7067
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7068
    instance = self.cfg.GetInstanceInfo(instance_name)
7069
    assert instance is not None
7070
    self.instance = instance
7071

    
7072
    if (not self.cleanup and not instance.admin_up and not self.failover and
7073
        self.fallback):
7074
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7075
                      " to failover")
7076
      self.failover = True
7077

    
7078
    if instance.disk_template not in constants.DTS_MIRRORED:
7079
      if self.failover:
7080
        text = "failovers"
7081
      else:
7082
        text = "migrations"
7083
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7084
                                 " %s" % (instance.disk_template, text),
7085
                                 errors.ECODE_STATE)
7086

    
7087
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7088
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7089

    
7090
      if self.lu.op.iallocator:
7091
        self._RunAllocator()
7092
      else:
7093
        # We set set self.target_node as it is required by
7094
        # BuildHooksEnv
7095
        self.target_node = self.lu.op.target_node
7096

    
7097
      # self.target_node is already populated, either directly or by the
7098
      # iallocator run
7099
      target_node = self.target_node
7100
      if self.target_node == instance.primary_node:
7101
        raise errors.OpPrereqError("Cannot migrate instance %s"
7102
                                   " to its primary (%s)" %
7103
                                   (instance.name, instance.primary_node))
7104

    
7105
      if len(self.lu.tasklets) == 1:
7106
        # It is safe to release locks only when we're the only tasklet
7107
        # in the LU
7108
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7109
                      keep=[instance.primary_node, self.target_node])
7110

    
7111
    else:
7112
      secondary_nodes = instance.secondary_nodes
7113
      if not secondary_nodes:
7114
        raise errors.ConfigurationError("No secondary node but using"
7115
                                        " %s disk template" %
7116
                                        instance.disk_template)
7117
      target_node = secondary_nodes[0]
7118
      if self.lu.op.iallocator or (self.lu.op.target_node and
7119
                                   self.lu.op.target_node != target_node):
7120
        if self.failover:
7121
          text = "failed over"
7122
        else:
7123
          text = "migrated"
7124
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7125
                                   " be %s to arbitrary nodes"
7126
                                   " (neither an iallocator nor a target"
7127
                                   " node can be passed)" %
7128
                                   (instance.disk_template, text),
7129
                                   errors.ECODE_INVAL)
7130

    
7131
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7132

    
7133
    # check memory requirements on the secondary node
7134
    if not self.failover or instance.admin_up:
7135
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7136
                           instance.name, i_be[constants.BE_MEMORY],
7137
                           instance.hypervisor)
7138
    else:
7139
      self.lu.LogInfo("Not checking memory on the secondary node as"
7140
                      " instance will not be started")
7141

    
7142
    # check bridge existance
7143
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7144

    
7145
    if not self.cleanup:
7146
      _CheckNodeNotDrained(self.lu, target_node)
7147
      if not self.failover:
7148
        result = self.rpc.call_instance_migratable(instance.primary_node,
7149
                                                   instance)
7150
        if result.fail_msg and self.fallback:
7151
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7152
                          " failover")
7153
          self.failover = True
7154
        else:
7155
          result.Raise("Can't migrate, please use failover",
7156
                       prereq=True, ecode=errors.ECODE_STATE)
7157

    
7158
    assert not (self.failover and self.cleanup)
7159

    
7160
    if not self.failover:
7161
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7162
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7163
                                   " parameters are accepted",
7164
                                   errors.ECODE_INVAL)
7165
      if self.lu.op.live is not None:
7166
        if self.lu.op.live:
7167
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7168
        else:
7169
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7170
        # reset the 'live' parameter to None so that repeated
7171
        # invocations of CheckPrereq do not raise an exception
7172
        self.lu.op.live = None
7173
      elif self.lu.op.mode is None:
7174
        # read the default value from the hypervisor
7175
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7176
                                                skip_globals=False)
7177
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7178

    
7179
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7180
    else:
7181
      # Failover is never live
7182
      self.live = False
7183

    
7184
  def _RunAllocator(self):
7185
    """Run the allocator based on input opcode.
7186

7187
    """
7188
    ial = IAllocator(self.cfg, self.rpc,
7189
                     mode=constants.IALLOCATOR_MODE_RELOC,
7190
                     name=self.instance_name,
7191
                     # TODO See why hail breaks with a single node below
7192
                     relocate_from=[self.instance.primary_node,
7193
                                    self.instance.primary_node],
7194
                     )
7195

    
7196
    ial.Run(self.lu.op.iallocator)
7197

    
7198
    if not ial.success:
7199
      raise errors.OpPrereqError("Can't compute nodes using"
7200
                                 " iallocator '%s': %s" %
7201
                                 (self.lu.op.iallocator, ial.info),
7202
                                 errors.ECODE_NORES)
7203
    if len(ial.result) != ial.required_nodes:
7204
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7205
                                 " of nodes (%s), required %s" %
7206
                                 (self.lu.op.iallocator, len(ial.result),
7207
                                  ial.required_nodes), errors.ECODE_FAULT)
7208
    self.target_node = ial.result[0]
7209
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7210
                 self.instance_name, self.lu.op.iallocator,
7211
                 utils.CommaJoin(ial.result))
7212

    
7213
  def _WaitUntilSync(self):
7214
    """Poll with custom rpc for disk sync.
7215

7216
    This uses our own step-based rpc call.
7217

7218
    """
7219
    self.feedback_fn("* wait until resync is done")
7220
    all_done = False
7221
    while not all_done:
7222
      all_done = True
7223
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7224
                                            self.nodes_ip,
7225
                                            self.instance.disks)
7226
      min_percent = 100
7227
      for node, nres in result.items():
7228
        nres.Raise("Cannot resync disks on node %s" % node)
7229
        node_done, node_percent = nres.payload
7230
        all_done = all_done and node_done
7231
        if node_percent is not None:
7232
          min_percent = min(min_percent, node_percent)
7233
      if not all_done:
7234
        if min_percent < 100:
7235
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7236
        time.sleep(2)
7237

    
7238
  def _EnsureSecondary(self, node):
7239
    """Demote a node to secondary.
7240

7241
    """
7242
    self.feedback_fn("* switching node %s to secondary mode" % node)
7243

    
7244
    for dev in self.instance.disks:
7245
      self.cfg.SetDiskID(dev, node)
7246

    
7247
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7248
                                          self.instance.disks)
7249
    result.Raise("Cannot change disk to secondary on node %s" % node)
7250

    
7251
  def _GoStandalone(self):
7252
    """Disconnect from the network.
7253

7254
    """
7255
    self.feedback_fn("* changing into standalone mode")
7256
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7257
                                               self.instance.disks)
7258
    for node, nres in result.items():
7259
      nres.Raise("Cannot disconnect disks node %s" % node)
7260

    
7261
  def _GoReconnect(self, multimaster):
7262
    """Reconnect to the network.
7263

7264
    """
7265
    if multimaster:
7266
      msg = "dual-master"
7267
    else:
7268
      msg = "single-master"
7269
    self.feedback_fn("* changing disks into %s mode" % msg)
7270
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7271
                                           self.instance.disks,
7272
                                           self.instance.name, multimaster)
7273
    for node, nres in result.items():
7274
      nres.Raise("Cannot change disks config on node %s" % node)
7275

    
7276
  def _ExecCleanup(self):
7277
    """Try to cleanup after a failed migration.
7278

7279
    The cleanup is done by:
7280
      - check that the instance is running only on one node
7281
        (and update the config if needed)
7282
      - change disks on its secondary node to secondary
7283
      - wait until disks are fully synchronized
7284
      - disconnect from the network
7285
      - change disks into single-master mode
7286
      - wait again until disks are fully synchronized
7287

7288
    """
7289
    instance = self.instance
7290
    target_node = self.target_node
7291
    source_node = self.source_node
7292

    
7293
    # check running on only one node
7294
    self.feedback_fn("* checking where the instance actually runs"
7295
                     " (if this hangs, the hypervisor might be in"
7296
                     " a bad state)")
7297
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7298
    for node, result in ins_l.items():
7299
      result.Raise("Can't contact node %s" % node)
7300

    
7301
    runningon_source = instance.name in ins_l[source_node].payload
7302
    runningon_target = instance.name in ins_l[target_node].payload
7303

    
7304
    if runningon_source and runningon_target:
7305
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7306
                               " or the hypervisor is confused; you will have"
7307
                               " to ensure manually that it runs only on one"
7308
                               " and restart this operation")
7309

    
7310
    if not (runningon_source or runningon_target):
7311
      raise errors.OpExecError("Instance does not seem to be running at all;"
7312
                               " in this case it's safer to repair by"
7313
                               " running 'gnt-instance stop' to ensure disk"
7314
                               " shutdown, and then restarting it")
7315

    
7316
    if runningon_target:
7317
      # the migration has actually succeeded, we need to update the config
7318
      self.feedback_fn("* instance running on secondary node (%s),"
7319
                       " updating config" % target_node)
7320
      instance.primary_node = target_node
7321
      self.cfg.Update(instance, self.feedback_fn)
7322
      demoted_node = source_node
7323
    else:
7324
      self.feedback_fn("* instance confirmed to be running on its"
7325
                       " primary node (%s)" % source_node)
7326
      demoted_node = target_node
7327

    
7328
    if instance.disk_template in constants.DTS_INT_MIRROR:
7329
      self._EnsureSecondary(demoted_node)
7330
      try:
7331
        self._WaitUntilSync()
7332
      except errors.OpExecError:
7333
        # we ignore here errors, since if the device is standalone, it
7334
        # won't be able to sync
7335
        pass
7336
      self._GoStandalone()
7337
      self._GoReconnect(False)
7338
      self._WaitUntilSync()
7339

    
7340
    self.feedback_fn("* done")
7341

    
7342
  def _RevertDiskStatus(self):
7343
    """Try to revert the disk status after a failed migration.
7344

7345
    """
7346
    target_node = self.target_node
7347
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7348
      return
7349

    
7350
    try:
7351
      self._EnsureSecondary(target_node)
7352
      self._GoStandalone()
7353
      self._GoReconnect(False)
7354
      self._WaitUntilSync()
7355
    except errors.OpExecError, err:
7356
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7357
                         " please try to recover the instance manually;"
7358
                         " error '%s'" % str(err))
7359

    
7360
  def _AbortMigration(self):
7361
    """Call the hypervisor code to abort a started migration.
7362

7363
    """
7364
    instance = self.instance
7365
    target_node = self.target_node
7366
    migration_info = self.migration_info
7367

    
7368
    abort_result = self.rpc.call_finalize_migration(target_node,
7369
                                                    instance,
7370
                                                    migration_info,
7371
                                                    False)
7372
    abort_msg = abort_result.fail_msg
7373
    if abort_msg:
7374
      logging.error("Aborting migration failed on target node %s: %s",
7375
                    target_node, abort_msg)
7376
      # Don't raise an exception here, as we stil have to try to revert the
7377
      # disk status, even if this step failed.
7378

    
7379
  def _ExecMigration(self):
7380
    """Migrate an instance.
7381

7382
    The migrate is done by:
7383
      - change the disks into dual-master mode
7384
      - wait until disks are fully synchronized again
7385
      - migrate the instance
7386
      - change disks on the new secondary node (the old primary) to secondary
7387
      - wait until disks are fully synchronized
7388
      - change disks into single-master mode
7389

7390
    """
7391
    instance = self.instance
7392
    target_node = self.target_node
7393
    source_node = self.source_node
7394

    
7395
    self.feedback_fn("* checking disk consistency between source and target")
7396
    for dev in instance.disks:
7397
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7398
        raise errors.OpExecError("Disk %s is degraded or not fully"
7399
                                 " synchronized on target node,"
7400
                                 " aborting migration" % dev.iv_name)
7401

    
7402
    # First get the migration information from the remote node
7403
    result = self.rpc.call_migration_info(source_node, instance)
7404
    msg = result.fail_msg
7405
    if msg:
7406
      log_err = ("Failed fetching source migration information from %s: %s" %
7407
                 (source_node, msg))
7408
      logging.error(log_err)
7409
      raise errors.OpExecError(log_err)
7410

    
7411
    self.migration_info = migration_info = result.payload
7412

    
7413
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7414
      # Then switch the disks to master/master mode
7415
      self._EnsureSecondary(target_node)
7416
      self._GoStandalone()
7417
      self._GoReconnect(True)
7418
      self._WaitUntilSync()
7419

    
7420
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7421
    result = self.rpc.call_accept_instance(target_node,
7422
                                           instance,
7423
                                           migration_info,
7424
                                           self.nodes_ip[target_node])
7425

    
7426
    msg = result.fail_msg
7427
    if msg:
7428
      logging.error("Instance pre-migration failed, trying to revert"
7429
                    " disk status: %s", msg)
7430
      self.feedback_fn("Pre-migration failed, aborting")
7431
      self._AbortMigration()
7432
      self._RevertDiskStatus()
7433
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7434
                               (instance.name, msg))
7435

    
7436
    self.feedback_fn("* migrating instance to %s" % target_node)
7437
    result = self.rpc.call_instance_migrate(source_node, instance,
7438
                                            self.nodes_ip[target_node],
7439
                                            self.live)
7440
    msg = result.fail_msg
7441
    if msg:
7442
      logging.error("Instance migration failed, trying to revert"
7443
                    " disk status: %s", msg)
7444
      self.feedback_fn("Migration failed, aborting")
7445
      self._AbortMigration()
7446
      self._RevertDiskStatus()
7447
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7448
                               (instance.name, msg))
7449

    
7450
    instance.primary_node = target_node
7451
    # distribute new instance config to the other nodes
7452
    self.cfg.Update(instance, self.feedback_fn)
7453

    
7454
    result = self.rpc.call_finalize_migration(target_node,
7455
                                              instance,
7456
                                              migration_info,
7457
                                              True)
7458
    msg = result.fail_msg
7459
    if msg:
7460
      logging.error("Instance migration succeeded, but finalization failed:"
7461
                    " %s", msg)
7462
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7463
                               msg)
7464

    
7465
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466
      self._EnsureSecondary(source_node)
7467
      self._WaitUntilSync()
7468
      self._GoStandalone()
7469
      self._GoReconnect(False)
7470
      self._WaitUntilSync()
7471

    
7472
    self.feedback_fn("* done")
7473

    
7474
  def _ExecFailover(self):
7475
    """Failover an instance.
7476

7477
    The failover is done by shutting it down on its present node and
7478
    starting it on the secondary.
7479

7480
    """
7481
    instance = self.instance
7482
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7483

    
7484
    source_node = instance.primary_node
7485
    target_node = self.target_node
7486

    
7487
    if instance.admin_up:
7488
      self.feedback_fn("* checking disk consistency between source and target")
7489
      for dev in instance.disks:
7490
        # for drbd, these are drbd over lvm
7491
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7492
          if primary_node.offline:
7493
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7494
                             " target node %s" %
7495
                             (primary_node.name, dev.iv_name, target_node))
7496
          elif not self.ignore_consistency:
7497
            raise errors.OpExecError("Disk %s is degraded on target node,"
7498
                                     " aborting failover" % dev.iv_name)
7499
    else:
7500
      self.feedback_fn("* not checking disk consistency as instance is not"
7501
                       " running")
7502

    
7503
    self.feedback_fn("* shutting down instance on source node")
7504
    logging.info("Shutting down instance %s on node %s",
7505
                 instance.name, source_node)
7506

    
7507
    result = self.rpc.call_instance_shutdown(source_node, instance,
7508
                                             self.shutdown_timeout)
7509
    msg = result.fail_msg
7510
    if msg:
7511
      if self.ignore_consistency or primary_node.offline:
7512
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7513
                           " proceeding anyway; please make sure node"
7514
                           " %s is down; error details: %s",
7515
                           instance.name, source_node, source_node, msg)
7516
      else:
7517
        raise errors.OpExecError("Could not shutdown instance %s on"
7518
                                 " node %s: %s" %
7519
                                 (instance.name, source_node, msg))
7520

    
7521
    self.feedback_fn("* deactivating the instance's disks on source node")
7522
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7523
      raise errors.OpExecError("Can't shut down the instance's disks")
7524

    
7525
    instance.primary_node = target_node
7526
    # distribute new instance config to the other nodes
7527
    self.cfg.Update(instance, self.feedback_fn)
7528

    
7529
    # Only start the instance if it's marked as up
7530
    if instance.admin_up:
7531
      self.feedback_fn("* activating the instance's disks on target node %s" %
7532
                       target_node)
7533
      logging.info("Starting instance %s on node %s",
7534
                   instance.name, target_node)
7535

    
7536
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7537
                                           ignore_secondaries=True)
7538
      if not disks_ok:
7539
        _ShutdownInstanceDisks(self.lu, instance)
7540
        raise errors.OpExecError("Can't activate the instance's disks")
7541

    
7542
      self.feedback_fn("* starting the instance on the target node %s" %
7543
                       target_node)
7544
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7545
                                            False)
7546
      msg = result.fail_msg
7547
      if msg:
7548
        _ShutdownInstanceDisks(self.lu, instance)
7549
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7550
                                 (instance.name, target_node, msg))
7551

    
7552
  def Exec(self, feedback_fn):
7553
    """Perform the migration.
7554

7555
    """
7556
    self.feedback_fn = feedback_fn
7557
    self.source_node = self.instance.primary_node
7558

    
7559
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7560
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7561
      self.target_node = self.instance.secondary_nodes[0]
7562
      # Otherwise self.target_node has been populated either
7563
      # directly, or through an iallocator.
7564

    
7565
    self.all_nodes = [self.source_node, self.target_node]
7566
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7567
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7568

    
7569
    if self.failover:
7570
      feedback_fn("Failover instance %s" % self.instance.name)
7571
      self._ExecFailover()
7572
    else:
7573
      feedback_fn("Migrating instance %s" % self.instance.name)
7574

    
7575
      if self.cleanup:
7576
        return self._ExecCleanup()
7577
      else:
7578
        return self._ExecMigration()
7579

    
7580

    
7581
def _CreateBlockDev(lu, node, instance, device, force_create,
7582
                    info, force_open):
7583
  """Create a tree of block devices on a given node.
7584

7585
  If this device type has to be created on secondaries, create it and
7586
  all its children.
7587

7588
  If not, just recurse to children keeping the same 'force' value.
7589

7590
  @param lu: the lu on whose behalf we execute
7591
  @param node: the node on which to create the device
7592
  @type instance: L{objects.Instance}
7593
  @param instance: the instance which owns the device
7594
  @type device: L{objects.Disk}
7595
  @param device: the device to create
7596
  @type force_create: boolean
7597
  @param force_create: whether to force creation of this device; this
7598
      will be change to True whenever we find a device which has
7599
      CreateOnSecondary() attribute
7600
  @param info: the extra 'metadata' we should attach to the device
7601
      (this will be represented as a LVM tag)
7602
  @type force_open: boolean
7603
  @param force_open: this parameter will be passes to the
7604
      L{backend.BlockdevCreate} function where it specifies
7605
      whether we run on primary or not, and it affects both
7606
      the child assembly and the device own Open() execution
7607

7608
  """
7609
  if device.CreateOnSecondary():
7610
    force_create = True
7611

    
7612
  if device.children:
7613
    for child in device.children:
7614
      _CreateBlockDev(lu, node, instance, child, force_create,
7615
                      info, force_open)
7616

    
7617
  if not force_create:
7618
    return
7619

    
7620
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7621

    
7622

    
7623
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7624
  """Create a single block device on a given node.
7625

7626
  This will not recurse over children of the device, so they must be
7627
  created in advance.
7628

7629
  @param lu: the lu on whose behalf we execute
7630
  @param node: the node on which to create the device
7631
  @type instance: L{objects.Instance}
7632
  @param instance: the instance which owns the device
7633
  @type device: L{objects.Disk}
7634
  @param device: the device to create
7635
  @param info: the extra 'metadata' we should attach to the device
7636
      (this will be represented as a LVM tag)
7637
  @type force_open: boolean
7638
  @param force_open: this parameter will be passes to the
7639
      L{backend.BlockdevCreate} function where it specifies
7640
      whether we run on primary or not, and it affects both
7641
      the child assembly and the device own Open() execution
7642

7643
  """
7644
  lu.cfg.SetDiskID(device, node)
7645
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7646
                                       instance.name, force_open, info)
7647
  result.Raise("Can't create block device %s on"
7648
               " node %s for instance %s" % (device, node, instance.name))
7649
  if device.physical_id is None:
7650
    device.physical_id = result.payload
7651

    
7652

    
7653
def _GenerateUniqueNames(lu, exts):
7654
  """Generate a suitable LV name.
7655

7656
  This will generate a logical volume name for the given instance.
7657

7658
  """
7659
  results = []
7660
  for val in exts:
7661
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7662
    results.append("%s%s" % (new_id, val))
7663
  return results
7664

    
7665

    
7666
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7667
                         iv_name, p_minor, s_minor):
7668
  """Generate a drbd8 device complete with its children.
7669

7670
  """
7671
  assert len(vgnames) == len(names) == 2
7672
  port = lu.cfg.AllocatePort()
7673
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7674
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7675
                          logical_id=(vgnames[0], names[0]))
7676
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7677
                          logical_id=(vgnames[1], names[1]))
7678
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7679
                          logical_id=(primary, secondary, port,
7680
                                      p_minor, s_minor,
7681
                                      shared_secret),
7682
                          children=[dev_data, dev_meta],
7683
                          iv_name=iv_name)
7684
  return drbd_dev
7685

    
7686

    
7687
def _GenerateDiskTemplate(lu, template_name,
7688
                          instance_name, primary_node,
7689
                          secondary_nodes, disk_info,
7690
                          file_storage_dir, file_driver,
7691
                          base_index, feedback_fn):
7692
  """Generate the entire disk layout for a given template type.
7693

7694
  """
7695
  #TODO: compute space requirements
7696

    
7697
  vgname = lu.cfg.GetVGName()
7698
  disk_count = len(disk_info)
7699
  disks = []
7700
  if template_name == constants.DT_DISKLESS:
7701
    pass
7702
  elif template_name == constants.DT_PLAIN:
7703
    if len(secondary_nodes) != 0:
7704
      raise errors.ProgrammerError("Wrong template configuration")
7705

    
7706
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7707
                                      for i in range(disk_count)])
7708
    for idx, disk in enumerate(disk_info):
7709
      disk_index = idx + base_index
7710
      vg = disk.get(constants.IDISK_VG, vgname)
7711
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7712
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7713
                              size=disk[constants.IDISK_SIZE],
7714
                              logical_id=(vg, names[idx]),
7715
                              iv_name="disk/%d" % disk_index,
7716
                              mode=disk[constants.IDISK_MODE])
7717
      disks.append(disk_dev)
7718
  elif template_name == constants.DT_DRBD8:
7719
    if len(secondary_nodes) != 1:
7720
      raise errors.ProgrammerError("Wrong template configuration")
7721
    remote_node = secondary_nodes[0]
7722
    minors = lu.cfg.AllocateDRBDMinor(
7723
      [primary_node, remote_node] * len(disk_info), instance_name)
7724

    
7725
    names = []
7726
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7727
                                               for i in range(disk_count)]):
7728
      names.append(lv_prefix + "_data")
7729
      names.append(lv_prefix + "_meta")
7730
    for idx, disk in enumerate(disk_info):
7731
      disk_index = idx + base_index
7732
      data_vg = disk.get(constants.IDISK_VG, vgname)
7733
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7734
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7735
                                      disk[constants.IDISK_SIZE],
7736
                                      [data_vg, meta_vg],
7737
                                      names[idx * 2:idx * 2 + 2],
7738
                                      "disk/%d" % disk_index,
7739
                                      minors[idx * 2], minors[idx * 2 + 1])
7740
      disk_dev.mode = disk[constants.IDISK_MODE]
7741
      disks.append(disk_dev)
7742
  elif template_name == constants.DT_FILE:
7743
    if len(secondary_nodes) != 0:
7744
      raise errors.ProgrammerError("Wrong template configuration")
7745

    
7746
    opcodes.RequireFileStorage()
7747

    
7748
    for idx, disk in enumerate(disk_info):
7749
      disk_index = idx + base_index
7750
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7751
                              size=disk[constants.IDISK_SIZE],
7752
                              iv_name="disk/%d" % disk_index,
7753
                              logical_id=(file_driver,
7754
                                          "%s/disk%d" % (file_storage_dir,
7755
                                                         disk_index)),
7756
                              mode=disk[constants.IDISK_MODE])
7757
      disks.append(disk_dev)
7758
  elif template_name == constants.DT_SHARED_FILE:
7759
    if len(secondary_nodes) != 0:
7760
      raise errors.ProgrammerError("Wrong template configuration")
7761

    
7762
    opcodes.RequireSharedFileStorage()
7763

    
7764
    for idx, disk in enumerate(disk_info):
7765
      disk_index = idx + base_index
7766
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7767
                              size=disk[constants.IDISK_SIZE],
7768
                              iv_name="disk/%d" % disk_index,
7769
                              logical_id=(file_driver,
7770
                                          "%s/disk%d" % (file_storage_dir,
7771
                                                         disk_index)),
7772
                              mode=disk[constants.IDISK_MODE])
7773
      disks.append(disk_dev)
7774
  elif template_name == constants.DT_BLOCK:
7775
    if len(secondary_nodes) != 0:
7776
      raise errors.ProgrammerError("Wrong template configuration")
7777

    
7778
    for idx, disk in enumerate(disk_info):
7779
      disk_index = idx + base_index
7780
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7781
                              size=disk[constants.IDISK_SIZE],
7782
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7783
                                          disk[constants.IDISK_ADOPT]),
7784
                              iv_name="disk/%d" % disk_index,
7785
                              mode=disk[constants.IDISK_MODE])
7786
      disks.append(disk_dev)
7787

    
7788
  else:
7789
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7790
  return disks
7791

    
7792

    
7793
def _GetInstanceInfoText(instance):
7794
  """Compute that text that should be added to the disk's metadata.
7795

7796
  """
7797
  return "originstname+%s" % instance.name
7798

    
7799

    
7800
def _CalcEta(time_taken, written, total_size):
7801
  """Calculates the ETA based on size written and total size.
7802

7803
  @param time_taken: The time taken so far
7804
  @param written: amount written so far
7805
  @param total_size: The total size of data to be written
7806
  @return: The remaining time in seconds
7807

7808
  """
7809
  avg_time = time_taken / float(written)
7810
  return (total_size - written) * avg_time
7811

    
7812

    
7813
def _WipeDisks(lu, instance):
7814
  """Wipes instance disks.
7815

7816
  @type lu: L{LogicalUnit}
7817
  @param lu: the logical unit on whose behalf we execute
7818
  @type instance: L{objects.Instance}
7819
  @param instance: the instance whose disks we should create
7820
  @return: the success of the wipe
7821

7822
  """
7823
  node = instance.primary_node
7824

    
7825
  for device in instance.disks:
7826
    lu.cfg.SetDiskID(device, node)
7827

    
7828
  logging.info("Pause sync of instance %s disks", instance.name)
7829
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7830

    
7831
  for idx, success in enumerate(result.payload):
7832
    if not success:
7833
      logging.warn("pause-sync of instance %s for disks %d failed",
7834
                   instance.name, idx)
7835

    
7836
  try:
7837
    for idx, device in enumerate(instance.disks):
7838
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7839
      # MAX_WIPE_CHUNK at max
7840
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7841
                            constants.MIN_WIPE_CHUNK_PERCENT)
7842
      # we _must_ make this an int, otherwise rounding errors will
7843
      # occur
7844
      wipe_chunk_size = int(wipe_chunk_size)
7845

    
7846
      lu.LogInfo("* Wiping disk %d", idx)
7847
      logging.info("Wiping disk %d for instance %s, node %s using"
7848
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7849

    
7850
      offset = 0
7851
      size = device.size
7852
      last_output = 0
7853
      start_time = time.time()
7854

    
7855
      while offset < size:
7856
        wipe_size = min(wipe_chunk_size, size - offset)
7857
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7858
                      idx, offset, wipe_size)
7859
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7860
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7861
                     (idx, offset, wipe_size))
7862
        now = time.time()
7863
        offset += wipe_size
7864
        if now - last_output >= 60:
7865
          eta = _CalcEta(now - start_time, offset, size)
7866
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7867
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7868
          last_output = now
7869
  finally:
7870
    logging.info("Resume sync of instance %s disks", instance.name)
7871

    
7872
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7873

    
7874
    for idx, success in enumerate(result.payload):
7875
      if not success:
7876
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7877
                      " look at the status and troubleshoot the issue", idx)
7878
        logging.warn("resume-sync of instance %s for disks %d failed",
7879
                     instance.name, idx)
7880

    
7881

    
7882
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7883
  """Create all disks for an instance.
7884

7885
  This abstracts away some work from AddInstance.
7886

7887
  @type lu: L{LogicalUnit}
7888
  @param lu: the logical unit on whose behalf we execute
7889
  @type instance: L{objects.Instance}
7890
  @param instance: the instance whose disks we should create
7891
  @type to_skip: list
7892
  @param to_skip: list of indices to skip
7893
  @type target_node: string
7894
  @param target_node: if passed, overrides the target node for creation
7895
  @rtype: boolean
7896
  @return: the success of the creation
7897

7898
  """
7899
  info = _GetInstanceInfoText(instance)
7900
  if target_node is None:
7901
    pnode = instance.primary_node
7902
    all_nodes = instance.all_nodes
7903
  else:
7904
    pnode = target_node
7905
    all_nodes = [pnode]
7906

    
7907
  if instance.disk_template in constants.DTS_FILEBASED:
7908
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7909
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7910

    
7911
    result.Raise("Failed to create directory '%s' on"
7912
                 " node %s" % (file_storage_dir, pnode))
7913

    
7914
  # Note: this needs to be kept in sync with adding of disks in
7915
  # LUInstanceSetParams
7916
  for idx, device in enumerate(instance.disks):
7917
    if to_skip and idx in to_skip:
7918
      continue
7919
    logging.info("Creating volume %s for instance %s",
7920
                 device.iv_name, instance.name)
7921
    #HARDCODE
7922
    for node in all_nodes:
7923
      f_create = node == pnode
7924
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7925

    
7926

    
7927
def _RemoveDisks(lu, instance, target_node=None):
7928
  """Remove all disks for an instance.
7929

7930
  This abstracts away some work from `AddInstance()` and
7931
  `RemoveInstance()`. Note that in case some of the devices couldn't
7932
  be removed, the removal will continue with the other ones (compare
7933
  with `_CreateDisks()`).
7934

7935
  @type lu: L{LogicalUnit}
7936
  @param lu: the logical unit on whose behalf we execute
7937
  @type instance: L{objects.Instance}
7938
  @param instance: the instance whose disks we should remove
7939
  @type target_node: string
7940
  @param target_node: used to override the node on which to remove the disks
7941
  @rtype: boolean
7942
  @return: the success of the removal
7943

7944
  """
7945
  logging.info("Removing block devices for instance %s", instance.name)
7946

    
7947
  all_result = True
7948
  for device in instance.disks:
7949
    if target_node:
7950
      edata = [(target_node, device)]
7951
    else:
7952
      edata = device.ComputeNodeTree(instance.primary_node)
7953
    for node, disk in edata:
7954
      lu.cfg.SetDiskID(disk, node)
7955
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7956
      if msg:
7957
        lu.LogWarning("Could not remove block device %s on node %s,"
7958
                      " continuing anyway: %s", device.iv_name, node, msg)
7959
        all_result = False
7960

    
7961
  if instance.disk_template == constants.DT_FILE:
7962
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7963
    if target_node:
7964
      tgt = target_node
7965
    else:
7966
      tgt = instance.primary_node
7967
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7968
    if result.fail_msg:
7969
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7970
                    file_storage_dir, instance.primary_node, result.fail_msg)
7971
      all_result = False
7972

    
7973
  return all_result
7974

    
7975

    
7976
def _ComputeDiskSizePerVG(disk_template, disks):
7977
  """Compute disk size requirements in the volume group
7978

7979
  """
7980
  def _compute(disks, payload):
7981
    """Universal algorithm.
7982

7983
    """
7984
    vgs = {}
7985
    for disk in disks:
7986
      vgs[disk[constants.IDISK_VG]] = \
7987
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7988

    
7989
    return vgs
7990

    
7991
  # Required free disk space as a function of disk and swap space
7992
  req_size_dict = {
7993
    constants.DT_DISKLESS: {},
7994
    constants.DT_PLAIN: _compute(disks, 0),
7995
    # 128 MB are added for drbd metadata for each disk
7996
    constants.DT_DRBD8: _compute(disks, 128),
7997
    constants.DT_FILE: {},
7998
    constants.DT_SHARED_FILE: {},
7999
  }
8000

    
8001
  if disk_template not in req_size_dict:
8002
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8003
                                 " is unknown" % disk_template)
8004

    
8005
  return req_size_dict[disk_template]
8006

    
8007

    
8008
def _ComputeDiskSize(disk_template, disks):
8009
  """Compute disk size requirements in the volume group
8010

8011
  """
8012
  # Required free disk space as a function of disk and swap space
8013
  req_size_dict = {
8014
    constants.DT_DISKLESS: None,
8015
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8016
    # 128 MB are added for drbd metadata for each disk
8017
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8018
    constants.DT_FILE: None,
8019
    constants.DT_SHARED_FILE: 0,
8020
    constants.DT_BLOCK: 0,
8021
  }
8022

    
8023
  if disk_template not in req_size_dict:
8024
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8025
                                 " is unknown" % disk_template)
8026

    
8027
  return req_size_dict[disk_template]
8028

    
8029

    
8030
def _FilterVmNodes(lu, nodenames):
8031
  """Filters out non-vm_capable nodes from a list.
8032

8033
  @type lu: L{LogicalUnit}
8034
  @param lu: the logical unit for which we check
8035
  @type nodenames: list
8036
  @param nodenames: the list of nodes on which we should check
8037
  @rtype: list
8038
  @return: the list of vm-capable nodes
8039

8040
  """
8041
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8042
  return [name for name in nodenames if name not in vm_nodes]
8043

    
8044

    
8045
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8046
  """Hypervisor parameter validation.
8047

8048
  This function abstract the hypervisor parameter validation to be
8049
  used in both instance create and instance modify.
8050

8051
  @type lu: L{LogicalUnit}
8052
  @param lu: the logical unit for which we check
8053
  @type nodenames: list
8054
  @param nodenames: the list of nodes on which we should check
8055
  @type hvname: string
8056
  @param hvname: the name of the hypervisor we should use
8057
  @type hvparams: dict
8058
  @param hvparams: the parameters which we need to check
8059
  @raise errors.OpPrereqError: if the parameters are not valid
8060

8061
  """
8062
  nodenames = _FilterVmNodes(lu, nodenames)
8063
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8064
                                                  hvname,
8065
                                                  hvparams)
8066
  for node in nodenames:
8067
    info = hvinfo[node]
8068
    if info.offline:
8069
      continue
8070
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8071

    
8072

    
8073
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8074
  """OS parameters validation.
8075

8076
  @type lu: L{LogicalUnit}
8077
  @param lu: the logical unit for which we check
8078
  @type required: boolean
8079
  @param required: whether the validation should fail if the OS is not
8080
      found
8081
  @type nodenames: list
8082
  @param nodenames: the list of nodes on which we should check
8083
  @type osname: string
8084
  @param osname: the name of the hypervisor we should use
8085
  @type osparams: dict
8086
  @param osparams: the parameters which we need to check
8087
  @raise errors.OpPrereqError: if the parameters are not valid
8088

8089
  """
8090
  nodenames = _FilterVmNodes(lu, nodenames)
8091
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8092
                                   [constants.OS_VALIDATE_PARAMETERS],
8093
                                   osparams)
8094
  for node, nres in result.items():
8095
    # we don't check for offline cases since this should be run only
8096
    # against the master node and/or an instance's nodes
8097
    nres.Raise("OS Parameters validation failed on node %s" % node)
8098
    if not nres.payload:
8099
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8100
                 osname, node)
8101

    
8102

    
8103
class LUInstanceCreate(LogicalUnit):
8104
  """Create an instance.
8105

8106
  """
8107
  HPATH = "instance-add"
8108
  HTYPE = constants.HTYPE_INSTANCE
8109
  REQ_BGL = False
8110

    
8111
  def CheckArguments(self):
8112
    """Check arguments.
8113

8114
    """
8115
    # do not require name_check to ease forward/backward compatibility
8116
    # for tools
8117
    if self.op.no_install and self.op.start:
8118
      self.LogInfo("No-installation mode selected, disabling startup")
8119
      self.op.start = False
8120
    # validate/normalize the instance name
8121
    self.op.instance_name = \
8122
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8123

    
8124
    if self.op.ip_check and not self.op.name_check:
8125
      # TODO: make the ip check more flexible and not depend on the name check
8126
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8127
                                 " check", errors.ECODE_INVAL)
8128

    
8129
    # check nics' parameter names
8130
    for nic in self.op.nics:
8131
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8132

    
8133
    # check disks. parameter names and consistent adopt/no-adopt strategy
8134
    has_adopt = has_no_adopt = False
8135
    for disk in self.op.disks:
8136
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8137
      if constants.IDISK_ADOPT in disk:
8138
        has_adopt = True
8139
      else:
8140
        has_no_adopt = True
8141
    if has_adopt and has_no_adopt:
8142
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8143
                                 errors.ECODE_INVAL)
8144
    if has_adopt:
8145
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8146
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8147
                                   " '%s' disk template" %
8148
                                   self.op.disk_template,
8149
                                   errors.ECODE_INVAL)
8150
      if self.op.iallocator is not None:
8151
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8152
                                   " iallocator script", errors.ECODE_INVAL)
8153
      if self.op.mode == constants.INSTANCE_IMPORT:
8154
        raise errors.OpPrereqError("Disk adoption not allowed for"
8155
                                   " instance import", errors.ECODE_INVAL)
8156
    else:
8157
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8158
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8159
                                   " but no 'adopt' parameter given" %
8160
                                   self.op.disk_template,
8161
                                   errors.ECODE_INVAL)
8162

    
8163
    self.adopt_disks = has_adopt
8164

    
8165
    # instance name verification
8166
    if self.op.name_check:
8167
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8168
      self.op.instance_name = self.hostname1.name
8169
      # used in CheckPrereq for ip ping check
8170
      self.check_ip = self.hostname1.ip
8171
    else:
8172
      self.check_ip = None
8173

    
8174
    # file storage checks
8175
    if (self.op.file_driver and
8176
        not self.op.file_driver in constants.FILE_DRIVER):
8177
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8178
                                 self.op.file_driver, errors.ECODE_INVAL)
8179

    
8180
    if self.op.disk_template == constants.DT_FILE:
8181
      opcodes.RequireFileStorage()
8182
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8183
      opcodes.RequireSharedFileStorage()
8184

    
8185
    ### Node/iallocator related checks
8186
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8187

    
8188
    if self.op.pnode is not None:
8189
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8190
        if self.op.snode is None:
8191
          raise errors.OpPrereqError("The networked disk templates need"
8192
                                     " a mirror node", errors.ECODE_INVAL)
8193
      elif self.op.snode:
8194
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8195
                        " template")
8196
        self.op.snode = None
8197

    
8198
    self._cds = _GetClusterDomainSecret()
8199

    
8200
    if self.op.mode == constants.INSTANCE_IMPORT:
8201
      # On import force_variant must be True, because if we forced it at
8202
      # initial install, our only chance when importing it back is that it
8203
      # works again!
8204
      self.op.force_variant = True
8205

    
8206
      if self.op.no_install:
8207
        self.LogInfo("No-installation mode has no effect during import")
8208

    
8209
    elif self.op.mode == constants.INSTANCE_CREATE:
8210
      if self.op.os_type is None:
8211
        raise errors.OpPrereqError("No guest OS specified",
8212
                                   errors.ECODE_INVAL)
8213
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8214
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8215
                                   " installation" % self.op.os_type,
8216
                                   errors.ECODE_STATE)
8217
      if self.op.disk_template is None:
8218
        raise errors.OpPrereqError("No disk template specified",
8219
                                   errors.ECODE_INVAL)
8220

    
8221
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8222
      # Check handshake to ensure both clusters have the same domain secret
8223
      src_handshake = self.op.source_handshake
8224
      if not src_handshake:
8225
        raise errors.OpPrereqError("Missing source handshake",
8226
                                   errors.ECODE_INVAL)
8227

    
8228
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8229
                                                           src_handshake)
8230
      if errmsg:
8231
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8232
                                   errors.ECODE_INVAL)
8233

    
8234
      # Load and check source CA
8235
      self.source_x509_ca_pem = self.op.source_x509_ca
8236
      if not self.source_x509_ca_pem:
8237
        raise errors.OpPrereqError("Missing source X509 CA",
8238
                                   errors.ECODE_INVAL)
8239

    
8240
      try:
8241
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8242
                                                    self._cds)
8243
      except OpenSSL.crypto.Error, err:
8244
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8245
                                   (err, ), errors.ECODE_INVAL)
8246

    
8247
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8248
      if errcode is not None:
8249
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8250
                                   errors.ECODE_INVAL)
8251

    
8252
      self.source_x509_ca = cert
8253

    
8254
      src_instance_name = self.op.source_instance_name
8255
      if not src_instance_name:
8256
        raise errors.OpPrereqError("Missing source instance name",
8257
                                   errors.ECODE_INVAL)
8258

    
8259
      self.source_instance_name = \
8260
          netutils.GetHostname(name=src_instance_name).name
8261

    
8262
    else:
8263
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8264
                                 self.op.mode, errors.ECODE_INVAL)
8265

    
8266
  def ExpandNames(self):
8267
    """ExpandNames for CreateInstance.
8268

8269
    Figure out the right locks for instance creation.
8270

8271
    """
8272
    self.needed_locks = {}
8273

    
8274
    instance_name = self.op.instance_name
8275
    # this is just a preventive check, but someone might still add this
8276
    # instance in the meantime, and creation will fail at lock-add time
8277
    if instance_name in self.cfg.GetInstanceList():
8278
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8279
                                 instance_name, errors.ECODE_EXISTS)
8280

    
8281
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8282

    
8283
    if self.op.iallocator:
8284
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8285
    else:
8286
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8287
      nodelist = [self.op.pnode]
8288
      if self.op.snode is not None:
8289
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8290
        nodelist.append(self.op.snode)
8291
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8292

    
8293
    # in case of import lock the source node too
8294
    if self.op.mode == constants.INSTANCE_IMPORT:
8295
      src_node = self.op.src_node
8296
      src_path = self.op.src_path
8297

    
8298
      if src_path is None:
8299
        self.op.src_path = src_path = self.op.instance_name
8300

    
8301
      if src_node is None:
8302
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8303
        self.op.src_node = None
8304
        if os.path.isabs(src_path):
8305
          raise errors.OpPrereqError("Importing an instance from a path"
8306
                                     " requires a source node option",
8307
                                     errors.ECODE_INVAL)
8308
      else:
8309
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8310
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8311
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8312
        if not os.path.isabs(src_path):
8313
          self.op.src_path = src_path = \
8314
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8315

    
8316
  def _RunAllocator(self):
8317
    """Run the allocator based on input opcode.
8318

8319
    """
8320
    nics = [n.ToDict() for n in self.nics]
8321
    ial = IAllocator(self.cfg, self.rpc,
8322
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8323
                     name=self.op.instance_name,
8324
                     disk_template=self.op.disk_template,
8325
                     tags=self.op.tags,
8326
                     os=self.op.os_type,
8327
                     vcpus=self.be_full[constants.BE_VCPUS],
8328
                     memory=self.be_full[constants.BE_MEMORY],
8329
                     disks=self.disks,
8330
                     nics=nics,
8331
                     hypervisor=self.op.hypervisor,
8332
                     )
8333

    
8334
    ial.Run(self.op.iallocator)
8335

    
8336
    if not ial.success:
8337
      raise errors.OpPrereqError("Can't compute nodes using"
8338
                                 " iallocator '%s': %s" %
8339
                                 (self.op.iallocator, ial.info),
8340
                                 errors.ECODE_NORES)
8341
    if len(ial.result) != ial.required_nodes:
8342
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8343
                                 " of nodes (%s), required %s" %
8344
                                 (self.op.iallocator, len(ial.result),
8345
                                  ial.required_nodes), errors.ECODE_FAULT)
8346
    self.op.pnode = ial.result[0]
8347
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8348
                 self.op.instance_name, self.op.iallocator,
8349
                 utils.CommaJoin(ial.result))
8350
    if ial.required_nodes == 2:
8351
      self.op.snode = ial.result[1]
8352

    
8353
  def BuildHooksEnv(self):
8354
    """Build hooks env.
8355

8356
    This runs on master, primary and secondary nodes of the instance.
8357

8358
    """
8359
    env = {
8360
      "ADD_MODE": self.op.mode,
8361
      }
8362
    if self.op.mode == constants.INSTANCE_IMPORT:
8363
      env["SRC_NODE"] = self.op.src_node
8364
      env["SRC_PATH"] = self.op.src_path
8365
      env["SRC_IMAGES"] = self.src_images
8366

    
8367
    env.update(_BuildInstanceHookEnv(
8368
      name=self.op.instance_name,
8369
      primary_node=self.op.pnode,
8370
      secondary_nodes=self.secondaries,
8371
      status=self.op.start,
8372
      os_type=self.op.os_type,
8373
      memory=self.be_full[constants.BE_MEMORY],
8374
      vcpus=self.be_full[constants.BE_VCPUS],
8375
      nics=_NICListToTuple(self, self.nics),
8376
      disk_template=self.op.disk_template,
8377
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8378
             for d in self.disks],
8379
      bep=self.be_full,
8380
      hvp=self.hv_full,
8381
      hypervisor_name=self.op.hypervisor,
8382
      tags=self.op.tags,
8383
    ))
8384

    
8385
    return env
8386

    
8387
  def BuildHooksNodes(self):
8388
    """Build hooks nodes.
8389

8390
    """
8391
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8392
    return nl, nl
8393

    
8394
  def _ReadExportInfo(self):
8395
    """Reads the export information from disk.
8396

8397
    It will override the opcode source node and path with the actual
8398
    information, if these two were not specified before.
8399

8400
    @return: the export information
8401

8402
    """
8403
    assert self.op.mode == constants.INSTANCE_IMPORT
8404

    
8405
    src_node = self.op.src_node
8406
    src_path = self.op.src_path
8407

    
8408
    if src_node is None:
8409
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8410
      exp_list = self.rpc.call_export_list(locked_nodes)
8411
      found = False
8412
      for node in exp_list:
8413
        if exp_list[node].fail_msg:
8414
          continue
8415
        if src_path in exp_list[node].payload:
8416
          found = True
8417
          self.op.src_node = src_node = node
8418
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8419
                                                       src_path)
8420
          break
8421
      if not found:
8422
        raise errors.OpPrereqError("No export found for relative path %s" %
8423
                                    src_path, errors.ECODE_INVAL)
8424

    
8425
    _CheckNodeOnline(self, src_node)
8426
    result = self.rpc.call_export_info(src_node, src_path)
8427
    result.Raise("No export or invalid export found in dir %s" % src_path)
8428

    
8429
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8430
    if not export_info.has_section(constants.INISECT_EXP):
8431
      raise errors.ProgrammerError("Corrupted export config",
8432
                                   errors.ECODE_ENVIRON)
8433

    
8434
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8435
    if (int(ei_version) != constants.EXPORT_VERSION):
8436
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8437
                                 (ei_version, constants.EXPORT_VERSION),
8438
                                 errors.ECODE_ENVIRON)
8439
    return export_info
8440

    
8441
  def _ReadExportParams(self, einfo):
8442
    """Use export parameters as defaults.
8443

8444
    In case the opcode doesn't specify (as in override) some instance
8445
    parameters, then try to use them from the export information, if
8446
    that declares them.
8447

8448
    """
8449
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8450

    
8451
    if self.op.disk_template is None:
8452
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8453
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8454
                                          "disk_template")
8455
      else:
8456
        raise errors.OpPrereqError("No disk template specified and the export"
8457
                                   " is missing the disk_template information",
8458
                                   errors.ECODE_INVAL)
8459

    
8460
    if not self.op.disks:
8461
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8462
        disks = []
8463
        # TODO: import the disk iv_name too
8464
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8465
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8466
          disks.append({constants.IDISK_SIZE: disk_sz})
8467
        self.op.disks = disks
8468
      else:
8469
        raise errors.OpPrereqError("No disk info specified and the export"
8470
                                   " is missing the disk information",
8471
                                   errors.ECODE_INVAL)
8472

    
8473
    if (not self.op.nics and
8474
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8475
      nics = []
8476
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8477
        ndict = {}
8478
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8479
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8480
          ndict[name] = v
8481
        nics.append(ndict)
8482
      self.op.nics = nics
8483

    
8484
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8485
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8486

    
8487
    if (self.op.hypervisor is None and
8488
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8489
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8490

    
8491
    if einfo.has_section(constants.INISECT_HYP):
8492
      # use the export parameters but do not override the ones
8493
      # specified by the user
8494
      for name, value in einfo.items(constants.INISECT_HYP):
8495
        if name not in self.op.hvparams:
8496
          self.op.hvparams[name] = value
8497

    
8498
    if einfo.has_section(constants.INISECT_BEP):
8499
      # use the parameters, without overriding
8500
      for name, value in einfo.items(constants.INISECT_BEP):
8501
        if name not in self.op.beparams:
8502
          self.op.beparams[name] = value
8503
    else:
8504
      # try to read the parameters old style, from the main section
8505
      for name in constants.BES_PARAMETERS:
8506
        if (name not in self.op.beparams and
8507
            einfo.has_option(constants.INISECT_INS, name)):
8508
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8509

    
8510
    if einfo.has_section(constants.INISECT_OSP):
8511
      # use the parameters, without overriding
8512
      for name, value in einfo.items(constants.INISECT_OSP):
8513
        if name not in self.op.osparams:
8514
          self.op.osparams[name] = value
8515

    
8516
  def _RevertToDefaults(self, cluster):
8517
    """Revert the instance parameters to the default values.
8518

8519
    """
8520
    # hvparams
8521
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8522
    for name in self.op.hvparams.keys():
8523
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8524
        del self.op.hvparams[name]
8525
    # beparams
8526
    be_defs = cluster.SimpleFillBE({})
8527
    for name in self.op.beparams.keys():
8528
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8529
        del self.op.beparams[name]
8530
    # nic params
8531
    nic_defs = cluster.SimpleFillNIC({})
8532
    for nic in self.op.nics:
8533
      for name in constants.NICS_PARAMETERS:
8534
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8535
          del nic[name]
8536
    # osparams
8537
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8538
    for name in self.op.osparams.keys():
8539
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8540
        del self.op.osparams[name]
8541

    
8542
  def _CalculateFileStorageDir(self):
8543
    """Calculate final instance file storage dir.
8544

8545
    """
8546
    # file storage dir calculation/check
8547
    self.instance_file_storage_dir = None
8548
    if self.op.disk_template in constants.DTS_FILEBASED:
8549
      # build the full file storage dir path
8550
      joinargs = []
8551

    
8552
      if self.op.disk_template == constants.DT_SHARED_FILE:
8553
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8554
      else:
8555
        get_fsd_fn = self.cfg.GetFileStorageDir
8556

    
8557
      cfg_storagedir = get_fsd_fn()
8558
      if not cfg_storagedir:
8559
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8560
      joinargs.append(cfg_storagedir)
8561

    
8562
      if self.op.file_storage_dir is not None:
8563
        joinargs.append(self.op.file_storage_dir)
8564

    
8565
      joinargs.append(self.op.instance_name)
8566

    
8567
      # pylint: disable=W0142
8568
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8569

    
8570
  def CheckPrereq(self):
8571
    """Check prerequisites.
8572

8573
    """
8574
    self._CalculateFileStorageDir()
8575

    
8576
    if self.op.mode == constants.INSTANCE_IMPORT:
8577
      export_info = self._ReadExportInfo()
8578
      self._ReadExportParams(export_info)
8579

    
8580
    if (not self.cfg.GetVGName() and
8581
        self.op.disk_template not in constants.DTS_NOT_LVM):
8582
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8583
                                 " instances", errors.ECODE_STATE)
8584

    
8585
    if self.op.hypervisor is None:
8586
      self.op.hypervisor = self.cfg.GetHypervisorType()
8587

    
8588
    cluster = self.cfg.GetClusterInfo()
8589
    enabled_hvs = cluster.enabled_hypervisors
8590
    if self.op.hypervisor not in enabled_hvs:
8591
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8592
                                 " cluster (%s)" % (self.op.hypervisor,
8593
                                  ",".join(enabled_hvs)),
8594
                                 errors.ECODE_STATE)
8595

    
8596
    # Check tag validity
8597
    for tag in self.op.tags:
8598
      objects.TaggableObject.ValidateTag(tag)
8599

    
8600
    # check hypervisor parameter syntax (locally)
8601
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8602
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8603
                                      self.op.hvparams)
8604
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8605
    hv_type.CheckParameterSyntax(filled_hvp)
8606
    self.hv_full = filled_hvp
8607
    # check that we don't specify global parameters on an instance
8608
    _CheckGlobalHvParams(self.op.hvparams)
8609

    
8610
    # fill and remember the beparams dict
8611
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8612
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8613

    
8614
    # build os parameters
8615
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8616

    
8617
    # now that hvp/bep are in final format, let's reset to defaults,
8618
    # if told to do so
8619
    if self.op.identify_defaults:
8620
      self._RevertToDefaults(cluster)
8621

    
8622
    # NIC buildup
8623
    self.nics = []
8624
    for idx, nic in enumerate(self.op.nics):
8625
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8626
      nic_mode = nic_mode_req
8627
      if nic_mode is None:
8628
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8629

    
8630
      # in routed mode, for the first nic, the default ip is 'auto'
8631
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8632
        default_ip_mode = constants.VALUE_AUTO
8633
      else:
8634
        default_ip_mode = constants.VALUE_NONE
8635

    
8636
      # ip validity checks
8637
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8638
      if ip is None or ip.lower() == constants.VALUE_NONE:
8639
        nic_ip = None
8640
      elif ip.lower() == constants.VALUE_AUTO:
8641
        if not self.op.name_check:
8642
          raise errors.OpPrereqError("IP address set to auto but name checks"
8643
                                     " have been skipped",
8644
                                     errors.ECODE_INVAL)
8645
        nic_ip = self.hostname1.ip
8646
      else:
8647
        if not netutils.IPAddress.IsValid(ip):
8648
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8649
                                     errors.ECODE_INVAL)
8650
        nic_ip = ip
8651

    
8652
      # TODO: check the ip address for uniqueness
8653
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8654
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8655
                                   errors.ECODE_INVAL)
8656

    
8657
      # MAC address verification
8658
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8659
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8660
        mac = utils.NormalizeAndValidateMac(mac)
8661

    
8662
        try:
8663
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8664
        except errors.ReservationError:
8665
          raise errors.OpPrereqError("MAC address %s already in use"
8666
                                     " in cluster" % mac,
8667
                                     errors.ECODE_NOTUNIQUE)
8668

    
8669
      #  Build nic parameters
8670
      link = nic.get(constants.INIC_LINK, None)
8671
      nicparams = {}
8672
      if nic_mode_req:
8673
        nicparams[constants.NIC_MODE] = nic_mode_req
8674
      if link:
8675
        nicparams[constants.NIC_LINK] = link
8676

    
8677
      check_params = cluster.SimpleFillNIC(nicparams)
8678
      objects.NIC.CheckParameterSyntax(check_params)
8679
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8680

    
8681
    # disk checks/pre-build
8682
    default_vg = self.cfg.GetVGName()
8683
    self.disks = []
8684
    for disk in self.op.disks:
8685
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8686
      if mode not in constants.DISK_ACCESS_SET:
8687
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8688
                                   mode, errors.ECODE_INVAL)
8689
      size = disk.get(constants.IDISK_SIZE, None)
8690
      if size is None:
8691
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8692
      try:
8693
        size = int(size)
8694
      except (TypeError, ValueError):
8695
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8696
                                   errors.ECODE_INVAL)
8697

    
8698
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8699
      new_disk = {
8700
        constants.IDISK_SIZE: size,
8701
        constants.IDISK_MODE: mode,
8702
        constants.IDISK_VG: data_vg,
8703
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8704
        }
8705
      if constants.IDISK_ADOPT in disk:
8706
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8707
      self.disks.append(new_disk)
8708

    
8709
    if self.op.mode == constants.INSTANCE_IMPORT:
8710

    
8711
      # Check that the new instance doesn't have less disks than the export
8712
      instance_disks = len(self.disks)
8713
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8714
      if instance_disks < export_disks:
8715
        raise errors.OpPrereqError("Not enough disks to import."
8716
                                   " (instance: %d, export: %d)" %
8717
                                   (instance_disks, export_disks),
8718
                                   errors.ECODE_INVAL)
8719

    
8720
      disk_images = []
8721
      for idx in range(export_disks):
8722
        option = "disk%d_dump" % idx
8723
        if export_info.has_option(constants.INISECT_INS, option):
8724
          # FIXME: are the old os-es, disk sizes, etc. useful?
8725
          export_name = export_info.get(constants.INISECT_INS, option)
8726
          image = utils.PathJoin(self.op.src_path, export_name)
8727
          disk_images.append(image)
8728
        else:
8729
          disk_images.append(False)
8730

    
8731
      self.src_images = disk_images
8732

    
8733
      old_name = export_info.get(constants.INISECT_INS, "name")
8734
      try:
8735
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8736
      except (TypeError, ValueError), err:
8737
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8738
                                   " an integer: %s" % str(err),
8739
                                   errors.ECODE_STATE)
8740
      if self.op.instance_name == old_name:
8741
        for idx, nic in enumerate(self.nics):
8742
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8743
            nic_mac_ini = "nic%d_mac" % idx
8744
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8745

    
8746
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8747

    
8748
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8749
    if self.op.ip_check:
8750
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8751
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8752
                                   (self.check_ip, self.op.instance_name),
8753
                                   errors.ECODE_NOTUNIQUE)
8754

    
8755
    #### mac address generation
8756
    # By generating here the mac address both the allocator and the hooks get
8757
    # the real final mac address rather than the 'auto' or 'generate' value.
8758
    # There is a race condition between the generation and the instance object
8759
    # creation, which means that we know the mac is valid now, but we're not
8760
    # sure it will be when we actually add the instance. If things go bad
8761
    # adding the instance will abort because of a duplicate mac, and the
8762
    # creation job will fail.
8763
    for nic in self.nics:
8764
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8765
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8766

    
8767
    #### allocator run
8768

    
8769
    if self.op.iallocator is not None:
8770
      self._RunAllocator()
8771

    
8772
    #### node related checks
8773

    
8774
    # check primary node
8775
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8776
    assert self.pnode is not None, \
8777
      "Cannot retrieve locked node %s" % self.op.pnode
8778
    if pnode.offline:
8779
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8780
                                 pnode.name, errors.ECODE_STATE)
8781
    if pnode.drained:
8782
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8783
                                 pnode.name, errors.ECODE_STATE)
8784
    if not pnode.vm_capable:
8785
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8786
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8787

    
8788
    self.secondaries = []
8789

    
8790
    # mirror node verification
8791
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8792
      if self.op.snode == pnode.name:
8793
        raise errors.OpPrereqError("The secondary node cannot be the"
8794
                                   " primary node", errors.ECODE_INVAL)
8795
      _CheckNodeOnline(self, self.op.snode)
8796
      _CheckNodeNotDrained(self, self.op.snode)
8797
      _CheckNodeVmCapable(self, self.op.snode)
8798
      self.secondaries.append(self.op.snode)
8799

    
8800
    nodenames = [pnode.name] + self.secondaries
8801

    
8802
    if not self.adopt_disks:
8803
      # Check lv size requirements, if not adopting
8804
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8805
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8806

    
8807
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8808
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8809
                                disk[constants.IDISK_ADOPT])
8810
                     for disk in self.disks])
8811
      if len(all_lvs) != len(self.disks):
8812
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8813
                                   errors.ECODE_INVAL)
8814
      for lv_name in all_lvs:
8815
        try:
8816
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8817
          # to ReserveLV uses the same syntax
8818
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8819
        except errors.ReservationError:
8820
          raise errors.OpPrereqError("LV named %s used by another instance" %
8821
                                     lv_name, errors.ECODE_NOTUNIQUE)
8822

    
8823
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8824
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8825

    
8826
      node_lvs = self.rpc.call_lv_list([pnode.name],
8827
                                       vg_names.payload.keys())[pnode.name]
8828
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8829
      node_lvs = node_lvs.payload
8830

    
8831
      delta = all_lvs.difference(node_lvs.keys())
8832
      if delta:
8833
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8834
                                   utils.CommaJoin(delta),
8835
                                   errors.ECODE_INVAL)
8836
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8837
      if online_lvs:
8838
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8839
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8840
                                   errors.ECODE_STATE)
8841
      # update the size of disk based on what is found
8842
      for dsk in self.disks:
8843
        dsk[constants.IDISK_SIZE] = \
8844
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8845
                                        dsk[constants.IDISK_ADOPT])][0]))
8846

    
8847
    elif self.op.disk_template == constants.DT_BLOCK:
8848
      # Normalize and de-duplicate device paths
8849
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8850
                       for disk in self.disks])
8851
      if len(all_disks) != len(self.disks):
8852
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8853
                                   errors.ECODE_INVAL)
8854
      baddisks = [d for d in all_disks
8855
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8856
      if baddisks:
8857
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8858
                                   " cannot be adopted" %
8859
                                   (", ".join(baddisks),
8860
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8861
                                   errors.ECODE_INVAL)
8862

    
8863
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8864
                                            list(all_disks))[pnode.name]
8865
      node_disks.Raise("Cannot get block device information from node %s" %
8866
                       pnode.name)
8867
      node_disks = node_disks.payload
8868
      delta = all_disks.difference(node_disks.keys())
8869
      if delta:
8870
        raise errors.OpPrereqError("Missing block device(s): %s" %
8871
                                   utils.CommaJoin(delta),
8872
                                   errors.ECODE_INVAL)
8873
      for dsk in self.disks:
8874
        dsk[constants.IDISK_SIZE] = \
8875
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8876

    
8877
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8878

    
8879
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8880
    # check OS parameters (remotely)
8881
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8882

    
8883
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8884

    
8885
    # memory check on primary node
8886
    if self.op.start:
8887
      _CheckNodeFreeMemory(self, self.pnode.name,
8888
                           "creating instance %s" % self.op.instance_name,
8889
                           self.be_full[constants.BE_MEMORY],
8890
                           self.op.hypervisor)
8891

    
8892
    self.dry_run_result = list(nodenames)
8893

    
8894
  def Exec(self, feedback_fn):
8895
    """Create and add the instance to the cluster.
8896

8897
    """
8898
    instance = self.op.instance_name
8899
    pnode_name = self.pnode.name
8900

    
8901
    ht_kind = self.op.hypervisor
8902
    if ht_kind in constants.HTS_REQ_PORT:
8903
      network_port = self.cfg.AllocatePort()
8904
    else:
8905
      network_port = None
8906

    
8907
    disks = _GenerateDiskTemplate(self,
8908
                                  self.op.disk_template,
8909
                                  instance, pnode_name,
8910
                                  self.secondaries,
8911
                                  self.disks,
8912
                                  self.instance_file_storage_dir,
8913
                                  self.op.file_driver,
8914
                                  0,
8915
                                  feedback_fn)
8916

    
8917
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8918
                            primary_node=pnode_name,
8919
                            nics=self.nics, disks=disks,
8920
                            disk_template=self.op.disk_template,
8921
                            admin_up=False,
8922
                            network_port=network_port,
8923
                            beparams=self.op.beparams,
8924
                            hvparams=self.op.hvparams,
8925
                            hypervisor=self.op.hypervisor,
8926
                            osparams=self.op.osparams,
8927
                            )
8928

    
8929
    if self.op.tags:
8930
      for tag in self.op.tags:
8931
        iobj.AddTag(tag)
8932

    
8933
    if self.adopt_disks:
8934
      if self.op.disk_template == constants.DT_PLAIN:
8935
        # rename LVs to the newly-generated names; we need to construct
8936
        # 'fake' LV disks with the old data, plus the new unique_id
8937
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8938
        rename_to = []
8939
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8940
          rename_to.append(t_dsk.logical_id)
8941
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8942
          self.cfg.SetDiskID(t_dsk, pnode_name)
8943
        result = self.rpc.call_blockdev_rename(pnode_name,
8944
                                               zip(tmp_disks, rename_to))
8945
        result.Raise("Failed to rename adoped LVs")
8946
    else:
8947
      feedback_fn("* creating instance disks...")
8948
      try:
8949
        _CreateDisks(self, iobj)
8950
      except errors.OpExecError:
8951
        self.LogWarning("Device creation failed, reverting...")
8952
        try:
8953
          _RemoveDisks(self, iobj)
8954
        finally:
8955
          self.cfg.ReleaseDRBDMinors(instance)
8956
          raise
8957

    
8958
    feedback_fn("adding instance %s to cluster config" % instance)
8959

    
8960
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8961

    
8962
    # Declare that we don't want to remove the instance lock anymore, as we've
8963
    # added the instance to the config
8964
    del self.remove_locks[locking.LEVEL_INSTANCE]
8965

    
8966
    if self.op.mode == constants.INSTANCE_IMPORT:
8967
      # Release unused nodes
8968
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8969
    else:
8970
      # Release all nodes
8971
      _ReleaseLocks(self, locking.LEVEL_NODE)
8972

    
8973
    disk_abort = False
8974
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8975
      feedback_fn("* wiping instance disks...")
8976
      try:
8977
        _WipeDisks(self, iobj)
8978
      except errors.OpExecError, err:
8979
        logging.exception("Wiping disks failed")
8980
        self.LogWarning("Wiping instance disks failed (%s)", err)
8981
        disk_abort = True
8982

    
8983
    if disk_abort:
8984
      # Something is already wrong with the disks, don't do anything else
8985
      pass
8986
    elif self.op.wait_for_sync:
8987
      disk_abort = not _WaitForSync(self, iobj)
8988
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8989
      # make sure the disks are not degraded (still sync-ing is ok)
8990
      feedback_fn("* checking mirrors status")
8991
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8992
    else:
8993
      disk_abort = False
8994

    
8995
    if disk_abort:
8996
      _RemoveDisks(self, iobj)
8997
      self.cfg.RemoveInstance(iobj.name)
8998
      # Make sure the instance lock gets removed
8999
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9000
      raise errors.OpExecError("There are some degraded disks for"
9001
                               " this instance")
9002

    
9003
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9004
      if self.op.mode == constants.INSTANCE_CREATE:
9005
        if not self.op.no_install:
9006
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9007
                        not self.op.wait_for_sync)
9008
          if pause_sync:
9009
            feedback_fn("* pausing disk sync to install instance OS")
9010
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9011
                                                              iobj.disks, True)
9012
            for idx, success in enumerate(result.payload):
9013
              if not success:
9014
                logging.warn("pause-sync of instance %s for disk %d failed",
9015
                             instance, idx)
9016

    
9017
          feedback_fn("* running the instance OS create scripts...")
9018
          # FIXME: pass debug option from opcode to backend
9019
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9020
                                                 self.op.debug_level)
9021
          if pause_sync:
9022
            feedback_fn("* resuming disk sync")
9023
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9024
                                                              iobj.disks, False)
9025
            for idx, success in enumerate(result.payload):
9026
              if not success:
9027
                logging.warn("resume-sync of instance %s for disk %d failed",
9028
                             instance, idx)
9029

    
9030
          result.Raise("Could not add os for instance %s"
9031
                       " on node %s" % (instance, pnode_name))
9032

    
9033
      elif self.op.mode == constants.INSTANCE_IMPORT:
9034
        feedback_fn("* running the instance OS import scripts...")
9035

    
9036
        transfers = []
9037

    
9038
        for idx, image in enumerate(self.src_images):
9039
          if not image:
9040
            continue
9041

    
9042
          # FIXME: pass debug option from opcode to backend
9043
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9044
                                             constants.IEIO_FILE, (image, ),
9045
                                             constants.IEIO_SCRIPT,
9046
                                             (iobj.disks[idx], idx),
9047
                                             None)
9048
          transfers.append(dt)
9049

    
9050
        import_result = \
9051
          masterd.instance.TransferInstanceData(self, feedback_fn,
9052
                                                self.op.src_node, pnode_name,
9053
                                                self.pnode.secondary_ip,
9054
                                                iobj, transfers)
9055
        if not compat.all(import_result):
9056
          self.LogWarning("Some disks for instance %s on node %s were not"
9057
                          " imported successfully" % (instance, pnode_name))
9058

    
9059
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9060
        feedback_fn("* preparing remote import...")
9061
        # The source cluster will stop the instance before attempting to make a
9062
        # connection. In some cases stopping an instance can take a long time,
9063
        # hence the shutdown timeout is added to the connection timeout.
9064
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9065
                           self.op.source_shutdown_timeout)
9066
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9067

    
9068
        assert iobj.primary_node == self.pnode.name
9069
        disk_results = \
9070
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9071
                                        self.source_x509_ca,
9072
                                        self._cds, timeouts)
9073
        if not compat.all(disk_results):
9074
          # TODO: Should the instance still be started, even if some disks
9075
          # failed to import (valid for local imports, too)?
9076
          self.LogWarning("Some disks for instance %s on node %s were not"
9077
                          " imported successfully" % (instance, pnode_name))
9078

    
9079
        # Run rename script on newly imported instance
9080
        assert iobj.name == instance
9081
        feedback_fn("Running rename script for %s" % instance)
9082
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9083
                                                   self.source_instance_name,
9084
                                                   self.op.debug_level)
9085
        if result.fail_msg:
9086
          self.LogWarning("Failed to run rename script for %s on node"
9087
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9088

    
9089
      else:
9090
        # also checked in the prereq part
9091
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9092
                                     % self.op.mode)
9093

    
9094
    if self.op.start:
9095
      iobj.admin_up = True
9096
      self.cfg.Update(iobj, feedback_fn)
9097
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9098
      feedback_fn("* starting instance...")
9099
      result = self.rpc.call_instance_start(pnode_name, iobj,
9100
                                            None, None, False)
9101
      result.Raise("Could not start instance")
9102

    
9103
    return list(iobj.all_nodes)
9104

    
9105

    
9106
class LUInstanceConsole(NoHooksLU):
9107
  """Connect to an instance's console.
9108

9109
  This is somewhat special in that it returns the command line that
9110
  you need to run on the master node in order to connect to the
9111
  console.
9112

9113
  """
9114
  REQ_BGL = False
9115

    
9116
  def ExpandNames(self):
9117
    self._ExpandAndLockInstance()
9118

    
9119
  def CheckPrereq(self):
9120
    """Check prerequisites.
9121

9122
    This checks that the instance is in the cluster.
9123

9124
    """
9125
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9126
    assert self.instance is not None, \
9127
      "Cannot retrieve locked instance %s" % self.op.instance_name
9128
    _CheckNodeOnline(self, self.instance.primary_node)
9129

    
9130
  def Exec(self, feedback_fn):
9131
    """Connect to the console of an instance
9132

9133
    """
9134
    instance = self.instance
9135
    node = instance.primary_node
9136

    
9137
    node_insts = self.rpc.call_instance_list([node],
9138
                                             [instance.hypervisor])[node]
9139
    node_insts.Raise("Can't get node information from %s" % node)
9140

    
9141
    if instance.name not in node_insts.payload:
9142
      if instance.admin_up:
9143
        state = constants.INSTST_ERRORDOWN
9144
      else:
9145
        state = constants.INSTST_ADMINDOWN
9146
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9147
                               (instance.name, state))
9148

    
9149
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9150

    
9151
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9152

    
9153

    
9154
def _GetInstanceConsole(cluster, instance):
9155
  """Returns console information for an instance.
9156

9157
  @type cluster: L{objects.Cluster}
9158
  @type instance: L{objects.Instance}
9159
  @rtype: dict
9160

9161
  """
9162
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9163
  # beparams and hvparams are passed separately, to avoid editing the
9164
  # instance and then saving the defaults in the instance itself.
9165
  hvparams = cluster.FillHV(instance)
9166
  beparams = cluster.FillBE(instance)
9167
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9168

    
9169
  assert console.instance == instance.name
9170
  assert console.Validate()
9171

    
9172
  return console.ToDict()
9173

    
9174

    
9175
class LUInstanceReplaceDisks(LogicalUnit):
9176
  """Replace the disks of an instance.
9177

9178
  """
9179
  HPATH = "mirrors-replace"
9180
  HTYPE = constants.HTYPE_INSTANCE
9181
  REQ_BGL = False
9182

    
9183
  def CheckArguments(self):
9184
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9185
                                  self.op.iallocator)
9186

    
9187
  def ExpandNames(self):
9188
    self._ExpandAndLockInstance()
9189

    
9190
    assert locking.LEVEL_NODE not in self.needed_locks
9191
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9192

    
9193
    assert self.op.iallocator is None or self.op.remote_node is None, \
9194
      "Conflicting options"
9195

    
9196
    if self.op.remote_node is not None:
9197
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9198

    
9199
      # Warning: do not remove the locking of the new secondary here
9200
      # unless DRBD8.AddChildren is changed to work in parallel;
9201
      # currently it doesn't since parallel invocations of
9202
      # FindUnusedMinor will conflict
9203
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9204
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9205
    else:
9206
      self.needed_locks[locking.LEVEL_NODE] = []
9207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9208

    
9209
      if self.op.iallocator is not None:
9210
        # iallocator will select a new node in the same group
9211
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9212

    
9213
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9214
                                   self.op.iallocator, self.op.remote_node,
9215
                                   self.op.disks, False, self.op.early_release)
9216

    
9217
    self.tasklets = [self.replacer]
9218

    
9219
  def DeclareLocks(self, level):
9220
    if level == locking.LEVEL_NODEGROUP:
9221
      assert self.op.remote_node is None
9222
      assert self.op.iallocator is not None
9223
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9224

    
9225
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9226
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9227
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9228

    
9229
    elif level == locking.LEVEL_NODE:
9230
      if self.op.iallocator is not None:
9231
        assert self.op.remote_node is None
9232
        assert not self.needed_locks[locking.LEVEL_NODE]
9233

    
9234
        # Lock member nodes of all locked groups
9235
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9236
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9237
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9238
      else:
9239
        self._LockInstancesNodes()
9240

    
9241
  def BuildHooksEnv(self):
9242
    """Build hooks env.
9243

9244
    This runs on the master, the primary and all the secondaries.
9245

9246
    """
9247
    instance = self.replacer.instance
9248
    env = {
9249
      "MODE": self.op.mode,
9250
      "NEW_SECONDARY": self.op.remote_node,
9251
      "OLD_SECONDARY": instance.secondary_nodes[0],
9252
      }
9253
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9254
    return env
9255

    
9256
  def BuildHooksNodes(self):
9257
    """Build hooks nodes.
9258

9259
    """
9260
    instance = self.replacer.instance
9261
    nl = [
9262
      self.cfg.GetMasterNode(),
9263
      instance.primary_node,
9264
      ]
9265
    if self.op.remote_node is not None:
9266
      nl.append(self.op.remote_node)
9267
    return nl, nl
9268

    
9269
  def CheckPrereq(self):
9270
    """Check prerequisites.
9271

9272
    """
9273
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9274
            self.op.iallocator is None)
9275

    
9276
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9277
    if owned_groups:
9278
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9279

    
9280
    return LogicalUnit.CheckPrereq(self)
9281

    
9282

    
9283
class TLReplaceDisks(Tasklet):
9284
  """Replaces disks for an instance.
9285

9286
  Note: Locking is not within the scope of this class.
9287

9288
  """
9289
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9290
               disks, delay_iallocator, early_release):
9291
    """Initializes this class.
9292

9293
    """
9294
    Tasklet.__init__(self, lu)
9295

    
9296
    # Parameters
9297
    self.instance_name = instance_name
9298
    self.mode = mode
9299
    self.iallocator_name = iallocator_name
9300
    self.remote_node = remote_node
9301
    self.disks = disks
9302
    self.delay_iallocator = delay_iallocator
9303
    self.early_release = early_release
9304

    
9305
    # Runtime data
9306
    self.instance = None
9307
    self.new_node = None
9308
    self.target_node = None
9309
    self.other_node = None
9310
    self.remote_node_info = None
9311
    self.node_secondary_ip = None
9312

    
9313
  @staticmethod
9314
  def CheckArguments(mode, remote_node, iallocator):
9315
    """Helper function for users of this class.
9316

9317
    """
9318
    # check for valid parameter combination
9319
    if mode == constants.REPLACE_DISK_CHG:
9320
      if remote_node is None and iallocator is None:
9321
        raise errors.OpPrereqError("When changing the secondary either an"
9322
                                   " iallocator script must be used or the"
9323
                                   " new node given", errors.ECODE_INVAL)
9324

    
9325
      if remote_node is not None and iallocator is not None:
9326
        raise errors.OpPrereqError("Give either the iallocator or the new"
9327
                                   " secondary, not both", errors.ECODE_INVAL)
9328

    
9329
    elif remote_node is not None or iallocator is not None:
9330
      # Not replacing the secondary
9331
      raise errors.OpPrereqError("The iallocator and new node options can"
9332
                                 " only be used when changing the"
9333
                                 " secondary node", errors.ECODE_INVAL)
9334

    
9335
  @staticmethod
9336
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9337
    """Compute a new secondary node using an IAllocator.
9338

9339
    """
9340
    ial = IAllocator(lu.cfg, lu.rpc,
9341
                     mode=constants.IALLOCATOR_MODE_RELOC,
9342
                     name=instance_name,
9343
                     relocate_from=list(relocate_from))
9344

    
9345
    ial.Run(iallocator_name)
9346

    
9347
    if not ial.success:
9348
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9349
                                 " %s" % (iallocator_name, ial.info),
9350
                                 errors.ECODE_NORES)
9351

    
9352
    if len(ial.result) != ial.required_nodes:
9353
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9354
                                 " of nodes (%s), required %s" %
9355
                                 (iallocator_name,
9356
                                  len(ial.result), ial.required_nodes),
9357
                                 errors.ECODE_FAULT)
9358

    
9359
    remote_node_name = ial.result[0]
9360

    
9361
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9362
               instance_name, remote_node_name)
9363

    
9364
    return remote_node_name
9365

    
9366
  def _FindFaultyDisks(self, node_name):
9367
    """Wrapper for L{_FindFaultyInstanceDisks}.
9368

9369
    """
9370
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9371
                                    node_name, True)
9372

    
9373
  def _CheckDisksActivated(self, instance):
9374
    """Checks if the instance disks are activated.
9375

9376
    @param instance: The instance to check disks
9377
    @return: True if they are activated, False otherwise
9378

9379
    """
9380
    nodes = instance.all_nodes
9381

    
9382
    for idx, dev in enumerate(instance.disks):
9383
      for node in nodes:
9384
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9385
        self.cfg.SetDiskID(dev, node)
9386

    
9387
        result = self.rpc.call_blockdev_find(node, dev)
9388

    
9389
        if result.offline:
9390
          continue
9391
        elif result.fail_msg or not result.payload:
9392
          return False
9393

    
9394
    return True
9395

    
9396
  def CheckPrereq(self):
9397
    """Check prerequisites.
9398

9399
    This checks that the instance is in the cluster.
9400

9401
    """
9402
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9403
    assert instance is not None, \
9404
      "Cannot retrieve locked instance %s" % self.instance_name
9405

    
9406
    if instance.disk_template != constants.DT_DRBD8:
9407
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9408
                                 " instances", errors.ECODE_INVAL)
9409

    
9410
    if len(instance.secondary_nodes) != 1:
9411
      raise errors.OpPrereqError("The instance has a strange layout,"
9412
                                 " expected one secondary but found %d" %
9413
                                 len(instance.secondary_nodes),
9414
                                 errors.ECODE_FAULT)
9415

    
9416
    if not self.delay_iallocator:
9417
      self._CheckPrereq2()
9418

    
9419
  def _CheckPrereq2(self):
9420
    """Check prerequisites, second part.
9421

9422
    This function should always be part of CheckPrereq. It was separated and is
9423
    now called from Exec because during node evacuation iallocator was only
9424
    called with an unmodified cluster model, not taking planned changes into
9425
    account.
9426

9427
    """
9428
    instance = self.instance
9429
    secondary_node = instance.secondary_nodes[0]
9430

    
9431
    if self.iallocator_name is None:
9432
      remote_node = self.remote_node
9433
    else:
9434
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9435
                                       instance.name, instance.secondary_nodes)
9436

    
9437
    if remote_node is None:
9438
      self.remote_node_info = None
9439
    else:
9440
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9441
             "Remote node '%s' is not locked" % remote_node
9442

    
9443
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9444
      assert self.remote_node_info is not None, \
9445
        "Cannot retrieve locked node %s" % remote_node
9446

    
9447
    if remote_node == self.instance.primary_node:
9448
      raise errors.OpPrereqError("The specified node is the primary node of"
9449
                                 " the instance", errors.ECODE_INVAL)
9450

    
9451
    if remote_node == secondary_node:
9452
      raise errors.OpPrereqError("The specified node is already the"
9453
                                 " secondary node of the instance",
9454
                                 errors.ECODE_INVAL)
9455

    
9456
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9457
                                    constants.REPLACE_DISK_CHG):
9458
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9459
                                 errors.ECODE_INVAL)
9460

    
9461
    if self.mode == constants.REPLACE_DISK_AUTO:
9462
      if not self._CheckDisksActivated(instance):
9463
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9464
                                   " first" % self.instance_name,
9465
                                   errors.ECODE_STATE)
9466
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9467
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9468

    
9469
      if faulty_primary and faulty_secondary:
9470
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9471
                                   " one node and can not be repaired"
9472
                                   " automatically" % self.instance_name,
9473
                                   errors.ECODE_STATE)
9474

    
9475
      if faulty_primary:
9476
        self.disks = faulty_primary
9477
        self.target_node = instance.primary_node
9478
        self.other_node = secondary_node
9479
        check_nodes = [self.target_node, self.other_node]
9480
      elif faulty_secondary:
9481
        self.disks = faulty_secondary
9482
        self.target_node = secondary_node
9483
        self.other_node = instance.primary_node
9484
        check_nodes = [self.target_node, self.other_node]
9485
      else:
9486
        self.disks = []
9487
        check_nodes = []
9488

    
9489
    else:
9490
      # Non-automatic modes
9491
      if self.mode == constants.REPLACE_DISK_PRI:
9492
        self.target_node = instance.primary_node
9493
        self.other_node = secondary_node
9494
        check_nodes = [self.target_node, self.other_node]
9495

    
9496
      elif self.mode == constants.REPLACE_DISK_SEC:
9497
        self.target_node = secondary_node
9498
        self.other_node = instance.primary_node
9499
        check_nodes = [self.target_node, self.other_node]
9500

    
9501
      elif self.mode == constants.REPLACE_DISK_CHG:
9502
        self.new_node = remote_node
9503
        self.other_node = instance.primary_node
9504
        self.target_node = secondary_node
9505
        check_nodes = [self.new_node, self.other_node]
9506

    
9507
        _CheckNodeNotDrained(self.lu, remote_node)
9508
        _CheckNodeVmCapable(self.lu, remote_node)
9509

    
9510
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9511
        assert old_node_info is not None
9512
        if old_node_info.offline and not self.early_release:
9513
          # doesn't make sense to delay the release
9514
          self.early_release = True
9515
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9516
                          " early-release mode", secondary_node)
9517

    
9518
      else:
9519
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9520
                                     self.mode)
9521

    
9522
      # If not specified all disks should be replaced
9523
      if not self.disks:
9524
        self.disks = range(len(self.instance.disks))
9525

    
9526
    for node in check_nodes:
9527
      _CheckNodeOnline(self.lu, node)
9528

    
9529
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9530
                                                          self.other_node,
9531
                                                          self.target_node]
9532
                              if node_name is not None)
9533

    
9534
    # Release unneeded node locks
9535
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9536

    
9537
    # Release any owned node group
9538
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9539
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9540

    
9541
    # Check whether disks are valid
9542
    for disk_idx in self.disks:
9543
      instance.FindDisk(disk_idx)
9544

    
9545
    # Get secondary node IP addresses
9546
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9547
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9548

    
9549
  def Exec(self, feedback_fn):
9550
    """Execute disk replacement.
9551

9552
    This dispatches the disk replacement to the appropriate handler.
9553

9554
    """
9555
    if self.delay_iallocator:
9556
      self._CheckPrereq2()
9557

    
9558
    if __debug__:
9559
      # Verify owned locks before starting operation
9560
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9561
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9562
          ("Incorrect node locks, owning %s, expected %s" %
9563
           (owned_nodes, self.node_secondary_ip.keys()))
9564

    
9565
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9566
      assert list(owned_instances) == [self.instance_name], \
9567
          "Instance '%s' not locked" % self.instance_name
9568

    
9569
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9570
          "Should not own any node group lock at this point"
9571

    
9572
    if not self.disks:
9573
      feedback_fn("No disks need replacement")
9574
      return
9575

    
9576
    feedback_fn("Replacing disk(s) %s for %s" %
9577
                (utils.CommaJoin(self.disks), self.instance.name))
9578

    
9579
    activate_disks = (not self.instance.admin_up)
9580

    
9581
    # Activate the instance disks if we're replacing them on a down instance
9582
    if activate_disks:
9583
      _StartInstanceDisks(self.lu, self.instance, True)
9584

    
9585
    try:
9586
      # Should we replace the secondary node?
9587
      if self.new_node is not None:
9588
        fn = self._ExecDrbd8Secondary
9589
      else:
9590
        fn = self._ExecDrbd8DiskOnly
9591

    
9592
      result = fn(feedback_fn)
9593
    finally:
9594
      # Deactivate the instance disks if we're replacing them on a
9595
      # down instance
9596
      if activate_disks:
9597
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9598

    
9599
    if __debug__:
9600
      # Verify owned locks
9601
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9602
      nodes = frozenset(self.node_secondary_ip)
9603
      assert ((self.early_release and not owned_nodes) or
9604
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9605
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9606
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9607

    
9608
    return result
9609

    
9610
  def _CheckVolumeGroup(self, nodes):
9611
    self.lu.LogInfo("Checking volume groups")
9612

    
9613
    vgname = self.cfg.GetVGName()
9614

    
9615
    # Make sure volume group exists on all involved nodes
9616
    results = self.rpc.call_vg_list(nodes)
9617
    if not results:
9618
      raise errors.OpExecError("Can't list volume groups on the nodes")
9619

    
9620
    for node in nodes:
9621
      res = results[node]
9622
      res.Raise("Error checking node %s" % node)
9623
      if vgname not in res.payload:
9624
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9625
                                 (vgname, node))
9626

    
9627
  def _CheckDisksExistence(self, nodes):
9628
    # Check disk existence
9629
    for idx, dev in enumerate(self.instance.disks):
9630
      if idx not in self.disks:
9631
        continue
9632

    
9633
      for node in nodes:
9634
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9635
        self.cfg.SetDiskID(dev, node)
9636

    
9637
        result = self.rpc.call_blockdev_find(node, dev)
9638

    
9639
        msg = result.fail_msg
9640
        if msg or not result.payload:
9641
          if not msg:
9642
            msg = "disk not found"
9643
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9644
                                   (idx, node, msg))
9645

    
9646
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9647
    for idx, dev in enumerate(self.instance.disks):
9648
      if idx not in self.disks:
9649
        continue
9650

    
9651
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9652
                      (idx, node_name))
9653

    
9654
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9655
                                   ldisk=ldisk):
9656
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9657
                                 " replace disks for instance %s" %
9658
                                 (node_name, self.instance.name))
9659

    
9660
  def _CreateNewStorage(self, node_name):
9661
    """Create new storage on the primary or secondary node.
9662

9663
    This is only used for same-node replaces, not for changing the
9664
    secondary node, hence we don't want to modify the existing disk.
9665

9666
    """
9667
    iv_names = {}
9668

    
9669
    for idx, dev in enumerate(self.instance.disks):
9670
      if idx not in self.disks:
9671
        continue
9672

    
9673
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9674

    
9675
      self.cfg.SetDiskID(dev, node_name)
9676

    
9677
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9678
      names = _GenerateUniqueNames(self.lu, lv_names)
9679

    
9680
      vg_data = dev.children[0].logical_id[0]
9681
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9682
                             logical_id=(vg_data, names[0]))
9683
      vg_meta = dev.children[1].logical_id[0]
9684
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9685
                             logical_id=(vg_meta, names[1]))
9686

    
9687
      new_lvs = [lv_data, lv_meta]
9688
      old_lvs = [child.Copy() for child in dev.children]
9689
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9690

    
9691
      # we pass force_create=True to force the LVM creation
9692
      for new_lv in new_lvs:
9693
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9694
                        _GetInstanceInfoText(self.instance), False)
9695

    
9696
    return iv_names
9697

    
9698
  def _CheckDevices(self, node_name, iv_names):
9699
    for name, (dev, _, _) in iv_names.iteritems():
9700
      self.cfg.SetDiskID(dev, node_name)
9701

    
9702
      result = self.rpc.call_blockdev_find(node_name, dev)
9703

    
9704
      msg = result.fail_msg
9705
      if msg or not result.payload:
9706
        if not msg:
9707
          msg = "disk not found"
9708
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9709
                                 (name, msg))
9710

    
9711
      if result.payload.is_degraded:
9712
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9713

    
9714
  def _RemoveOldStorage(self, node_name, iv_names):
9715
    for name, (_, old_lvs, _) in iv_names.iteritems():
9716
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9717

    
9718
      for lv in old_lvs:
9719
        self.cfg.SetDiskID(lv, node_name)
9720

    
9721
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9722
        if msg:
9723
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9724
                             hint="remove unused LVs manually")
9725

    
9726
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9727
    """Replace a disk on the primary or secondary for DRBD 8.
9728

9729
    The algorithm for replace is quite complicated:
9730

9731
      1. for each disk to be replaced:
9732

9733
        1. create new LVs on the target node with unique names
9734
        1. detach old LVs from the drbd device
9735
        1. rename old LVs to name_replaced.<time_t>
9736
        1. rename new LVs to old LVs
9737
        1. attach the new LVs (with the old names now) to the drbd device
9738

9739
      1. wait for sync across all devices
9740

9741
      1. for each modified disk:
9742

9743
        1. remove old LVs (which have the name name_replaces.<time_t>)
9744

9745
    Failures are not very well handled.
9746

9747
    """
9748
    steps_total = 6
9749

    
9750
    # Step: check device activation
9751
    self.lu.LogStep(1, steps_total, "Check device existence")
9752
    self._CheckDisksExistence([self.other_node, self.target_node])
9753
    self._CheckVolumeGroup([self.target_node, self.other_node])
9754

    
9755
    # Step: check other node consistency
9756
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9757
    self._CheckDisksConsistency(self.other_node,
9758
                                self.other_node == self.instance.primary_node,
9759
                                False)
9760

    
9761
    # Step: create new storage
9762
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9763
    iv_names = self._CreateNewStorage(self.target_node)
9764

    
9765
    # Step: for each lv, detach+rename*2+attach
9766
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9767
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9768
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9769

    
9770
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9771
                                                     old_lvs)
9772
      result.Raise("Can't detach drbd from local storage on node"
9773
                   " %s for device %s" % (self.target_node, dev.iv_name))
9774
      #dev.children = []
9775
      #cfg.Update(instance)
9776

    
9777
      # ok, we created the new LVs, so now we know we have the needed
9778
      # storage; as such, we proceed on the target node to rename
9779
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9780
      # using the assumption that logical_id == physical_id (which in
9781
      # turn is the unique_id on that node)
9782

    
9783
      # FIXME(iustin): use a better name for the replaced LVs
9784
      temp_suffix = int(time.time())
9785
      ren_fn = lambda d, suff: (d.physical_id[0],
9786
                                d.physical_id[1] + "_replaced-%s" % suff)
9787

    
9788
      # Build the rename list based on what LVs exist on the node
9789
      rename_old_to_new = []
9790
      for to_ren in old_lvs:
9791
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9792
        if not result.fail_msg and result.payload:
9793
          # device exists
9794
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9795

    
9796
      self.lu.LogInfo("Renaming the old LVs on the target node")
9797
      result = self.rpc.call_blockdev_rename(self.target_node,
9798
                                             rename_old_to_new)
9799
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9800

    
9801
      # Now we rename the new LVs to the old LVs
9802
      self.lu.LogInfo("Renaming the new LVs on the target node")
9803
      rename_new_to_old = [(new, old.physical_id)
9804
                           for old, new in zip(old_lvs, new_lvs)]
9805
      result = self.rpc.call_blockdev_rename(self.target_node,
9806
                                             rename_new_to_old)
9807
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9808

    
9809
      # Intermediate steps of in memory modifications
9810
      for old, new in zip(old_lvs, new_lvs):
9811
        new.logical_id = old.logical_id
9812
        self.cfg.SetDiskID(new, self.target_node)
9813

    
9814
      # We need to modify old_lvs so that removal later removes the
9815
      # right LVs, not the newly added ones; note that old_lvs is a
9816
      # copy here
9817
      for disk in old_lvs:
9818
        disk.logical_id = ren_fn(disk, temp_suffix)
9819
        self.cfg.SetDiskID(disk, self.target_node)
9820

    
9821
      # Now that the new lvs have the old name, we can add them to the device
9822
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9823
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9824
                                                  new_lvs)
9825
      msg = result.fail_msg
9826
      if msg:
9827
        for new_lv in new_lvs:
9828
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9829
                                               new_lv).fail_msg
9830
          if msg2:
9831
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9832
                               hint=("cleanup manually the unused logical"
9833
                                     "volumes"))
9834
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9835

    
9836
    cstep = 5
9837
    if self.early_release:
9838
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9839
      cstep += 1
9840
      self._RemoveOldStorage(self.target_node, iv_names)
9841
      # WARNING: we release both node locks here, do not do other RPCs
9842
      # than WaitForSync to the primary node
9843
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9844
                    names=[self.target_node, self.other_node])
9845

    
9846
    # Wait for sync
9847
    # This can fail as the old devices are degraded and _WaitForSync
9848
    # does a combined result over all disks, so we don't check its return value
9849
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9850
    cstep += 1
9851
    _WaitForSync(self.lu, self.instance)
9852

    
9853
    # Check all devices manually
9854
    self._CheckDevices(self.instance.primary_node, iv_names)
9855

    
9856
    # Step: remove old storage
9857
    if not self.early_release:
9858
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9859
      cstep += 1
9860
      self._RemoveOldStorage(self.target_node, iv_names)
9861

    
9862
  def _ExecDrbd8Secondary(self, feedback_fn):
9863
    """Replace the secondary node for DRBD 8.
9864

9865
    The algorithm for replace is quite complicated:
9866
      - for all disks of the instance:
9867
        - create new LVs on the new node with same names
9868
        - shutdown the drbd device on the old secondary
9869
        - disconnect the drbd network on the primary
9870
        - create the drbd device on the new secondary
9871
        - network attach the drbd on the primary, using an artifice:
9872
          the drbd code for Attach() will connect to the network if it
9873
          finds a device which is connected to the good local disks but
9874
          not network enabled
9875
      - wait for sync across all devices
9876
      - remove all disks from the old secondary
9877

9878
    Failures are not very well handled.
9879

9880
    """
9881
    steps_total = 6
9882

    
9883
    pnode = self.instance.primary_node
9884

    
9885
    # Step: check device activation
9886
    self.lu.LogStep(1, steps_total, "Check device existence")
9887
    self._CheckDisksExistence([self.instance.primary_node])
9888
    self._CheckVolumeGroup([self.instance.primary_node])
9889

    
9890
    # Step: check other node consistency
9891
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9892
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9893

    
9894
    # Step: create new storage
9895
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9896
    for idx, dev in enumerate(self.instance.disks):
9897
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9898
                      (self.new_node, idx))
9899
      # we pass force_create=True to force LVM creation
9900
      for new_lv in dev.children:
9901
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9902
                        _GetInstanceInfoText(self.instance), False)
9903

    
9904
    # Step 4: dbrd minors and drbd setups changes
9905
    # after this, we must manually remove the drbd minors on both the
9906
    # error and the success paths
9907
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9908
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9909
                                         for dev in self.instance.disks],
9910
                                        self.instance.name)
9911
    logging.debug("Allocated minors %r", minors)
9912

    
9913
    iv_names = {}
9914
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9915
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9916
                      (self.new_node, idx))
9917
      # create new devices on new_node; note that we create two IDs:
9918
      # one without port, so the drbd will be activated without
9919
      # networking information on the new node at this stage, and one
9920
      # with network, for the latter activation in step 4
9921
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9922
      if self.instance.primary_node == o_node1:
9923
        p_minor = o_minor1
9924
      else:
9925
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9926
        p_minor = o_minor2
9927

    
9928
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9929
                      p_minor, new_minor, o_secret)
9930
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9931
                    p_minor, new_minor, o_secret)
9932

    
9933
      iv_names[idx] = (dev, dev.children, new_net_id)
9934
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9935
                    new_net_id)
9936
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9937
                              logical_id=new_alone_id,
9938
                              children=dev.children,
9939
                              size=dev.size)
9940
      try:
9941
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9942
                              _GetInstanceInfoText(self.instance), False)
9943
      except errors.GenericError:
9944
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9945
        raise
9946

    
9947
    # We have new devices, shutdown the drbd on the old secondary
9948
    for idx, dev in enumerate(self.instance.disks):
9949
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9950
      self.cfg.SetDiskID(dev, self.target_node)
9951
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9952
      if msg:
9953
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9954
                           "node: %s" % (idx, msg),
9955
                           hint=("Please cleanup this device manually as"
9956
                                 " soon as possible"))
9957

    
9958
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9959
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9960
                                               self.instance.disks)[pnode]
9961

    
9962
    msg = result.fail_msg
9963
    if msg:
9964
      # detaches didn't succeed (unlikely)
9965
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9966
      raise errors.OpExecError("Can't detach the disks from the network on"
9967
                               " old node: %s" % (msg,))
9968

    
9969
    # if we managed to detach at least one, we update all the disks of
9970
    # the instance to point to the new secondary
9971
    self.lu.LogInfo("Updating instance configuration")
9972
    for dev, _, new_logical_id in iv_names.itervalues():
9973
      dev.logical_id = new_logical_id
9974
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9975

    
9976
    self.cfg.Update(self.instance, feedback_fn)
9977

    
9978
    # and now perform the drbd attach
9979
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9980
                    " (standalone => connected)")
9981
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9982
                                            self.new_node],
9983
                                           self.node_secondary_ip,
9984
                                           self.instance.disks,
9985
                                           self.instance.name,
9986
                                           False)
9987
    for to_node, to_result in result.items():
9988
      msg = to_result.fail_msg
9989
      if msg:
9990
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9991
                           to_node, msg,
9992
                           hint=("please do a gnt-instance info to see the"
9993
                                 " status of disks"))
9994
    cstep = 5
9995
    if self.early_release:
9996
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9997
      cstep += 1
9998
      self._RemoveOldStorage(self.target_node, iv_names)
9999
      # WARNING: we release all node locks here, do not do other RPCs
10000
      # than WaitForSync to the primary node
10001
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10002
                    names=[self.instance.primary_node,
10003
                           self.target_node,
10004
                           self.new_node])
10005

    
10006
    # Wait for sync
10007
    # This can fail as the old devices are degraded and _WaitForSync
10008
    # does a combined result over all disks, so we don't check its return value
10009
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10010
    cstep += 1
10011
    _WaitForSync(self.lu, self.instance)
10012

    
10013
    # Check all devices manually
10014
    self._CheckDevices(self.instance.primary_node, iv_names)
10015

    
10016
    # Step: remove old storage
10017
    if not self.early_release:
10018
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10019
      self._RemoveOldStorage(self.target_node, iv_names)
10020

    
10021

    
10022
class LURepairNodeStorage(NoHooksLU):
10023
  """Repairs the volume group on a node.
10024

10025
  """
10026
  REQ_BGL = False
10027

    
10028
  def CheckArguments(self):
10029
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10030

    
10031
    storage_type = self.op.storage_type
10032

    
10033
    if (constants.SO_FIX_CONSISTENCY not in
10034
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10035
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10036
                                 " repaired" % storage_type,
10037
                                 errors.ECODE_INVAL)
10038

    
10039
  def ExpandNames(self):
10040
    self.needed_locks = {
10041
      locking.LEVEL_NODE: [self.op.node_name],
10042
      }
10043

    
10044
  def _CheckFaultyDisks(self, instance, node_name):
10045
    """Ensure faulty disks abort the opcode or at least warn."""
10046
    try:
10047
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10048
                                  node_name, True):
10049
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10050
                                   " node '%s'" % (instance.name, node_name),
10051
                                   errors.ECODE_STATE)
10052
    except errors.OpPrereqError, err:
10053
      if self.op.ignore_consistency:
10054
        self.proc.LogWarning(str(err.args[0]))
10055
      else:
10056
        raise
10057

    
10058
  def CheckPrereq(self):
10059
    """Check prerequisites.
10060

10061
    """
10062
    # Check whether any instance on this node has faulty disks
10063
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10064
      if not inst.admin_up:
10065
        continue
10066
      check_nodes = set(inst.all_nodes)
10067
      check_nodes.discard(self.op.node_name)
10068
      for inst_node_name in check_nodes:
10069
        self._CheckFaultyDisks(inst, inst_node_name)
10070

    
10071
  def Exec(self, feedback_fn):
10072
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10073
                (self.op.name, self.op.node_name))
10074

    
10075
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10076
    result = self.rpc.call_storage_execute(self.op.node_name,
10077
                                           self.op.storage_type, st_args,
10078
                                           self.op.name,
10079
                                           constants.SO_FIX_CONSISTENCY)
10080
    result.Raise("Failed to repair storage unit '%s' on %s" %
10081
                 (self.op.name, self.op.node_name))
10082

    
10083

    
10084
class LUNodeEvacuate(NoHooksLU):
10085
  """Evacuates instances off a list of nodes.
10086

10087
  """
10088
  REQ_BGL = False
10089

    
10090
  def CheckArguments(self):
10091
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10092

    
10093
  def ExpandNames(self):
10094
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10095

    
10096
    if self.op.remote_node is not None:
10097
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10098
      assert self.op.remote_node
10099

    
10100
      if self.op.remote_node == self.op.node_name:
10101
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10102
                                   " secondary node", errors.ECODE_INVAL)
10103

    
10104
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10105
        raise errors.OpPrereqError("Without the use of an iallocator only"
10106
                                   " secondary instances can be evacuated",
10107
                                   errors.ECODE_INVAL)
10108

    
10109
    # Declare locks
10110
    self.share_locks = _ShareAll()
10111
    self.needed_locks = {
10112
      locking.LEVEL_INSTANCE: [],
10113
      locking.LEVEL_NODEGROUP: [],
10114
      locking.LEVEL_NODE: [],
10115
      }
10116

    
10117
    if self.op.remote_node is None:
10118
      # Iallocator will choose any node(s) in the same group
10119
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10120
    else:
10121
      group_nodes = frozenset([self.op.remote_node])
10122

    
10123
    # Determine nodes to be locked
10124
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10125

    
10126
  def _DetermineInstances(self):
10127
    """Builds list of instances to operate on.
10128

10129
    """
10130
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10131

    
10132
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10133
      # Primary instances only
10134
      inst_fn = _GetNodePrimaryInstances
10135
      assert self.op.remote_node is None, \
10136
        "Evacuating primary instances requires iallocator"
10137
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10138
      # Secondary instances only
10139
      inst_fn = _GetNodeSecondaryInstances
10140
    else:
10141
      # All instances
10142
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10143
      inst_fn = _GetNodeInstances
10144

    
10145
    return inst_fn(self.cfg, self.op.node_name)
10146

    
10147
  def DeclareLocks(self, level):
10148
    if level == locking.LEVEL_INSTANCE:
10149
      # Lock instances optimistically, needs verification once node and group
10150
      # locks have been acquired
10151
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10152
        set(i.name for i in self._DetermineInstances())
10153

    
10154
    elif level == locking.LEVEL_NODEGROUP:
10155
      # Lock node groups optimistically, needs verification once nodes have
10156
      # been acquired
10157
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10158
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10159

    
10160
    elif level == locking.LEVEL_NODE:
10161
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10162

    
10163
  def CheckPrereq(self):
10164
    # Verify locks
10165
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10166
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10167
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10168

    
10169
    assert owned_nodes == self.lock_nodes
10170

    
10171
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10172
    if owned_groups != wanted_groups:
10173
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10174
                               " current groups are '%s', used to be '%s'" %
10175
                               (utils.CommaJoin(wanted_groups),
10176
                                utils.CommaJoin(owned_groups)))
10177

    
10178
    # Determine affected instances
10179
    self.instances = self._DetermineInstances()
10180
    self.instance_names = [i.name for i in self.instances]
10181

    
10182
    if set(self.instance_names) != owned_instances:
10183
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10184
                               " were acquired, current instances are '%s',"
10185
                               " used to be '%s'" %
10186
                               (self.op.node_name,
10187
                                utils.CommaJoin(self.instance_names),
10188
                                utils.CommaJoin(owned_instances)))
10189

    
10190
    if self.instance_names:
10191
      self.LogInfo("Evacuating instances from node '%s': %s",
10192
                   self.op.node_name,
10193
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10194
    else:
10195
      self.LogInfo("No instances to evacuate from node '%s'",
10196
                   self.op.node_name)
10197

    
10198
    if self.op.remote_node is not None:
10199
      for i in self.instances:
10200
        if i.primary_node == self.op.remote_node:
10201
          raise errors.OpPrereqError("Node %s is the primary node of"
10202
                                     " instance %s, cannot use it as"
10203
                                     " secondary" %
10204
                                     (self.op.remote_node, i.name),
10205
                                     errors.ECODE_INVAL)
10206

    
10207
  def Exec(self, feedback_fn):
10208
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10209

    
10210
    if not self.instance_names:
10211
      # No instances to evacuate
10212
      jobs = []
10213

    
10214
    elif self.op.iallocator is not None:
10215
      # TODO: Implement relocation to other group
10216
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10217
                       evac_mode=self.op.mode,
10218
                       instances=list(self.instance_names))
10219

    
10220
      ial.Run(self.op.iallocator)
10221

    
10222
      if not ial.success:
10223
        raise errors.OpPrereqError("Can't compute node evacuation using"
10224
                                   " iallocator '%s': %s" %
10225
                                   (self.op.iallocator, ial.info),
10226
                                   errors.ECODE_NORES)
10227

    
10228
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10229

    
10230
    elif self.op.remote_node is not None:
10231
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10232
      jobs = [
10233
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10234
                                        remote_node=self.op.remote_node,
10235
                                        disks=[],
10236
                                        mode=constants.REPLACE_DISK_CHG,
10237
                                        early_release=self.op.early_release)]
10238
        for instance_name in self.instance_names
10239
        ]
10240

    
10241
    else:
10242
      raise errors.ProgrammerError("No iallocator or remote node")
10243

    
10244
    return ResultWithJobs(jobs)
10245

    
10246

    
10247
def _SetOpEarlyRelease(early_release, op):
10248
  """Sets C{early_release} flag on opcodes if available.
10249

10250
  """
10251
  try:
10252
    op.early_release = early_release
10253
  except AttributeError:
10254
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10255

    
10256
  return op
10257

    
10258

    
10259
def _NodeEvacDest(use_nodes, group, nodes):
10260
  """Returns group or nodes depending on caller's choice.
10261

10262
  """
10263
  if use_nodes:
10264
    return utils.CommaJoin(nodes)
10265
  else:
10266
    return group
10267

    
10268

    
10269
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10270
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10271

10272
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10273
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10274

10275
  @type lu: L{LogicalUnit}
10276
  @param lu: Logical unit instance
10277
  @type alloc_result: tuple/list
10278
  @param alloc_result: Result from iallocator
10279
  @type early_release: bool
10280
  @param early_release: Whether to release locks early if possible
10281
  @type use_nodes: bool
10282
  @param use_nodes: Whether to display node names instead of groups
10283

10284
  """
10285
  (moved, failed, jobs) = alloc_result
10286

    
10287
  if failed:
10288
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10289
                                 for (name, reason) in failed)
10290
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10291
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10292

    
10293
  if moved:
10294
    lu.LogInfo("Instances to be moved: %s",
10295
               utils.CommaJoin("%s (to %s)" %
10296
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10297
                               for (name, group, nodes) in moved))
10298

    
10299
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10300
              map(opcodes.OpCode.LoadOpCode, ops))
10301
          for ops in jobs]
10302

    
10303

    
10304
class LUInstanceGrowDisk(LogicalUnit):
10305
  """Grow a disk of an instance.
10306

10307
  """
10308
  HPATH = "disk-grow"
10309
  HTYPE = constants.HTYPE_INSTANCE
10310
  REQ_BGL = False
10311

    
10312
  def ExpandNames(self):
10313
    self._ExpandAndLockInstance()
10314
    self.needed_locks[locking.LEVEL_NODE] = []
10315
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10316

    
10317
  def DeclareLocks(self, level):
10318
    if level == locking.LEVEL_NODE:
10319
      self._LockInstancesNodes()
10320

    
10321
  def BuildHooksEnv(self):
10322
    """Build hooks env.
10323

10324
    This runs on the master, the primary and all the secondaries.
10325

10326
    """
10327
    env = {
10328
      "DISK": self.op.disk,
10329
      "AMOUNT": self.op.amount,
10330
      }
10331
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10332
    return env
10333

    
10334
  def BuildHooksNodes(self):
10335
    """Build hooks nodes.
10336

10337
    """
10338
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10339
    return (nl, nl)
10340

    
10341
  def CheckPrereq(self):
10342
    """Check prerequisites.
10343

10344
    This checks that the instance is in the cluster.
10345

10346
    """
10347
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10348
    assert instance is not None, \
10349
      "Cannot retrieve locked instance %s" % self.op.instance_name
10350
    nodenames = list(instance.all_nodes)
10351
    for node in nodenames:
10352
      _CheckNodeOnline(self, node)
10353

    
10354
    self.instance = instance
10355

    
10356
    if instance.disk_template not in constants.DTS_GROWABLE:
10357
      raise errors.OpPrereqError("Instance's disk layout does not support"
10358
                                 " growing", errors.ECODE_INVAL)
10359

    
10360
    self.disk = instance.FindDisk(self.op.disk)
10361

    
10362
    if instance.disk_template not in (constants.DT_FILE,
10363
                                      constants.DT_SHARED_FILE):
10364
      # TODO: check the free disk space for file, when that feature will be
10365
      # supported
10366
      _CheckNodesFreeDiskPerVG(self, nodenames,
10367
                               self.disk.ComputeGrowth(self.op.amount))
10368

    
10369
  def Exec(self, feedback_fn):
10370
    """Execute disk grow.
10371

10372
    """
10373
    instance = self.instance
10374
    disk = self.disk
10375

    
10376
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10377
    if not disks_ok:
10378
      raise errors.OpExecError("Cannot activate block device to grow")
10379

    
10380
    # First run all grow ops in dry-run mode
10381
    for node in instance.all_nodes:
10382
      self.cfg.SetDiskID(disk, node)
10383
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10384
      result.Raise("Grow request failed to node %s" % node)
10385

    
10386
    # We know that (as far as we can test) operations across different
10387
    # nodes will succeed, time to run it for real
10388
    for node in instance.all_nodes:
10389
      self.cfg.SetDiskID(disk, node)
10390
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10391
      result.Raise("Grow request failed to node %s" % node)
10392

    
10393
      # TODO: Rewrite code to work properly
10394
      # DRBD goes into sync mode for a short amount of time after executing the
10395
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10396
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10397
      # time is a work-around.
10398
      time.sleep(5)
10399

    
10400
    disk.RecordGrow(self.op.amount)
10401
    self.cfg.Update(instance, feedback_fn)
10402
    if self.op.wait_for_sync:
10403
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10404
      if disk_abort:
10405
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10406
                             " status; please check the instance")
10407
      if not instance.admin_up:
10408
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10409
    elif not instance.admin_up:
10410
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10411
                           " not supposed to be running because no wait for"
10412
                           " sync mode was requested")
10413

    
10414

    
10415
class LUInstanceQueryData(NoHooksLU):
10416
  """Query runtime instance data.
10417

10418
  """
10419
  REQ_BGL = False
10420

    
10421
  def ExpandNames(self):
10422
    self.needed_locks = {}
10423

    
10424
    # Use locking if requested or when non-static information is wanted
10425
    if not (self.op.static or self.op.use_locking):
10426
      self.LogWarning("Non-static data requested, locks need to be acquired")
10427
      self.op.use_locking = True
10428

    
10429
    if self.op.instances or not self.op.use_locking:
10430
      # Expand instance names right here
10431
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10432
    else:
10433
      # Will use acquired locks
10434
      self.wanted_names = None
10435

    
10436
    if self.op.use_locking:
10437
      self.share_locks = _ShareAll()
10438

    
10439
      if self.wanted_names is None:
10440
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10441
      else:
10442
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10443

    
10444
      self.needed_locks[locking.LEVEL_NODE] = []
10445
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10446

    
10447
  def DeclareLocks(self, level):
10448
    if self.op.use_locking and level == locking.LEVEL_NODE:
10449
      self._LockInstancesNodes()
10450

    
10451
  def CheckPrereq(self):
10452
    """Check prerequisites.
10453

10454
    This only checks the optional instance list against the existing names.
10455

10456
    """
10457
    if self.wanted_names is None:
10458
      assert self.op.use_locking, "Locking was not used"
10459
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10460

    
10461
    self.wanted_instances = \
10462
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10463

    
10464
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10465
    """Returns the status of a block device
10466

10467
    """
10468
    if self.op.static or not node:
10469
      return None
10470

    
10471
    self.cfg.SetDiskID(dev, node)
10472

    
10473
    result = self.rpc.call_blockdev_find(node, dev)
10474
    if result.offline:
10475
      return None
10476

    
10477
    result.Raise("Can't compute disk status for %s" % instance_name)
10478

    
10479
    status = result.payload
10480
    if status is None:
10481
      return None
10482

    
10483
    return (status.dev_path, status.major, status.minor,
10484
            status.sync_percent, status.estimated_time,
10485
            status.is_degraded, status.ldisk_status)
10486

    
10487
  def _ComputeDiskStatus(self, instance, snode, dev):
10488
    """Compute block device status.
10489

10490
    """
10491
    if dev.dev_type in constants.LDS_DRBD:
10492
      # we change the snode then (otherwise we use the one passed in)
10493
      if dev.logical_id[0] == instance.primary_node:
10494
        snode = dev.logical_id[1]
10495
      else:
10496
        snode = dev.logical_id[0]
10497

    
10498
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10499
                                              instance.name, dev)
10500
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10501

    
10502
    if dev.children:
10503
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10504
                                        instance, snode),
10505
                         dev.children)
10506
    else:
10507
      dev_children = []
10508

    
10509
    return {
10510
      "iv_name": dev.iv_name,
10511
      "dev_type": dev.dev_type,
10512
      "logical_id": dev.logical_id,
10513
      "physical_id": dev.physical_id,
10514
      "pstatus": dev_pstatus,
10515
      "sstatus": dev_sstatus,
10516
      "children": dev_children,
10517
      "mode": dev.mode,
10518
      "size": dev.size,
10519
      }
10520

    
10521
  def Exec(self, feedback_fn):
10522
    """Gather and return data"""
10523
    result = {}
10524

    
10525
    cluster = self.cfg.GetClusterInfo()
10526

    
10527
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10528
                                          for i in self.wanted_instances)
10529
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10530
      if self.op.static or pnode.offline:
10531
        remote_state = None
10532
        if pnode.offline:
10533
          self.LogWarning("Primary node %s is marked offline, returning static"
10534
                          " information only for instance %s" %
10535
                          (pnode.name, instance.name))
10536
      else:
10537
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10538
                                                  instance.name,
10539
                                                  instance.hypervisor)
10540
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10541
        remote_info = remote_info.payload
10542
        if remote_info and "state" in remote_info:
10543
          remote_state = "up"
10544
        else:
10545
          remote_state = "down"
10546

    
10547
      if instance.admin_up:
10548
        config_state = "up"
10549
      else:
10550
        config_state = "down"
10551

    
10552
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10553
                  instance.disks)
10554

    
10555
      result[instance.name] = {
10556
        "name": instance.name,
10557
        "config_state": config_state,
10558
        "run_state": remote_state,
10559
        "pnode": instance.primary_node,
10560
        "snodes": instance.secondary_nodes,
10561
        "os": instance.os,
10562
        # this happens to be the same format used for hooks
10563
        "nics": _NICListToTuple(self, instance.nics),
10564
        "disk_template": instance.disk_template,
10565
        "disks": disks,
10566
        "hypervisor": instance.hypervisor,
10567
        "network_port": instance.network_port,
10568
        "hv_instance": instance.hvparams,
10569
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10570
        "be_instance": instance.beparams,
10571
        "be_actual": cluster.FillBE(instance),
10572
        "os_instance": instance.osparams,
10573
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10574
        "serial_no": instance.serial_no,
10575
        "mtime": instance.mtime,
10576
        "ctime": instance.ctime,
10577
        "uuid": instance.uuid,
10578
        }
10579

    
10580
    return result
10581

    
10582

    
10583
class LUInstanceSetParams(LogicalUnit):
10584
  """Modifies an instances's parameters.
10585

10586
  """
10587
  HPATH = "instance-modify"
10588
  HTYPE = constants.HTYPE_INSTANCE
10589
  REQ_BGL = False
10590

    
10591
  def CheckArguments(self):
10592
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10593
            self.op.hvparams or self.op.beparams or self.op.os_name):
10594
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10595

    
10596
    if self.op.hvparams:
10597
      _CheckGlobalHvParams(self.op.hvparams)
10598

    
10599
    # Disk validation
10600
    disk_addremove = 0
10601
    for disk_op, disk_dict in self.op.disks:
10602
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10603
      if disk_op == constants.DDM_REMOVE:
10604
        disk_addremove += 1
10605
        continue
10606
      elif disk_op == constants.DDM_ADD:
10607
        disk_addremove += 1
10608
      else:
10609
        if not isinstance(disk_op, int):
10610
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10611
        if not isinstance(disk_dict, dict):
10612
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10613
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10614

    
10615
      if disk_op == constants.DDM_ADD:
10616
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10617
        if mode not in constants.DISK_ACCESS_SET:
10618
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10619
                                     errors.ECODE_INVAL)
10620
        size = disk_dict.get(constants.IDISK_SIZE, None)
10621
        if size is None:
10622
          raise errors.OpPrereqError("Required disk parameter size missing",
10623
                                     errors.ECODE_INVAL)
10624
        try:
10625
          size = int(size)
10626
        except (TypeError, ValueError), err:
10627
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10628
                                     str(err), errors.ECODE_INVAL)
10629
        disk_dict[constants.IDISK_SIZE] = size
10630
      else:
10631
        # modification of disk
10632
        if constants.IDISK_SIZE in disk_dict:
10633
          raise errors.OpPrereqError("Disk size change not possible, use"
10634
                                     " grow-disk", errors.ECODE_INVAL)
10635

    
10636
    if disk_addremove > 1:
10637
      raise errors.OpPrereqError("Only one disk add or remove operation"
10638
                                 " supported at a time", errors.ECODE_INVAL)
10639

    
10640
    if self.op.disks and self.op.disk_template is not None:
10641
      raise errors.OpPrereqError("Disk template conversion and other disk"
10642
                                 " changes not supported at the same time",
10643
                                 errors.ECODE_INVAL)
10644

    
10645
    if (self.op.disk_template and
10646
        self.op.disk_template in constants.DTS_INT_MIRROR and
10647
        self.op.remote_node is None):
10648
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10649
                                 " one requires specifying a secondary node",
10650
                                 errors.ECODE_INVAL)
10651

    
10652
    # NIC validation
10653
    nic_addremove = 0
10654
    for nic_op, nic_dict in self.op.nics:
10655
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10656
      if nic_op == constants.DDM_REMOVE:
10657
        nic_addremove += 1
10658
        continue
10659
      elif nic_op == constants.DDM_ADD:
10660
        nic_addremove += 1
10661
      else:
10662
        if not isinstance(nic_op, int):
10663
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10664
        if not isinstance(nic_dict, dict):
10665
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10666
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10667

    
10668
      # nic_dict should be a dict
10669
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10670
      if nic_ip is not None:
10671
        if nic_ip.lower() == constants.VALUE_NONE:
10672
          nic_dict[constants.INIC_IP] = None
10673
        else:
10674
          if not netutils.IPAddress.IsValid(nic_ip):
10675
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10676
                                       errors.ECODE_INVAL)
10677

    
10678
      nic_bridge = nic_dict.get("bridge", None)
10679
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10680
      if nic_bridge and nic_link:
10681
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10682
                                   " at the same time", errors.ECODE_INVAL)
10683
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10684
        nic_dict["bridge"] = None
10685
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10686
        nic_dict[constants.INIC_LINK] = None
10687

    
10688
      if nic_op == constants.DDM_ADD:
10689
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10690
        if nic_mac is None:
10691
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10692

    
10693
      if constants.INIC_MAC in nic_dict:
10694
        nic_mac = nic_dict[constants.INIC_MAC]
10695
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10696
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10697

    
10698
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10699
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10700
                                     " modifying an existing nic",
10701
                                     errors.ECODE_INVAL)
10702

    
10703
    if nic_addremove > 1:
10704
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10705
                                 " supported at a time", errors.ECODE_INVAL)
10706

    
10707
  def ExpandNames(self):
10708
    self._ExpandAndLockInstance()
10709
    self.needed_locks[locking.LEVEL_NODE] = []
10710
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10711

    
10712
  def DeclareLocks(self, level):
10713
    if level == locking.LEVEL_NODE:
10714
      self._LockInstancesNodes()
10715
      if self.op.disk_template and self.op.remote_node:
10716
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10717
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10718

    
10719
  def BuildHooksEnv(self):
10720
    """Build hooks env.
10721

10722
    This runs on the master, primary and secondaries.
10723

10724
    """
10725
    args = dict()
10726
    if constants.BE_MEMORY in self.be_new:
10727
      args["memory"] = self.be_new[constants.BE_MEMORY]
10728
    if constants.BE_VCPUS in self.be_new:
10729
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10730
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10731
    # information at all.
10732
    if self.op.nics:
10733
      args["nics"] = []
10734
      nic_override = dict(self.op.nics)
10735
      for idx, nic in enumerate(self.instance.nics):
10736
        if idx in nic_override:
10737
          this_nic_override = nic_override[idx]
10738
        else:
10739
          this_nic_override = {}
10740
        if constants.INIC_IP in this_nic_override:
10741
          ip = this_nic_override[constants.INIC_IP]
10742
        else:
10743
          ip = nic.ip
10744
        if constants.INIC_MAC in this_nic_override:
10745
          mac = this_nic_override[constants.INIC_MAC]
10746
        else:
10747
          mac = nic.mac
10748
        if idx in self.nic_pnew:
10749
          nicparams = self.nic_pnew[idx]
10750
        else:
10751
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10752
        mode = nicparams[constants.NIC_MODE]
10753
        link = nicparams[constants.NIC_LINK]
10754
        args["nics"].append((ip, mac, mode, link))
10755
      if constants.DDM_ADD in nic_override:
10756
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10757
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10758
        nicparams = self.nic_pnew[constants.DDM_ADD]
10759
        mode = nicparams[constants.NIC_MODE]
10760
        link = nicparams[constants.NIC_LINK]
10761
        args["nics"].append((ip, mac, mode, link))
10762
      elif constants.DDM_REMOVE in nic_override:
10763
        del args["nics"][-1]
10764

    
10765
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10766
    if self.op.disk_template:
10767
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10768

    
10769
    return env
10770

    
10771
  def BuildHooksNodes(self):
10772
    """Build hooks nodes.
10773

10774
    """
10775
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10776
    return (nl, nl)
10777

    
10778
  def CheckPrereq(self):
10779
    """Check prerequisites.
10780

10781
    This only checks the instance list against the existing names.
10782

10783
    """
10784
    # checking the new params on the primary/secondary nodes
10785

    
10786
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10787
    cluster = self.cluster = self.cfg.GetClusterInfo()
10788
    assert self.instance is not None, \
10789
      "Cannot retrieve locked instance %s" % self.op.instance_name
10790
    pnode = instance.primary_node
10791
    nodelist = list(instance.all_nodes)
10792

    
10793
    # OS change
10794
    if self.op.os_name and not self.op.force:
10795
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10796
                      self.op.force_variant)
10797
      instance_os = self.op.os_name
10798
    else:
10799
      instance_os = instance.os
10800

    
10801
    if self.op.disk_template:
10802
      if instance.disk_template == self.op.disk_template:
10803
        raise errors.OpPrereqError("Instance already has disk template %s" %
10804
                                   instance.disk_template, errors.ECODE_INVAL)
10805

    
10806
      if (instance.disk_template,
10807
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10808
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10809
                                   " %s to %s" % (instance.disk_template,
10810
                                                  self.op.disk_template),
10811
                                   errors.ECODE_INVAL)
10812
      _CheckInstanceDown(self, instance, "cannot change disk template")
10813
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10814
        if self.op.remote_node == pnode:
10815
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10816
                                     " as the primary node of the instance" %
10817
                                     self.op.remote_node, errors.ECODE_STATE)
10818
        _CheckNodeOnline(self, self.op.remote_node)
10819
        _CheckNodeNotDrained(self, self.op.remote_node)
10820
        # FIXME: here we assume that the old instance type is DT_PLAIN
10821
        assert instance.disk_template == constants.DT_PLAIN
10822
        disks = [{constants.IDISK_SIZE: d.size,
10823
                  constants.IDISK_VG: d.logical_id[0]}
10824
                 for d in instance.disks]
10825
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10826
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10827

    
10828
    # hvparams processing
10829
    if self.op.hvparams:
10830
      hv_type = instance.hypervisor
10831
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10832
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10833
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10834

    
10835
      # local check
10836
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10837
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10838
      self.hv_new = hv_new # the new actual values
10839
      self.hv_inst = i_hvdict # the new dict (without defaults)
10840
    else:
10841
      self.hv_new = self.hv_inst = {}
10842

    
10843
    # beparams processing
10844
    if self.op.beparams:
10845
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10846
                                   use_none=True)
10847
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10848
      be_new = cluster.SimpleFillBE(i_bedict)
10849
      self.be_new = be_new # the new actual values
10850
      self.be_inst = i_bedict # the new dict (without defaults)
10851
    else:
10852
      self.be_new = self.be_inst = {}
10853
    be_old = cluster.FillBE(instance)
10854

    
10855
    # osparams processing
10856
    if self.op.osparams:
10857
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10858
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10859
      self.os_inst = i_osdict # the new dict (without defaults)
10860
    else:
10861
      self.os_inst = {}
10862

    
10863
    self.warn = []
10864

    
10865
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10866
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10867
      mem_check_list = [pnode]
10868
      if be_new[constants.BE_AUTO_BALANCE]:
10869
        # either we changed auto_balance to yes or it was from before
10870
        mem_check_list.extend(instance.secondary_nodes)
10871
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10872
                                                  instance.hypervisor)
10873
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10874
                                         instance.hypervisor)
10875
      pninfo = nodeinfo[pnode]
10876
      msg = pninfo.fail_msg
10877
      if msg:
10878
        # Assume the primary node is unreachable and go ahead
10879
        self.warn.append("Can't get info from primary node %s: %s" %
10880
                         (pnode, msg))
10881
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10882
        self.warn.append("Node data from primary node %s doesn't contain"
10883
                         " free memory information" % pnode)
10884
      elif instance_info.fail_msg:
10885
        self.warn.append("Can't get instance runtime information: %s" %
10886
                        instance_info.fail_msg)
10887
      else:
10888
        if instance_info.payload:
10889
          current_mem = int(instance_info.payload["memory"])
10890
        else:
10891
          # Assume instance not running
10892
          # (there is a slight race condition here, but it's not very probable,
10893
          # and we have no other way to check)
10894
          current_mem = 0
10895
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10896
                    pninfo.payload["memory_free"])
10897
        if miss_mem > 0:
10898
          raise errors.OpPrereqError("This change will prevent the instance"
10899
                                     " from starting, due to %d MB of memory"
10900
                                     " missing on its primary node" % miss_mem,
10901
                                     errors.ECODE_NORES)
10902

    
10903
      if be_new[constants.BE_AUTO_BALANCE]:
10904
        for node, nres in nodeinfo.items():
10905
          if node not in instance.secondary_nodes:
10906
            continue
10907
          nres.Raise("Can't get info from secondary node %s" % node,
10908
                     prereq=True, ecode=errors.ECODE_STATE)
10909
          if not isinstance(nres.payload.get("memory_free", None), int):
10910
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10911
                                       " memory information" % node,
10912
                                       errors.ECODE_STATE)
10913
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10914
            raise errors.OpPrereqError("This change will prevent the instance"
10915
                                       " from failover to its secondary node"
10916
                                       " %s, due to not enough memory" % node,
10917
                                       errors.ECODE_STATE)
10918

    
10919
    # NIC processing
10920
    self.nic_pnew = {}
10921
    self.nic_pinst = {}
10922
    for nic_op, nic_dict in self.op.nics:
10923
      if nic_op == constants.DDM_REMOVE:
10924
        if not instance.nics:
10925
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10926
                                     errors.ECODE_INVAL)
10927
        continue
10928
      if nic_op != constants.DDM_ADD:
10929
        # an existing nic
10930
        if not instance.nics:
10931
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10932
                                     " no NICs" % nic_op,
10933
                                     errors.ECODE_INVAL)
10934
        if nic_op < 0 or nic_op >= len(instance.nics):
10935
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10936
                                     " are 0 to %d" %
10937
                                     (nic_op, len(instance.nics) - 1),
10938
                                     errors.ECODE_INVAL)
10939
        old_nic_params = instance.nics[nic_op].nicparams
10940
        old_nic_ip = instance.nics[nic_op].ip
10941
      else:
10942
        old_nic_params = {}
10943
        old_nic_ip = None
10944

    
10945
      update_params_dict = dict([(key, nic_dict[key])
10946
                                 for key in constants.NICS_PARAMETERS
10947
                                 if key in nic_dict])
10948

    
10949
      if "bridge" in nic_dict:
10950
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10951

    
10952
      new_nic_params = _GetUpdatedParams(old_nic_params,
10953
                                         update_params_dict)
10954
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10955
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10956
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10957
      self.nic_pinst[nic_op] = new_nic_params
10958
      self.nic_pnew[nic_op] = new_filled_nic_params
10959
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10960

    
10961
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10962
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10963
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10964
        if msg:
10965
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10966
          if self.op.force:
10967
            self.warn.append(msg)
10968
          else:
10969
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10970
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10971
        if constants.INIC_IP in nic_dict:
10972
          nic_ip = nic_dict[constants.INIC_IP]
10973
        else:
10974
          nic_ip = old_nic_ip
10975
        if nic_ip is None:
10976
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10977
                                     " on a routed nic", errors.ECODE_INVAL)
10978
      if constants.INIC_MAC in nic_dict:
10979
        nic_mac = nic_dict[constants.INIC_MAC]
10980
        if nic_mac is None:
10981
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10982
                                     errors.ECODE_INVAL)
10983
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10984
          # otherwise generate the mac
10985
          nic_dict[constants.INIC_MAC] = \
10986
            self.cfg.GenerateMAC(self.proc.GetECId())
10987
        else:
10988
          # or validate/reserve the current one
10989
          try:
10990
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10991
          except errors.ReservationError:
10992
            raise errors.OpPrereqError("MAC address %s already in use"
10993
                                       " in cluster" % nic_mac,
10994
                                       errors.ECODE_NOTUNIQUE)
10995

    
10996
    # DISK processing
10997
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10998
      raise errors.OpPrereqError("Disk operations not supported for"
10999
                                 " diskless instances",
11000
                                 errors.ECODE_INVAL)
11001
    for disk_op, _ in self.op.disks:
11002
      if disk_op == constants.DDM_REMOVE:
11003
        if len(instance.disks) == 1:
11004
          raise errors.OpPrereqError("Cannot remove the last disk of"
11005
                                     " an instance", errors.ECODE_INVAL)
11006
        _CheckInstanceDown(self, instance, "cannot remove disks")
11007

    
11008
      if (disk_op == constants.DDM_ADD and
11009
          len(instance.disks) >= constants.MAX_DISKS):
11010
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11011
                                   " add more" % constants.MAX_DISKS,
11012
                                   errors.ECODE_STATE)
11013
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11014
        # an existing disk
11015
        if disk_op < 0 or disk_op >= len(instance.disks):
11016
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11017
                                     " are 0 to %d" %
11018
                                     (disk_op, len(instance.disks)),
11019
                                     errors.ECODE_INVAL)
11020

    
11021
    return
11022

    
11023
  def _ConvertPlainToDrbd(self, feedback_fn):
11024
    """Converts an instance from plain to drbd.
11025

11026
    """
11027
    feedback_fn("Converting template to drbd")
11028
    instance = self.instance
11029
    pnode = instance.primary_node
11030
    snode = self.op.remote_node
11031

    
11032
    # create a fake disk info for _GenerateDiskTemplate
11033
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11034
                  constants.IDISK_VG: d.logical_id[0]}
11035
                 for d in instance.disks]
11036
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11037
                                      instance.name, pnode, [snode],
11038
                                      disk_info, None, None, 0, feedback_fn)
11039
    info = _GetInstanceInfoText(instance)
11040
    feedback_fn("Creating aditional volumes...")
11041
    # first, create the missing data and meta devices
11042
    for disk in new_disks:
11043
      # unfortunately this is... not too nice
11044
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11045
                            info, True)
11046
      for child in disk.children:
11047
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11048
    # at this stage, all new LVs have been created, we can rename the
11049
    # old ones
11050
    feedback_fn("Renaming original volumes...")
11051
    rename_list = [(o, n.children[0].logical_id)
11052
                   for (o, n) in zip(instance.disks, new_disks)]
11053
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11054
    result.Raise("Failed to rename original LVs")
11055

    
11056
    feedback_fn("Initializing DRBD devices...")
11057
    # all child devices are in place, we can now create the DRBD devices
11058
    for disk in new_disks:
11059
      for node in [pnode, snode]:
11060
        f_create = node == pnode
11061
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11062

    
11063
    # at this point, the instance has been modified
11064
    instance.disk_template = constants.DT_DRBD8
11065
    instance.disks = new_disks
11066
    self.cfg.Update(instance, feedback_fn)
11067

    
11068
    # disks are created, waiting for sync
11069
    disk_abort = not _WaitForSync(self, instance,
11070
                                  oneshot=not self.op.wait_for_sync)
11071
    if disk_abort:
11072
      raise errors.OpExecError("There are some degraded disks for"
11073
                               " this instance, please cleanup manually")
11074

    
11075
  def _ConvertDrbdToPlain(self, feedback_fn):
11076
    """Converts an instance from drbd to plain.
11077

11078
    """
11079
    instance = self.instance
11080
    assert len(instance.secondary_nodes) == 1
11081
    pnode = instance.primary_node
11082
    snode = instance.secondary_nodes[0]
11083
    feedback_fn("Converting template to plain")
11084

    
11085
    old_disks = instance.disks
11086
    new_disks = [d.children[0] for d in old_disks]
11087

    
11088
    # copy over size and mode
11089
    for parent, child in zip(old_disks, new_disks):
11090
      child.size = parent.size
11091
      child.mode = parent.mode
11092

    
11093
    # update instance structure
11094
    instance.disks = new_disks
11095
    instance.disk_template = constants.DT_PLAIN
11096
    self.cfg.Update(instance, feedback_fn)
11097

    
11098
    feedback_fn("Removing volumes on the secondary node...")
11099
    for disk in old_disks:
11100
      self.cfg.SetDiskID(disk, snode)
11101
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11102
      if msg:
11103
        self.LogWarning("Could not remove block device %s on node %s,"
11104
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11105

    
11106
    feedback_fn("Removing unneeded volumes on the primary node...")
11107
    for idx, disk in enumerate(old_disks):
11108
      meta = disk.children[1]
11109
      self.cfg.SetDiskID(meta, pnode)
11110
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11111
      if msg:
11112
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11113
                        " continuing anyway: %s", idx, pnode, msg)
11114

    
11115
  def Exec(self, feedback_fn):
11116
    """Modifies an instance.
11117

11118
    All parameters take effect only at the next restart of the instance.
11119

11120
    """
11121
    # Process here the warnings from CheckPrereq, as we don't have a
11122
    # feedback_fn there.
11123
    for warn in self.warn:
11124
      feedback_fn("WARNING: %s" % warn)
11125

    
11126
    result = []
11127
    instance = self.instance
11128
    # disk changes
11129
    for disk_op, disk_dict in self.op.disks:
11130
      if disk_op == constants.DDM_REMOVE:
11131
        # remove the last disk
11132
        device = instance.disks.pop()
11133
        device_idx = len(instance.disks)
11134
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11135
          self.cfg.SetDiskID(disk, node)
11136
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11137
          if msg:
11138
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11139
                            " continuing anyway", device_idx, node, msg)
11140
        result.append(("disk/%d" % device_idx, "remove"))
11141
      elif disk_op == constants.DDM_ADD:
11142
        # add a new disk
11143
        if instance.disk_template in (constants.DT_FILE,
11144
                                        constants.DT_SHARED_FILE):
11145
          file_driver, file_path = instance.disks[0].logical_id
11146
          file_path = os.path.dirname(file_path)
11147
        else:
11148
          file_driver = file_path = None
11149
        disk_idx_base = len(instance.disks)
11150
        new_disk = _GenerateDiskTemplate(self,
11151
                                         instance.disk_template,
11152
                                         instance.name, instance.primary_node,
11153
                                         instance.secondary_nodes,
11154
                                         [disk_dict],
11155
                                         file_path,
11156
                                         file_driver,
11157
                                         disk_idx_base, feedback_fn)[0]
11158
        instance.disks.append(new_disk)
11159
        info = _GetInstanceInfoText(instance)
11160

    
11161
        logging.info("Creating volume %s for instance %s",
11162
                     new_disk.iv_name, instance.name)
11163
        # Note: this needs to be kept in sync with _CreateDisks
11164
        #HARDCODE
11165
        for node in instance.all_nodes:
11166
          f_create = node == instance.primary_node
11167
          try:
11168
            _CreateBlockDev(self, node, instance, new_disk,
11169
                            f_create, info, f_create)
11170
          except errors.OpExecError, err:
11171
            self.LogWarning("Failed to create volume %s (%s) on"
11172
                            " node %s: %s",
11173
                            new_disk.iv_name, new_disk, node, err)
11174
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11175
                       (new_disk.size, new_disk.mode)))
11176
      else:
11177
        # change a given disk
11178
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11179
        result.append(("disk.mode/%d" % disk_op,
11180
                       disk_dict[constants.IDISK_MODE]))
11181

    
11182
    if self.op.disk_template:
11183
      r_shut = _ShutdownInstanceDisks(self, instance)
11184
      if not r_shut:
11185
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11186
                                 " proceed with disk template conversion")
11187
      mode = (instance.disk_template, self.op.disk_template)
11188
      try:
11189
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11190
      except:
11191
        self.cfg.ReleaseDRBDMinors(instance.name)
11192
        raise
11193
      result.append(("disk_template", self.op.disk_template))
11194

    
11195
    # NIC changes
11196
    for nic_op, nic_dict in self.op.nics:
11197
      if nic_op == constants.DDM_REMOVE:
11198
        # remove the last nic
11199
        del instance.nics[-1]
11200
        result.append(("nic.%d" % len(instance.nics), "remove"))
11201
      elif nic_op == constants.DDM_ADD:
11202
        # mac and bridge should be set, by now
11203
        mac = nic_dict[constants.INIC_MAC]
11204
        ip = nic_dict.get(constants.INIC_IP, None)
11205
        nicparams = self.nic_pinst[constants.DDM_ADD]
11206
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11207
        instance.nics.append(new_nic)
11208
        result.append(("nic.%d" % (len(instance.nics) - 1),
11209
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11210
                       (new_nic.mac, new_nic.ip,
11211
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11212
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11213
                       )))
11214
      else:
11215
        for key in (constants.INIC_MAC, constants.INIC_IP):
11216
          if key in nic_dict:
11217
            setattr(instance.nics[nic_op], key, nic_dict[key])
11218
        if nic_op in self.nic_pinst:
11219
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11220
        for key, val in nic_dict.iteritems():
11221
          result.append(("nic.%s/%d" % (key, nic_op), val))
11222

    
11223
    # hvparams changes
11224
    if self.op.hvparams:
11225
      instance.hvparams = self.hv_inst
11226
      for key, val in self.op.hvparams.iteritems():
11227
        result.append(("hv/%s" % key, val))
11228

    
11229
    # beparams changes
11230
    if self.op.beparams:
11231
      instance.beparams = self.be_inst
11232
      for key, val in self.op.beparams.iteritems():
11233
        result.append(("be/%s" % key, val))
11234

    
11235
    # OS change
11236
    if self.op.os_name:
11237
      instance.os = self.op.os_name
11238

    
11239
    # osparams changes
11240
    if self.op.osparams:
11241
      instance.osparams = self.os_inst
11242
      for key, val in self.op.osparams.iteritems():
11243
        result.append(("os/%s" % key, val))
11244

    
11245
    self.cfg.Update(instance, feedback_fn)
11246

    
11247
    return result
11248

    
11249
  _DISK_CONVERSIONS = {
11250
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11251
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11252
    }
11253

    
11254

    
11255
class LUInstanceChangeGroup(LogicalUnit):
11256
  HPATH = "instance-change-group"
11257
  HTYPE = constants.HTYPE_INSTANCE
11258
  REQ_BGL = False
11259

    
11260
  def ExpandNames(self):
11261
    self.share_locks = _ShareAll()
11262
    self.needed_locks = {
11263
      locking.LEVEL_NODEGROUP: [],
11264
      locking.LEVEL_NODE: [],
11265
      }
11266

    
11267
    self._ExpandAndLockInstance()
11268

    
11269
    if self.op.target_groups:
11270
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11271
                                  self.op.target_groups)
11272
    else:
11273
      self.req_target_uuids = None
11274

    
11275
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11276

    
11277
  def DeclareLocks(self, level):
11278
    if level == locking.LEVEL_NODEGROUP:
11279
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11280

    
11281
      if self.req_target_uuids:
11282
        lock_groups = set(self.req_target_uuids)
11283

    
11284
        # Lock all groups used by instance optimistically; this requires going
11285
        # via the node before it's locked, requiring verification later on
11286
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11287
        lock_groups.update(instance_groups)
11288
      else:
11289
        # No target groups, need to lock all of them
11290
        lock_groups = locking.ALL_SET
11291

    
11292
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11293

    
11294
    elif level == locking.LEVEL_NODE:
11295
      if self.req_target_uuids:
11296
        # Lock all nodes used by instances
11297
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11298
        self._LockInstancesNodes()
11299

    
11300
        # Lock all nodes in all potential target groups
11301
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11302
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11303
        member_nodes = [node_name
11304
                        for group in lock_groups
11305
                        for node_name in self.cfg.GetNodeGroup(group).members]
11306
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11307
      else:
11308
        # Lock all nodes as all groups are potential targets
11309
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11310

    
11311
  def CheckPrereq(self):
11312
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11313
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11314
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11315

    
11316
    assert (self.req_target_uuids is None or
11317
            owned_groups.issuperset(self.req_target_uuids))
11318
    assert owned_instances == set([self.op.instance_name])
11319

    
11320
    # Get instance information
11321
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11322

    
11323
    # Check if node groups for locked instance are still correct
11324
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11325
      ("Instance %s's nodes changed while we kept the lock" %
11326
       self.op.instance_name)
11327

    
11328
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11329
                                           owned_groups)
11330

    
11331
    if self.req_target_uuids:
11332
      # User requested specific target groups
11333
      self.target_uuids = self.req_target_uuids
11334
    else:
11335
      # All groups except those used by the instance are potential targets
11336
      self.target_uuids = owned_groups - inst_groups
11337

    
11338
    conflicting_groups = self.target_uuids & inst_groups
11339
    if conflicting_groups:
11340
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11341
                                 " used by the instance '%s'" %
11342
                                 (utils.CommaJoin(conflicting_groups),
11343
                                  self.op.instance_name),
11344
                                 errors.ECODE_INVAL)
11345

    
11346
    if not self.target_uuids:
11347
      raise errors.OpPrereqError("There are no possible target groups",
11348
                                 errors.ECODE_INVAL)
11349

    
11350
  def BuildHooksEnv(self):
11351
    """Build hooks env.
11352

11353
    """
11354
    assert self.target_uuids
11355

    
11356
    env = {
11357
      "TARGET_GROUPS": " ".join(self.target_uuids),
11358
      }
11359

    
11360
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11361

    
11362
    return env
11363

    
11364
  def BuildHooksNodes(self):
11365
    """Build hooks nodes.
11366

11367
    """
11368
    mn = self.cfg.GetMasterNode()
11369
    return ([mn], [mn])
11370

    
11371
  def Exec(self, feedback_fn):
11372
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11373

    
11374
    assert instances == [self.op.instance_name], "Instance not locked"
11375

    
11376
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11377
                     instances=instances, target_groups=list(self.target_uuids))
11378

    
11379
    ial.Run(self.op.iallocator)
11380

    
11381
    if not ial.success:
11382
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11383
                                 " instance '%s' using iallocator '%s': %s" %
11384
                                 (self.op.instance_name, self.op.iallocator,
11385
                                  ial.info),
11386
                                 errors.ECODE_NORES)
11387

    
11388
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11389

    
11390
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11391
                 " instance '%s'", len(jobs), self.op.instance_name)
11392

    
11393
    return ResultWithJobs(jobs)
11394

    
11395

    
11396
class LUBackupQuery(NoHooksLU):
11397
  """Query the exports list
11398

11399
  """
11400
  REQ_BGL = False
11401

    
11402
  def ExpandNames(self):
11403
    self.needed_locks = {}
11404
    self.share_locks[locking.LEVEL_NODE] = 1
11405
    if not self.op.nodes:
11406
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11407
    else:
11408
      self.needed_locks[locking.LEVEL_NODE] = \
11409
        _GetWantedNodes(self, self.op.nodes)
11410

    
11411
  def Exec(self, feedback_fn):
11412
    """Compute the list of all the exported system images.
11413

11414
    @rtype: dict
11415
    @return: a dictionary with the structure node->(export-list)
11416
        where export-list is a list of the instances exported on
11417
        that node.
11418

11419
    """
11420
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11421
    rpcresult = self.rpc.call_export_list(self.nodes)
11422
    result = {}
11423
    for node in rpcresult:
11424
      if rpcresult[node].fail_msg:
11425
        result[node] = False
11426
      else:
11427
        result[node] = rpcresult[node].payload
11428

    
11429
    return result
11430

    
11431

    
11432
class LUBackupPrepare(NoHooksLU):
11433
  """Prepares an instance for an export and returns useful information.
11434

11435
  """
11436
  REQ_BGL = False
11437

    
11438
  def ExpandNames(self):
11439
    self._ExpandAndLockInstance()
11440

    
11441
  def CheckPrereq(self):
11442
    """Check prerequisites.
11443

11444
    """
11445
    instance_name = self.op.instance_name
11446

    
11447
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11448
    assert self.instance is not None, \
11449
          "Cannot retrieve locked instance %s" % self.op.instance_name
11450
    _CheckNodeOnline(self, self.instance.primary_node)
11451

    
11452
    self._cds = _GetClusterDomainSecret()
11453

    
11454
  def Exec(self, feedback_fn):
11455
    """Prepares an instance for an export.
11456

11457
    """
11458
    instance = self.instance
11459

    
11460
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11461
      salt = utils.GenerateSecret(8)
11462

    
11463
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11464
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11465
                                              constants.RIE_CERT_VALIDITY)
11466
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11467

    
11468
      (name, cert_pem) = result.payload
11469

    
11470
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11471
                                             cert_pem)
11472

    
11473
      return {
11474
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11475
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11476
                          salt),
11477
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11478
        }
11479

    
11480
    return None
11481

    
11482

    
11483
class LUBackupExport(LogicalUnit):
11484
  """Export an instance to an image in the cluster.
11485

11486
  """
11487
  HPATH = "instance-export"
11488
  HTYPE = constants.HTYPE_INSTANCE
11489
  REQ_BGL = False
11490

    
11491
  def CheckArguments(self):
11492
    """Check the arguments.
11493

11494
    """
11495
    self.x509_key_name = self.op.x509_key_name
11496
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11497

    
11498
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11499
      if not self.x509_key_name:
11500
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11501
                                   errors.ECODE_INVAL)
11502

    
11503
      if not self.dest_x509_ca_pem:
11504
        raise errors.OpPrereqError("Missing destination X509 CA",
11505
                                   errors.ECODE_INVAL)
11506

    
11507
  def ExpandNames(self):
11508
    self._ExpandAndLockInstance()
11509

    
11510
    # Lock all nodes for local exports
11511
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11512
      # FIXME: lock only instance primary and destination node
11513
      #
11514
      # Sad but true, for now we have do lock all nodes, as we don't know where
11515
      # the previous export might be, and in this LU we search for it and
11516
      # remove it from its current node. In the future we could fix this by:
11517
      #  - making a tasklet to search (share-lock all), then create the
11518
      #    new one, then one to remove, after
11519
      #  - removing the removal operation altogether
11520
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11521

    
11522
  def DeclareLocks(self, level):
11523
    """Last minute lock declaration."""
11524
    # All nodes are locked anyway, so nothing to do here.
11525

    
11526
  def BuildHooksEnv(self):
11527
    """Build hooks env.
11528

11529
    This will run on the master, primary node and target node.
11530

11531
    """
11532
    env = {
11533
      "EXPORT_MODE": self.op.mode,
11534
      "EXPORT_NODE": self.op.target_node,
11535
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11536
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11537
      # TODO: Generic function for boolean env variables
11538
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11539
      }
11540

    
11541
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11542

    
11543
    return env
11544

    
11545
  def BuildHooksNodes(self):
11546
    """Build hooks nodes.
11547

11548
    """
11549
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11550

    
11551
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11552
      nl.append(self.op.target_node)
11553

    
11554
    return (nl, nl)
11555

    
11556
  def CheckPrereq(self):
11557
    """Check prerequisites.
11558

11559
    This checks that the instance and node names are valid.
11560

11561
    """
11562
    instance_name = self.op.instance_name
11563

    
11564
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11565
    assert self.instance is not None, \
11566
          "Cannot retrieve locked instance %s" % self.op.instance_name
11567
    _CheckNodeOnline(self, self.instance.primary_node)
11568

    
11569
    if (self.op.remove_instance and self.instance.admin_up and
11570
        not self.op.shutdown):
11571
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11572
                                 " down before")
11573

    
11574
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11575
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11576
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11577
      assert self.dst_node is not None
11578

    
11579
      _CheckNodeOnline(self, self.dst_node.name)
11580
      _CheckNodeNotDrained(self, self.dst_node.name)
11581

    
11582
      self._cds = None
11583
      self.dest_disk_info = None
11584
      self.dest_x509_ca = None
11585

    
11586
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11587
      self.dst_node = None
11588

    
11589
      if len(self.op.target_node) != len(self.instance.disks):
11590
        raise errors.OpPrereqError(("Received destination information for %s"
11591
                                    " disks, but instance %s has %s disks") %
11592
                                   (len(self.op.target_node), instance_name,
11593
                                    len(self.instance.disks)),
11594
                                   errors.ECODE_INVAL)
11595

    
11596
      cds = _GetClusterDomainSecret()
11597

    
11598
      # Check X509 key name
11599
      try:
11600
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11601
      except (TypeError, ValueError), err:
11602
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11603

    
11604
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11605
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11606
                                   errors.ECODE_INVAL)
11607

    
11608
      # Load and verify CA
11609
      try:
11610
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11611
      except OpenSSL.crypto.Error, err:
11612
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11613
                                   (err, ), errors.ECODE_INVAL)
11614

    
11615
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11616
      if errcode is not None:
11617
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11618
                                   (msg, ), errors.ECODE_INVAL)
11619

    
11620
      self.dest_x509_ca = cert
11621

    
11622
      # Verify target information
11623
      disk_info = []
11624
      for idx, disk_data in enumerate(self.op.target_node):
11625
        try:
11626
          (host, port, magic) = \
11627
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11628
        except errors.GenericError, err:
11629
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11630
                                     (idx, err), errors.ECODE_INVAL)
11631

    
11632
        disk_info.append((host, port, magic))
11633

    
11634
      assert len(disk_info) == len(self.op.target_node)
11635
      self.dest_disk_info = disk_info
11636

    
11637
    else:
11638
      raise errors.ProgrammerError("Unhandled export mode %r" %
11639
                                   self.op.mode)
11640

    
11641
    # instance disk type verification
11642
    # TODO: Implement export support for file-based disks
11643
    for disk in self.instance.disks:
11644
      if disk.dev_type == constants.LD_FILE:
11645
        raise errors.OpPrereqError("Export not supported for instances with"
11646
                                   " file-based disks", errors.ECODE_INVAL)
11647

    
11648
  def _CleanupExports(self, feedback_fn):
11649
    """Removes exports of current instance from all other nodes.
11650

11651
    If an instance in a cluster with nodes A..D was exported to node C, its
11652
    exports will be removed from the nodes A, B and D.
11653

11654
    """
11655
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11656

    
11657
    nodelist = self.cfg.GetNodeList()
11658
    nodelist.remove(self.dst_node.name)
11659

    
11660
    # on one-node clusters nodelist will be empty after the removal
11661
    # if we proceed the backup would be removed because OpBackupQuery
11662
    # substitutes an empty list with the full cluster node list.
11663
    iname = self.instance.name
11664
    if nodelist:
11665
      feedback_fn("Removing old exports for instance %s" % iname)
11666
      exportlist = self.rpc.call_export_list(nodelist)
11667
      for node in exportlist:
11668
        if exportlist[node].fail_msg:
11669
          continue
11670
        if iname in exportlist[node].payload:
11671
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11672
          if msg:
11673
            self.LogWarning("Could not remove older export for instance %s"
11674
                            " on node %s: %s", iname, node, msg)
11675

    
11676
  def Exec(self, feedback_fn):
11677
    """Export an instance to an image in the cluster.
11678

11679
    """
11680
    assert self.op.mode in constants.EXPORT_MODES
11681

    
11682
    instance = self.instance
11683
    src_node = instance.primary_node
11684

    
11685
    if self.op.shutdown:
11686
      # shutdown the instance, but not the disks
11687
      feedback_fn("Shutting down instance %s" % instance.name)
11688
      result = self.rpc.call_instance_shutdown(src_node, instance,
11689
                                               self.op.shutdown_timeout)
11690
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11691
      result.Raise("Could not shutdown instance %s on"
11692
                   " node %s" % (instance.name, src_node))
11693

    
11694
    # set the disks ID correctly since call_instance_start needs the
11695
    # correct drbd minor to create the symlinks
11696
    for disk in instance.disks:
11697
      self.cfg.SetDiskID(disk, src_node)
11698

    
11699
    activate_disks = (not instance.admin_up)
11700

    
11701
    if activate_disks:
11702
      # Activate the instance disks if we'exporting a stopped instance
11703
      feedback_fn("Activating disks for %s" % instance.name)
11704
      _StartInstanceDisks(self, instance, None)
11705

    
11706
    try:
11707
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11708
                                                     instance)
11709

    
11710
      helper.CreateSnapshots()
11711
      try:
11712
        if (self.op.shutdown and instance.admin_up and
11713
            not self.op.remove_instance):
11714
          assert not activate_disks
11715
          feedback_fn("Starting instance %s" % instance.name)
11716
          result = self.rpc.call_instance_start(src_node, instance,
11717
                                                None, None, False)
11718
          msg = result.fail_msg
11719
          if msg:
11720
            feedback_fn("Failed to start instance: %s" % msg)
11721
            _ShutdownInstanceDisks(self, instance)
11722
            raise errors.OpExecError("Could not start instance: %s" % msg)
11723

    
11724
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11725
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11726
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11727
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11728
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11729

    
11730
          (key_name, _, _) = self.x509_key_name
11731

    
11732
          dest_ca_pem = \
11733
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11734
                                            self.dest_x509_ca)
11735

    
11736
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11737
                                                     key_name, dest_ca_pem,
11738
                                                     timeouts)
11739
      finally:
11740
        helper.Cleanup()
11741

    
11742
      # Check for backwards compatibility
11743
      assert len(dresults) == len(instance.disks)
11744
      assert compat.all(isinstance(i, bool) for i in dresults), \
11745
             "Not all results are boolean: %r" % dresults
11746

    
11747
    finally:
11748
      if activate_disks:
11749
        feedback_fn("Deactivating disks for %s" % instance.name)
11750
        _ShutdownInstanceDisks(self, instance)
11751

    
11752
    if not (compat.all(dresults) and fin_resu):
11753
      failures = []
11754
      if not fin_resu:
11755
        failures.append("export finalization")
11756
      if not compat.all(dresults):
11757
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11758
                               if not dsk)
11759
        failures.append("disk export: disk(s) %s" % fdsk)
11760

    
11761
      raise errors.OpExecError("Export failed, errors in %s" %
11762
                               utils.CommaJoin(failures))
11763

    
11764
    # At this point, the export was successful, we can cleanup/finish
11765

    
11766
    # Remove instance if requested
11767
    if self.op.remove_instance:
11768
      feedback_fn("Removing instance %s" % instance.name)
11769
      _RemoveInstance(self, feedback_fn, instance,
11770
                      self.op.ignore_remove_failures)
11771

    
11772
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11773
      self._CleanupExports(feedback_fn)
11774

    
11775
    return fin_resu, dresults
11776

    
11777

    
11778
class LUBackupRemove(NoHooksLU):
11779
  """Remove exports related to the named instance.
11780

11781
  """
11782
  REQ_BGL = False
11783

    
11784
  def ExpandNames(self):
11785
    self.needed_locks = {}
11786
    # We need all nodes to be locked in order for RemoveExport to work, but we
11787
    # don't need to lock the instance itself, as nothing will happen to it (and
11788
    # we can remove exports also for a removed instance)
11789
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11790

    
11791
  def Exec(self, feedback_fn):
11792
    """Remove any export.
11793

11794
    """
11795
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11796
    # If the instance was not found we'll try with the name that was passed in.
11797
    # This will only work if it was an FQDN, though.
11798
    fqdn_warn = False
11799
    if not instance_name:
11800
      fqdn_warn = True
11801
      instance_name = self.op.instance_name
11802

    
11803
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11804
    exportlist = self.rpc.call_export_list(locked_nodes)
11805
    found = False
11806
    for node in exportlist:
11807
      msg = exportlist[node].fail_msg
11808
      if msg:
11809
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11810
        continue
11811
      if instance_name in exportlist[node].payload:
11812
        found = True
11813
        result = self.rpc.call_export_remove(node, instance_name)
11814
        msg = result.fail_msg
11815
        if msg:
11816
          logging.error("Could not remove export for instance %s"
11817
                        " on node %s: %s", instance_name, node, msg)
11818

    
11819
    if fqdn_warn and not found:
11820
      feedback_fn("Export not found. If trying to remove an export belonging"
11821
                  " to a deleted instance please use its Fully Qualified"
11822
                  " Domain Name.")
11823

    
11824

    
11825
class LUGroupAdd(LogicalUnit):
11826
  """Logical unit for creating node groups.
11827

11828
  """
11829
  HPATH = "group-add"
11830
  HTYPE = constants.HTYPE_GROUP
11831
  REQ_BGL = False
11832

    
11833
  def ExpandNames(self):
11834
    # We need the new group's UUID here so that we can create and acquire the
11835
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11836
    # that it should not check whether the UUID exists in the configuration.
11837
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11838
    self.needed_locks = {}
11839
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11840

    
11841
  def CheckPrereq(self):
11842
    """Check prerequisites.
11843

11844
    This checks that the given group name is not an existing node group
11845
    already.
11846

11847
    """
11848
    try:
11849
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11850
    except errors.OpPrereqError:
11851
      pass
11852
    else:
11853
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11854
                                 " node group (UUID: %s)" %
11855
                                 (self.op.group_name, existing_uuid),
11856
                                 errors.ECODE_EXISTS)
11857

    
11858
    if self.op.ndparams:
11859
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11860

    
11861
  def BuildHooksEnv(self):
11862
    """Build hooks env.
11863

11864
    """
11865
    return {
11866
      "GROUP_NAME": self.op.group_name,
11867
      }
11868

    
11869
  def BuildHooksNodes(self):
11870
    """Build hooks nodes.
11871

11872
    """
11873
    mn = self.cfg.GetMasterNode()
11874
    return ([mn], [mn])
11875

    
11876
  def Exec(self, feedback_fn):
11877
    """Add the node group to the cluster.
11878

11879
    """
11880
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11881
                                  uuid=self.group_uuid,
11882
                                  alloc_policy=self.op.alloc_policy,
11883
                                  ndparams=self.op.ndparams)
11884

    
11885
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11886
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11887

    
11888

    
11889
class LUGroupAssignNodes(NoHooksLU):
11890
  """Logical unit for assigning nodes to groups.
11891

11892
  """
11893
  REQ_BGL = False
11894

    
11895
  def ExpandNames(self):
11896
    # These raise errors.OpPrereqError on their own:
11897
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11898
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11899

    
11900
    # We want to lock all the affected nodes and groups. We have readily
11901
    # available the list of nodes, and the *destination* group. To gather the
11902
    # list of "source" groups, we need to fetch node information later on.
11903
    self.needed_locks = {
11904
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11905
      locking.LEVEL_NODE: self.op.nodes,
11906
      }
11907

    
11908
  def DeclareLocks(self, level):
11909
    if level == locking.LEVEL_NODEGROUP:
11910
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11911

    
11912
      # Try to get all affected nodes' groups without having the group or node
11913
      # lock yet. Needs verification later in the code flow.
11914
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11915

    
11916
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11917

    
11918
  def CheckPrereq(self):
11919
    """Check prerequisites.
11920

11921
    """
11922
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11923
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11924
            frozenset(self.op.nodes))
11925

    
11926
    expected_locks = (set([self.group_uuid]) |
11927
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11928
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11929
    if actual_locks != expected_locks:
11930
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11931
                               " current groups are '%s', used to be '%s'" %
11932
                               (utils.CommaJoin(expected_locks),
11933
                                utils.CommaJoin(actual_locks)))
11934

    
11935
    self.node_data = self.cfg.GetAllNodesInfo()
11936
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11937
    instance_data = self.cfg.GetAllInstancesInfo()
11938

    
11939
    if self.group is None:
11940
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11941
                               (self.op.group_name, self.group_uuid))
11942

    
11943
    (new_splits, previous_splits) = \
11944
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11945
                                             for node in self.op.nodes],
11946
                                            self.node_data, instance_data)
11947

    
11948
    if new_splits:
11949
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11950

    
11951
      if not self.op.force:
11952
        raise errors.OpExecError("The following instances get split by this"
11953
                                 " change and --force was not given: %s" %
11954
                                 fmt_new_splits)
11955
      else:
11956
        self.LogWarning("This operation will split the following instances: %s",
11957
                        fmt_new_splits)
11958

    
11959
        if previous_splits:
11960
          self.LogWarning("In addition, these already-split instances continue"
11961
                          " to be split across groups: %s",
11962
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11963

    
11964
  def Exec(self, feedback_fn):
11965
    """Assign nodes to a new group.
11966

11967
    """
11968
    for node in self.op.nodes:
11969
      self.node_data[node].group = self.group_uuid
11970

    
11971
    # FIXME: Depends on side-effects of modifying the result of
11972
    # C{cfg.GetAllNodesInfo}
11973

    
11974
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11975

    
11976
  @staticmethod
11977
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11978
    """Check for split instances after a node assignment.
11979

11980
    This method considers a series of node assignments as an atomic operation,
11981
    and returns information about split instances after applying the set of
11982
    changes.
11983

11984
    In particular, it returns information about newly split instances, and
11985
    instances that were already split, and remain so after the change.
11986

11987
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11988
    considered.
11989

11990
    @type changes: list of (node_name, new_group_uuid) pairs.
11991
    @param changes: list of node assignments to consider.
11992
    @param node_data: a dict with data for all nodes
11993
    @param instance_data: a dict with all instances to consider
11994
    @rtype: a two-tuple
11995
    @return: a list of instances that were previously okay and result split as a
11996
      consequence of this change, and a list of instances that were previously
11997
      split and this change does not fix.
11998

11999
    """
12000
    changed_nodes = dict((node, group) for node, group in changes
12001
                         if node_data[node].group != group)
12002

    
12003
    all_split_instances = set()
12004
    previously_split_instances = set()
12005

    
12006
    def InstanceNodes(instance):
12007
      return [instance.primary_node] + list(instance.secondary_nodes)
12008

    
12009
    for inst in instance_data.values():
12010
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12011
        continue
12012

    
12013
      instance_nodes = InstanceNodes(inst)
12014

    
12015
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12016
        previously_split_instances.add(inst.name)
12017

    
12018
      if len(set(changed_nodes.get(node, node_data[node].group)
12019
                 for node in instance_nodes)) > 1:
12020
        all_split_instances.add(inst.name)
12021

    
12022
    return (list(all_split_instances - previously_split_instances),
12023
            list(previously_split_instances & all_split_instances))
12024

    
12025

    
12026
class _GroupQuery(_QueryBase):
12027
  FIELDS = query.GROUP_FIELDS
12028

    
12029
  def ExpandNames(self, lu):
12030
    lu.needed_locks = {}
12031

    
12032
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12033
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12034

    
12035
    if not self.names:
12036
      self.wanted = [name_to_uuid[name]
12037
                     for name in utils.NiceSort(name_to_uuid.keys())]
12038
    else:
12039
      # Accept names to be either names or UUIDs.
12040
      missing = []
12041
      self.wanted = []
12042
      all_uuid = frozenset(self._all_groups.keys())
12043

    
12044
      for name in self.names:
12045
        if name in all_uuid:
12046
          self.wanted.append(name)
12047
        elif name in name_to_uuid:
12048
          self.wanted.append(name_to_uuid[name])
12049
        else:
12050
          missing.append(name)
12051

    
12052
      if missing:
12053
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12054
                                   utils.CommaJoin(missing),
12055
                                   errors.ECODE_NOENT)
12056

    
12057
  def DeclareLocks(self, lu, level):
12058
    pass
12059

    
12060
  def _GetQueryData(self, lu):
12061
    """Computes the list of node groups and their attributes.
12062

12063
    """
12064
    do_nodes = query.GQ_NODE in self.requested_data
12065
    do_instances = query.GQ_INST in self.requested_data
12066

    
12067
    group_to_nodes = None
12068
    group_to_instances = None
12069

    
12070
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12071
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12072
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12073
    # instance->node. Hence, we will need to process nodes even if we only need
12074
    # instance information.
12075
    if do_nodes or do_instances:
12076
      all_nodes = lu.cfg.GetAllNodesInfo()
12077
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12078
      node_to_group = {}
12079

    
12080
      for node in all_nodes.values():
12081
        if node.group in group_to_nodes:
12082
          group_to_nodes[node.group].append(node.name)
12083
          node_to_group[node.name] = node.group
12084

    
12085
      if do_instances:
12086
        all_instances = lu.cfg.GetAllInstancesInfo()
12087
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12088

    
12089
        for instance in all_instances.values():
12090
          node = instance.primary_node
12091
          if node in node_to_group:
12092
            group_to_instances[node_to_group[node]].append(instance.name)
12093

    
12094
        if not do_nodes:
12095
          # Do not pass on node information if it was not requested.
12096
          group_to_nodes = None
12097

    
12098
    return query.GroupQueryData([self._all_groups[uuid]
12099
                                 for uuid in self.wanted],
12100
                                group_to_nodes, group_to_instances)
12101

    
12102

    
12103
class LUGroupQuery(NoHooksLU):
12104
  """Logical unit for querying node groups.
12105

12106
  """
12107
  REQ_BGL = False
12108

    
12109
  def CheckArguments(self):
12110
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12111
                          self.op.output_fields, False)
12112

    
12113
  def ExpandNames(self):
12114
    self.gq.ExpandNames(self)
12115

    
12116
  def DeclareLocks(self, level):
12117
    self.gq.DeclareLocks(self, level)
12118

    
12119
  def Exec(self, feedback_fn):
12120
    return self.gq.OldStyleQuery(self)
12121

    
12122

    
12123
class LUGroupSetParams(LogicalUnit):
12124
  """Modifies the parameters of a node group.
12125

12126
  """
12127
  HPATH = "group-modify"
12128
  HTYPE = constants.HTYPE_GROUP
12129
  REQ_BGL = False
12130

    
12131
  def CheckArguments(self):
12132
    all_changes = [
12133
      self.op.ndparams,
12134
      self.op.alloc_policy,
12135
      ]
12136

    
12137
    if all_changes.count(None) == len(all_changes):
12138
      raise errors.OpPrereqError("Please pass at least one modification",
12139
                                 errors.ECODE_INVAL)
12140

    
12141
  def ExpandNames(self):
12142
    # This raises errors.OpPrereqError on its own:
12143
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12144

    
12145
    self.needed_locks = {
12146
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12147
      }
12148

    
12149
  def CheckPrereq(self):
12150
    """Check prerequisites.
12151

12152
    """
12153
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12154

    
12155
    if self.group is None:
12156
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12157
                               (self.op.group_name, self.group_uuid))
12158

    
12159
    if self.op.ndparams:
12160
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12161
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12162
      self.new_ndparams = new_ndparams
12163

    
12164
  def BuildHooksEnv(self):
12165
    """Build hooks env.
12166

12167
    """
12168
    return {
12169
      "GROUP_NAME": self.op.group_name,
12170
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12171
      }
12172

    
12173
  def BuildHooksNodes(self):
12174
    """Build hooks nodes.
12175

12176
    """
12177
    mn = self.cfg.GetMasterNode()
12178
    return ([mn], [mn])
12179

    
12180
  def Exec(self, feedback_fn):
12181
    """Modifies the node group.
12182

12183
    """
12184
    result = []
12185

    
12186
    if self.op.ndparams:
12187
      self.group.ndparams = self.new_ndparams
12188
      result.append(("ndparams", str(self.group.ndparams)))
12189

    
12190
    if self.op.alloc_policy:
12191
      self.group.alloc_policy = self.op.alloc_policy
12192

    
12193
    self.cfg.Update(self.group, feedback_fn)
12194
    return result
12195

    
12196

    
12197
class LUGroupRemove(LogicalUnit):
12198
  HPATH = "group-remove"
12199
  HTYPE = constants.HTYPE_GROUP
12200
  REQ_BGL = False
12201

    
12202
  def ExpandNames(self):
12203
    # This will raises errors.OpPrereqError on its own:
12204
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12205
    self.needed_locks = {
12206
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12207
      }
12208

    
12209
  def CheckPrereq(self):
12210
    """Check prerequisites.
12211

12212
    This checks that the given group name exists as a node group, that is
12213
    empty (i.e., contains no nodes), and that is not the last group of the
12214
    cluster.
12215

12216
    """
12217
    # Verify that the group is empty.
12218
    group_nodes = [node.name
12219
                   for node in self.cfg.GetAllNodesInfo().values()
12220
                   if node.group == self.group_uuid]
12221

    
12222
    if group_nodes:
12223
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12224
                                 " nodes: %s" %
12225
                                 (self.op.group_name,
12226
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12227
                                 errors.ECODE_STATE)
12228

    
12229
    # Verify the cluster would not be left group-less.
12230
    if len(self.cfg.GetNodeGroupList()) == 1:
12231
      raise errors.OpPrereqError("Group '%s' is the only group,"
12232
                                 " cannot be removed" %
12233
                                 self.op.group_name,
12234
                                 errors.ECODE_STATE)
12235

    
12236
  def BuildHooksEnv(self):
12237
    """Build hooks env.
12238

12239
    """
12240
    return {
12241
      "GROUP_NAME": self.op.group_name,
12242
      }
12243

    
12244
  def BuildHooksNodes(self):
12245
    """Build hooks nodes.
12246

12247
    """
12248
    mn = self.cfg.GetMasterNode()
12249
    return ([mn], [mn])
12250

    
12251
  def Exec(self, feedback_fn):
12252
    """Remove the node group.
12253

12254
    """
12255
    try:
12256
      self.cfg.RemoveNodeGroup(self.group_uuid)
12257
    except errors.ConfigurationError:
12258
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12259
                               (self.op.group_name, self.group_uuid))
12260

    
12261
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12262

    
12263

    
12264
class LUGroupRename(LogicalUnit):
12265
  HPATH = "group-rename"
12266
  HTYPE = constants.HTYPE_GROUP
12267
  REQ_BGL = False
12268

    
12269
  def ExpandNames(self):
12270
    # This raises errors.OpPrereqError on its own:
12271
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12272

    
12273
    self.needed_locks = {
12274
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12275
      }
12276

    
12277
  def CheckPrereq(self):
12278
    """Check prerequisites.
12279

12280
    Ensures requested new name is not yet used.
12281

12282
    """
12283
    try:
12284
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12285
    except errors.OpPrereqError:
12286
      pass
12287
    else:
12288
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12289
                                 " node group (UUID: %s)" %
12290
                                 (self.op.new_name, new_name_uuid),
12291
                                 errors.ECODE_EXISTS)
12292

    
12293
  def BuildHooksEnv(self):
12294
    """Build hooks env.
12295

12296
    """
12297
    return {
12298
      "OLD_NAME": self.op.group_name,
12299
      "NEW_NAME": self.op.new_name,
12300
      }
12301

    
12302
  def BuildHooksNodes(self):
12303
    """Build hooks nodes.
12304

12305
    """
12306
    mn = self.cfg.GetMasterNode()
12307

    
12308
    all_nodes = self.cfg.GetAllNodesInfo()
12309
    all_nodes.pop(mn, None)
12310

    
12311
    run_nodes = [mn]
12312
    run_nodes.extend(node.name for node in all_nodes.values()
12313
                     if node.group == self.group_uuid)
12314

    
12315
    return (run_nodes, run_nodes)
12316

    
12317
  def Exec(self, feedback_fn):
12318
    """Rename the node group.
12319

12320
    """
12321
    group = self.cfg.GetNodeGroup(self.group_uuid)
12322

    
12323
    if group is None:
12324
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12325
                               (self.op.group_name, self.group_uuid))
12326

    
12327
    group.name = self.op.new_name
12328
    self.cfg.Update(group, feedback_fn)
12329

    
12330
    return self.op.new_name
12331

    
12332

    
12333
class LUGroupEvacuate(LogicalUnit):
12334
  HPATH = "group-evacuate"
12335
  HTYPE = constants.HTYPE_GROUP
12336
  REQ_BGL = False
12337

    
12338
  def ExpandNames(self):
12339
    # This raises errors.OpPrereqError on its own:
12340
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12341

    
12342
    if self.op.target_groups:
12343
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12344
                                  self.op.target_groups)
12345
    else:
12346
      self.req_target_uuids = []
12347

    
12348
    if self.group_uuid in self.req_target_uuids:
12349
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12350
                                 " as a target group (targets are %s)" %
12351
                                 (self.group_uuid,
12352
                                  utils.CommaJoin(self.req_target_uuids)),
12353
                                 errors.ECODE_INVAL)
12354

    
12355
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12356

    
12357
    self.share_locks = _ShareAll()
12358
    self.needed_locks = {
12359
      locking.LEVEL_INSTANCE: [],
12360
      locking.LEVEL_NODEGROUP: [],
12361
      locking.LEVEL_NODE: [],
12362
      }
12363

    
12364
  def DeclareLocks(self, level):
12365
    if level == locking.LEVEL_INSTANCE:
12366
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12367

    
12368
      # Lock instances optimistically, needs verification once node and group
12369
      # locks have been acquired
12370
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12371
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12372

    
12373
    elif level == locking.LEVEL_NODEGROUP:
12374
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12375

    
12376
      if self.req_target_uuids:
12377
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12378

    
12379
        # Lock all groups used by instances optimistically; this requires going
12380
        # via the node before it's locked, requiring verification later on
12381
        lock_groups.update(group_uuid
12382
                           for instance_name in
12383
                             self.owned_locks(locking.LEVEL_INSTANCE)
12384
                           for group_uuid in
12385
                             self.cfg.GetInstanceNodeGroups(instance_name))
12386
      else:
12387
        # No target groups, need to lock all of them
12388
        lock_groups = locking.ALL_SET
12389

    
12390
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12391

    
12392
    elif level == locking.LEVEL_NODE:
12393
      # This will only lock the nodes in the group to be evacuated which
12394
      # contain actual instances
12395
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12396
      self._LockInstancesNodes()
12397

    
12398
      # Lock all nodes in group to be evacuated and target groups
12399
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12400
      assert self.group_uuid in owned_groups
12401
      member_nodes = [node_name
12402
                      for group in owned_groups
12403
                      for node_name in self.cfg.GetNodeGroup(group).members]
12404
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12405

    
12406
  def CheckPrereq(self):
12407
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12408
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12409
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12410

    
12411
    assert owned_groups.issuperset(self.req_target_uuids)
12412
    assert self.group_uuid in owned_groups
12413

    
12414
    # Check if locked instances are still correct
12415
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12416

    
12417
    # Get instance information
12418
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12419

    
12420
    # Check if node groups for locked instances are still correct
12421
    for instance_name in owned_instances:
12422
      inst = self.instances[instance_name]
12423
      assert owned_nodes.issuperset(inst.all_nodes), \
12424
        "Instance %s's nodes changed while we kept the lock" % instance_name
12425

    
12426
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12427
                                             owned_groups)
12428

    
12429
      assert self.group_uuid in inst_groups, \
12430
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12431

    
12432
    if self.req_target_uuids:
12433
      # User requested specific target groups
12434
      self.target_uuids = self.req_target_uuids
12435
    else:
12436
      # All groups except the one to be evacuated are potential targets
12437
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12438
                           if group_uuid != self.group_uuid]
12439

    
12440
      if not self.target_uuids:
12441
        raise errors.OpPrereqError("There are no possible target groups",
12442
                                   errors.ECODE_INVAL)
12443

    
12444
  def BuildHooksEnv(self):
12445
    """Build hooks env.
12446

12447
    """
12448
    return {
12449
      "GROUP_NAME": self.op.group_name,
12450
      "TARGET_GROUPS": " ".join(self.target_uuids),
12451
      }
12452

    
12453
  def BuildHooksNodes(self):
12454
    """Build hooks nodes.
12455

12456
    """
12457
    mn = self.cfg.GetMasterNode()
12458

    
12459
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12460

    
12461
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12462

    
12463
    return (run_nodes, run_nodes)
12464

    
12465
  def Exec(self, feedback_fn):
12466
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12467

    
12468
    assert self.group_uuid not in self.target_uuids
12469

    
12470
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12471
                     instances=instances, target_groups=self.target_uuids)
12472

    
12473
    ial.Run(self.op.iallocator)
12474

    
12475
    if not ial.success:
12476
      raise errors.OpPrereqError("Can't compute group evacuation using"
12477
                                 " iallocator '%s': %s" %
12478
                                 (self.op.iallocator, ial.info),
12479
                                 errors.ECODE_NORES)
12480

    
12481
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12482

    
12483
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12484
                 len(jobs), self.op.group_name)
12485

    
12486
    return ResultWithJobs(jobs)
12487

    
12488

    
12489
class TagsLU(NoHooksLU): # pylint: disable=W0223
12490
  """Generic tags LU.
12491

12492
  This is an abstract class which is the parent of all the other tags LUs.
12493

12494
  """
12495
  def ExpandNames(self):
12496
    self.group_uuid = None
12497
    self.needed_locks = {}
12498
    if self.op.kind == constants.TAG_NODE:
12499
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12500
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12501
    elif self.op.kind == constants.TAG_INSTANCE:
12502
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12503
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12504
    elif self.op.kind == constants.TAG_NODEGROUP:
12505
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12506

    
12507
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12508
    # not possible to acquire the BGL based on opcode parameters)
12509

    
12510
  def CheckPrereq(self):
12511
    """Check prerequisites.
12512

12513
    """
12514
    if self.op.kind == constants.TAG_CLUSTER:
12515
      self.target = self.cfg.GetClusterInfo()
12516
    elif self.op.kind == constants.TAG_NODE:
12517
      self.target = self.cfg.GetNodeInfo(self.op.name)
12518
    elif self.op.kind == constants.TAG_INSTANCE:
12519
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12520
    elif self.op.kind == constants.TAG_NODEGROUP:
12521
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12522
    else:
12523
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12524
                                 str(self.op.kind), errors.ECODE_INVAL)
12525

    
12526

    
12527
class LUTagsGet(TagsLU):
12528
  """Returns the tags of a given object.
12529

12530
  """
12531
  REQ_BGL = False
12532

    
12533
  def ExpandNames(self):
12534
    TagsLU.ExpandNames(self)
12535

    
12536
    # Share locks as this is only a read operation
12537
    self.share_locks = _ShareAll()
12538

    
12539
  def Exec(self, feedback_fn):
12540
    """Returns the tag list.
12541

12542
    """
12543
    return list(self.target.GetTags())
12544

    
12545

    
12546
class LUTagsSearch(NoHooksLU):
12547
  """Searches the tags for a given pattern.
12548

12549
  """
12550
  REQ_BGL = False
12551

    
12552
  def ExpandNames(self):
12553
    self.needed_locks = {}
12554

    
12555
  def CheckPrereq(self):
12556
    """Check prerequisites.
12557

12558
    This checks the pattern passed for validity by compiling it.
12559

12560
    """
12561
    try:
12562
      self.re = re.compile(self.op.pattern)
12563
    except re.error, err:
12564
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12565
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12566

    
12567
  def Exec(self, feedback_fn):
12568
    """Returns the tag list.
12569

12570
    """
12571
    cfg = self.cfg
12572
    tgts = [("/cluster", cfg.GetClusterInfo())]
12573
    ilist = cfg.GetAllInstancesInfo().values()
12574
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12575
    nlist = cfg.GetAllNodesInfo().values()
12576
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12577
    tgts.extend(("/nodegroup/%s" % n.name, n)
12578
                for n in cfg.GetAllNodeGroupsInfo().values())
12579
    results = []
12580
    for path, target in tgts:
12581
      for tag in target.GetTags():
12582
        if self.re.search(tag):
12583
          results.append((path, tag))
12584
    return results
12585

    
12586

    
12587
class LUTagsSet(TagsLU):
12588
  """Sets a tag on a given object.
12589

12590
  """
12591
  REQ_BGL = False
12592

    
12593
  def CheckPrereq(self):
12594
    """Check prerequisites.
12595

12596
    This checks the type and length of the tag name and value.
12597

12598
    """
12599
    TagsLU.CheckPrereq(self)
12600
    for tag in self.op.tags:
12601
      objects.TaggableObject.ValidateTag(tag)
12602

    
12603
  def Exec(self, feedback_fn):
12604
    """Sets the tag.
12605

12606
    """
12607
    try:
12608
      for tag in self.op.tags:
12609
        self.target.AddTag(tag)
12610
    except errors.TagError, err:
12611
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12612
    self.cfg.Update(self.target, feedback_fn)
12613

    
12614

    
12615
class LUTagsDel(TagsLU):
12616
  """Delete a list of tags from a given object.
12617

12618
  """
12619
  REQ_BGL = False
12620

    
12621
  def CheckPrereq(self):
12622
    """Check prerequisites.
12623

12624
    This checks that we have the given tag.
12625

12626
    """
12627
    TagsLU.CheckPrereq(self)
12628
    for tag in self.op.tags:
12629
      objects.TaggableObject.ValidateTag(tag)
12630
    del_tags = frozenset(self.op.tags)
12631
    cur_tags = self.target.GetTags()
12632

    
12633
    diff_tags = del_tags - cur_tags
12634
    if diff_tags:
12635
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12636
      raise errors.OpPrereqError("Tag(s) %s not found" %
12637
                                 (utils.CommaJoin(diff_names), ),
12638
                                 errors.ECODE_NOENT)
12639

    
12640
  def Exec(self, feedback_fn):
12641
    """Remove the tag from the object.
12642

12643
    """
12644
    for tag in self.op.tags:
12645
      self.target.RemoveTag(tag)
12646
    self.cfg.Update(self.target, feedback_fn)
12647

    
12648

    
12649
class LUTestDelay(NoHooksLU):
12650
  """Sleep for a specified amount of time.
12651

12652
  This LU sleeps on the master and/or nodes for a specified amount of
12653
  time.
12654

12655
  """
12656
  REQ_BGL = False
12657

    
12658
  def ExpandNames(self):
12659
    """Expand names and set required locks.
12660

12661
    This expands the node list, if any.
12662

12663
    """
12664
    self.needed_locks = {}
12665
    if self.op.on_nodes:
12666
      # _GetWantedNodes can be used here, but is not always appropriate to use
12667
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12668
      # more information.
12669
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12670
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12671

    
12672
  def _TestDelay(self):
12673
    """Do the actual sleep.
12674

12675
    """
12676
    if self.op.on_master:
12677
      if not utils.TestDelay(self.op.duration):
12678
        raise errors.OpExecError("Error during master delay test")
12679
    if self.op.on_nodes:
12680
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12681
      for node, node_result in result.items():
12682
        node_result.Raise("Failure during rpc call to node %s" % node)
12683

    
12684
  def Exec(self, feedback_fn):
12685
    """Execute the test delay opcode, with the wanted repetitions.
12686

12687
    """
12688
    if self.op.repeat == 0:
12689
      self._TestDelay()
12690
    else:
12691
      top_value = self.op.repeat - 1
12692
      for i in range(self.op.repeat):
12693
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12694
        self._TestDelay()
12695

    
12696

    
12697
class LUTestJqueue(NoHooksLU):
12698
  """Utility LU to test some aspects of the job queue.
12699

12700
  """
12701
  REQ_BGL = False
12702

    
12703
  # Must be lower than default timeout for WaitForJobChange to see whether it
12704
  # notices changed jobs
12705
  _CLIENT_CONNECT_TIMEOUT = 20.0
12706
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12707

    
12708
  @classmethod
12709
  def _NotifyUsingSocket(cls, cb, errcls):
12710
    """Opens a Unix socket and waits for another program to connect.
12711

12712
    @type cb: callable
12713
    @param cb: Callback to send socket name to client
12714
    @type errcls: class
12715
    @param errcls: Exception class to use for errors
12716

12717
    """
12718
    # Using a temporary directory as there's no easy way to create temporary
12719
    # sockets without writing a custom loop around tempfile.mktemp and
12720
    # socket.bind
12721
    tmpdir = tempfile.mkdtemp()
12722
    try:
12723
      tmpsock = utils.PathJoin(tmpdir, "sock")
12724

    
12725
      logging.debug("Creating temporary socket at %s", tmpsock)
12726
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12727
      try:
12728
        sock.bind(tmpsock)
12729
        sock.listen(1)
12730

    
12731
        # Send details to client
12732
        cb(tmpsock)
12733

    
12734
        # Wait for client to connect before continuing
12735
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12736
        try:
12737
          (conn, _) = sock.accept()
12738
        except socket.error, err:
12739
          raise errcls("Client didn't connect in time (%s)" % err)
12740
      finally:
12741
        sock.close()
12742
    finally:
12743
      # Remove as soon as client is connected
12744
      shutil.rmtree(tmpdir)
12745

    
12746
    # Wait for client to close
12747
    try:
12748
      try:
12749
        # pylint: disable=E1101
12750
        # Instance of '_socketobject' has no ... member
12751
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12752
        conn.recv(1)
12753
      except socket.error, err:
12754
        raise errcls("Client failed to confirm notification (%s)" % err)
12755
    finally:
12756
      conn.close()
12757

    
12758
  def _SendNotification(self, test, arg, sockname):
12759
    """Sends a notification to the client.
12760

12761
    @type test: string
12762
    @param test: Test name
12763
    @param arg: Test argument (depends on test)
12764
    @type sockname: string
12765
    @param sockname: Socket path
12766

12767
    """
12768
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12769

    
12770
  def _Notify(self, prereq, test, arg):
12771
    """Notifies the client of a test.
12772

12773
    @type prereq: bool
12774
    @param prereq: Whether this is a prereq-phase test
12775
    @type test: string
12776
    @param test: Test name
12777
    @param arg: Test argument (depends on test)
12778

12779
    """
12780
    if prereq:
12781
      errcls = errors.OpPrereqError
12782
    else:
12783
      errcls = errors.OpExecError
12784

    
12785
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12786
                                                  test, arg),
12787
                                   errcls)
12788

    
12789
  def CheckArguments(self):
12790
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12791
    self.expandnames_calls = 0
12792

    
12793
  def ExpandNames(self):
12794
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12795
    if checkargs_calls < 1:
12796
      raise errors.ProgrammerError("CheckArguments was not called")
12797

    
12798
    self.expandnames_calls += 1
12799

    
12800
    if self.op.notify_waitlock:
12801
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12802

    
12803
    self.LogInfo("Expanding names")
12804

    
12805
    # Get lock on master node (just to get a lock, not for a particular reason)
12806
    self.needed_locks = {
12807
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12808
      }
12809

    
12810
  def Exec(self, feedback_fn):
12811
    if self.expandnames_calls < 1:
12812
      raise errors.ProgrammerError("ExpandNames was not called")
12813

    
12814
    if self.op.notify_exec:
12815
      self._Notify(False, constants.JQT_EXEC, None)
12816

    
12817
    self.LogInfo("Executing")
12818

    
12819
    if self.op.log_messages:
12820
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12821
      for idx, msg in enumerate(self.op.log_messages):
12822
        self.LogInfo("Sending log message %s", idx + 1)
12823
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12824
        # Report how many test messages have been sent
12825
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12826

    
12827
    if self.op.fail:
12828
      raise errors.OpExecError("Opcode failure was requested")
12829

    
12830
    return True
12831

    
12832

    
12833
class IAllocator(object):
12834
  """IAllocator framework.
12835

12836
  An IAllocator instance has three sets of attributes:
12837
    - cfg that is needed to query the cluster
12838
    - input data (all members of the _KEYS class attribute are required)
12839
    - four buffer attributes (in|out_data|text), that represent the
12840
      input (to the external script) in text and data structure format,
12841
      and the output from it, again in two formats
12842
    - the result variables from the script (success, info, nodes) for
12843
      easy usage
12844

12845
  """
12846
  # pylint: disable=R0902
12847
  # lots of instance attributes
12848

    
12849
  def __init__(self, cfg, rpc, mode, **kwargs):
12850
    self.cfg = cfg
12851
    self.rpc = rpc
12852
    # init buffer variables
12853
    self.in_text = self.out_text = self.in_data = self.out_data = None
12854
    # init all input fields so that pylint is happy
12855
    self.mode = mode
12856
    self.memory = self.disks = self.disk_template = None
12857
    self.os = self.tags = self.nics = self.vcpus = None
12858
    self.hypervisor = None
12859
    self.relocate_from = None
12860
    self.name = None
12861
    self.instances = None
12862
    self.evac_mode = None
12863
    self.target_groups = []
12864
    # computed fields
12865
    self.required_nodes = None
12866
    # init result fields
12867
    self.success = self.info = self.result = None
12868

    
12869
    try:
12870
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12871
    except KeyError:
12872
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12873
                                   " IAllocator" % self.mode)
12874

    
12875
    keyset = [n for (n, _) in keydata]
12876

    
12877
    for key in kwargs:
12878
      if key not in keyset:
12879
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12880
                                     " IAllocator" % key)
12881
      setattr(self, key, kwargs[key])
12882

    
12883
    for key in keyset:
12884
      if key not in kwargs:
12885
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12886
                                     " IAllocator" % key)
12887
    self._BuildInputData(compat.partial(fn, self), keydata)
12888

    
12889
  def _ComputeClusterData(self):
12890
    """Compute the generic allocator input data.
12891

12892
    This is the data that is independent of the actual operation.
12893

12894
    """
12895
    cfg = self.cfg
12896
    cluster_info = cfg.GetClusterInfo()
12897
    # cluster data
12898
    data = {
12899
      "version": constants.IALLOCATOR_VERSION,
12900
      "cluster_name": cfg.GetClusterName(),
12901
      "cluster_tags": list(cluster_info.GetTags()),
12902
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12903
      # we don't have job IDs
12904
      }
12905
    ninfo = cfg.GetAllNodesInfo()
12906
    iinfo = cfg.GetAllInstancesInfo().values()
12907
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12908

    
12909
    # node data
12910
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12911

    
12912
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12913
      hypervisor_name = self.hypervisor
12914
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12915
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12916
    else:
12917
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12918

    
12919
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12920
                                        hypervisor_name)
12921
    node_iinfo = \
12922
      self.rpc.call_all_instances_info(node_list,
12923
                                       cluster_info.enabled_hypervisors)
12924

    
12925
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12926

    
12927
    config_ndata = self._ComputeBasicNodeData(ninfo)
12928
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12929
                                                 i_list, config_ndata)
12930
    assert len(data["nodes"]) == len(ninfo), \
12931
        "Incomplete node data computed"
12932

    
12933
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12934

    
12935
    self.in_data = data
12936

    
12937
  @staticmethod
12938
  def _ComputeNodeGroupData(cfg):
12939
    """Compute node groups data.
12940

12941
    """
12942
    ng = dict((guuid, {
12943
      "name": gdata.name,
12944
      "alloc_policy": gdata.alloc_policy,
12945
      })
12946
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12947

    
12948
    return ng
12949

    
12950
  @staticmethod
12951
  def _ComputeBasicNodeData(node_cfg):
12952
    """Compute global node data.
12953

12954
    @rtype: dict
12955
    @returns: a dict of name: (node dict, node config)
12956

12957
    """
12958
    # fill in static (config-based) values
12959
    node_results = dict((ninfo.name, {
12960
      "tags": list(ninfo.GetTags()),
12961
      "primary_ip": ninfo.primary_ip,
12962
      "secondary_ip": ninfo.secondary_ip,
12963
      "offline": ninfo.offline,
12964
      "drained": ninfo.drained,
12965
      "master_candidate": ninfo.master_candidate,
12966
      "group": ninfo.group,
12967
      "master_capable": ninfo.master_capable,
12968
      "vm_capable": ninfo.vm_capable,
12969
      })
12970
      for ninfo in node_cfg.values())
12971

    
12972
    return node_results
12973

    
12974
  @staticmethod
12975
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12976
                              node_results):
12977
    """Compute global node data.
12978

12979
    @param node_results: the basic node structures as filled from the config
12980

12981
    """
12982
    # make a copy of the current dict
12983
    node_results = dict(node_results)
12984
    for nname, nresult in node_data.items():
12985
      assert nname in node_results, "Missing basic data for node %s" % nname
12986
      ninfo = node_cfg[nname]
12987

    
12988
      if not (ninfo.offline or ninfo.drained):
12989
        nresult.Raise("Can't get data for node %s" % nname)
12990
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12991
                                nname)
12992
        remote_info = nresult.payload
12993

    
12994
        for attr in ["memory_total", "memory_free", "memory_dom0",
12995
                     "vg_size", "vg_free", "cpu_total"]:
12996
          if attr not in remote_info:
12997
            raise errors.OpExecError("Node '%s' didn't return attribute"
12998
                                     " '%s'" % (nname, attr))
12999
          if not isinstance(remote_info[attr], int):
13000
            raise errors.OpExecError("Node '%s' returned invalid value"
13001
                                     " for '%s': %s" %
13002
                                     (nname, attr, remote_info[attr]))
13003
        # compute memory used by primary instances
13004
        i_p_mem = i_p_up_mem = 0
13005
        for iinfo, beinfo in i_list:
13006
          if iinfo.primary_node == nname:
13007
            i_p_mem += beinfo[constants.BE_MEMORY]
13008
            if iinfo.name not in node_iinfo[nname].payload:
13009
              i_used_mem = 0
13010
            else:
13011
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13012
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13013
            remote_info["memory_free"] -= max(0, i_mem_diff)
13014

    
13015
            if iinfo.admin_up:
13016
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13017

    
13018
        # compute memory used by instances
13019
        pnr_dyn = {
13020
          "total_memory": remote_info["memory_total"],
13021
          "reserved_memory": remote_info["memory_dom0"],
13022
          "free_memory": remote_info["memory_free"],
13023
          "total_disk": remote_info["vg_size"],
13024
          "free_disk": remote_info["vg_free"],
13025
          "total_cpus": remote_info["cpu_total"],
13026
          "i_pri_memory": i_p_mem,
13027
          "i_pri_up_memory": i_p_up_mem,
13028
          }
13029
        pnr_dyn.update(node_results[nname])
13030
        node_results[nname] = pnr_dyn
13031

    
13032
    return node_results
13033

    
13034
  @staticmethod
13035
  def _ComputeInstanceData(cluster_info, i_list):
13036
    """Compute global instance data.
13037

13038
    """
13039
    instance_data = {}
13040
    for iinfo, beinfo in i_list:
13041
      nic_data = []
13042
      for nic in iinfo.nics:
13043
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13044
        nic_dict = {
13045
          "mac": nic.mac,
13046
          "ip": nic.ip,
13047
          "mode": filled_params[constants.NIC_MODE],
13048
          "link": filled_params[constants.NIC_LINK],
13049
          }
13050
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13051
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13052
        nic_data.append(nic_dict)
13053
      pir = {
13054
        "tags": list(iinfo.GetTags()),
13055
        "admin_up": iinfo.admin_up,
13056
        "vcpus": beinfo[constants.BE_VCPUS],
13057
        "memory": beinfo[constants.BE_MEMORY],
13058
        "os": iinfo.os,
13059
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13060
        "nics": nic_data,
13061
        "disks": [{constants.IDISK_SIZE: dsk.size,
13062
                   constants.IDISK_MODE: dsk.mode}
13063
                  for dsk in iinfo.disks],
13064
        "disk_template": iinfo.disk_template,
13065
        "hypervisor": iinfo.hypervisor,
13066
        }
13067
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13068
                                                 pir["disks"])
13069
      instance_data[iinfo.name] = pir
13070

    
13071
    return instance_data
13072

    
13073
  def _AddNewInstance(self):
13074
    """Add new instance data to allocator structure.
13075

13076
    This in combination with _AllocatorGetClusterData will create the
13077
    correct structure needed as input for the allocator.
13078

13079
    The checks for the completeness of the opcode must have already been
13080
    done.
13081

13082
    """
13083
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13084

    
13085
    if self.disk_template in constants.DTS_INT_MIRROR:
13086
      self.required_nodes = 2
13087
    else:
13088
      self.required_nodes = 1
13089

    
13090
    request = {
13091
      "name": self.name,
13092
      "disk_template": self.disk_template,
13093
      "tags": self.tags,
13094
      "os": self.os,
13095
      "vcpus": self.vcpus,
13096
      "memory": self.memory,
13097
      "disks": self.disks,
13098
      "disk_space_total": disk_space,
13099
      "nics": self.nics,
13100
      "required_nodes": self.required_nodes,
13101
      "hypervisor": self.hypervisor,
13102
      }
13103

    
13104
    return request
13105

    
13106
  def _AddRelocateInstance(self):
13107
    """Add relocate instance data to allocator structure.
13108

13109
    This in combination with _IAllocatorGetClusterData will create the
13110
    correct structure needed as input for the allocator.
13111

13112
    The checks for the completeness of the opcode must have already been
13113
    done.
13114

13115
    """
13116
    instance = self.cfg.GetInstanceInfo(self.name)
13117
    if instance is None:
13118
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13119
                                   " IAllocator" % self.name)
13120

    
13121
    if instance.disk_template not in constants.DTS_MIRRORED:
13122
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13123
                                 errors.ECODE_INVAL)
13124

    
13125
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13126
        len(instance.secondary_nodes) != 1:
13127
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13128
                                 errors.ECODE_STATE)
13129

    
13130
    self.required_nodes = 1
13131
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13132
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13133

    
13134
    request = {
13135
      "name": self.name,
13136
      "disk_space_total": disk_space,
13137
      "required_nodes": self.required_nodes,
13138
      "relocate_from": self.relocate_from,
13139
      }
13140
    return request
13141

    
13142
  def _AddNodeEvacuate(self):
13143
    """Get data for node-evacuate requests.
13144

13145
    """
13146
    return {
13147
      "instances": self.instances,
13148
      "evac_mode": self.evac_mode,
13149
      }
13150

    
13151
  def _AddChangeGroup(self):
13152
    """Get data for node-evacuate requests.
13153

13154
    """
13155
    return {
13156
      "instances": self.instances,
13157
      "target_groups": self.target_groups,
13158
      }
13159

    
13160
  def _BuildInputData(self, fn, keydata):
13161
    """Build input data structures.
13162

13163
    """
13164
    self._ComputeClusterData()
13165

    
13166
    request = fn()
13167
    request["type"] = self.mode
13168
    for keyname, keytype in keydata:
13169
      if keyname not in request:
13170
        raise errors.ProgrammerError("Request parameter %s is missing" %
13171
                                     keyname)
13172
      val = request[keyname]
13173
      if not keytype(val):
13174
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13175
                                     " validation, value %s, expected"
13176
                                     " type %s" % (keyname, val, keytype))
13177
    self.in_data["request"] = request
13178

    
13179
    self.in_text = serializer.Dump(self.in_data)
13180

    
13181
  _STRING_LIST = ht.TListOf(ht.TString)
13182
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13183
     # pylint: disable=E1101
13184
     # Class '...' has no 'OP_ID' member
13185
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13186
                          opcodes.OpInstanceMigrate.OP_ID,
13187
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13188
     })))
13189

    
13190
  _NEVAC_MOVED = \
13191
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13192
                       ht.TItems([ht.TNonEmptyString,
13193
                                  ht.TNonEmptyString,
13194
                                  ht.TListOf(ht.TNonEmptyString),
13195
                                 ])))
13196
  _NEVAC_FAILED = \
13197
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13198
                       ht.TItems([ht.TNonEmptyString,
13199
                                  ht.TMaybeString,
13200
                                 ])))
13201
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13202
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13203

    
13204
  _MODE_DATA = {
13205
    constants.IALLOCATOR_MODE_ALLOC:
13206
      (_AddNewInstance,
13207
       [
13208
        ("name", ht.TString),
13209
        ("memory", ht.TInt),
13210
        ("disks", ht.TListOf(ht.TDict)),
13211
        ("disk_template", ht.TString),
13212
        ("os", ht.TString),
13213
        ("tags", _STRING_LIST),
13214
        ("nics", ht.TListOf(ht.TDict)),
13215
        ("vcpus", ht.TInt),
13216
        ("hypervisor", ht.TString),
13217
        ], ht.TList),
13218
    constants.IALLOCATOR_MODE_RELOC:
13219
      (_AddRelocateInstance,
13220
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13221
       ht.TList),
13222
     constants.IALLOCATOR_MODE_NODE_EVAC:
13223
      (_AddNodeEvacuate, [
13224
        ("instances", _STRING_LIST),
13225
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13226
        ], _NEVAC_RESULT),
13227
     constants.IALLOCATOR_MODE_CHG_GROUP:
13228
      (_AddChangeGroup, [
13229
        ("instances", _STRING_LIST),
13230
        ("target_groups", _STRING_LIST),
13231
        ], _NEVAC_RESULT),
13232
    }
13233

    
13234
  def Run(self, name, validate=True, call_fn=None):
13235
    """Run an instance allocator and return the results.
13236

13237
    """
13238
    if call_fn is None:
13239
      call_fn = self.rpc.call_iallocator_runner
13240

    
13241
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13242
    result.Raise("Failure while running the iallocator script")
13243

    
13244
    self.out_text = result.payload
13245
    if validate:
13246
      self._ValidateResult()
13247

    
13248
  def _ValidateResult(self):
13249
    """Process the allocator results.
13250

13251
    This will process and if successful save the result in
13252
    self.out_data and the other parameters.
13253

13254
    """
13255
    try:
13256
      rdict = serializer.Load(self.out_text)
13257
    except Exception, err:
13258
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13259

    
13260
    if not isinstance(rdict, dict):
13261
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13262

    
13263
    # TODO: remove backwards compatiblity in later versions
13264
    if "nodes" in rdict and "result" not in rdict:
13265
      rdict["result"] = rdict["nodes"]
13266
      del rdict["nodes"]
13267

    
13268
    for key in "success", "info", "result":
13269
      if key not in rdict:
13270
        raise errors.OpExecError("Can't parse iallocator results:"
13271
                                 " missing key '%s'" % key)
13272
      setattr(self, key, rdict[key])
13273

    
13274
    if not self._result_check(self.result):
13275
      raise errors.OpExecError("Iallocator returned invalid result,"
13276
                               " expected %s, got %s" %
13277
                               (self._result_check, self.result),
13278
                               errors.ECODE_INVAL)
13279

    
13280
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13281
      assert self.relocate_from is not None
13282
      assert self.required_nodes == 1
13283

    
13284
      node2group = dict((name, ndata["group"])
13285
                        for (name, ndata) in self.in_data["nodes"].items())
13286

    
13287
      fn = compat.partial(self._NodesToGroups, node2group,
13288
                          self.in_data["nodegroups"])
13289

    
13290
      instance = self.cfg.GetInstanceInfo(self.name)
13291
      request_groups = fn(self.relocate_from + [instance.primary_node])
13292
      result_groups = fn(rdict["result"] + [instance.primary_node])
13293

    
13294
      if self.success and not set(result_groups).issubset(request_groups):
13295
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13296
                                 " differ from original groups (%s)" %
13297
                                 (utils.CommaJoin(result_groups),
13298
                                  utils.CommaJoin(request_groups)))
13299

    
13300
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13301
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13302

    
13303
    self.out_data = rdict
13304

    
13305
  @staticmethod
13306
  def _NodesToGroups(node2group, groups, nodes):
13307
    """Returns a list of unique group names for a list of nodes.
13308

13309
    @type node2group: dict
13310
    @param node2group: Map from node name to group UUID
13311
    @type groups: dict
13312
    @param groups: Group information
13313
    @type nodes: list
13314
    @param nodes: Node names
13315

13316
    """
13317
    result = set()
13318

    
13319
    for node in nodes:
13320
      try:
13321
        group_uuid = node2group[node]
13322
      except KeyError:
13323
        # Ignore unknown node
13324
        pass
13325
      else:
13326
        try:
13327
          group = groups[group_uuid]
13328
        except KeyError:
13329
          # Can't find group, let's use UUID
13330
          group_name = group_uuid
13331
        else:
13332
          group_name = group["name"]
13333

    
13334
        result.add(group_name)
13335

    
13336
    return sorted(result)
13337

    
13338

    
13339
class LUTestAllocator(NoHooksLU):
13340
  """Run allocator tests.
13341

13342
  This LU runs the allocator tests
13343

13344
  """
13345
  def CheckPrereq(self):
13346
    """Check prerequisites.
13347

13348
    This checks the opcode parameters depending on the director and mode test.
13349

13350
    """
13351
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13352
      for attr in ["memory", "disks", "disk_template",
13353
                   "os", "tags", "nics", "vcpus"]:
13354
        if not hasattr(self.op, attr):
13355
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13356
                                     attr, errors.ECODE_INVAL)
13357
      iname = self.cfg.ExpandInstanceName(self.op.name)
13358
      if iname is not None:
13359
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13360
                                   iname, errors.ECODE_EXISTS)
13361
      if not isinstance(self.op.nics, list):
13362
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13363
                                   errors.ECODE_INVAL)
13364
      if not isinstance(self.op.disks, list):
13365
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13366
                                   errors.ECODE_INVAL)
13367
      for row in self.op.disks:
13368
        if (not isinstance(row, dict) or
13369
            constants.IDISK_SIZE not in row or
13370
            not isinstance(row[constants.IDISK_SIZE], int) or
13371
            constants.IDISK_MODE not in row or
13372
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13373
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13374
                                     " parameter", errors.ECODE_INVAL)
13375
      if self.op.hypervisor is None:
13376
        self.op.hypervisor = self.cfg.GetHypervisorType()
13377
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13378
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13379
      self.op.name = fname
13380
      self.relocate_from = \
13381
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13382
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13383
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13384
      if not self.op.instances:
13385
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13386
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13387
    else:
13388
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13389
                                 self.op.mode, errors.ECODE_INVAL)
13390

    
13391
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13392
      if self.op.allocator is None:
13393
        raise errors.OpPrereqError("Missing allocator name",
13394
                                   errors.ECODE_INVAL)
13395
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13396
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13397
                                 self.op.direction, errors.ECODE_INVAL)
13398

    
13399
  def Exec(self, feedback_fn):
13400
    """Run the allocator test.
13401

13402
    """
13403
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13404
      ial = IAllocator(self.cfg, self.rpc,
13405
                       mode=self.op.mode,
13406
                       name=self.op.name,
13407
                       memory=self.op.memory,
13408
                       disks=self.op.disks,
13409
                       disk_template=self.op.disk_template,
13410
                       os=self.op.os,
13411
                       tags=self.op.tags,
13412
                       nics=self.op.nics,
13413
                       vcpus=self.op.vcpus,
13414
                       hypervisor=self.op.hypervisor,
13415
                       )
13416
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13417
      ial = IAllocator(self.cfg, self.rpc,
13418
                       mode=self.op.mode,
13419
                       name=self.op.name,
13420
                       relocate_from=list(self.relocate_from),
13421
                       )
13422
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13423
      ial = IAllocator(self.cfg, self.rpc,
13424
                       mode=self.op.mode,
13425
                       instances=self.op.instances,
13426
                       target_groups=self.op.target_groups)
13427
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13428
      ial = IAllocator(self.cfg, self.rpc,
13429
                       mode=self.op.mode,
13430
                       instances=self.op.instances,
13431
                       evac_mode=self.op.evac_mode)
13432
    else:
13433
      raise errors.ProgrammerError("Uncatched mode %s in"
13434
                                   " LUTestAllocator.Exec", self.op.mode)
13435

    
13436
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13437
      result = ial.in_text
13438
    else:
13439
      ial.Run(self.op.allocator, validate=False)
13440
      result = ial.out_text
13441
    return result
13442

    
13443

    
13444
#: Query type implementations
13445
_QUERY_IMPL = {
13446
  constants.QR_INSTANCE: _InstanceQuery,
13447
  constants.QR_NODE: _NodeQuery,
13448
  constants.QR_GROUP: _GroupQuery,
13449
  constants.QR_OS: _OsQuery,
13450
  }
13451

    
13452
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13453

    
13454

    
13455
def _GetQueryImplementation(name):
13456
  """Returns the implemtnation for a query type.
13457

13458
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13459

13460
  """
13461
  try:
13462
    return _QUERY_IMPL[name]
13463
  except KeyError:
13464
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13465
                               errors.ECODE_INVAL)