Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b2acdbdc

History | View | Annotate | Download (482.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False):
353
    """Helper function to declare instances' nodes for locking.
354

355
    This function should be called after locking one or more instances to lock
356
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
357
    with all primary or secondary nodes for instances already locked and
358
    present in self.needed_locks[locking.LEVEL_INSTANCE].
359

360
    It should be called from DeclareLocks, and for safety only works if
361
    self.recalculate_locks[locking.LEVEL_NODE] is set.
362

363
    In the future it may grow parameters to just lock some instance's nodes, or
364
    to just lock primaries or secondary nodes, if needed.
365

366
    If should be called in DeclareLocks in a way similar to::
367

368
      if level == locking.LEVEL_NODE:
369
        self._LockInstancesNodes()
370

371
    @type primary_only: boolean
372
    @param primary_only: only lock primary nodes of locked instances
373

374
    """
375
    assert locking.LEVEL_NODE in self.recalculate_locks, \
376
      "_LockInstancesNodes helper function called with no nodes to recalculate"
377

    
378
    # TODO: check if we're really been called with the instance locks held
379

    
380
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
381
    # future we might want to have different behaviors depending on the value
382
    # of self.recalculate_locks[locking.LEVEL_NODE]
383
    wanted_nodes = []
384
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
385
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
386
      wanted_nodes.append(instance.primary_node)
387
      if not primary_only:
388
        wanted_nodes.extend(instance.secondary_nodes)
389

    
390
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
391
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
392
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
393
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
394

    
395
    del self.recalculate_locks[locking.LEVEL_NODE]
396

    
397

    
398
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
399
  """Simple LU which runs no hooks.
400

401
  This LU is intended as a parent for other LogicalUnits which will
402
  run no hooks, in order to reduce duplicate code.
403

404
  """
405
  HPATH = None
406
  HTYPE = None
407

    
408
  def BuildHooksEnv(self):
409
    """Empty BuildHooksEnv for NoHooksLu.
410

411
    This just raises an error.
412

413
    """
414
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
415

    
416
  def BuildHooksNodes(self):
417
    """Empty BuildHooksNodes for NoHooksLU.
418

419
    """
420
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
421

    
422

    
423
class Tasklet:
424
  """Tasklet base class.
425

426
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
427
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
428
  tasklets know nothing about locks.
429

430
  Subclasses must follow these rules:
431
    - Implement CheckPrereq
432
    - Implement Exec
433

434
  """
435
  def __init__(self, lu):
436
    self.lu = lu
437

    
438
    # Shortcuts
439
    self.cfg = lu.cfg
440
    self.rpc = lu.rpc
441

    
442
  def CheckPrereq(self):
443
    """Check prerequisites for this tasklets.
444

445
    This method should check whether the prerequisites for the execution of
446
    this tasklet are fulfilled. It can do internode communication, but it
447
    should be idempotent - no cluster or system changes are allowed.
448

449
    The method should raise errors.OpPrereqError in case something is not
450
    fulfilled. Its return value is ignored.
451

452
    This method should also update all parameters to their canonical form if it
453
    hasn't been done before.
454

455
    """
456
    pass
457

    
458
  def Exec(self, feedback_fn):
459
    """Execute the tasklet.
460

461
    This method should implement the actual work. It should raise
462
    errors.OpExecError for failures that are somewhat dealt with in code, or
463
    expected.
464

465
    """
466
    raise NotImplementedError
467

    
468

    
469
class _QueryBase:
470
  """Base for query utility classes.
471

472
  """
473
  #: Attribute holding field definitions
474
  FIELDS = None
475

    
476
  def __init__(self, qfilter, fields, use_locking):
477
    """Initializes this class.
478

479
    """
480
    self.use_locking = use_locking
481

    
482
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
483
                             namefield="name")
484
    self.requested_data = self.query.RequestedData()
485
    self.names = self.query.RequestedNames()
486

    
487
    # Sort only if no names were requested
488
    self.sort_by_name = not self.names
489

    
490
    self.do_locking = None
491
    self.wanted = None
492

    
493
  def _GetNames(self, lu, all_names, lock_level):
494
    """Helper function to determine names asked for in the query.
495

496
    """
497
    if self.do_locking:
498
      names = lu.owned_locks(lock_level)
499
    else:
500
      names = all_names
501

    
502
    if self.wanted == locking.ALL_SET:
503
      assert not self.names
504
      # caller didn't specify names, so ordering is not important
505
      return utils.NiceSort(names)
506

    
507
    # caller specified names and we must keep the same order
508
    assert self.names
509
    assert not self.do_locking or lu.glm.is_owned(lock_level)
510

    
511
    missing = set(self.wanted).difference(names)
512
    if missing:
513
      raise errors.OpExecError("Some items were removed before retrieving"
514
                               " their data: %s" % missing)
515

    
516
    # Return expanded names
517
    return self.wanted
518

    
519
  def ExpandNames(self, lu):
520
    """Expand names for this query.
521

522
    See L{LogicalUnit.ExpandNames}.
523

524
    """
525
    raise NotImplementedError()
526

    
527
  def DeclareLocks(self, lu, level):
528
    """Declare locks for this query.
529

530
    See L{LogicalUnit.DeclareLocks}.
531

532
    """
533
    raise NotImplementedError()
534

    
535
  def _GetQueryData(self, lu):
536
    """Collects all data for this query.
537

538
    @return: Query data object
539

540
    """
541
    raise NotImplementedError()
542

    
543
  def NewStyleQuery(self, lu):
544
    """Collect data and execute query.
545

546
    """
547
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
548
                                  sort_by_name=self.sort_by_name)
549

    
550
  def OldStyleQuery(self, lu):
551
    """Collect data and execute query.
552

553
    """
554
    return self.query.OldStyleQuery(self._GetQueryData(lu),
555
                                    sort_by_name=self.sort_by_name)
556

    
557

    
558
def _ShareAll():
559
  """Returns a dict declaring all lock levels shared.
560

561
  """
562
  return dict.fromkeys(locking.LEVELS, 1)
563

    
564

    
565
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
566
  """Checks if the owned node groups are still correct for an instance.
567

568
  @type cfg: L{config.ConfigWriter}
569
  @param cfg: The cluster configuration
570
  @type instance_name: string
571
  @param instance_name: Instance name
572
  @type owned_groups: set or frozenset
573
  @param owned_groups: List of currently owned node groups
574

575
  """
576
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
577

    
578
  if not owned_groups.issuperset(inst_groups):
579
    raise errors.OpPrereqError("Instance %s's node groups changed since"
580
                               " locks were acquired, current groups are"
581
                               " are '%s', owning groups '%s'; retry the"
582
                               " operation" %
583
                               (instance_name,
584
                                utils.CommaJoin(inst_groups),
585
                                utils.CommaJoin(owned_groups)),
586
                               errors.ECODE_STATE)
587

    
588
  return inst_groups
589

    
590

    
591
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
592
  """Checks if the instances in a node group are still correct.
593

594
  @type cfg: L{config.ConfigWriter}
595
  @param cfg: The cluster configuration
596
  @type group_uuid: string
597
  @param group_uuid: Node group UUID
598
  @type owned_instances: set or frozenset
599
  @param owned_instances: List of currently owned instances
600

601
  """
602
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
603
  if owned_instances != wanted_instances:
604
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
605
                               " locks were acquired, wanted '%s', have '%s';"
606
                               " retry the operation" %
607
                               (group_uuid,
608
                                utils.CommaJoin(wanted_instances),
609
                                utils.CommaJoin(owned_instances)),
610
                               errors.ECODE_STATE)
611

    
612
  return wanted_instances
613

    
614

    
615
def _SupportsOob(cfg, node):
616
  """Tells if node supports OOB.
617

618
  @type cfg: L{config.ConfigWriter}
619
  @param cfg: The cluster configuration
620
  @type node: L{objects.Node}
621
  @param node: The node
622
  @return: The OOB script if supported or an empty string otherwise
623

624
  """
625
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
626

    
627

    
628
def _GetWantedNodes(lu, nodes):
629
  """Returns list of checked and expanded node names.
630

631
  @type lu: L{LogicalUnit}
632
  @param lu: the logical unit on whose behalf we execute
633
  @type nodes: list
634
  @param nodes: list of node names or None for all nodes
635
  @rtype: list
636
  @return: the list of nodes, sorted
637
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
638

639
  """
640
  if nodes:
641
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
642

    
643
  return utils.NiceSort(lu.cfg.GetNodeList())
644

    
645

    
646
def _GetWantedInstances(lu, instances):
647
  """Returns list of checked and expanded instance names.
648

649
  @type lu: L{LogicalUnit}
650
  @param lu: the logical unit on whose behalf we execute
651
  @type instances: list
652
  @param instances: list of instance names or None for all instances
653
  @rtype: list
654
  @return: the list of instances, sorted
655
  @raise errors.OpPrereqError: if the instances parameter is wrong type
656
  @raise errors.OpPrereqError: if any of the passed instances is not found
657

658
  """
659
  if instances:
660
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
661
  else:
662
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
663
  return wanted
664

    
665

    
666
def _GetUpdatedParams(old_params, update_dict,
667
                      use_default=True, use_none=False):
668
  """Return the new version of a parameter dictionary.
669

670
  @type old_params: dict
671
  @param old_params: old parameters
672
  @type update_dict: dict
673
  @param update_dict: dict containing new parameter values, or
674
      constants.VALUE_DEFAULT to reset the parameter to its default
675
      value
676
  @param use_default: boolean
677
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
678
      values as 'to be deleted' values
679
  @param use_none: boolean
680
  @type use_none: whether to recognise C{None} values as 'to be
681
      deleted' values
682
  @rtype: dict
683
  @return: the new parameter dictionary
684

685
  """
686
  params_copy = copy.deepcopy(old_params)
687
  for key, val in update_dict.iteritems():
688
    if ((use_default and val == constants.VALUE_DEFAULT) or
689
        (use_none and val is None)):
690
      try:
691
        del params_copy[key]
692
      except KeyError:
693
        pass
694
    else:
695
      params_copy[key] = val
696
  return params_copy
697

    
698

    
699
def _ReleaseLocks(lu, level, names=None, keep=None):
700
  """Releases locks owned by an LU.
701

702
  @type lu: L{LogicalUnit}
703
  @param level: Lock level
704
  @type names: list or None
705
  @param names: Names of locks to release
706
  @type keep: list or None
707
  @param keep: Names of locks to retain
708

709
  """
710
  assert not (keep is not None and names is not None), \
711
         "Only one of the 'names' and the 'keep' parameters can be given"
712

    
713
  if names is not None:
714
    should_release = names.__contains__
715
  elif keep:
716
    should_release = lambda name: name not in keep
717
  else:
718
    should_release = None
719

    
720
  if should_release:
721
    retain = []
722
    release = []
723

    
724
    # Determine which locks to release
725
    for name in lu.owned_locks(level):
726
      if should_release(name):
727
        release.append(name)
728
      else:
729
        retain.append(name)
730

    
731
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
732

    
733
    # Release just some locks
734
    lu.glm.release(level, names=release)
735

    
736
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
737
  else:
738
    # Release everything
739
    lu.glm.release(level)
740

    
741
    assert not lu.glm.is_owned(level), "No locks should be owned"
742

    
743

    
744
def _MapInstanceDisksToNodes(instances):
745
  """Creates a map from (node, volume) to instance name.
746

747
  @type instances: list of L{objects.Instance}
748
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
749

750
  """
751
  return dict(((node, vol), inst.name)
752
              for inst in instances
753
              for (node, vols) in inst.MapLVsByNode().items()
754
              for vol in vols)
755

    
756

    
757
def _RunPostHook(lu, node_name):
758
  """Runs the post-hook for an opcode on a single node.
759

760
  """
761
  hm = lu.proc.BuildHooksManager(lu)
762
  try:
763
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
764
  except:
765
    # pylint: disable=W0702
766
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
767

    
768

    
769
def _CheckOutputFields(static, dynamic, selected):
770
  """Checks whether all selected fields are valid.
771

772
  @type static: L{utils.FieldSet}
773
  @param static: static fields set
774
  @type dynamic: L{utils.FieldSet}
775
  @param dynamic: dynamic fields set
776

777
  """
778
  f = utils.FieldSet()
779
  f.Extend(static)
780
  f.Extend(dynamic)
781

    
782
  delta = f.NonMatching(selected)
783
  if delta:
784
    raise errors.OpPrereqError("Unknown output fields selected: %s"
785
                               % ",".join(delta), errors.ECODE_INVAL)
786

    
787

    
788
def _CheckGlobalHvParams(params):
789
  """Validates that given hypervisor params are not global ones.
790

791
  This will ensure that instances don't get customised versions of
792
  global params.
793

794
  """
795
  used_globals = constants.HVC_GLOBALS.intersection(params)
796
  if used_globals:
797
    msg = ("The following hypervisor parameters are global and cannot"
798
           " be customized at instance level, please modify them at"
799
           " cluster level: %s" % utils.CommaJoin(used_globals))
800
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
801

    
802

    
803
def _CheckNodeOnline(lu, node, msg=None):
804
  """Ensure that a given node is online.
805

806
  @param lu: the LU on behalf of which we make the check
807
  @param node: the node to check
808
  @param msg: if passed, should be a message to replace the default one
809
  @raise errors.OpPrereqError: if the node is offline
810

811
  """
812
  if msg is None:
813
    msg = "Can't use offline node"
814
  if lu.cfg.GetNodeInfo(node).offline:
815
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
816

    
817

    
818
def _CheckNodeNotDrained(lu, node):
819
  """Ensure that a given node is not drained.
820

821
  @param lu: the LU on behalf of which we make the check
822
  @param node: the node to check
823
  @raise errors.OpPrereqError: if the node is drained
824

825
  """
826
  if lu.cfg.GetNodeInfo(node).drained:
827
    raise errors.OpPrereqError("Can't use drained node %s" % node,
828
                               errors.ECODE_STATE)
829

    
830

    
831
def _CheckNodeVmCapable(lu, node):
832
  """Ensure that a given node is vm capable.
833

834
  @param lu: the LU on behalf of which we make the check
835
  @param node: the node to check
836
  @raise errors.OpPrereqError: if the node is not vm capable
837

838
  """
839
  if not lu.cfg.GetNodeInfo(node).vm_capable:
840
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
841
                               errors.ECODE_STATE)
842

    
843

    
844
def _CheckNodeHasOS(lu, node, os_name, force_variant):
845
  """Ensure that a node supports a given OS.
846

847
  @param lu: the LU on behalf of which we make the check
848
  @param node: the node to check
849
  @param os_name: the OS to query about
850
  @param force_variant: whether to ignore variant errors
851
  @raise errors.OpPrereqError: if the node is not supporting the OS
852

853
  """
854
  result = lu.rpc.call_os_get(node, os_name)
855
  result.Raise("OS '%s' not in supported OS list for node %s" %
856
               (os_name, node),
857
               prereq=True, ecode=errors.ECODE_INVAL)
858
  if not force_variant:
859
    _CheckOSVariant(result.payload, os_name)
860

    
861

    
862
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
863
  """Ensure that a node has the given secondary ip.
864

865
  @type lu: L{LogicalUnit}
866
  @param lu: the LU on behalf of which we make the check
867
  @type node: string
868
  @param node: the node to check
869
  @type secondary_ip: string
870
  @param secondary_ip: the ip to check
871
  @type prereq: boolean
872
  @param prereq: whether to throw a prerequisite or an execute error
873
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
874
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
875

876
  """
877
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
878
  result.Raise("Failure checking secondary ip on node %s" % node,
879
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
880
  if not result.payload:
881
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
882
           " please fix and re-run this command" % secondary_ip)
883
    if prereq:
884
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
885
    else:
886
      raise errors.OpExecError(msg)
887

    
888

    
889
def _GetClusterDomainSecret():
890
  """Reads the cluster domain secret.
891

892
  """
893
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
894
                               strict=True)
895

    
896

    
897
def _CheckInstanceDown(lu, instance, reason):
898
  """Ensure that an instance is not running."""
899
  if instance.admin_up:
900
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
901
                               (instance.name, reason), errors.ECODE_STATE)
902

    
903
  pnode = instance.primary_node
904
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
905
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
906
              prereq=True, ecode=errors.ECODE_ENVIRON)
907

    
908
  if instance.name in ins_l.payload:
909
    raise errors.OpPrereqError("Instance %s is running, %s" %
910
                               (instance.name, reason), errors.ECODE_STATE)
911

    
912

    
913
def _ExpandItemName(fn, name, kind):
914
  """Expand an item name.
915

916
  @param fn: the function to use for expansion
917
  @param name: requested item name
918
  @param kind: text description ('Node' or 'Instance')
919
  @return: the resolved (full) name
920
  @raise errors.OpPrereqError: if the item is not found
921

922
  """
923
  full_name = fn(name)
924
  if full_name is None:
925
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
926
                               errors.ECODE_NOENT)
927
  return full_name
928

    
929

    
930
def _ExpandNodeName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for nodes."""
932
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
933

    
934

    
935
def _ExpandInstanceName(cfg, name):
936
  """Wrapper over L{_ExpandItemName} for instance."""
937
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
938

    
939

    
940
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
941
                          memory, vcpus, nics, disk_template, disks,
942
                          bep, hvp, hypervisor_name, tags):
943
  """Builds instance related env variables for hooks
944

945
  This builds the hook environment from individual variables.
946

947
  @type name: string
948
  @param name: the name of the instance
949
  @type primary_node: string
950
  @param primary_node: the name of the instance's primary node
951
  @type secondary_nodes: list
952
  @param secondary_nodes: list of secondary nodes as strings
953
  @type os_type: string
954
  @param os_type: the name of the instance's OS
955
  @type status: boolean
956
  @param status: the should_run status of the instance
957
  @type memory: string
958
  @param memory: the memory size of the instance
959
  @type vcpus: string
960
  @param vcpus: the count of VCPUs the instance has
961
  @type nics: list
962
  @param nics: list of tuples (ip, mac, mode, link) representing
963
      the NICs the instance has
964
  @type disk_template: string
965
  @param disk_template: the disk template of the instance
966
  @type disks: list
967
  @param disks: the list of (size, mode) pairs
968
  @type bep: dict
969
  @param bep: the backend parameters for the instance
970
  @type hvp: dict
971
  @param hvp: the hypervisor parameters for the instance
972
  @type hypervisor_name: string
973
  @param hypervisor_name: the hypervisor for the instance
974
  @type tags: list
975
  @param tags: list of instance tags as strings
976
  @rtype: dict
977
  @return: the hook environment for this instance
978

979
  """
980
  if status:
981
    str_status = "up"
982
  else:
983
    str_status = "down"
984
  env = {
985
    "OP_TARGET": name,
986
    "INSTANCE_NAME": name,
987
    "INSTANCE_PRIMARY": primary_node,
988
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
989
    "INSTANCE_OS_TYPE": os_type,
990
    "INSTANCE_STATUS": str_status,
991
    "INSTANCE_MEMORY": memory,
992
    "INSTANCE_VCPUS": vcpus,
993
    "INSTANCE_DISK_TEMPLATE": disk_template,
994
    "INSTANCE_HYPERVISOR": hypervisor_name,
995
  }
996

    
997
  if nics:
998
    nic_count = len(nics)
999
    for idx, (ip, mac, mode, link) in enumerate(nics):
1000
      if ip is None:
1001
        ip = ""
1002
      env["INSTANCE_NIC%d_IP" % idx] = ip
1003
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1004
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1005
      env["INSTANCE_NIC%d_LINK" % idx] = link
1006
      if mode == constants.NIC_MODE_BRIDGED:
1007
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1008
  else:
1009
    nic_count = 0
1010

    
1011
  env["INSTANCE_NIC_COUNT"] = nic_count
1012

    
1013
  if disks:
1014
    disk_count = len(disks)
1015
    for idx, (size, mode) in enumerate(disks):
1016
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1017
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1018
  else:
1019
    disk_count = 0
1020

    
1021
  env["INSTANCE_DISK_COUNT"] = disk_count
1022

    
1023
  if not tags:
1024
    tags = []
1025

    
1026
  env["INSTANCE_TAGS"] = " ".join(tags)
1027

    
1028
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1029
    for key, value in source.items():
1030
      env["INSTANCE_%s_%s" % (kind, key)] = value
1031

    
1032
  return env
1033

    
1034

    
1035
def _NICListToTuple(lu, nics):
1036
  """Build a list of nic information tuples.
1037

1038
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1039
  value in LUInstanceQueryData.
1040

1041
  @type lu:  L{LogicalUnit}
1042
  @param lu: the logical unit on whose behalf we execute
1043
  @type nics: list of L{objects.NIC}
1044
  @param nics: list of nics to convert to hooks tuples
1045

1046
  """
1047
  hooks_nics = []
1048
  cluster = lu.cfg.GetClusterInfo()
1049
  for nic in nics:
1050
    ip = nic.ip
1051
    mac = nic.mac
1052
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1053
    mode = filled_params[constants.NIC_MODE]
1054
    link = filled_params[constants.NIC_LINK]
1055
    hooks_nics.append((ip, mac, mode, link))
1056
  return hooks_nics
1057

    
1058

    
1059
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1060
  """Builds instance related env variables for hooks from an object.
1061

1062
  @type lu: L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type instance: L{objects.Instance}
1065
  @param instance: the instance for which we should build the
1066
      environment
1067
  @type override: dict
1068
  @param override: dictionary with key/values that will override
1069
      our values
1070
  @rtype: dict
1071
  @return: the hook environment dictionary
1072

1073
  """
1074
  cluster = lu.cfg.GetClusterInfo()
1075
  bep = cluster.FillBE(instance)
1076
  hvp = cluster.FillHV(instance)
1077
  args = {
1078
    "name": instance.name,
1079
    "primary_node": instance.primary_node,
1080
    "secondary_nodes": instance.secondary_nodes,
1081
    "os_type": instance.os,
1082
    "status": instance.admin_up,
1083
    "memory": bep[constants.BE_MEMORY],
1084
    "vcpus": bep[constants.BE_VCPUS],
1085
    "nics": _NICListToTuple(lu, instance.nics),
1086
    "disk_template": instance.disk_template,
1087
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1088
    "bep": bep,
1089
    "hvp": hvp,
1090
    "hypervisor_name": instance.hypervisor,
1091
    "tags": instance.tags,
1092
  }
1093
  if override:
1094
    args.update(override)
1095
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1096

    
1097

    
1098
def _AdjustCandidatePool(lu, exceptions):
1099
  """Adjust the candidate pool after node operations.
1100

1101
  """
1102
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1103
  if mod_list:
1104
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1105
               utils.CommaJoin(node.name for node in mod_list))
1106
    for name in mod_list:
1107
      lu.context.ReaddNode(name)
1108
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1109
  if mc_now > mc_max:
1110
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1111
               (mc_now, mc_max))
1112

    
1113

    
1114
def _DecideSelfPromotion(lu, exceptions=None):
1115
  """Decide whether I should promote myself as a master candidate.
1116

1117
  """
1118
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1119
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1120
  # the new node will increase mc_max with one, so:
1121
  mc_should = min(mc_should + 1, cp_size)
1122
  return mc_now < mc_should
1123

    
1124

    
1125
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1126
  """Check that the brigdes needed by a list of nics exist.
1127

1128
  """
1129
  cluster = lu.cfg.GetClusterInfo()
1130
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1131
  brlist = [params[constants.NIC_LINK] for params in paramslist
1132
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1133
  if brlist:
1134
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1135
    result.Raise("Error checking bridges on destination node '%s'" %
1136
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1137

    
1138

    
1139
def _CheckInstanceBridgesExist(lu, instance, node=None):
1140
  """Check that the brigdes needed by an instance exist.
1141

1142
  """
1143
  if node is None:
1144
    node = instance.primary_node
1145
  _CheckNicsBridgesExist(lu, instance.nics, node)
1146

    
1147

    
1148
def _CheckOSVariant(os_obj, name):
1149
  """Check whether an OS name conforms to the os variants specification.
1150

1151
  @type os_obj: L{objects.OS}
1152
  @param os_obj: OS object to check
1153
  @type name: string
1154
  @param name: OS name passed by the user, to check for validity
1155

1156
  """
1157
  variant = objects.OS.GetVariant(name)
1158
  if not os_obj.supported_variants:
1159
    if variant:
1160
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1161
                                 " passed)" % (os_obj.name, variant),
1162
                                 errors.ECODE_INVAL)
1163
    return
1164
  if not variant:
1165
    raise errors.OpPrereqError("OS name must include a variant",
1166
                               errors.ECODE_INVAL)
1167

    
1168
  if variant not in os_obj.supported_variants:
1169
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1170

    
1171

    
1172
def _GetNodeInstancesInner(cfg, fn):
1173
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1174

    
1175

    
1176
def _GetNodeInstances(cfg, node_name):
1177
  """Returns a list of all primary and secondary instances on a node.
1178

1179
  """
1180

    
1181
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1182

    
1183

    
1184
def _GetNodePrimaryInstances(cfg, node_name):
1185
  """Returns primary instances on a node.
1186

1187
  """
1188
  return _GetNodeInstancesInner(cfg,
1189
                                lambda inst: node_name == inst.primary_node)
1190

    
1191

    
1192
def _GetNodeSecondaryInstances(cfg, node_name):
1193
  """Returns secondary instances on a node.
1194

1195
  """
1196
  return _GetNodeInstancesInner(cfg,
1197
                                lambda inst: node_name in inst.secondary_nodes)
1198

    
1199

    
1200
def _GetStorageTypeArgs(cfg, storage_type):
1201
  """Returns the arguments for a storage type.
1202

1203
  """
1204
  # Special case for file storage
1205
  if storage_type == constants.ST_FILE:
1206
    # storage.FileStorage wants a list of storage directories
1207
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1208

    
1209
  return []
1210

    
1211

    
1212
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1213
  faulty = []
1214

    
1215
  for dev in instance.disks:
1216
    cfg.SetDiskID(dev, node_name)
1217

    
1218
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1219
  result.Raise("Failed to get disk status from node %s" % node_name,
1220
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1221

    
1222
  for idx, bdev_status in enumerate(result.payload):
1223
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224
      faulty.append(idx)
1225

    
1226
  return faulty
1227

    
1228

    
1229
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1230
  """Check the sanity of iallocator and node arguments and use the
1231
  cluster-wide iallocator if appropriate.
1232

1233
  Check that at most one of (iallocator, node) is specified. If none is
1234
  specified, then the LU's opcode's iallocator slot is filled with the
1235
  cluster-wide default iallocator.
1236

1237
  @type iallocator_slot: string
1238
  @param iallocator_slot: the name of the opcode iallocator slot
1239
  @type node_slot: string
1240
  @param node_slot: the name of the opcode target node slot
1241

1242
  """
1243
  node = getattr(lu.op, node_slot, None)
1244
  iallocator = getattr(lu.op, iallocator_slot, None)
1245

    
1246
  if node is not None and iallocator is not None:
1247
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1248
                               errors.ECODE_INVAL)
1249
  elif node is None and iallocator is None:
1250
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1251
    if default_iallocator:
1252
      setattr(lu.op, iallocator_slot, default_iallocator)
1253
    else:
1254
      raise errors.OpPrereqError("No iallocator or node given and no"
1255
                                 " cluster-wide default iallocator found;"
1256
                                 " please specify either an iallocator or a"
1257
                                 " node, or set a cluster-wide default"
1258
                                 " iallocator")
1259

    
1260

    
1261
def _GetDefaultIAllocator(cfg, iallocator):
1262
  """Decides on which iallocator to use.
1263

1264
  @type cfg: L{config.ConfigWriter}
1265
  @param cfg: Cluster configuration object
1266
  @type iallocator: string or None
1267
  @param iallocator: Iallocator specified in opcode
1268
  @rtype: string
1269
  @return: Iallocator name
1270

1271
  """
1272
  if not iallocator:
1273
    # Use default iallocator
1274
    iallocator = cfg.GetDefaultIAllocator()
1275

    
1276
  if not iallocator:
1277
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1278
                               " opcode nor as a cluster-wide default",
1279
                               errors.ECODE_INVAL)
1280

    
1281
  return iallocator
1282

    
1283

    
1284
class LUClusterPostInit(LogicalUnit):
1285
  """Logical unit for running hooks after cluster initialization.
1286

1287
  """
1288
  HPATH = "cluster-init"
1289
  HTYPE = constants.HTYPE_CLUSTER
1290

    
1291
  def BuildHooksEnv(self):
1292
    """Build hooks env.
1293

1294
    """
1295
    return {
1296
      "OP_TARGET": self.cfg.GetClusterName(),
1297
      }
1298

    
1299
  def BuildHooksNodes(self):
1300
    """Build hooks nodes.
1301

1302
    """
1303
    return ([], [self.cfg.GetMasterNode()])
1304

    
1305
  def Exec(self, feedback_fn):
1306
    """Nothing to do.
1307

1308
    """
1309
    return True
1310

    
1311

    
1312
class LUClusterDestroy(LogicalUnit):
1313
  """Logical unit for destroying the cluster.
1314

1315
  """
1316
  HPATH = "cluster-destroy"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [])
1332

    
1333
  def CheckPrereq(self):
1334
    """Check prerequisites.
1335

1336
    This checks whether the cluster is empty.
1337

1338
    Any errors are signaled by raising errors.OpPrereqError.
1339

1340
    """
1341
    master = self.cfg.GetMasterNode()
1342

    
1343
    nodelist = self.cfg.GetNodeList()
1344
    if len(nodelist) != 1 or nodelist[0] != master:
1345
      raise errors.OpPrereqError("There are still %d node(s) in"
1346
                                 " this cluster." % (len(nodelist) - 1),
1347
                                 errors.ECODE_INVAL)
1348
    instancelist = self.cfg.GetInstanceList()
1349
    if instancelist:
1350
      raise errors.OpPrereqError("There are still %d instance(s) in"
1351
                                 " this cluster." % len(instancelist),
1352
                                 errors.ECODE_INVAL)
1353

    
1354
  def Exec(self, feedback_fn):
1355
    """Destroys the cluster.
1356

1357
    """
1358
    master_params = self.cfg.GetMasterNetworkParameters()
1359

    
1360
    # Run post hooks on master node before it's removed
1361
    _RunPostHook(self, master_params.name)
1362

    
1363
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1364
                                                     master_params)
1365
    result.Raise("Could not disable the master role")
1366

    
1367
    return master_params.name
1368

    
1369

    
1370
def _VerifyCertificate(filename):
1371
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1372

1373
  @type filename: string
1374
  @param filename: Path to PEM file
1375

1376
  """
1377
  try:
1378
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1379
                                           utils.ReadFile(filename))
1380
  except Exception, err: # pylint: disable=W0703
1381
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1382
            "Failed to load X509 certificate %s: %s" % (filename, err))
1383

    
1384
  (errcode, msg) = \
1385
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1386
                                constants.SSL_CERT_EXPIRATION_ERROR)
1387

    
1388
  if msg:
1389
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1390
  else:
1391
    fnamemsg = None
1392

    
1393
  if errcode is None:
1394
    return (None, fnamemsg)
1395
  elif errcode == utils.CERT_WARNING:
1396
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1397
  elif errcode == utils.CERT_ERROR:
1398
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1399

    
1400
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1401

    
1402

    
1403
def _GetAllHypervisorParameters(cluster, instances):
1404
  """Compute the set of all hypervisor parameters.
1405

1406
  @type cluster: L{objects.Cluster}
1407
  @param cluster: the cluster object
1408
  @param instances: list of L{objects.Instance}
1409
  @param instances: additional instances from which to obtain parameters
1410
  @rtype: list of (origin, hypervisor, parameters)
1411
  @return: a list with all parameters found, indicating the hypervisor they
1412
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1413

1414
  """
1415
  hvp_data = []
1416

    
1417
  for hv_name in cluster.enabled_hypervisors:
1418
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1419

    
1420
  for os_name, os_hvp in cluster.os_hvp.items():
1421
    for hv_name, hv_params in os_hvp.items():
1422
      if hv_params:
1423
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1424
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1425

    
1426
  # TODO: collapse identical parameter values in a single one
1427
  for instance in instances:
1428
    if instance.hvparams:
1429
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1430
                       cluster.FillHV(instance)))
1431

    
1432
  return hvp_data
1433

    
1434

    
1435
class _VerifyErrors(object):
1436
  """Mix-in for cluster/group verify LUs.
1437

1438
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1439
  self.op and self._feedback_fn to be available.)
1440

1441
  """
1442

    
1443
  ETYPE_FIELD = "code"
1444
  ETYPE_ERROR = "ERROR"
1445
  ETYPE_WARNING = "WARNING"
1446

    
1447
  def _Error(self, ecode, item, msg, *args, **kwargs):
1448
    """Format an error message.
1449

1450
    Based on the opcode's error_codes parameter, either format a
1451
    parseable error code, or a simpler error string.
1452

1453
    This must be called only from Exec and functions called from Exec.
1454

1455
    """
1456
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1457
    itype, etxt, _ = ecode
1458
    # first complete the msg
1459
    if args:
1460
      msg = msg % args
1461
    # then format the whole message
1462
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1463
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1464
    else:
1465
      if item:
1466
        item = " " + item
1467
      else:
1468
        item = ""
1469
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1470
    # and finally report it via the feedback_fn
1471
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1472

    
1473
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1474
    """Log an error message if the passed condition is True.
1475

1476
    """
1477
    cond = (bool(cond)
1478
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1479

    
1480
    # If the error code is in the list of ignored errors, demote the error to a
1481
    # warning
1482
    (_, etxt, _) = ecode
1483
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1484
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1485

    
1486
    if cond:
1487
      self._Error(ecode, *args, **kwargs)
1488

    
1489
    # do not mark the operation as failed for WARN cases only
1490
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1491
      self.bad = self.bad or cond
1492

    
1493

    
1494
class LUClusterVerify(NoHooksLU):
1495
  """Submits all jobs necessary to verify the cluster.
1496

1497
  """
1498
  REQ_BGL = False
1499

    
1500
  def ExpandNames(self):
1501
    self.needed_locks = {}
1502

    
1503
  def Exec(self, feedback_fn):
1504
    jobs = []
1505

    
1506
    if self.op.group_name:
1507
      groups = [self.op.group_name]
1508
      depends_fn = lambda: None
1509
    else:
1510
      groups = self.cfg.GetNodeGroupList()
1511

    
1512
      # Verify global configuration
1513
      jobs.append([
1514
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1515
        ])
1516

    
1517
      # Always depend on global verification
1518
      depends_fn = lambda: [(-len(jobs), [])]
1519

    
1520
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1521
                                            ignore_errors=self.op.ignore_errors,
1522
                                            depends=depends_fn())]
1523
                for group in groups)
1524

    
1525
    # Fix up all parameters
1526
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1527
      op.debug_simulate_errors = self.op.debug_simulate_errors
1528
      op.verbose = self.op.verbose
1529
      op.error_codes = self.op.error_codes
1530
      try:
1531
        op.skip_checks = self.op.skip_checks
1532
      except AttributeError:
1533
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1534

    
1535
    return ResultWithJobs(jobs)
1536

    
1537

    
1538
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1539
  """Verifies the cluster config.
1540

1541
  """
1542
  REQ_BGL = True
1543

    
1544
  def _VerifyHVP(self, hvp_data):
1545
    """Verifies locally the syntax of the hypervisor parameters.
1546

1547
    """
1548
    for item, hv_name, hv_params in hvp_data:
1549
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1550
             (item, hv_name))
1551
      try:
1552
        hv_class = hypervisor.GetHypervisor(hv_name)
1553
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1554
        hv_class.CheckParameterSyntax(hv_params)
1555
      except errors.GenericError, err:
1556
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1557

    
1558
  def ExpandNames(self):
1559
    # Information can be safely retrieved as the BGL is acquired in exclusive
1560
    # mode
1561
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1562
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1563
    self.all_node_info = self.cfg.GetAllNodesInfo()
1564
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1565
    self.needed_locks = {}
1566

    
1567
  def Exec(self, feedback_fn):
1568
    """Verify integrity of cluster, performing various test on nodes.
1569

1570
    """
1571
    self.bad = False
1572
    self._feedback_fn = feedback_fn
1573

    
1574
    feedback_fn("* Verifying cluster config")
1575

    
1576
    for msg in self.cfg.VerifyConfig():
1577
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1578

    
1579
    feedback_fn("* Verifying cluster certificate files")
1580

    
1581
    for cert_filename in constants.ALL_CERT_FILES:
1582
      (errcode, msg) = _VerifyCertificate(cert_filename)
1583
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1584

    
1585
    feedback_fn("* Verifying hypervisor parameters")
1586

    
1587
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1588
                                                self.all_inst_info.values()))
1589

    
1590
    feedback_fn("* Verifying all nodes belong to an existing group")
1591

    
1592
    # We do this verification here because, should this bogus circumstance
1593
    # occur, it would never be caught by VerifyGroup, which only acts on
1594
    # nodes/instances reachable from existing node groups.
1595

    
1596
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1597
                         if node.group not in self.all_group_info)
1598

    
1599
    dangling_instances = {}
1600
    no_node_instances = []
1601

    
1602
    for inst in self.all_inst_info.values():
1603
      if inst.primary_node in dangling_nodes:
1604
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1605
      elif inst.primary_node not in self.all_node_info:
1606
        no_node_instances.append(inst.name)
1607

    
1608
    pretty_dangling = [
1609
        "%s (%s)" %
1610
        (node.name,
1611
         utils.CommaJoin(dangling_instances.get(node.name,
1612
                                                ["no instances"])))
1613
        for node in dangling_nodes]
1614

    
1615
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1616
                  None,
1617
                  "the following nodes (and their instances) belong to a non"
1618
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1619

    
1620
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1621
                  None,
1622
                  "the following instances have a non-existing primary-node:"
1623
                  " %s", utils.CommaJoin(no_node_instances))
1624

    
1625
    return not self.bad
1626

    
1627

    
1628
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1629
  """Verifies the status of a node group.
1630

1631
  """
1632
  HPATH = "cluster-verify"
1633
  HTYPE = constants.HTYPE_CLUSTER
1634
  REQ_BGL = False
1635

    
1636
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1637

    
1638
  class NodeImage(object):
1639
    """A class representing the logical and physical status of a node.
1640

1641
    @type name: string
1642
    @ivar name: the node name to which this object refers
1643
    @ivar volumes: a structure as returned from
1644
        L{ganeti.backend.GetVolumeList} (runtime)
1645
    @ivar instances: a list of running instances (runtime)
1646
    @ivar pinst: list of configured primary instances (config)
1647
    @ivar sinst: list of configured secondary instances (config)
1648
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1649
        instances for which this node is secondary (config)
1650
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1651
    @ivar dfree: free disk, as reported by the node (runtime)
1652
    @ivar offline: the offline status (config)
1653
    @type rpc_fail: boolean
1654
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1655
        not whether the individual keys were correct) (runtime)
1656
    @type lvm_fail: boolean
1657
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1658
    @type hyp_fail: boolean
1659
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1660
    @type ghost: boolean
1661
    @ivar ghost: whether this is a known node or not (config)
1662
    @type os_fail: boolean
1663
    @ivar os_fail: whether the RPC call didn't return valid OS data
1664
    @type oslist: list
1665
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1666
    @type vm_capable: boolean
1667
    @ivar vm_capable: whether the node can host instances
1668

1669
    """
1670
    def __init__(self, offline=False, name=None, vm_capable=True):
1671
      self.name = name
1672
      self.volumes = {}
1673
      self.instances = []
1674
      self.pinst = []
1675
      self.sinst = []
1676
      self.sbp = {}
1677
      self.mfree = 0
1678
      self.dfree = 0
1679
      self.offline = offline
1680
      self.vm_capable = vm_capable
1681
      self.rpc_fail = False
1682
      self.lvm_fail = False
1683
      self.hyp_fail = False
1684
      self.ghost = False
1685
      self.os_fail = False
1686
      self.oslist = {}
1687

    
1688
  def ExpandNames(self):
1689
    # This raises errors.OpPrereqError on its own:
1690
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1691

    
1692
    # Get instances in node group; this is unsafe and needs verification later
1693
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1694

    
1695
    self.needed_locks = {
1696
      locking.LEVEL_INSTANCE: inst_names,
1697
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1698
      locking.LEVEL_NODE: [],
1699
      }
1700

    
1701
    self.share_locks = _ShareAll()
1702

    
1703
  def DeclareLocks(self, level):
1704
    if level == locking.LEVEL_NODE:
1705
      # Get members of node group; this is unsafe and needs verification later
1706
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1707

    
1708
      all_inst_info = self.cfg.GetAllInstancesInfo()
1709

    
1710
      # In Exec(), we warn about mirrored instances that have primary and
1711
      # secondary living in separate node groups. To fully verify that
1712
      # volumes for these instances are healthy, we will need to do an
1713
      # extra call to their secondaries. We ensure here those nodes will
1714
      # be locked.
1715
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1716
        # Important: access only the instances whose lock is owned
1717
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1718
          nodes.update(all_inst_info[inst].secondary_nodes)
1719

    
1720
      self.needed_locks[locking.LEVEL_NODE] = nodes
1721

    
1722
  def CheckPrereq(self):
1723
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1724
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1725

    
1726
    group_nodes = set(self.group_info.members)
1727
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1728

    
1729
    unlocked_nodes = \
1730
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1731

    
1732
    unlocked_instances = \
1733
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1734

    
1735
    if unlocked_nodes:
1736
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1737
                                 utils.CommaJoin(unlocked_nodes))
1738

    
1739
    if unlocked_instances:
1740
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1741
                                 utils.CommaJoin(unlocked_instances))
1742

    
1743
    self.all_node_info = self.cfg.GetAllNodesInfo()
1744
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1745

    
1746
    self.my_node_names = utils.NiceSort(group_nodes)
1747
    self.my_inst_names = utils.NiceSort(group_instances)
1748

    
1749
    self.my_node_info = dict((name, self.all_node_info[name])
1750
                             for name in self.my_node_names)
1751

    
1752
    self.my_inst_info = dict((name, self.all_inst_info[name])
1753
                             for name in self.my_inst_names)
1754

    
1755
    # We detect here the nodes that will need the extra RPC calls for verifying
1756
    # split LV volumes; they should be locked.
1757
    extra_lv_nodes = set()
1758

    
1759
    for inst in self.my_inst_info.values():
1760
      if inst.disk_template in constants.DTS_INT_MIRROR:
1761
        group = self.my_node_info[inst.primary_node].group
1762
        for nname in inst.secondary_nodes:
1763
          if self.all_node_info[nname].group != group:
1764
            extra_lv_nodes.add(nname)
1765

    
1766
    unlocked_lv_nodes = \
1767
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1768

    
1769
    if unlocked_lv_nodes:
1770
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1771
                                 utils.CommaJoin(unlocked_lv_nodes))
1772
    self.extra_lv_nodes = list(extra_lv_nodes)
1773

    
1774
  def _VerifyNode(self, ninfo, nresult):
1775
    """Perform some basic validation on data returned from a node.
1776

1777
      - check the result data structure is well formed and has all the
1778
        mandatory fields
1779
      - check ganeti version
1780

1781
    @type ninfo: L{objects.Node}
1782
    @param ninfo: the node to check
1783
    @param nresult: the results from the node
1784
    @rtype: boolean
1785
    @return: whether overall this call was successful (and we can expect
1786
         reasonable values in the respose)
1787

1788
    """
1789
    node = ninfo.name
1790
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1791

    
1792
    # main result, nresult should be a non-empty dict
1793
    test = not nresult or not isinstance(nresult, dict)
1794
    _ErrorIf(test, constants.CV_ENODERPC, node,
1795
                  "unable to verify node: no data returned")
1796
    if test:
1797
      return False
1798

    
1799
    # compares ganeti version
1800
    local_version = constants.PROTOCOL_VERSION
1801
    remote_version = nresult.get("version", None)
1802
    test = not (remote_version and
1803
                isinstance(remote_version, (list, tuple)) and
1804
                len(remote_version) == 2)
1805
    _ErrorIf(test, constants.CV_ENODERPC, node,
1806
             "connection to node returned invalid data")
1807
    if test:
1808
      return False
1809

    
1810
    test = local_version != remote_version[0]
1811
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1812
             "incompatible protocol versions: master %s,"
1813
             " node %s", local_version, remote_version[0])
1814
    if test:
1815
      return False
1816

    
1817
    # node seems compatible, we can actually try to look into its results
1818

    
1819
    # full package version
1820
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1821
                  constants.CV_ENODEVERSION, node,
1822
                  "software version mismatch: master %s, node %s",
1823
                  constants.RELEASE_VERSION, remote_version[1],
1824
                  code=self.ETYPE_WARNING)
1825

    
1826
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1827
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1828
      for hv_name, hv_result in hyp_result.iteritems():
1829
        test = hv_result is not None
1830
        _ErrorIf(test, constants.CV_ENODEHV, node,
1831
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1832

    
1833
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1834
    if ninfo.vm_capable and isinstance(hvp_result, list):
1835
      for item, hv_name, hv_result in hvp_result:
1836
        _ErrorIf(True, constants.CV_ENODEHV, node,
1837
                 "hypervisor %s parameter verify failure (source %s): %s",
1838
                 hv_name, item, hv_result)
1839

    
1840
    test = nresult.get(constants.NV_NODESETUP,
1841
                       ["Missing NODESETUP results"])
1842
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1843
             "; ".join(test))
1844

    
1845
    return True
1846

    
1847
  def _VerifyNodeTime(self, ninfo, nresult,
1848
                      nvinfo_starttime, nvinfo_endtime):
1849
    """Check the node time.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param nvinfo_starttime: the start time of the RPC call
1855
    @param nvinfo_endtime: the end time of the RPC call
1856

1857
    """
1858
    node = ninfo.name
1859
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1860

    
1861
    ntime = nresult.get(constants.NV_TIME, None)
1862
    try:
1863
      ntime_merged = utils.MergeTime(ntime)
1864
    except (ValueError, TypeError):
1865
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1866
      return
1867

    
1868
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1869
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1870
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1871
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1872
    else:
1873
      ntime_diff = None
1874

    
1875
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1876
             "Node time diverges by at least %s from master node time",
1877
             ntime_diff)
1878

    
1879
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1880
    """Check the node LVM results.
1881

1882
    @type ninfo: L{objects.Node}
1883
    @param ninfo: the node to check
1884
    @param nresult: the remote results for the node
1885
    @param vg_name: the configured VG name
1886

1887
    """
1888
    if vg_name is None:
1889
      return
1890

    
1891
    node = ninfo.name
1892
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1893

    
1894
    # checks vg existence and size > 20G
1895
    vglist = nresult.get(constants.NV_VGLIST, None)
1896
    test = not vglist
1897
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1898
    if not test:
1899
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1900
                                            constants.MIN_VG_SIZE)
1901
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1902

    
1903
    # check pv names
1904
    pvlist = nresult.get(constants.NV_PVLIST, None)
1905
    test = pvlist is None
1906
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1907
    if not test:
1908
      # check that ':' is not present in PV names, since it's a
1909
      # special character for lvcreate (denotes the range of PEs to
1910
      # use on the PV)
1911
      for _, pvname, owner_vg in pvlist:
1912
        test = ":" in pvname
1913
        _ErrorIf(test, constants.CV_ENODELVM, node,
1914
                 "Invalid character ':' in PV '%s' of VG '%s'",
1915
                 pvname, owner_vg)
1916

    
1917
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1918
    """Check the node bridges.
1919

1920
    @type ninfo: L{objects.Node}
1921
    @param ninfo: the node to check
1922
    @param nresult: the remote results for the node
1923
    @param bridges: the expected list of bridges
1924

1925
    """
1926
    if not bridges:
1927
      return
1928

    
1929
    node = ninfo.name
1930
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1931

    
1932
    missing = nresult.get(constants.NV_BRIDGES, None)
1933
    test = not isinstance(missing, list)
1934
    _ErrorIf(test, constants.CV_ENODENET, node,
1935
             "did not return valid bridge information")
1936
    if not test:
1937
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1938
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1939

    
1940
  def _VerifyNodeNetwork(self, ninfo, nresult):
1941
    """Check the node network connectivity results.
1942

1943
    @type ninfo: L{objects.Node}
1944
    @param ninfo: the node to check
1945
    @param nresult: the remote results for the node
1946

1947
    """
1948
    node = ninfo.name
1949
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1950

    
1951
    test = constants.NV_NODELIST not in nresult
1952
    _ErrorIf(test, constants.CV_ENODESSH, node,
1953
             "node hasn't returned node ssh connectivity data")
1954
    if not test:
1955
      if nresult[constants.NV_NODELIST]:
1956
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1957
          _ErrorIf(True, constants.CV_ENODESSH, node,
1958
                   "ssh communication with node '%s': %s", a_node, a_msg)
1959

    
1960
    test = constants.NV_NODENETTEST not in nresult
1961
    _ErrorIf(test, constants.CV_ENODENET, node,
1962
             "node hasn't returned node tcp connectivity data")
1963
    if not test:
1964
      if nresult[constants.NV_NODENETTEST]:
1965
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1966
        for anode in nlist:
1967
          _ErrorIf(True, constants.CV_ENODENET, node,
1968
                   "tcp communication with node '%s': %s",
1969
                   anode, nresult[constants.NV_NODENETTEST][anode])
1970

    
1971
    test = constants.NV_MASTERIP not in nresult
1972
    _ErrorIf(test, constants.CV_ENODENET, node,
1973
             "node hasn't returned node master IP reachability data")
1974
    if not test:
1975
      if not nresult[constants.NV_MASTERIP]:
1976
        if node == self.master_node:
1977
          msg = "the master node cannot reach the master IP (not configured?)"
1978
        else:
1979
          msg = "cannot reach the master IP"
1980
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1981

    
1982
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1983
                      diskstatus):
1984
    """Verify an instance.
1985

1986
    This function checks to see if the required block devices are
1987
    available on the instance's node.
1988

1989
    """
1990
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1991
    node_current = instanceconfig.primary_node
1992

    
1993
    node_vol_should = {}
1994
    instanceconfig.MapLVsByNode(node_vol_should)
1995

    
1996
    for node in node_vol_should:
1997
      n_img = node_image[node]
1998
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1999
        # ignore missing volumes on offline or broken nodes
2000
        continue
2001
      for volume in node_vol_should[node]:
2002
        test = volume not in n_img.volumes
2003
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2004
                 "volume %s missing on node %s", volume, node)
2005

    
2006
    if instanceconfig.admin_up:
2007
      pri_img = node_image[node_current]
2008
      test = instance not in pri_img.instances and not pri_img.offline
2009
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2010
               "instance not running on its primary node %s",
2011
               node_current)
2012

    
2013
    diskdata = [(nname, success, status, idx)
2014
                for (nname, disks) in diskstatus.items()
2015
                for idx, (success, status) in enumerate(disks)]
2016

    
2017
    for nname, success, bdev_status, idx in diskdata:
2018
      # the 'ghost node' construction in Exec() ensures that we have a
2019
      # node here
2020
      snode = node_image[nname]
2021
      bad_snode = snode.ghost or snode.offline
2022
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2023
               constants.CV_EINSTANCEFAULTYDISK, instance,
2024
               "couldn't retrieve status for disk/%s on %s: %s",
2025
               idx, nname, bdev_status)
2026
      _ErrorIf((instanceconfig.admin_up and success and
2027
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2028
               constants.CV_EINSTANCEFAULTYDISK, instance,
2029
               "disk/%s on %s is faulty", idx, nname)
2030

    
2031
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2032
    """Verify if there are any unknown volumes in the cluster.
2033

2034
    The .os, .swap and backup volumes are ignored. All other volumes are
2035
    reported as unknown.
2036

2037
    @type reserved: L{ganeti.utils.FieldSet}
2038
    @param reserved: a FieldSet of reserved volume names
2039

2040
    """
2041
    for node, n_img in node_image.items():
2042
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043
        # skip non-healthy nodes
2044
        continue
2045
      for volume in n_img.volumes:
2046
        test = ((node not in node_vol_should or
2047
                volume not in node_vol_should[node]) and
2048
                not reserved.Matches(volume))
2049
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2050
                      "volume %s is unknown", volume)
2051

    
2052
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2053
    """Verify N+1 Memory Resilience.
2054

2055
    Check that if one single node dies we can still start all the
2056
    instances it was primary for.
2057

2058
    """
2059
    cluster_info = self.cfg.GetClusterInfo()
2060
    for node, n_img in node_image.items():
2061
      # This code checks that every node which is now listed as
2062
      # secondary has enough memory to host all instances it is
2063
      # supposed to should a single other node in the cluster fail.
2064
      # FIXME: not ready for failover to an arbitrary node
2065
      # FIXME: does not support file-backed instances
2066
      # WARNING: we currently take into account down instances as well
2067
      # as up ones, considering that even if they're down someone
2068
      # might want to start them even in the event of a node failure.
2069
      if n_img.offline:
2070
        # we're skipping offline nodes from the N+1 warning, since
2071
        # most likely we don't have good memory infromation from them;
2072
        # we already list instances living on such nodes, and that's
2073
        # enough warning
2074
        continue
2075
      for prinode, instances in n_img.sbp.items():
2076
        needed_mem = 0
2077
        for instance in instances:
2078
          bep = cluster_info.FillBE(instance_cfg[instance])
2079
          if bep[constants.BE_AUTO_BALANCE]:
2080
            needed_mem += bep[constants.BE_MEMORY]
2081
        test = n_img.mfree < needed_mem
2082
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2083
                      "not enough memory to accomodate instance failovers"
2084
                      " should node %s fail (%dMiB needed, %dMiB available)",
2085
                      prinode, needed_mem, n_img.mfree)
2086

    
2087
  @classmethod
2088
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2089
                   (files_all, files_opt, files_mc, files_vm)):
2090
    """Verifies file checksums collected from all nodes.
2091

2092
    @param errorif: Callback for reporting errors
2093
    @param nodeinfo: List of L{objects.Node} objects
2094
    @param master_node: Name of master node
2095
    @param all_nvinfo: RPC results
2096

2097
    """
2098
    # Define functions determining which nodes to consider for a file
2099
    files2nodefn = [
2100
      (files_all, None),
2101
      (files_mc, lambda node: (node.master_candidate or
2102
                               node.name == master_node)),
2103
      (files_vm, lambda node: node.vm_capable),
2104
      ]
2105

    
2106
    # Build mapping from filename to list of nodes which should have the file
2107
    nodefiles = {}
2108
    for (files, fn) in files2nodefn:
2109
      if fn is None:
2110
        filenodes = nodeinfo
2111
      else:
2112
        filenodes = filter(fn, nodeinfo)
2113
      nodefiles.update((filename,
2114
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2115
                       for filename in files)
2116

    
2117
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2118

    
2119
    fileinfo = dict((filename, {}) for filename in nodefiles)
2120
    ignore_nodes = set()
2121

    
2122
    for node in nodeinfo:
2123
      if node.offline:
2124
        ignore_nodes.add(node.name)
2125
        continue
2126

    
2127
      nresult = all_nvinfo[node.name]
2128

    
2129
      if nresult.fail_msg or not nresult.payload:
2130
        node_files = None
2131
      else:
2132
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2133

    
2134
      test = not (node_files and isinstance(node_files, dict))
2135
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2136
              "Node did not return file checksum data")
2137
      if test:
2138
        ignore_nodes.add(node.name)
2139
        continue
2140

    
2141
      # Build per-checksum mapping from filename to nodes having it
2142
      for (filename, checksum) in node_files.items():
2143
        assert filename in nodefiles
2144
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2145

    
2146
    for (filename, checksums) in fileinfo.items():
2147
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2148

    
2149
      # Nodes having the file
2150
      with_file = frozenset(node_name
2151
                            for nodes in fileinfo[filename].values()
2152
                            for node_name in nodes) - ignore_nodes
2153

    
2154
      expected_nodes = nodefiles[filename] - ignore_nodes
2155

    
2156
      # Nodes missing file
2157
      missing_file = expected_nodes - with_file
2158

    
2159
      if filename in files_opt:
2160
        # All or no nodes
2161
        errorif(missing_file and missing_file != expected_nodes,
2162
                constants.CV_ECLUSTERFILECHECK, None,
2163
                "File %s is optional, but it must exist on all or no"
2164
                " nodes (not found on %s)",
2165
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2166
      else:
2167
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2168
                "File %s is missing from node(s) %s", filename,
2169
                utils.CommaJoin(utils.NiceSort(missing_file)))
2170

    
2171
        # Warn if a node has a file it shouldn't
2172
        unexpected = with_file - expected_nodes
2173
        errorif(unexpected,
2174
                constants.CV_ECLUSTERFILECHECK, None,
2175
                "File %s should not exist on node(s) %s",
2176
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2177

    
2178
      # See if there are multiple versions of the file
2179
      test = len(checksums) > 1
2180
      if test:
2181
        variants = ["variant %s on %s" %
2182
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2183
                    for (idx, (checksum, nodes)) in
2184
                      enumerate(sorted(checksums.items()))]
2185
      else:
2186
        variants = []
2187

    
2188
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2189
              "File %s found with %s different checksums (%s)",
2190
              filename, len(checksums), "; ".join(variants))
2191

    
2192
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2193
                      drbd_map):
2194
    """Verifies and the node DRBD status.
2195

2196
    @type ninfo: L{objects.Node}
2197
    @param ninfo: the node to check
2198
    @param nresult: the remote results for the node
2199
    @param instanceinfo: the dict of instances
2200
    @param drbd_helper: the configured DRBD usermode helper
2201
    @param drbd_map: the DRBD map as returned by
2202
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2203

2204
    """
2205
    node = ninfo.name
2206
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2207

    
2208
    if drbd_helper:
2209
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2210
      test = (helper_result == None)
2211
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2212
               "no drbd usermode helper returned")
2213
      if helper_result:
2214
        status, payload = helper_result
2215
        test = not status
2216
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2217
                 "drbd usermode helper check unsuccessful: %s", payload)
2218
        test = status and (payload != drbd_helper)
2219
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2220
                 "wrong drbd usermode helper: %s", payload)
2221

    
2222
    # compute the DRBD minors
2223
    node_drbd = {}
2224
    for minor, instance in drbd_map[node].items():
2225
      test = instance not in instanceinfo
2226
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2227
               "ghost instance '%s' in temporary DRBD map", instance)
2228
        # ghost instance should not be running, but otherwise we
2229
        # don't give double warnings (both ghost instance and
2230
        # unallocated minor in use)
2231
      if test:
2232
        node_drbd[minor] = (instance, False)
2233
      else:
2234
        instance = instanceinfo[instance]
2235
        node_drbd[minor] = (instance.name, instance.admin_up)
2236

    
2237
    # and now check them
2238
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2239
    test = not isinstance(used_minors, (tuple, list))
2240
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2241
             "cannot parse drbd status file: %s", str(used_minors))
2242
    if test:
2243
      # we cannot check drbd status
2244
      return
2245

    
2246
    for minor, (iname, must_exist) in node_drbd.items():
2247
      test = minor not in used_minors and must_exist
2248
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2249
               "drbd minor %d of instance %s is not active", minor, iname)
2250
    for minor in used_minors:
2251
      test = minor not in node_drbd
2252
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2253
               "unallocated drbd minor %d is in use", minor)
2254

    
2255
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2256
    """Builds the node OS structures.
2257

2258
    @type ninfo: L{objects.Node}
2259
    @param ninfo: the node to check
2260
    @param nresult: the remote results for the node
2261
    @param nimg: the node image object
2262

2263
    """
2264
    node = ninfo.name
2265
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2266

    
2267
    remote_os = nresult.get(constants.NV_OSLIST, None)
2268
    test = (not isinstance(remote_os, list) or
2269
            not compat.all(isinstance(v, list) and len(v) == 7
2270
                           for v in remote_os))
2271

    
2272
    _ErrorIf(test, constants.CV_ENODEOS, node,
2273
             "node hasn't returned valid OS data")
2274

    
2275
    nimg.os_fail = test
2276

    
2277
    if test:
2278
      return
2279

    
2280
    os_dict = {}
2281

    
2282
    for (name, os_path, status, diagnose,
2283
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2284

    
2285
      if name not in os_dict:
2286
        os_dict[name] = []
2287

    
2288
      # parameters is a list of lists instead of list of tuples due to
2289
      # JSON lacking a real tuple type, fix it:
2290
      parameters = [tuple(v) for v in parameters]
2291
      os_dict[name].append((os_path, status, diagnose,
2292
                            set(variants), set(parameters), set(api_ver)))
2293

    
2294
    nimg.oslist = os_dict
2295

    
2296
  def _VerifyNodeOS(self, ninfo, nimg, base):
2297
    """Verifies the node OS list.
2298

2299
    @type ninfo: L{objects.Node}
2300
    @param ninfo: the node to check
2301
    @param nimg: the node image object
2302
    @param base: the 'template' node we match against (e.g. from the master)
2303

2304
    """
2305
    node = ninfo.name
2306
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2307

    
2308
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2309

    
2310
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2311
    for os_name, os_data in nimg.oslist.items():
2312
      assert os_data, "Empty OS status for OS %s?!" % os_name
2313
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2314
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2315
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2316
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2317
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2318
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2319
      # comparisons with the 'base' image
2320
      test = os_name not in base.oslist
2321
      _ErrorIf(test, constants.CV_ENODEOS, node,
2322
               "Extra OS %s not present on reference node (%s)",
2323
               os_name, base.name)
2324
      if test:
2325
        continue
2326
      assert base.oslist[os_name], "Base node has empty OS status?"
2327
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2328
      if not b_status:
2329
        # base OS is invalid, skipping
2330
        continue
2331
      for kind, a, b in [("API version", f_api, b_api),
2332
                         ("variants list", f_var, b_var),
2333
                         ("parameters", beautify_params(f_param),
2334
                          beautify_params(b_param))]:
2335
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2336
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2337
                 kind, os_name, base.name,
2338
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2339

    
2340
    # check any missing OSes
2341
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2342
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2343
             "OSes present on reference node %s but missing on this node: %s",
2344
             base.name, utils.CommaJoin(missing))
2345

    
2346
  def _VerifyOob(self, ninfo, nresult):
2347
    """Verifies out of band functionality of a node.
2348

2349
    @type ninfo: L{objects.Node}
2350
    @param ninfo: the node to check
2351
    @param nresult: the remote results for the node
2352

2353
    """
2354
    node = ninfo.name
2355
    # We just have to verify the paths on master and/or master candidates
2356
    # as the oob helper is invoked on the master
2357
    if ((ninfo.master_candidate or ninfo.master_capable) and
2358
        constants.NV_OOB_PATHS in nresult):
2359
      for path_result in nresult[constants.NV_OOB_PATHS]:
2360
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2361

    
2362
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2363
    """Verifies and updates the node volume data.
2364

2365
    This function will update a L{NodeImage}'s internal structures
2366
    with data from the remote call.
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371
    @param nimg: the node image object
2372
    @param vg_name: the configured VG name
2373

2374
    """
2375
    node = ninfo.name
2376
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2377

    
2378
    nimg.lvm_fail = True
2379
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2380
    if vg_name is None:
2381
      pass
2382
    elif isinstance(lvdata, basestring):
2383
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2384
               utils.SafeEncode(lvdata))
2385
    elif not isinstance(lvdata, dict):
2386
      _ErrorIf(True, constants.CV_ENODELVM, node,
2387
               "rpc call to node failed (lvlist)")
2388
    else:
2389
      nimg.volumes = lvdata
2390
      nimg.lvm_fail = False
2391

    
2392
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2393
    """Verifies and updates the node instance list.
2394

2395
    If the listing was successful, then updates this node's instance
2396
    list. Otherwise, it marks the RPC call as failed for the instance
2397
    list key.
2398

2399
    @type ninfo: L{objects.Node}
2400
    @param ninfo: the node to check
2401
    @param nresult: the remote results for the node
2402
    @param nimg: the node image object
2403

2404
    """
2405
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2406
    test = not isinstance(idata, list)
2407
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2408
                  "rpc call to node failed (instancelist): %s",
2409
                  utils.SafeEncode(str(idata)))
2410
    if test:
2411
      nimg.hyp_fail = True
2412
    else:
2413
      nimg.instances = idata
2414

    
2415
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2416
    """Verifies and computes a node information map
2417

2418
    @type ninfo: L{objects.Node}
2419
    @param ninfo: the node to check
2420
    @param nresult: the remote results for the node
2421
    @param nimg: the node image object
2422
    @param vg_name: the configured VG name
2423

2424
    """
2425
    node = ninfo.name
2426
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427

    
2428
    # try to read free memory (from the hypervisor)
2429
    hv_info = nresult.get(constants.NV_HVINFO, None)
2430
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2431
    _ErrorIf(test, constants.CV_ENODEHV, node,
2432
             "rpc call to node failed (hvinfo)")
2433
    if not test:
2434
      try:
2435
        nimg.mfree = int(hv_info["memory_free"])
2436
      except (ValueError, TypeError):
2437
        _ErrorIf(True, constants.CV_ENODERPC, node,
2438
                 "node returned invalid nodeinfo, check hypervisor")
2439

    
2440
    # FIXME: devise a free space model for file based instances as well
2441
    if vg_name is not None:
2442
      test = (constants.NV_VGLIST not in nresult or
2443
              vg_name not in nresult[constants.NV_VGLIST])
2444
      _ErrorIf(test, constants.CV_ENODELVM, node,
2445
               "node didn't return data for the volume group '%s'"
2446
               " - it is either missing or broken", vg_name)
2447
      if not test:
2448
        try:
2449
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2450
        except (ValueError, TypeError):
2451
          _ErrorIf(True, constants.CV_ENODERPC, node,
2452
                   "node returned invalid LVM info, check LVM status")
2453

    
2454
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2455
    """Gets per-disk status information for all instances.
2456

2457
    @type nodelist: list of strings
2458
    @param nodelist: Node names
2459
    @type node_image: dict of (name, L{objects.Node})
2460
    @param node_image: Node objects
2461
    @type instanceinfo: dict of (name, L{objects.Instance})
2462
    @param instanceinfo: Instance objects
2463
    @rtype: {instance: {node: [(succes, payload)]}}
2464
    @return: a dictionary of per-instance dictionaries with nodes as
2465
        keys and disk information as values; the disk information is a
2466
        list of tuples (success, payload)
2467

2468
    """
2469
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2470

    
2471
    node_disks = {}
2472
    node_disks_devonly = {}
2473
    diskless_instances = set()
2474
    diskless = constants.DT_DISKLESS
2475

    
2476
    for nname in nodelist:
2477
      node_instances = list(itertools.chain(node_image[nname].pinst,
2478
                                            node_image[nname].sinst))
2479
      diskless_instances.update(inst for inst in node_instances
2480
                                if instanceinfo[inst].disk_template == diskless)
2481
      disks = [(inst, disk)
2482
               for inst in node_instances
2483
               for disk in instanceinfo[inst].disks]
2484

    
2485
      if not disks:
2486
        # No need to collect data
2487
        continue
2488

    
2489
      node_disks[nname] = disks
2490

    
2491
      # Creating copies as SetDiskID below will modify the objects and that can
2492
      # lead to incorrect data returned from nodes
2493
      devonly = [dev.Copy() for (_, dev) in disks]
2494

    
2495
      for dev in devonly:
2496
        self.cfg.SetDiskID(dev, nname)
2497

    
2498
      node_disks_devonly[nname] = devonly
2499

    
2500
    assert len(node_disks) == len(node_disks_devonly)
2501

    
2502
    # Collect data from all nodes with disks
2503
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2504
                                                          node_disks_devonly)
2505

    
2506
    assert len(result) == len(node_disks)
2507

    
2508
    instdisk = {}
2509

    
2510
    for (nname, nres) in result.items():
2511
      disks = node_disks[nname]
2512

    
2513
      if nres.offline:
2514
        # No data from this node
2515
        data = len(disks) * [(False, "node offline")]
2516
      else:
2517
        msg = nres.fail_msg
2518
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2519
                 "while getting disk information: %s", msg)
2520
        if msg:
2521
          # No data from this node
2522
          data = len(disks) * [(False, msg)]
2523
        else:
2524
          data = []
2525
          for idx, i in enumerate(nres.payload):
2526
            if isinstance(i, (tuple, list)) and len(i) == 2:
2527
              data.append(i)
2528
            else:
2529
              logging.warning("Invalid result from node %s, entry %d: %s",
2530
                              nname, idx, i)
2531
              data.append((False, "Invalid result from the remote node"))
2532

    
2533
      for ((inst, _), status) in zip(disks, data):
2534
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2535

    
2536
    # Add empty entries for diskless instances.
2537
    for inst in diskless_instances:
2538
      assert inst not in instdisk
2539
      instdisk[inst] = {}
2540

    
2541
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2542
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2543
                      compat.all(isinstance(s, (tuple, list)) and
2544
                                 len(s) == 2 for s in statuses)
2545
                      for inst, nnames in instdisk.items()
2546
                      for nname, statuses in nnames.items())
2547
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2548

    
2549
    return instdisk
2550

    
2551
  @staticmethod
2552
  def _SshNodeSelector(group_uuid, all_nodes):
2553
    """Create endless iterators for all potential SSH check hosts.
2554

2555
    """
2556
    nodes = [node for node in all_nodes
2557
             if (node.group != group_uuid and
2558
                 not node.offline)]
2559
    keyfunc = operator.attrgetter("group")
2560

    
2561
    return map(itertools.cycle,
2562
               [sorted(map(operator.attrgetter("name"), names))
2563
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2564
                                                  keyfunc)])
2565

    
2566
  @classmethod
2567
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2568
    """Choose which nodes should talk to which other nodes.
2569

2570
    We will make nodes contact all nodes in their group, and one node from
2571
    every other group.
2572

2573
    @warning: This algorithm has a known issue if one node group is much
2574
      smaller than others (e.g. just one node). In such a case all other
2575
      nodes will talk to the single node.
2576

2577
    """
2578
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2579
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2580

    
2581
    return (online_nodes,
2582
            dict((name, sorted([i.next() for i in sel]))
2583
                 for name in online_nodes))
2584

    
2585
  def BuildHooksEnv(self):
2586
    """Build hooks env.
2587

2588
    Cluster-Verify hooks just ran in the post phase and their failure makes
2589
    the output be logged in the verify output and the verification to fail.
2590

2591
    """
2592
    env = {
2593
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2594
      }
2595

    
2596
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2597
               for node in self.my_node_info.values())
2598

    
2599
    return env
2600

    
2601
  def BuildHooksNodes(self):
2602
    """Build hooks nodes.
2603

2604
    """
2605
    return ([], self.my_node_names)
2606

    
2607
  def Exec(self, feedback_fn):
2608
    """Verify integrity of the node group, performing various test on nodes.
2609

2610
    """
2611
    # This method has too many local variables. pylint: disable=R0914
2612
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2613

    
2614
    if not self.my_node_names:
2615
      # empty node group
2616
      feedback_fn("* Empty node group, skipping verification")
2617
      return True
2618

    
2619
    self.bad = False
2620
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2621
    verbose = self.op.verbose
2622
    self._feedback_fn = feedback_fn
2623

    
2624
    vg_name = self.cfg.GetVGName()
2625
    drbd_helper = self.cfg.GetDRBDHelper()
2626
    cluster = self.cfg.GetClusterInfo()
2627
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2628
    hypervisors = cluster.enabled_hypervisors
2629
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2630

    
2631
    i_non_redundant = [] # Non redundant instances
2632
    i_non_a_balanced = [] # Non auto-balanced instances
2633
    n_offline = 0 # Count of offline nodes
2634
    n_drained = 0 # Count of nodes being drained
2635
    node_vol_should = {}
2636

    
2637
    # FIXME: verify OS list
2638

    
2639
    # File verification
2640
    filemap = _ComputeAncillaryFiles(cluster, False)
2641

    
2642
    # do local checksums
2643
    master_node = self.master_node = self.cfg.GetMasterNode()
2644
    master_ip = self.cfg.GetMasterIP()
2645

    
2646
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2647

    
2648
    node_verify_param = {
2649
      constants.NV_FILELIST:
2650
        utils.UniqueSequence(filename
2651
                             for files in filemap
2652
                             for filename in files),
2653
      constants.NV_NODELIST:
2654
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2655
                                  self.all_node_info.values()),
2656
      constants.NV_HYPERVISOR: hypervisors,
2657
      constants.NV_HVPARAMS:
2658
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2659
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2660
                                 for node in node_data_list
2661
                                 if not node.offline],
2662
      constants.NV_INSTANCELIST: hypervisors,
2663
      constants.NV_VERSION: None,
2664
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2665
      constants.NV_NODESETUP: None,
2666
      constants.NV_TIME: None,
2667
      constants.NV_MASTERIP: (master_node, master_ip),
2668
      constants.NV_OSLIST: None,
2669
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2670
      }
2671

    
2672
    if vg_name is not None:
2673
      node_verify_param[constants.NV_VGLIST] = None
2674
      node_verify_param[constants.NV_LVLIST] = vg_name
2675
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2676
      node_verify_param[constants.NV_DRBDLIST] = None
2677

    
2678
    if drbd_helper:
2679
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2680

    
2681
    # bridge checks
2682
    # FIXME: this needs to be changed per node-group, not cluster-wide
2683
    bridges = set()
2684
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2685
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2686
      bridges.add(default_nicpp[constants.NIC_LINK])
2687
    for instance in self.my_inst_info.values():
2688
      for nic in instance.nics:
2689
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2690
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2691
          bridges.add(full_nic[constants.NIC_LINK])
2692

    
2693
    if bridges:
2694
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2695

    
2696
    # Build our expected cluster state
2697
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2698
                                                 name=node.name,
2699
                                                 vm_capable=node.vm_capable))
2700
                      for node in node_data_list)
2701

    
2702
    # Gather OOB paths
2703
    oob_paths = []
2704
    for node in self.all_node_info.values():
2705
      path = _SupportsOob(self.cfg, node)
2706
      if path and path not in oob_paths:
2707
        oob_paths.append(path)
2708

    
2709
    if oob_paths:
2710
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2711

    
2712
    for instance in self.my_inst_names:
2713
      inst_config = self.my_inst_info[instance]
2714

    
2715
      for nname in inst_config.all_nodes:
2716
        if nname not in node_image:
2717
          gnode = self.NodeImage(name=nname)
2718
          gnode.ghost = (nname not in self.all_node_info)
2719
          node_image[nname] = gnode
2720

    
2721
      inst_config.MapLVsByNode(node_vol_should)
2722

    
2723
      pnode = inst_config.primary_node
2724
      node_image[pnode].pinst.append(instance)
2725

    
2726
      for snode in inst_config.secondary_nodes:
2727
        nimg = node_image[snode]
2728
        nimg.sinst.append(instance)
2729
        if pnode not in nimg.sbp:
2730
          nimg.sbp[pnode] = []
2731
        nimg.sbp[pnode].append(instance)
2732

    
2733
    # At this point, we have the in-memory data structures complete,
2734
    # except for the runtime information, which we'll gather next
2735

    
2736
    # Due to the way our RPC system works, exact response times cannot be
2737
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2738
    # time before and after executing the request, we can at least have a time
2739
    # window.
2740
    nvinfo_starttime = time.time()
2741
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2742
                                           node_verify_param,
2743
                                           self.cfg.GetClusterName())
2744
    nvinfo_endtime = time.time()
2745

    
2746
    if self.extra_lv_nodes and vg_name is not None:
2747
      extra_lv_nvinfo = \
2748
          self.rpc.call_node_verify(self.extra_lv_nodes,
2749
                                    {constants.NV_LVLIST: vg_name},
2750
                                    self.cfg.GetClusterName())
2751
    else:
2752
      extra_lv_nvinfo = {}
2753

    
2754
    all_drbd_map = self.cfg.ComputeDRBDMap()
2755

    
2756
    feedback_fn("* Gathering disk information (%s nodes)" %
2757
                len(self.my_node_names))
2758
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2759
                                     self.my_inst_info)
2760

    
2761
    feedback_fn("* Verifying configuration file consistency")
2762

    
2763
    # If not all nodes are being checked, we need to make sure the master node
2764
    # and a non-checked vm_capable node are in the list.
2765
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2766
    if absent_nodes:
2767
      vf_nvinfo = all_nvinfo.copy()
2768
      vf_node_info = list(self.my_node_info.values())
2769
      additional_nodes = []
2770
      if master_node not in self.my_node_info:
2771
        additional_nodes.append(master_node)
2772
        vf_node_info.append(self.all_node_info[master_node])
2773
      # Add the first vm_capable node we find which is not included
2774
      for node in absent_nodes:
2775
        nodeinfo = self.all_node_info[node]
2776
        if nodeinfo.vm_capable and not nodeinfo.offline:
2777
          additional_nodes.append(node)
2778
          vf_node_info.append(self.all_node_info[node])
2779
          break
2780
      key = constants.NV_FILELIST
2781
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2782
                                                 {key: node_verify_param[key]},
2783
                                                 self.cfg.GetClusterName()))
2784
    else:
2785
      vf_nvinfo = all_nvinfo
2786
      vf_node_info = self.my_node_info.values()
2787

    
2788
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2789

    
2790
    feedback_fn("* Verifying node status")
2791

    
2792
    refos_img = None
2793

    
2794
    for node_i in node_data_list:
2795
      node = node_i.name
2796
      nimg = node_image[node]
2797

    
2798
      if node_i.offline:
2799
        if verbose:
2800
          feedback_fn("* Skipping offline node %s" % (node,))
2801
        n_offline += 1
2802
        continue
2803

    
2804
      if node == master_node:
2805
        ntype = "master"
2806
      elif node_i.master_candidate:
2807
        ntype = "master candidate"
2808
      elif node_i.drained:
2809
        ntype = "drained"
2810
        n_drained += 1
2811
      else:
2812
        ntype = "regular"
2813
      if verbose:
2814
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2815

    
2816
      msg = all_nvinfo[node].fail_msg
2817
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2818
               msg)
2819
      if msg:
2820
        nimg.rpc_fail = True
2821
        continue
2822

    
2823
      nresult = all_nvinfo[node].payload
2824

    
2825
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2826
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2827
      self._VerifyNodeNetwork(node_i, nresult)
2828
      self._VerifyOob(node_i, nresult)
2829

    
2830
      if nimg.vm_capable:
2831
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2832
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2833
                             all_drbd_map)
2834

    
2835
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2836
        self._UpdateNodeInstances(node_i, nresult, nimg)
2837
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2838
        self._UpdateNodeOS(node_i, nresult, nimg)
2839

    
2840
        if not nimg.os_fail:
2841
          if refos_img is None:
2842
            refos_img = nimg
2843
          self._VerifyNodeOS(node_i, nimg, refos_img)
2844
        self._VerifyNodeBridges(node_i, nresult, bridges)
2845

    
2846
        # Check whether all running instancies are primary for the node. (This
2847
        # can no longer be done from _VerifyInstance below, since some of the
2848
        # wrong instances could be from other node groups.)
2849
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2850

    
2851
        for inst in non_primary_inst:
2852
          test = inst in self.all_inst_info
2853
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2854
                   "instance should not run on node %s", node_i.name)
2855
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2856
                   "node is running unknown instance %s", inst)
2857

    
2858
    for node, result in extra_lv_nvinfo.items():
2859
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2860
                              node_image[node], vg_name)
2861

    
2862
    feedback_fn("* Verifying instance status")
2863
    for instance in self.my_inst_names:
2864
      if verbose:
2865
        feedback_fn("* Verifying instance %s" % instance)
2866
      inst_config = self.my_inst_info[instance]
2867
      self._VerifyInstance(instance, inst_config, node_image,
2868
                           instdisk[instance])
2869
      inst_nodes_offline = []
2870

    
2871
      pnode = inst_config.primary_node
2872
      pnode_img = node_image[pnode]
2873
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2874
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2875
               " primary node failed", instance)
2876

    
2877
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2878
               constants.CV_EINSTANCEBADNODE, instance,
2879
               "instance is marked as running and lives on offline node %s",
2880
               inst_config.primary_node)
2881

    
2882
      # If the instance is non-redundant we cannot survive losing its primary
2883
      # node, so we are not N+1 compliant. On the other hand we have no disk
2884
      # templates with more than one secondary so that situation is not well
2885
      # supported either.
2886
      # FIXME: does not support file-backed instances
2887
      if not inst_config.secondary_nodes:
2888
        i_non_redundant.append(instance)
2889

    
2890
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2891
               constants.CV_EINSTANCELAYOUT,
2892
               instance, "instance has multiple secondary nodes: %s",
2893
               utils.CommaJoin(inst_config.secondary_nodes),
2894
               code=self.ETYPE_WARNING)
2895

    
2896
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2897
        pnode = inst_config.primary_node
2898
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2899
        instance_groups = {}
2900

    
2901
        for node in instance_nodes:
2902
          instance_groups.setdefault(self.all_node_info[node].group,
2903
                                     []).append(node)
2904

    
2905
        pretty_list = [
2906
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2907
          # Sort so that we always list the primary node first.
2908
          for group, nodes in sorted(instance_groups.items(),
2909
                                     key=lambda (_, nodes): pnode in nodes,
2910
                                     reverse=True)]
2911

    
2912
        self._ErrorIf(len(instance_groups) > 1,
2913
                      constants.CV_EINSTANCESPLITGROUPS,
2914
                      instance, "instance has primary and secondary nodes in"
2915
                      " different groups: %s", utils.CommaJoin(pretty_list),
2916
                      code=self.ETYPE_WARNING)
2917

    
2918
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2919
        i_non_a_balanced.append(instance)
2920

    
2921
      for snode in inst_config.secondary_nodes:
2922
        s_img = node_image[snode]
2923
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2924
                 snode, "instance %s, connection to secondary node failed",
2925
                 instance)
2926

    
2927
        if s_img.offline:
2928
          inst_nodes_offline.append(snode)
2929

    
2930
      # warn that the instance lives on offline nodes
2931
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2932
               "instance has offline secondary node(s) %s",
2933
               utils.CommaJoin(inst_nodes_offline))
2934
      # ... or ghost/non-vm_capable nodes
2935
      for node in inst_config.all_nodes:
2936
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2937
                 instance, "instance lives on ghost node %s", node)
2938
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2939
                 instance, "instance lives on non-vm_capable node %s", node)
2940

    
2941
    feedback_fn("* Verifying orphan volumes")
2942
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2943

    
2944
    # We will get spurious "unknown volume" warnings if any node of this group
2945
    # is secondary for an instance whose primary is in another group. To avoid
2946
    # them, we find these instances and add their volumes to node_vol_should.
2947
    for inst in self.all_inst_info.values():
2948
      for secondary in inst.secondary_nodes:
2949
        if (secondary in self.my_node_info
2950
            and inst.name not in self.my_inst_info):
2951
          inst.MapLVsByNode(node_vol_should)
2952
          break
2953

    
2954
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2955

    
2956
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2957
      feedback_fn("* Verifying N+1 Memory redundancy")
2958
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2959

    
2960
    feedback_fn("* Other Notes")
2961
    if i_non_redundant:
2962
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2963
                  % len(i_non_redundant))
2964

    
2965
    if i_non_a_balanced:
2966
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2967
                  % len(i_non_a_balanced))
2968

    
2969
    if n_offline:
2970
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2971

    
2972
    if n_drained:
2973
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2974

    
2975
    return not self.bad
2976

    
2977
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2978
    """Analyze the post-hooks' result
2979

2980
    This method analyses the hook result, handles it, and sends some
2981
    nicely-formatted feedback back to the user.
2982

2983
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2984
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2985
    @param hooks_results: the results of the multi-node hooks rpc call
2986
    @param feedback_fn: function used send feedback back to the caller
2987
    @param lu_result: previous Exec result
2988
    @return: the new Exec result, based on the previous result
2989
        and hook results
2990

2991
    """
2992
    # We only really run POST phase hooks, only for non-empty groups,
2993
    # and are only interested in their results
2994
    if not self.my_node_names:
2995
      # empty node group
2996
      pass
2997
    elif phase == constants.HOOKS_PHASE_POST:
2998
      # Used to change hooks' output to proper indentation
2999
      feedback_fn("* Hooks Results")
3000
      assert hooks_results, "invalid result from hooks"
3001

    
3002
      for node_name in hooks_results:
3003
        res = hooks_results[node_name]
3004
        msg = res.fail_msg
3005
        test = msg and not res.offline
3006
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3007
                      "Communication failure in hooks execution: %s", msg)
3008
        if res.offline or msg:
3009
          # No need to investigate payload if node is offline or gave
3010
          # an error.
3011
          continue
3012
        for script, hkr, output in res.payload:
3013
          test = hkr == constants.HKR_FAIL
3014
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3015
                        "Script %s failed, output:", script)
3016
          if test:
3017
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3018
            feedback_fn("%s" % output)
3019
            lu_result = False
3020

    
3021
    return lu_result
3022

    
3023

    
3024
class LUClusterVerifyDisks(NoHooksLU):
3025
  """Verifies the cluster disks status.
3026

3027
  """
3028
  REQ_BGL = False
3029

    
3030
  def ExpandNames(self):
3031
    self.share_locks = _ShareAll()
3032
    self.needed_locks = {
3033
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3034
      }
3035

    
3036
  def Exec(self, feedback_fn):
3037
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3038

    
3039
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3040
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3041
                           for group in group_names])
3042

    
3043

    
3044
class LUGroupVerifyDisks(NoHooksLU):
3045
  """Verifies the status of all disks in a node group.
3046

3047
  """
3048
  REQ_BGL = False
3049

    
3050
  def ExpandNames(self):
3051
    # Raises errors.OpPrereqError on its own if group can't be found
3052
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3053

    
3054
    self.share_locks = _ShareAll()
3055
    self.needed_locks = {
3056
      locking.LEVEL_INSTANCE: [],
3057
      locking.LEVEL_NODEGROUP: [],
3058
      locking.LEVEL_NODE: [],
3059
      }
3060

    
3061
  def DeclareLocks(self, level):
3062
    if level == locking.LEVEL_INSTANCE:
3063
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3064

    
3065
      # Lock instances optimistically, needs verification once node and group
3066
      # locks have been acquired
3067
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3068
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3069

    
3070
    elif level == locking.LEVEL_NODEGROUP:
3071
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3072

    
3073
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3074
        set([self.group_uuid] +
3075
            # Lock all groups used by instances optimistically; this requires
3076
            # going via the node before it's locked, requiring verification
3077
            # later on
3078
            [group_uuid
3079
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3080
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3081

    
3082
    elif level == locking.LEVEL_NODE:
3083
      # This will only lock the nodes in the group to be verified which contain
3084
      # actual instances
3085
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3086
      self._LockInstancesNodes()
3087

    
3088
      # Lock all nodes in group to be verified
3089
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3090
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3091
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3092

    
3093
  def CheckPrereq(self):
3094
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3095
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3096
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3097

    
3098
    assert self.group_uuid in owned_groups
3099

    
3100
    # Check if locked instances are still correct
3101
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3102

    
3103
    # Get instance information
3104
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3105

    
3106
    # Check if node groups for locked instances are still correct
3107
    for (instance_name, inst) in self.instances.items():
3108
      assert owned_nodes.issuperset(inst.all_nodes), \
3109
        "Instance %s's nodes changed while we kept the lock" % instance_name
3110

    
3111
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3112
                                             owned_groups)
3113

    
3114
      assert self.group_uuid in inst_groups, \
3115
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3116

    
3117
  def Exec(self, feedback_fn):
3118
    """Verify integrity of cluster disks.
3119

3120
    @rtype: tuple of three items
3121
    @return: a tuple of (dict of node-to-node_error, list of instances
3122
        which need activate-disks, dict of instance: (node, volume) for
3123
        missing volumes
3124

3125
    """
3126
    res_nodes = {}
3127
    res_instances = set()
3128
    res_missing = {}
3129

    
3130
    nv_dict = _MapInstanceDisksToNodes([inst
3131
                                        for inst in self.instances.values()
3132
                                        if inst.admin_up])
3133

    
3134
    if nv_dict:
3135
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3136
                             set(self.cfg.GetVmCapableNodeList()))
3137

    
3138
      node_lvs = self.rpc.call_lv_list(nodes, [])
3139

    
3140
      for (node, node_res) in node_lvs.items():
3141
        if node_res.offline:
3142
          continue
3143

    
3144
        msg = node_res.fail_msg
3145
        if msg:
3146
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3147
          res_nodes[node] = msg
3148
          continue
3149

    
3150
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3151
          inst = nv_dict.pop((node, lv_name), None)
3152
          if not (lv_online or inst is None):
3153
            res_instances.add(inst)
3154

    
3155
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3156
      # better
3157
      for key, inst in nv_dict.iteritems():
3158
        res_missing.setdefault(inst, []).append(list(key))
3159

    
3160
    return (res_nodes, list(res_instances), res_missing)
3161

    
3162

    
3163
class LUClusterRepairDiskSizes(NoHooksLU):
3164
  """Verifies the cluster disks sizes.
3165

3166
  """
3167
  REQ_BGL = False
3168

    
3169
  def ExpandNames(self):
3170
    if self.op.instances:
3171
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3172
      self.needed_locks = {
3173
        locking.LEVEL_NODE: [],
3174
        locking.LEVEL_INSTANCE: self.wanted_names,
3175
        }
3176
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3177
    else:
3178
      self.wanted_names = None
3179
      self.needed_locks = {
3180
        locking.LEVEL_NODE: locking.ALL_SET,
3181
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3182
        }
3183
    self.share_locks = _ShareAll()
3184

    
3185
  def DeclareLocks(self, level):
3186
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3187
      self._LockInstancesNodes(primary_only=True)
3188

    
3189
  def CheckPrereq(self):
3190
    """Check prerequisites.
3191

3192
    This only checks the optional instance list against the existing names.
3193

3194
    """
3195
    if self.wanted_names is None:
3196
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3197

    
3198
    self.wanted_instances = \
3199
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3200

    
3201
  def _EnsureChildSizes(self, disk):
3202
    """Ensure children of the disk have the needed disk size.
3203

3204
    This is valid mainly for DRBD8 and fixes an issue where the
3205
    children have smaller disk size.
3206

3207
    @param disk: an L{ganeti.objects.Disk} object
3208

3209
    """
3210
    if disk.dev_type == constants.LD_DRBD8:
3211
      assert disk.children, "Empty children for DRBD8?"
3212
      fchild = disk.children[0]
3213
      mismatch = fchild.size < disk.size
3214
      if mismatch:
3215
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3216
                     fchild.size, disk.size)
3217
        fchild.size = disk.size
3218

    
3219
      # and we recurse on this child only, not on the metadev
3220
      return self._EnsureChildSizes(fchild) or mismatch
3221
    else:
3222
      return False
3223

    
3224
  def Exec(self, feedback_fn):
3225
    """Verify the size of cluster disks.
3226

3227
    """
3228
    # TODO: check child disks too
3229
    # TODO: check differences in size between primary/secondary nodes
3230
    per_node_disks = {}
3231
    for instance in self.wanted_instances:
3232
      pnode = instance.primary_node
3233
      if pnode not in per_node_disks:
3234
        per_node_disks[pnode] = []
3235
      for idx, disk in enumerate(instance.disks):
3236
        per_node_disks[pnode].append((instance, idx, disk))
3237

    
3238
    changed = []
3239
    for node, dskl in per_node_disks.items():
3240
      newl = [v[2].Copy() for v in dskl]
3241
      for dsk in newl:
3242
        self.cfg.SetDiskID(dsk, node)
3243
      result = self.rpc.call_blockdev_getsize(node, newl)
3244
      if result.fail_msg:
3245
        self.LogWarning("Failure in blockdev_getsize call to node"
3246
                        " %s, ignoring", node)
3247
        continue
3248
      if len(result.payload) != len(dskl):
3249
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3250
                        " result.payload=%s", node, len(dskl), result.payload)
3251
        self.LogWarning("Invalid result from node %s, ignoring node results",
3252
                        node)
3253
        continue
3254
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3255
        if size is None:
3256
          self.LogWarning("Disk %d of instance %s did not return size"
3257
                          " information, ignoring", idx, instance.name)
3258
          continue
3259
        if not isinstance(size, (int, long)):
3260
          self.LogWarning("Disk %d of instance %s did not return valid"
3261
                          " size information, ignoring", idx, instance.name)
3262
          continue
3263
        size = size >> 20
3264
        if size != disk.size:
3265
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3266
                       " correcting: recorded %d, actual %d", idx,
3267
                       instance.name, disk.size, size)
3268
          disk.size = size
3269
          self.cfg.Update(instance, feedback_fn)
3270
          changed.append((instance.name, idx, size))
3271
        if self._EnsureChildSizes(disk):
3272
          self.cfg.Update(instance, feedback_fn)
3273
          changed.append((instance.name, idx, disk.size))
3274
    return changed
3275

    
3276

    
3277
class LUClusterRename(LogicalUnit):
3278
  """Rename the cluster.
3279

3280
  """
3281
  HPATH = "cluster-rename"
3282
  HTYPE = constants.HTYPE_CLUSTER
3283

    
3284
  def BuildHooksEnv(self):
3285
    """Build hooks env.
3286

3287
    """
3288
    return {
3289
      "OP_TARGET": self.cfg.GetClusterName(),
3290
      "NEW_NAME": self.op.name,
3291
      }
3292

    
3293
  def BuildHooksNodes(self):
3294
    """Build hooks nodes.
3295

3296
    """
3297
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3298

    
3299
  def CheckPrereq(self):
3300
    """Verify that the passed name is a valid one.
3301

3302
    """
3303
    hostname = netutils.GetHostname(name=self.op.name,
3304
                                    family=self.cfg.GetPrimaryIPFamily())
3305

    
3306
    new_name = hostname.name
3307
    self.ip = new_ip = hostname.ip
3308
    old_name = self.cfg.GetClusterName()
3309
    old_ip = self.cfg.GetMasterIP()
3310
    if new_name == old_name and new_ip == old_ip:
3311
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3312
                                 " cluster has changed",
3313
                                 errors.ECODE_INVAL)
3314
    if new_ip != old_ip:
3315
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3316
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3317
                                   " reachable on the network" %
3318
                                   new_ip, errors.ECODE_NOTUNIQUE)
3319

    
3320
    self.op.name = new_name
3321

    
3322
  def Exec(self, feedback_fn):
3323
    """Rename the cluster.
3324

3325
    """
3326
    clustername = self.op.name
3327
    new_ip = self.ip
3328

    
3329
    # shutdown the master IP
3330
    master_params = self.cfg.GetMasterNetworkParameters()
3331
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3332
                                                     master_params)
3333
    result.Raise("Could not disable the master role")
3334

    
3335
    try:
3336
      cluster = self.cfg.GetClusterInfo()
3337
      cluster.cluster_name = clustername
3338
      cluster.master_ip = new_ip
3339
      self.cfg.Update(cluster, feedback_fn)
3340

    
3341
      # update the known hosts file
3342
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3343
      node_list = self.cfg.GetOnlineNodeList()
3344
      try:
3345
        node_list.remove(master_params.name)
3346
      except ValueError:
3347
        pass
3348
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3349
    finally:
3350
      master_params.ip = new_ip
3351
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3352
                                                     master_params)
3353
      msg = result.fail_msg
3354
      if msg:
3355
        self.LogWarning("Could not re-enable the master role on"
3356
                        " the master, please restart manually: %s", msg)
3357

    
3358
    return clustername
3359

    
3360

    
3361
def _ValidateNetmask(cfg, netmask):
3362
  """Checks if a netmask is valid.
3363

3364
  @type cfg: L{config.ConfigWriter}
3365
  @param cfg: The cluster configuration
3366
  @type netmask: int
3367
  @param netmask: the netmask to be verified
3368
  @raise errors.OpPrereqError: if the validation fails
3369

3370
  """
3371
  ip_family = cfg.GetPrimaryIPFamily()
3372
  try:
3373
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3374
  except errors.ProgrammerError:
3375
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3376
                               ip_family)
3377
  if not ipcls.ValidateNetmask(netmask):
3378
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3379
                                (netmask))
3380

    
3381

    
3382
class LUClusterSetParams(LogicalUnit):
3383
  """Change the parameters of the cluster.
3384

3385
  """
3386
  HPATH = "cluster-modify"
3387
  HTYPE = constants.HTYPE_CLUSTER
3388
  REQ_BGL = False
3389

    
3390
  def CheckArguments(self):
3391
    """Check parameters
3392

3393
    """
3394
    if self.op.uid_pool:
3395
      uidpool.CheckUidPool(self.op.uid_pool)
3396

    
3397
    if self.op.add_uids:
3398
      uidpool.CheckUidPool(self.op.add_uids)
3399

    
3400
    if self.op.remove_uids:
3401
      uidpool.CheckUidPool(self.op.remove_uids)
3402

    
3403
    if self.op.master_netmask is not None:
3404
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3405

    
3406
  def ExpandNames(self):
3407
    # FIXME: in the future maybe other cluster params won't require checking on
3408
    # all nodes to be modified.
3409
    self.needed_locks = {
3410
      locking.LEVEL_NODE: locking.ALL_SET,
3411
    }
3412
    self.share_locks[locking.LEVEL_NODE] = 1
3413

    
3414
  def BuildHooksEnv(self):
3415
    """Build hooks env.
3416

3417
    """
3418
    return {
3419
      "OP_TARGET": self.cfg.GetClusterName(),
3420
      "NEW_VG_NAME": self.op.vg_name,
3421
      }
3422

    
3423
  def BuildHooksNodes(self):
3424
    """Build hooks nodes.
3425

3426
    """
3427
    mn = self.cfg.GetMasterNode()
3428
    return ([mn], [mn])
3429

    
3430
  def CheckPrereq(self):
3431
    """Check prerequisites.
3432

3433
    This checks whether the given params don't conflict and
3434
    if the given volume group is valid.
3435

3436
    """
3437
    if self.op.vg_name is not None and not self.op.vg_name:
3438
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3439
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3440
                                   " instances exist", errors.ECODE_INVAL)
3441

    
3442
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3443
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3444
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3445
                                   " drbd-based instances exist",
3446
                                   errors.ECODE_INVAL)
3447

    
3448
    node_list = self.owned_locks(locking.LEVEL_NODE)
3449

    
3450
    # if vg_name not None, checks given volume group on all nodes
3451
    if self.op.vg_name:
3452
      vglist = self.rpc.call_vg_list(node_list)
3453
      for node in node_list:
3454
        msg = vglist[node].fail_msg
3455
        if msg:
3456
          # ignoring down node
3457
          self.LogWarning("Error while gathering data on node %s"
3458
                          " (ignoring node): %s", node, msg)
3459
          continue
3460
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3461
                                              self.op.vg_name,
3462
                                              constants.MIN_VG_SIZE)
3463
        if vgstatus:
3464
          raise errors.OpPrereqError("Error on node '%s': %s" %
3465
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3466

    
3467
    if self.op.drbd_helper:
3468
      # checks given drbd helper on all nodes
3469
      helpers = self.rpc.call_drbd_helper(node_list)
3470
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3471
        if ninfo.offline:
3472
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3473
          continue
3474
        msg = helpers[node].fail_msg
3475
        if msg:
3476
          raise errors.OpPrereqError("Error checking drbd helper on node"
3477
                                     " '%s': %s" % (node, msg),
3478
                                     errors.ECODE_ENVIRON)
3479
        node_helper = helpers[node].payload
3480
        if node_helper != self.op.drbd_helper:
3481
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3482
                                     (node, node_helper), errors.ECODE_ENVIRON)
3483

    
3484
    self.cluster = cluster = self.cfg.GetClusterInfo()
3485
    # validate params changes
3486
    if self.op.beparams:
3487
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3488
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3489

    
3490
    if self.op.ndparams:
3491
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3492
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3493

    
3494
      # TODO: we need a more general way to handle resetting
3495
      # cluster-level parameters to default values
3496
      if self.new_ndparams["oob_program"] == "":
3497
        self.new_ndparams["oob_program"] = \
3498
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3499

    
3500
    if self.op.nicparams:
3501
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3502
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3503
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3504
      nic_errors = []
3505

    
3506
      # check all instances for consistency
3507
      for instance in self.cfg.GetAllInstancesInfo().values():
3508
        for nic_idx, nic in enumerate(instance.nics):
3509
          params_copy = copy.deepcopy(nic.nicparams)
3510
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3511

    
3512
          # check parameter syntax
3513
          try:
3514
            objects.NIC.CheckParameterSyntax(params_filled)
3515
          except errors.ConfigurationError, err:
3516
            nic_errors.append("Instance %s, nic/%d: %s" %
3517
                              (instance.name, nic_idx, err))
3518

    
3519
          # if we're moving instances to routed, check that they have an ip
3520
          target_mode = params_filled[constants.NIC_MODE]
3521
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3522
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3523
                              " address" % (instance.name, nic_idx))
3524
      if nic_errors:
3525
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3526
                                   "\n".join(nic_errors))
3527

    
3528
    # hypervisor list/parameters
3529
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3530
    if self.op.hvparams:
3531
      for hv_name, hv_dict in self.op.hvparams.items():
3532
        if hv_name not in self.new_hvparams:
3533
          self.new_hvparams[hv_name] = hv_dict
3534
        else:
3535
          self.new_hvparams[hv_name].update(hv_dict)
3536

    
3537
    # os hypervisor parameters
3538
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3539
    if self.op.os_hvp:
3540
      for os_name, hvs in self.op.os_hvp.items():
3541
        if os_name not in self.new_os_hvp:
3542
          self.new_os_hvp[os_name] = hvs
3543
        else:
3544
          for hv_name, hv_dict in hvs.items():
3545
            if hv_name not in self.new_os_hvp[os_name]:
3546
              self.new_os_hvp[os_name][hv_name] = hv_dict
3547
            else:
3548
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3549

    
3550
    # os parameters
3551
    self.new_osp = objects.FillDict(cluster.osparams, {})
3552
    if self.op.osparams:
3553
      for os_name, osp in self.op.osparams.items():
3554
        if os_name not in self.new_osp:
3555
          self.new_osp[os_name] = {}
3556

    
3557
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3558
                                                  use_none=True)
3559

    
3560
        if not self.new_osp[os_name]:
3561
          # we removed all parameters
3562
          del self.new_osp[os_name]
3563
        else:
3564
          # check the parameter validity (remote check)
3565
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3566
                         os_name, self.new_osp[os_name])
3567

    
3568
    # changes to the hypervisor list
3569
    if self.op.enabled_hypervisors is not None:
3570
      self.hv_list = self.op.enabled_hypervisors
3571
      for hv in self.hv_list:
3572
        # if the hypervisor doesn't already exist in the cluster
3573
        # hvparams, we initialize it to empty, and then (in both
3574
        # cases) we make sure to fill the defaults, as we might not
3575
        # have a complete defaults list if the hypervisor wasn't
3576
        # enabled before
3577
        if hv not in new_hvp:
3578
          new_hvp[hv] = {}
3579
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3580
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3581
    else:
3582
      self.hv_list = cluster.enabled_hypervisors
3583

    
3584
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3585
      # either the enabled list has changed, or the parameters have, validate
3586
      for hv_name, hv_params in self.new_hvparams.items():
3587
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3588
            (self.op.enabled_hypervisors and
3589
             hv_name in self.op.enabled_hypervisors)):
3590
          # either this is a new hypervisor, or its parameters have changed
3591
          hv_class = hypervisor.GetHypervisor(hv_name)
3592
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3593
          hv_class.CheckParameterSyntax(hv_params)
3594
          _CheckHVParams(self, node_list, hv_name, hv_params)
3595

    
3596
    if self.op.os_hvp:
3597
      # no need to check any newly-enabled hypervisors, since the
3598
      # defaults have already been checked in the above code-block
3599
      for os_name, os_hvp in self.new_os_hvp.items():
3600
        for hv_name, hv_params in os_hvp.items():
3601
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3602
          # we need to fill in the new os_hvp on top of the actual hv_p
3603
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3604
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3605
          hv_class = hypervisor.GetHypervisor(hv_name)
3606
          hv_class.CheckParameterSyntax(new_osp)
3607
          _CheckHVParams(self, node_list, hv_name, new_osp)
3608

    
3609
    if self.op.default_iallocator:
3610
      alloc_script = utils.FindFile(self.op.default_iallocator,
3611
                                    constants.IALLOCATOR_SEARCH_PATH,
3612
                                    os.path.isfile)
3613
      if alloc_script is None:
3614
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3615
                                   " specified" % self.op.default_iallocator,
3616
                                   errors.ECODE_INVAL)
3617

    
3618
  def Exec(self, feedback_fn):
3619
    """Change the parameters of the cluster.
3620

3621
    """
3622
    if self.op.vg_name is not None:
3623
      new_volume = self.op.vg_name
3624
      if not new_volume:
3625
        new_volume = None
3626
      if new_volume != self.cfg.GetVGName():
3627
        self.cfg.SetVGName(new_volume)
3628
      else:
3629
        feedback_fn("Cluster LVM configuration already in desired"
3630
                    " state, not changing")
3631
    if self.op.drbd_helper is not None:
3632
      new_helper = self.op.drbd_helper
3633
      if not new_helper:
3634
        new_helper = None
3635
      if new_helper != self.cfg.GetDRBDHelper():
3636
        self.cfg.SetDRBDHelper(new_helper)
3637
      else:
3638
        feedback_fn("Cluster DRBD helper already in desired state,"
3639
                    " not changing")
3640
    if self.op.hvparams:
3641
      self.cluster.hvparams = self.new_hvparams
3642
    if self.op.os_hvp:
3643
      self.cluster.os_hvp = self.new_os_hvp
3644
    if self.op.enabled_hypervisors is not None:
3645
      self.cluster.hvparams = self.new_hvparams
3646
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3647
    if self.op.beparams:
3648
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3649
    if self.op.nicparams:
3650
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3651
    if self.op.osparams:
3652
      self.cluster.osparams = self.new_osp
3653
    if self.op.ndparams:
3654
      self.cluster.ndparams = self.new_ndparams
3655

    
3656
    if self.op.candidate_pool_size is not None:
3657
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3658
      # we need to update the pool size here, otherwise the save will fail
3659
      _AdjustCandidatePool(self, [])
3660

    
3661
    if self.op.maintain_node_health is not None:
3662
      self.cluster.maintain_node_health = self.op.maintain_node_health
3663

    
3664
    if self.op.prealloc_wipe_disks is not None:
3665
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3666

    
3667
    if self.op.add_uids is not None:
3668
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3669

    
3670
    if self.op.remove_uids is not None:
3671
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3672

    
3673
    if self.op.uid_pool is not None:
3674
      self.cluster.uid_pool = self.op.uid_pool
3675

    
3676
    if self.op.default_iallocator is not None:
3677
      self.cluster.default_iallocator = self.op.default_iallocator
3678

    
3679
    if self.op.reserved_lvs is not None:
3680
      self.cluster.reserved_lvs = self.op.reserved_lvs
3681

    
3682
    def helper_os(aname, mods, desc):
3683
      desc += " OS list"
3684
      lst = getattr(self.cluster, aname)
3685
      for key, val in mods:
3686
        if key == constants.DDM_ADD:
3687
          if val in lst:
3688
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3689
          else:
3690
            lst.append(val)
3691
        elif key == constants.DDM_REMOVE:
3692
          if val in lst:
3693
            lst.remove(val)
3694
          else:
3695
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3696
        else:
3697
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3698

    
3699
    if self.op.hidden_os:
3700
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3701

    
3702
    if self.op.blacklisted_os:
3703
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3704

    
3705
    if self.op.master_netdev:
3706
      master_params = self.cfg.GetMasterNetworkParameters()
3707
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3708
                  self.cluster.master_netdev)
3709
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3710
                                                       master_params)
3711
      result.Raise("Could not disable the master ip")
3712
      feedback_fn("Changing master_netdev from %s to %s" %
3713
                  (master_params.netdev, self.op.master_netdev))
3714
      self.cluster.master_netdev = self.op.master_netdev
3715

    
3716
    if self.op.master_netmask:
3717
      master_params = self.cfg.GetMasterNetworkParameters()
3718
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3719
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3720
                                                        master_params.netmask,
3721
                                                        self.op.master_netmask,
3722
                                                        master_params.ip,
3723
                                                        master_params.netdev)
3724
      if result.fail_msg:
3725
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3726
        self.LogWarning(msg)
3727
        feedback_fn(msg)
3728
      else:
3729
        self.cluster.master_netmask = self.op.master_netmask
3730

    
3731
    self.cfg.Update(self.cluster, feedback_fn)
3732

    
3733
    if self.op.master_netdev:
3734
      master_params = self.cfg.GetMasterNetworkParameters()
3735
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3736
                  self.op.master_netdev)
3737
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3738
                                                     master_params)
3739
      if result.fail_msg:
3740
        self.LogWarning("Could not re-enable the master ip on"
3741
                        " the master, please restart manually: %s",
3742
                        result.fail_msg)
3743

    
3744

    
3745
def _UploadHelper(lu, nodes, fname):
3746
  """Helper for uploading a file and showing warnings.
3747

3748
  """
3749
  if os.path.exists(fname):
3750
    result = lu.rpc.call_upload_file(nodes, fname)
3751
    for to_node, to_result in result.items():
3752
      msg = to_result.fail_msg
3753
      if msg:
3754
        msg = ("Copy of file %s to node %s failed: %s" %
3755
               (fname, to_node, msg))
3756
        lu.proc.LogWarning(msg)
3757

    
3758

    
3759
def _ComputeAncillaryFiles(cluster, redist):
3760
  """Compute files external to Ganeti which need to be consistent.
3761

3762
  @type redist: boolean
3763
  @param redist: Whether to include files which need to be redistributed
3764

3765
  """
3766
  # Compute files for all nodes
3767
  files_all = set([
3768
    constants.SSH_KNOWN_HOSTS_FILE,
3769
    constants.CONFD_HMAC_KEY,
3770
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3771
    constants.SPICE_CERT_FILE,
3772
    constants.SPICE_CACERT_FILE,
3773
    constants.RAPI_USERS_FILE,
3774
    ])
3775

    
3776
  if not redist:
3777
    files_all.update(constants.ALL_CERT_FILES)
3778
    files_all.update(ssconf.SimpleStore().GetFileList())
3779
  else:
3780
    # we need to ship at least the RAPI certificate
3781
    files_all.add(constants.RAPI_CERT_FILE)
3782

    
3783
  if cluster.modify_etc_hosts:
3784
    files_all.add(constants.ETC_HOSTS)
3785

    
3786
  # Files which are optional, these must:
3787
  # - be present in one other category as well
3788
  # - either exist or not exist on all nodes of that category (mc, vm all)
3789
  files_opt = set([
3790
    constants.RAPI_USERS_FILE,
3791
    ])
3792

    
3793
  # Files which should only be on master candidates
3794
  files_mc = set()
3795
  if not redist:
3796
    files_mc.add(constants.CLUSTER_CONF_FILE)
3797

    
3798
  # Files which should only be on VM-capable nodes
3799
  files_vm = set(filename
3800
    for hv_name in cluster.enabled_hypervisors
3801
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3802

    
3803
  files_opt |= set(filename
3804
    for hv_name in cluster.enabled_hypervisors
3805
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3806

    
3807
  # Filenames in each category must be unique
3808
  all_files_set = files_all | files_mc | files_vm
3809
  assert (len(all_files_set) ==
3810
          sum(map(len, [files_all, files_mc, files_vm]))), \
3811
         "Found file listed in more than one file list"
3812

    
3813
  # Optional files must be present in one other category
3814
  assert all_files_set.issuperset(files_opt), \
3815
         "Optional file not in a different required list"
3816

    
3817
  return (files_all, files_opt, files_mc, files_vm)
3818

    
3819

    
3820
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3821
  """Distribute additional files which are part of the cluster configuration.
3822

3823
  ConfigWriter takes care of distributing the config and ssconf files, but
3824
  there are more files which should be distributed to all nodes. This function
3825
  makes sure those are copied.
3826

3827
  @param lu: calling logical unit
3828
  @param additional_nodes: list of nodes not in the config to distribute to
3829
  @type additional_vm: boolean
3830
  @param additional_vm: whether the additional nodes are vm-capable or not
3831

3832
  """
3833
  # Gather target nodes
3834
  cluster = lu.cfg.GetClusterInfo()
3835
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3836

    
3837
  online_nodes = lu.cfg.GetOnlineNodeList()
3838
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3839

    
3840
  if additional_nodes is not None:
3841
    online_nodes.extend(additional_nodes)
3842
    if additional_vm:
3843
      vm_nodes.extend(additional_nodes)
3844

    
3845
  # Never distribute to master node
3846
  for nodelist in [online_nodes, vm_nodes]:
3847
    if master_info.name in nodelist:
3848
      nodelist.remove(master_info.name)
3849

    
3850
  # Gather file lists
3851
  (files_all, _, files_mc, files_vm) = \
3852
    _ComputeAncillaryFiles(cluster, True)
3853

    
3854
  # Never re-distribute configuration file from here
3855
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3856
              constants.CLUSTER_CONF_FILE in files_vm)
3857
  assert not files_mc, "Master candidates not handled in this function"
3858

    
3859
  filemap = [
3860
    (online_nodes, files_all),
3861
    (vm_nodes, files_vm),
3862
    ]
3863

    
3864
  # Upload the files
3865
  for (node_list, files) in filemap:
3866
    for fname in files:
3867
      _UploadHelper(lu, node_list, fname)
3868

    
3869

    
3870
class LUClusterRedistConf(NoHooksLU):
3871
  """Force the redistribution of cluster configuration.
3872

3873
  This is a very simple LU.
3874

3875
  """
3876
  REQ_BGL = False
3877

    
3878
  def ExpandNames(self):
3879
    self.needed_locks = {
3880
      locking.LEVEL_NODE: locking.ALL_SET,
3881
    }
3882
    self.share_locks[locking.LEVEL_NODE] = 1
3883

    
3884
  def Exec(self, feedback_fn):
3885
    """Redistribute the configuration.
3886

3887
    """
3888
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3889
    _RedistributeAncillaryFiles(self)
3890

    
3891

    
3892
class LUClusterActivateMasterIp(NoHooksLU):
3893
  """Activate the master IP on the master node.
3894

3895
  """
3896
  def Exec(self, feedback_fn):
3897
    """Activate the master IP.
3898

3899
    """
3900
    master_params = self.cfg.GetMasterNetworkParameters()
3901
    self.rpc.call_node_activate_master_ip(master_params.name,
3902
                                          master_params)
3903

    
3904

    
3905
class LUClusterDeactivateMasterIp(NoHooksLU):
3906
  """Deactivate the master IP on the master node.
3907

3908
  """
3909
  def Exec(self, feedback_fn):
3910
    """Deactivate the master IP.
3911

3912
    """
3913
    master_params = self.cfg.GetMasterNetworkParameters()
3914
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params)
3915

    
3916

    
3917
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3918
  """Sleep and poll for an instance's disk to sync.
3919

3920
  """
3921
  if not instance.disks or disks is not None and not disks:
3922
    return True
3923

    
3924
  disks = _ExpandCheckDisks(instance, disks)
3925

    
3926
  if not oneshot:
3927
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3928

    
3929
  node = instance.primary_node
3930

    
3931
  for dev in disks:
3932
    lu.cfg.SetDiskID(dev, node)
3933

    
3934
  # TODO: Convert to utils.Retry
3935

    
3936
  retries = 0
3937
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3938
  while True:
3939
    max_time = 0
3940
    done = True
3941
    cumul_degraded = False
3942
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3943
    msg = rstats.fail_msg
3944
    if msg:
3945
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3946
      retries += 1
3947
      if retries >= 10:
3948
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3949
                                 " aborting." % node)
3950
      time.sleep(6)
3951
      continue
3952
    rstats = rstats.payload
3953
    retries = 0
3954
    for i, mstat in enumerate(rstats):
3955
      if mstat is None:
3956
        lu.LogWarning("Can't compute data for node %s/%s",
3957
                           node, disks[i].iv_name)
3958
        continue
3959

    
3960
      cumul_degraded = (cumul_degraded or
3961
                        (mstat.is_degraded and mstat.sync_percent is None))
3962
      if mstat.sync_percent is not None:
3963
        done = False
3964
        if mstat.estimated_time is not None:
3965
          rem_time = ("%s remaining (estimated)" %
3966
                      utils.FormatSeconds(mstat.estimated_time))
3967
          max_time = mstat.estimated_time
3968
        else:
3969
          rem_time = "no time estimate"
3970
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3971
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3972

    
3973
    # if we're done but degraded, let's do a few small retries, to
3974
    # make sure we see a stable and not transient situation; therefore
3975
    # we force restart of the loop
3976
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3977
      logging.info("Degraded disks found, %d retries left", degr_retries)
3978
      degr_retries -= 1
3979
      time.sleep(1)
3980
      continue
3981

    
3982
    if done or oneshot:
3983
      break
3984

    
3985
    time.sleep(min(60, max_time))
3986

    
3987
  if done:
3988
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3989
  return not cumul_degraded
3990

    
3991

    
3992
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3993
  """Check that mirrors are not degraded.
3994

3995
  The ldisk parameter, if True, will change the test from the
3996
  is_degraded attribute (which represents overall non-ok status for
3997
  the device(s)) to the ldisk (representing the local storage status).
3998

3999
  """
4000
  lu.cfg.SetDiskID(dev, node)
4001

    
4002
  result = True
4003

    
4004
  if on_primary or dev.AssembleOnSecondary():
4005
    rstats = lu.rpc.call_blockdev_find(node, dev)
4006
    msg = rstats.fail_msg
4007
    if msg:
4008
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4009
      result = False
4010
    elif not rstats.payload:
4011
      lu.LogWarning("Can't find disk on node %s", node)
4012
      result = False
4013
    else:
4014
      if ldisk:
4015
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4016
      else:
4017
        result = result and not rstats.payload.is_degraded
4018

    
4019
  if dev.children:
4020
    for child in dev.children:
4021
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4022

    
4023
  return result
4024

    
4025

    
4026
class LUOobCommand(NoHooksLU):
4027
  """Logical unit for OOB handling.
4028

4029
  """
4030
  REG_BGL = False
4031
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4032

    
4033
  def ExpandNames(self):
4034
    """Gather locks we need.
4035

4036
    """
4037
    if self.op.node_names:
4038
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4039
      lock_names = self.op.node_names
4040
    else:
4041
      lock_names = locking.ALL_SET
4042

    
4043
    self.needed_locks = {
4044
      locking.LEVEL_NODE: lock_names,
4045
      }
4046

    
4047
  def CheckPrereq(self):
4048
    """Check prerequisites.
4049

4050
    This checks:
4051
     - the node exists in the configuration
4052
     - OOB is supported
4053

4054
    Any errors are signaled by raising errors.OpPrereqError.
4055

4056
    """
4057
    self.nodes = []
4058
    self.master_node = self.cfg.GetMasterNode()
4059

    
4060
    assert self.op.power_delay >= 0.0
4061

    
4062
    if self.op.node_names:
4063
      if (self.op.command in self._SKIP_MASTER and
4064
          self.master_node in self.op.node_names):
4065
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4066
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4067

    
4068
        if master_oob_handler:
4069
          additional_text = ("run '%s %s %s' if you want to operate on the"
4070
                             " master regardless") % (master_oob_handler,
4071
                                                      self.op.command,
4072
                                                      self.master_node)
4073
        else:
4074
          additional_text = "it does not support out-of-band operations"
4075

    
4076
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4077
                                    " allowed for %s; %s") %
4078
                                   (self.master_node, self.op.command,
4079
                                    additional_text), errors.ECODE_INVAL)
4080
    else:
4081
      self.op.node_names = self.cfg.GetNodeList()
4082
      if self.op.command in self._SKIP_MASTER:
4083
        self.op.node_names.remove(self.master_node)
4084

    
4085
    if self.op.command in self._SKIP_MASTER:
4086
      assert self.master_node not in self.op.node_names
4087

    
4088
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4089
      if node is None:
4090
        raise errors.OpPrereqError("Node %s not found" % node_name,
4091
                                   errors.ECODE_NOENT)
4092
      else:
4093
        self.nodes.append(node)
4094

    
4095
      if (not self.op.ignore_status and
4096
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4097
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4098
                                    " not marked offline") % node_name,
4099
                                   errors.ECODE_STATE)
4100

    
4101
  def Exec(self, feedback_fn):
4102
    """Execute OOB and return result if we expect any.
4103

4104
    """
4105
    master_node = self.master_node
4106
    ret = []
4107

    
4108
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4109
                                              key=lambda node: node.name)):
4110
      node_entry = [(constants.RS_NORMAL, node.name)]
4111
      ret.append(node_entry)
4112

    
4113
      oob_program = _SupportsOob(self.cfg, node)
4114

    
4115
      if not oob_program:
4116
        node_entry.append((constants.RS_UNAVAIL, None))
4117
        continue
4118

    
4119
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4120
                   self.op.command, oob_program, node.name)
4121
      result = self.rpc.call_run_oob(master_node, oob_program,
4122
                                     self.op.command, node.name,
4123
                                     self.op.timeout)
4124

    
4125
      if result.fail_msg:
4126
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4127
                        node.name, result.fail_msg)
4128
        node_entry.append((constants.RS_NODATA, None))
4129
      else:
4130
        try:
4131
          self._CheckPayload(result)
4132
        except errors.OpExecError, err:
4133
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4134
                          node.name, err)
4135
          node_entry.append((constants.RS_NODATA, None))
4136
        else:
4137
          if self.op.command == constants.OOB_HEALTH:
4138
            # For health we should log important events
4139
            for item, status in result.payload:
4140
              if status in [constants.OOB_STATUS_WARNING,
4141
                            constants.OOB_STATUS_CRITICAL]:
4142
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4143
                                item, node.name, status)
4144

    
4145
          if self.op.command == constants.OOB_POWER_ON:
4146
            node.powered = True
4147
          elif self.op.command == constants.OOB_POWER_OFF:
4148
            node.powered = False
4149
          elif self.op.command == constants.OOB_POWER_STATUS:
4150
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4151
            if powered != node.powered:
4152
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4153
                               " match actual power state (%s)"), node.powered,
4154
                              node.name, powered)
4155

    
4156
          # For configuration changing commands we should update the node
4157
          if self.op.command in (constants.OOB_POWER_ON,
4158
                                 constants.OOB_POWER_OFF):
4159
            self.cfg.Update(node, feedback_fn)
4160

    
4161
          node_entry.append((constants.RS_NORMAL, result.payload))
4162

    
4163
          if (self.op.command == constants.OOB_POWER_ON and
4164
              idx < len(self.nodes) - 1):
4165
            time.sleep(self.op.power_delay)
4166

    
4167
    return ret
4168

    
4169
  def _CheckPayload(self, result):
4170
    """Checks if the payload is valid.
4171

4172
    @param result: RPC result
4173
    @raises errors.OpExecError: If payload is not valid
4174

4175
    """
4176
    errs = []
4177
    if self.op.command == constants.OOB_HEALTH:
4178
      if not isinstance(result.payload, list):
4179
        errs.append("command 'health' is expected to return a list but got %s" %
4180
                    type(result.payload))
4181
      else:
4182
        for item, status in result.payload:
4183
          if status not in constants.OOB_STATUSES:
4184
            errs.append("health item '%s' has invalid status '%s'" %
4185
                        (item, status))
4186

    
4187
    if self.op.command == constants.OOB_POWER_STATUS:
4188
      if not isinstance(result.payload, dict):
4189
        errs.append("power-status is expected to return a dict but got %s" %
4190
                    type(result.payload))
4191

    
4192
    if self.op.command in [
4193
        constants.OOB_POWER_ON,
4194
        constants.OOB_POWER_OFF,
4195
        constants.OOB_POWER_CYCLE,
4196
        ]:
4197
      if result.payload is not None:
4198
        errs.append("%s is expected to not return payload but got '%s'" %
4199
                    (self.op.command, result.payload))
4200

    
4201
    if errs:
4202
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4203
                               utils.CommaJoin(errs))
4204

    
4205

    
4206
class _OsQuery(_QueryBase):
4207
  FIELDS = query.OS_FIELDS
4208

    
4209
  def ExpandNames(self, lu):
4210
    # Lock all nodes in shared mode
4211
    # Temporary removal of locks, should be reverted later
4212
    # TODO: reintroduce locks when they are lighter-weight
4213
    lu.needed_locks = {}
4214
    #self.share_locks[locking.LEVEL_NODE] = 1
4215
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4216

    
4217
    # The following variables interact with _QueryBase._GetNames
4218
    if self.names:
4219
      self.wanted = self.names
4220
    else:
4221
      self.wanted = locking.ALL_SET
4222

    
4223
    self.do_locking = self.use_locking
4224

    
4225
  def DeclareLocks(self, lu, level):
4226
    pass
4227

    
4228
  @staticmethod
4229
  def _DiagnoseByOS(rlist):
4230
    """Remaps a per-node return list into an a per-os per-node dictionary
4231

4232
    @param rlist: a map with node names as keys and OS objects as values
4233

4234
    @rtype: dict
4235
    @return: a dictionary with osnames as keys and as value another
4236
        map, with nodes as keys and tuples of (path, status, diagnose,
4237
        variants, parameters, api_versions) as values, eg::
4238

4239
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4240
                                     (/srv/..., False, "invalid api")],
4241
                           "node2": [(/srv/..., True, "", [], [])]}
4242
          }
4243

4244
    """
4245
    all_os = {}
4246
    # we build here the list of nodes that didn't fail the RPC (at RPC
4247
    # level), so that nodes with a non-responding node daemon don't
4248
    # make all OSes invalid
4249
    good_nodes = [node_name for node_name in rlist
4250
                  if not rlist[node_name].fail_msg]
4251
    for node_name, nr in rlist.items():
4252
      if nr.fail_msg or not nr.payload:
4253
        continue
4254
      for (name, path, status, diagnose, variants,
4255
           params, api_versions) in nr.payload:
4256
        if name not in all_os:
4257
          # build a list of nodes for this os containing empty lists
4258
          # for each node in node_list
4259
          all_os[name] = {}
4260
          for nname in good_nodes:
4261
            all_os[name][nname] = []
4262
        # convert params from [name, help] to (name, help)
4263
        params = [tuple(v) for v in params]
4264
        all_os[name][node_name].append((path, status, diagnose,
4265
                                        variants, params, api_versions))
4266
    return all_os
4267

    
4268
  def _GetQueryData(self, lu):
4269
    """Computes the list of nodes and their attributes.
4270

4271
    """
4272
    # Locking is not used
4273
    assert not (compat.any(lu.glm.is_owned(level)
4274
                           for level in locking.LEVELS
4275
                           if level != locking.LEVEL_CLUSTER) or
4276
                self.do_locking or self.use_locking)
4277

    
4278
    valid_nodes = [node.name
4279
                   for node in lu.cfg.GetAllNodesInfo().values()
4280
                   if not node.offline and node.vm_capable]
4281
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4282
    cluster = lu.cfg.GetClusterInfo()
4283

    
4284
    data = {}
4285

    
4286
    for (os_name, os_data) in pol.items():
4287
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4288
                          hidden=(os_name in cluster.hidden_os),
4289
                          blacklisted=(os_name in cluster.blacklisted_os))
4290

    
4291
      variants = set()
4292
      parameters = set()
4293
      api_versions = set()
4294

    
4295
      for idx, osl in enumerate(os_data.values()):
4296
        info.valid = bool(info.valid and osl and osl[0][1])
4297
        if not info.valid:
4298
          break
4299

    
4300
        (node_variants, node_params, node_api) = osl[0][3:6]
4301
        if idx == 0:
4302
          # First entry
4303
          variants.update(node_variants)
4304
          parameters.update(node_params)
4305
          api_versions.update(node_api)
4306
        else:
4307
          # Filter out inconsistent values
4308
          variants.intersection_update(node_variants)
4309
          parameters.intersection_update(node_params)
4310
          api_versions.intersection_update(node_api)
4311

    
4312
      info.variants = list(variants)
4313
      info.parameters = list(parameters)
4314
      info.api_versions = list(api_versions)
4315

    
4316
      data[os_name] = info
4317

    
4318
    # Prepare data in requested order
4319
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4320
            if name in data]
4321

    
4322

    
4323
class LUOsDiagnose(NoHooksLU):
4324
  """Logical unit for OS diagnose/query.
4325

4326
  """
4327
  REQ_BGL = False
4328

    
4329
  @staticmethod
4330
  def _BuildFilter(fields, names):
4331
    """Builds a filter for querying OSes.
4332

4333
    """
4334
    name_filter = qlang.MakeSimpleFilter("name", names)
4335

    
4336
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4337
    # respective field is not requested
4338
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4339
                     for fname in ["hidden", "blacklisted"]
4340
                     if fname not in fields]
4341
    if "valid" not in fields:
4342
      status_filter.append([qlang.OP_TRUE, "valid"])
4343

    
4344
    if status_filter:
4345
      status_filter.insert(0, qlang.OP_AND)
4346
    else:
4347
      status_filter = None
4348

    
4349
    if name_filter and status_filter:
4350
      return [qlang.OP_AND, name_filter, status_filter]
4351
    elif name_filter:
4352
      return name_filter
4353
    else:
4354
      return status_filter
4355

    
4356
  def CheckArguments(self):
4357
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4358
                       self.op.output_fields, False)
4359

    
4360
  def ExpandNames(self):
4361
    self.oq.ExpandNames(self)
4362

    
4363
  def Exec(self, feedback_fn):
4364
    return self.oq.OldStyleQuery(self)
4365

    
4366

    
4367
class LUNodeRemove(LogicalUnit):
4368
  """Logical unit for removing a node.
4369

4370
  """
4371
  HPATH = "node-remove"
4372
  HTYPE = constants.HTYPE_NODE
4373

    
4374
  def BuildHooksEnv(self):
4375
    """Build hooks env.
4376

4377
    This doesn't run on the target node in the pre phase as a failed
4378
    node would then be impossible to remove.
4379

4380
    """
4381
    return {
4382
      "OP_TARGET": self.op.node_name,
4383
      "NODE_NAME": self.op.node_name,
4384
      }
4385

    
4386
  def BuildHooksNodes(self):
4387
    """Build hooks nodes.
4388

4389
    """
4390
    all_nodes = self.cfg.GetNodeList()
4391
    try:
4392
      all_nodes.remove(self.op.node_name)
4393
    except ValueError:
4394
      logging.warning("Node '%s', which is about to be removed, was not found"
4395
                      " in the list of all nodes", self.op.node_name)
4396
    return (all_nodes, all_nodes)
4397

    
4398
  def CheckPrereq(self):
4399
    """Check prerequisites.
4400

4401
    This checks:
4402
     - the node exists in the configuration
4403
     - it does not have primary or secondary instances
4404
     - it's not the master
4405

4406
    Any errors are signaled by raising errors.OpPrereqError.
4407

4408
    """
4409
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4410
    node = self.cfg.GetNodeInfo(self.op.node_name)
4411
    assert node is not None
4412

    
4413
    masternode = self.cfg.GetMasterNode()
4414
    if node.name == masternode:
4415
      raise errors.OpPrereqError("Node is the master node, failover to another"
4416
                                 " node is required", errors.ECODE_INVAL)
4417

    
4418
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4419
      if node.name in instance.all_nodes:
4420
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4421
                                   " please remove first" % instance_name,
4422
                                   errors.ECODE_INVAL)
4423
    self.op.node_name = node.name
4424
    self.node = node
4425

    
4426
  def Exec(self, feedback_fn):
4427
    """Removes the node from the cluster.
4428

4429
    """
4430
    node = self.node
4431
    logging.info("Stopping the node daemon and removing configs from node %s",
4432
                 node.name)
4433

    
4434
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4435

    
4436
    # Promote nodes to master candidate as needed
4437
    _AdjustCandidatePool(self, exceptions=[node.name])
4438
    self.context.RemoveNode(node.name)
4439

    
4440
    # Run post hooks on the node before it's removed
4441
    _RunPostHook(self, node.name)
4442

    
4443
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4444
    msg = result.fail_msg
4445
    if msg:
4446
      self.LogWarning("Errors encountered on the remote node while leaving"
4447
                      " the cluster: %s", msg)
4448

    
4449
    # Remove node from our /etc/hosts
4450
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4451
      master_node = self.cfg.GetMasterNode()
4452
      result = self.rpc.call_etc_hosts_modify(master_node,
4453
                                              constants.ETC_HOSTS_REMOVE,
4454
                                              node.name, None)
4455
      result.Raise("Can't update hosts file with new host data")
4456
      _RedistributeAncillaryFiles(self)
4457

    
4458

    
4459
class _NodeQuery(_QueryBase):
4460
  FIELDS = query.NODE_FIELDS
4461

    
4462
  def ExpandNames(self, lu):
4463
    lu.needed_locks = {}
4464
    lu.share_locks = _ShareAll()
4465

    
4466
    if self.names:
4467
      self.wanted = _GetWantedNodes(lu, self.names)
4468
    else:
4469
      self.wanted = locking.ALL_SET
4470

    
4471
    self.do_locking = (self.use_locking and
4472
                       query.NQ_LIVE in self.requested_data)
4473

    
4474
    if self.do_locking:
4475
      # If any non-static field is requested we need to lock the nodes
4476
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4477

    
4478
  def DeclareLocks(self, lu, level):
4479
    pass
4480

    
4481
  def _GetQueryData(self, lu):
4482
    """Computes the list of nodes and their attributes.
4483

4484
    """
4485
    all_info = lu.cfg.GetAllNodesInfo()
4486

    
4487
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4488

    
4489
    # Gather data as requested
4490
    if query.NQ_LIVE in self.requested_data:
4491
      # filter out non-vm_capable nodes
4492
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4493

    
4494
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4495
                                        lu.cfg.GetHypervisorType())
4496
      live_data = dict((name, nresult.payload)
4497
                       for (name, nresult) in node_data.items()
4498
                       if not nresult.fail_msg and nresult.payload)
4499
    else:
4500
      live_data = None
4501

    
4502
    if query.NQ_INST in self.requested_data:
4503
      node_to_primary = dict([(name, set()) for name in nodenames])
4504
      node_to_secondary = dict([(name, set()) for name in nodenames])
4505

    
4506
      inst_data = lu.cfg.GetAllInstancesInfo()
4507

    
4508
      for inst in inst_data.values():
4509
        if inst.primary_node in node_to_primary:
4510
          node_to_primary[inst.primary_node].add(inst.name)
4511
        for secnode in inst.secondary_nodes:
4512
          if secnode in node_to_secondary:
4513
            node_to_secondary[secnode].add(inst.name)
4514
    else:
4515
      node_to_primary = None
4516
      node_to_secondary = None
4517

    
4518
    if query.NQ_OOB in self.requested_data:
4519
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4520
                         for name, node in all_info.iteritems())
4521
    else:
4522
      oob_support = None
4523

    
4524
    if query.NQ_GROUP in self.requested_data:
4525
      groups = lu.cfg.GetAllNodeGroupsInfo()
4526
    else:
4527
      groups = {}
4528

    
4529
    return query.NodeQueryData([all_info[name] for name in nodenames],
4530
                               live_data, lu.cfg.GetMasterNode(),
4531
                               node_to_primary, node_to_secondary, groups,
4532
                               oob_support, lu.cfg.GetClusterInfo())
4533

    
4534

    
4535
class LUNodeQuery(NoHooksLU):
4536
  """Logical unit for querying nodes.
4537

4538
  """
4539
  # pylint: disable=W0142
4540
  REQ_BGL = False
4541

    
4542
  def CheckArguments(self):
4543
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4544
                         self.op.output_fields, self.op.use_locking)
4545

    
4546
  def ExpandNames(self):
4547
    self.nq.ExpandNames(self)
4548

    
4549
  def Exec(self, feedback_fn):
4550
    return self.nq.OldStyleQuery(self)
4551

    
4552

    
4553
class LUNodeQueryvols(NoHooksLU):
4554
  """Logical unit for getting volumes on node(s).
4555

4556
  """
4557
  REQ_BGL = False
4558
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4559
  _FIELDS_STATIC = utils.FieldSet("node")
4560

    
4561
  def CheckArguments(self):
4562
    _CheckOutputFields(static=self._FIELDS_STATIC,
4563
                       dynamic=self._FIELDS_DYNAMIC,
4564
                       selected=self.op.output_fields)
4565

    
4566
  def ExpandNames(self):
4567
    self.needed_locks = {}
4568
    self.share_locks[locking.LEVEL_NODE] = 1
4569
    if not self.op.nodes:
4570
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4571
    else:
4572
      self.needed_locks[locking.LEVEL_NODE] = \
4573
        _GetWantedNodes(self, self.op.nodes)
4574

    
4575
  def Exec(self, feedback_fn):
4576
    """Computes the list of nodes and their attributes.
4577

4578
    """
4579
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4580
    volumes = self.rpc.call_node_volumes(nodenames)
4581

    
4582
    ilist = self.cfg.GetAllInstancesInfo()
4583
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4584

    
4585
    output = []
4586
    for node in nodenames:
4587
      nresult = volumes[node]
4588
      if nresult.offline:
4589
        continue
4590
      msg = nresult.fail_msg
4591
      if msg:
4592
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4593
        continue
4594

    
4595
      node_vols = sorted(nresult.payload,
4596
                         key=operator.itemgetter("dev"))
4597

    
4598
      for vol in node_vols:
4599
        node_output = []
4600
        for field in self.op.output_fields:
4601
          if field == "node":
4602
            val = node
4603
          elif field == "phys":
4604
            val = vol["dev"]
4605
          elif field == "vg":
4606
            val = vol["vg"]
4607
          elif field == "name":
4608
            val = vol["name"]
4609
          elif field == "size":
4610
            val = int(float(vol["size"]))
4611
          elif field == "instance":
4612
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4613
          else:
4614
            raise errors.ParameterError(field)
4615
          node_output.append(str(val))
4616

    
4617
        output.append(node_output)
4618

    
4619
    return output
4620

    
4621

    
4622
class LUNodeQueryStorage(NoHooksLU):
4623
  """Logical unit for getting information on storage units on node(s).
4624

4625
  """
4626
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4627
  REQ_BGL = False
4628

    
4629
  def CheckArguments(self):
4630
    _CheckOutputFields(static=self._FIELDS_STATIC,
4631
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4632
                       selected=self.op.output_fields)
4633

    
4634
  def ExpandNames(self):
4635
    self.needed_locks = {}
4636
    self.share_locks[locking.LEVEL_NODE] = 1
4637

    
4638
    if self.op.nodes:
4639
      self.needed_locks[locking.LEVEL_NODE] = \
4640
        _GetWantedNodes(self, self.op.nodes)
4641
    else:
4642
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4643

    
4644
  def Exec(self, feedback_fn):
4645
    """Computes the list of nodes and their attributes.
4646

4647
    """
4648
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4649

    
4650
    # Always get name to sort by
4651
    if constants.SF_NAME in self.op.output_fields:
4652
      fields = self.op.output_fields[:]
4653
    else:
4654
      fields = [constants.SF_NAME] + self.op.output_fields
4655

    
4656
    # Never ask for node or type as it's only known to the LU
4657
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4658
      while extra in fields:
4659
        fields.remove(extra)
4660

    
4661
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4662
    name_idx = field_idx[constants.SF_NAME]
4663

    
4664
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4665
    data = self.rpc.call_storage_list(self.nodes,
4666
                                      self.op.storage_type, st_args,
4667
                                      self.op.name, fields)
4668

    
4669
    result = []
4670

    
4671
    for node in utils.NiceSort(self.nodes):
4672
      nresult = data[node]
4673
      if nresult.offline:
4674
        continue
4675

    
4676
      msg = nresult.fail_msg
4677
      if msg:
4678
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4679
        continue
4680

    
4681
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4682

    
4683
      for name in utils.NiceSort(rows.keys()):
4684
        row = rows[name]
4685

    
4686
        out = []
4687

    
4688
        for field in self.op.output_fields:
4689
          if field == constants.SF_NODE:
4690
            val = node
4691
          elif field == constants.SF_TYPE:
4692
            val = self.op.storage_type
4693
          elif field in field_idx:
4694
            val = row[field_idx[field]]
4695
          else:
4696
            raise errors.ParameterError(field)
4697

    
4698
          out.append(val)
4699

    
4700
        result.append(out)
4701

    
4702
    return result
4703

    
4704

    
4705
class _InstanceQuery(_QueryBase):
4706
  FIELDS = query.INSTANCE_FIELDS
4707

    
4708
  def ExpandNames(self, lu):
4709
    lu.needed_locks = {}
4710
    lu.share_locks = _ShareAll()
4711

    
4712
    if self.names:
4713
      self.wanted = _GetWantedInstances(lu, self.names)
4714
    else:
4715
      self.wanted = locking.ALL_SET
4716

    
4717
    self.do_locking = (self.use_locking and
4718
                       query.IQ_LIVE in self.requested_data)
4719
    if self.do_locking:
4720
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4721
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4722
      lu.needed_locks[locking.LEVEL_NODE] = []
4723
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4724

    
4725
    self.do_grouplocks = (self.do_locking and
4726
                          query.IQ_NODES in self.requested_data)
4727

    
4728
  def DeclareLocks(self, lu, level):
4729
    if self.do_locking:
4730
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4731
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4732

    
4733
        # Lock all groups used by instances optimistically; this requires going
4734
        # via the node before it's locked, requiring verification later on
4735
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4736
          set(group_uuid
4737
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4738
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4739
      elif level == locking.LEVEL_NODE:
4740
        lu._LockInstancesNodes() # pylint: disable=W0212
4741

    
4742
  @staticmethod
4743
  def _CheckGroupLocks(lu):
4744
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4745
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4746

    
4747
    # Check if node groups for locked instances are still correct
4748
    for instance_name in owned_instances:
4749
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4750

    
4751
  def _GetQueryData(self, lu):
4752
    """Computes the list of instances and their attributes.
4753

4754
    """
4755
    if self.do_grouplocks:
4756
      self._CheckGroupLocks(lu)
4757

    
4758
    cluster = lu.cfg.GetClusterInfo()
4759
    all_info = lu.cfg.GetAllInstancesInfo()
4760

    
4761
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4762

    
4763
    instance_list = [all_info[name] for name in instance_names]
4764
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4765
                                        for inst in instance_list)))
4766
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4767
    bad_nodes = []
4768
    offline_nodes = []
4769
    wrongnode_inst = set()
4770

    
4771
    # Gather data as requested
4772
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4773
      live_data = {}
4774
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4775
      for name in nodes:
4776
        result = node_data[name]
4777
        if result.offline:
4778
          # offline nodes will be in both lists
4779
          assert result.fail_msg
4780
          offline_nodes.append(name)
4781
        if result.fail_msg:
4782
          bad_nodes.append(name)
4783
        elif result.payload:
4784
          for inst in result.payload:
4785
            if inst in all_info:
4786
              if all_info[inst].primary_node == name:
4787
                live_data.update(result.payload)
4788
              else:
4789
                wrongnode_inst.add(inst)
4790
            else:
4791
              # orphan instance; we don't list it here as we don't
4792
              # handle this case yet in the output of instance listing
4793
              logging.warning("Orphan instance '%s' found on node %s",
4794
                              inst, name)
4795
        # else no instance is alive
4796
    else:
4797
      live_data = {}
4798

    
4799
    if query.IQ_DISKUSAGE in self.requested_data:
4800
      disk_usage = dict((inst.name,
4801
                         _ComputeDiskSize(inst.disk_template,
4802
                                          [{constants.IDISK_SIZE: disk.size}
4803
                                           for disk in inst.disks]))
4804
                        for inst in instance_list)
4805
    else:
4806
      disk_usage = None
4807

    
4808
    if query.IQ_CONSOLE in self.requested_data:
4809
      consinfo = {}
4810
      for inst in instance_list:
4811
        if inst.name in live_data:
4812
          # Instance is running
4813
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4814
        else:
4815
          consinfo[inst.name] = None
4816
      assert set(consinfo.keys()) == set(instance_names)
4817
    else:
4818
      consinfo = None
4819

    
4820
    if query.IQ_NODES in self.requested_data:
4821
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4822
                                            instance_list)))
4823
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4824
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4825
                    for uuid in set(map(operator.attrgetter("group"),
4826
                                        nodes.values())))
4827
    else:
4828
      nodes = None
4829
      groups = None
4830

    
4831
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4832
                                   disk_usage, offline_nodes, bad_nodes,
4833
                                   live_data, wrongnode_inst, consinfo,
4834
                                   nodes, groups)
4835

    
4836

    
4837
class LUQuery(NoHooksLU):
4838
  """Query for resources/items of a certain kind.
4839

4840
  """
4841
  # pylint: disable=W0142
4842
  REQ_BGL = False
4843

    
4844
  def CheckArguments(self):
4845
    qcls = _GetQueryImplementation(self.op.what)
4846

    
4847
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4848

    
4849
  def ExpandNames(self):
4850
    self.impl.ExpandNames(self)
4851

    
4852
  def DeclareLocks(self, level):
4853
    self.impl.DeclareLocks(self, level)
4854

    
4855
  def Exec(self, feedback_fn):
4856
    return self.impl.NewStyleQuery(self)
4857

    
4858

    
4859
class LUQueryFields(NoHooksLU):
4860
  """Query for resources/items of a certain kind.
4861

4862
  """
4863
  # pylint: disable=W0142
4864
  REQ_BGL = False
4865

    
4866
  def CheckArguments(self):
4867
    self.qcls = _GetQueryImplementation(self.op.what)
4868

    
4869
  def ExpandNames(self):
4870
    self.needed_locks = {}
4871

    
4872
  def Exec(self, feedback_fn):
4873
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4874

    
4875

    
4876
class LUNodeModifyStorage(NoHooksLU):
4877
  """Logical unit for modifying a storage volume on a node.
4878

4879
  """
4880
  REQ_BGL = False
4881

    
4882
  def CheckArguments(self):
4883
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4884

    
4885
    storage_type = self.op.storage_type
4886

    
4887
    try:
4888
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4889
    except KeyError:
4890
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4891
                                 " modified" % storage_type,
4892
                                 errors.ECODE_INVAL)
4893

    
4894
    diff = set(self.op.changes.keys()) - modifiable
4895
    if diff:
4896
      raise errors.OpPrereqError("The following fields can not be modified for"
4897
                                 " storage units of type '%s': %r" %
4898
                                 (storage_type, list(diff)),
4899
                                 errors.ECODE_INVAL)
4900

    
4901
  def ExpandNames(self):
4902
    self.needed_locks = {
4903
      locking.LEVEL_NODE: self.op.node_name,
4904
      }
4905

    
4906
  def Exec(self, feedback_fn):
4907
    """Computes the list of nodes and their attributes.
4908

4909
    """
4910
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4911
    result = self.rpc.call_storage_modify(self.op.node_name,
4912
                                          self.op.storage_type, st_args,
4913
                                          self.op.name, self.op.changes)
4914
    result.Raise("Failed to modify storage unit '%s' on %s" %
4915
                 (self.op.name, self.op.node_name))
4916

    
4917

    
4918
class LUNodeAdd(LogicalUnit):
4919
  """Logical unit for adding node to the cluster.
4920

4921
  """
4922
  HPATH = "node-add"
4923
  HTYPE = constants.HTYPE_NODE
4924
  _NFLAGS = ["master_capable", "vm_capable"]
4925

    
4926
  def CheckArguments(self):
4927
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4928
    # validate/normalize the node name
4929
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4930
                                         family=self.primary_ip_family)
4931
    self.op.node_name = self.hostname.name
4932

    
4933
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4934
      raise errors.OpPrereqError("Cannot readd the master node",
4935
                                 errors.ECODE_STATE)
4936

    
4937
    if self.op.readd and self.op.group:
4938
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4939
                                 " being readded", errors.ECODE_INVAL)
4940

    
4941
  def BuildHooksEnv(self):
4942
    """Build hooks env.
4943

4944
    This will run on all nodes before, and on all nodes + the new node after.
4945

4946
    """
4947
    return {
4948
      "OP_TARGET": self.op.node_name,
4949
      "NODE_NAME": self.op.node_name,
4950
      "NODE_PIP": self.op.primary_ip,
4951
      "NODE_SIP": self.op.secondary_ip,
4952
      "MASTER_CAPABLE": str(self.op.master_capable),
4953
      "VM_CAPABLE": str(self.op.vm_capable),
4954
      }
4955

    
4956
  def BuildHooksNodes(self):
4957
    """Build hooks nodes.
4958

4959
    """
4960
    # Exclude added node
4961
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4962
    post_nodes = pre_nodes + [self.op.node_name, ]
4963

    
4964
    return (pre_nodes, post_nodes)
4965

    
4966
  def CheckPrereq(self):
4967
    """Check prerequisites.
4968

4969
    This checks:
4970
     - the new node is not already in the config
4971
     - it is resolvable
4972
     - its parameters (single/dual homed) matches the cluster
4973

4974
    Any errors are signaled by raising errors.OpPrereqError.
4975

4976
    """
4977
    cfg = self.cfg
4978
    hostname = self.hostname
4979
    node = hostname.name
4980
    primary_ip = self.op.primary_ip = hostname.ip
4981
    if self.op.secondary_ip is None:
4982
      if self.primary_ip_family == netutils.IP6Address.family:
4983
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4984
                                   " IPv4 address must be given as secondary",
4985
                                   errors.ECODE_INVAL)
4986
      self.op.secondary_ip = primary_ip
4987

    
4988
    secondary_ip = self.op.secondary_ip
4989
    if not netutils.IP4Address.IsValid(secondary_ip):
4990
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4991
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4992

    
4993
    node_list = cfg.GetNodeList()
4994
    if not self.op.readd and node in node_list:
4995
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4996
                                 node, errors.ECODE_EXISTS)
4997
    elif self.op.readd and node not in node_list:
4998
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4999
                                 errors.ECODE_NOENT)
5000

    
5001
    self.changed_primary_ip = False
5002

    
5003
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5004
      if self.op.readd and node == existing_node_name:
5005
        if existing_node.secondary_ip != secondary_ip:
5006
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5007
                                     " address configuration as before",
5008
                                     errors.ECODE_INVAL)
5009
        if existing_node.primary_ip != primary_ip:
5010
          self.changed_primary_ip = True
5011

    
5012
        continue
5013

    
5014
      if (existing_node.primary_ip == primary_ip or
5015
          existing_node.secondary_ip == primary_ip or
5016
          existing_node.primary_ip == secondary_ip or
5017
          existing_node.secondary_ip == secondary_ip):
5018
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5019
                                   " existing node %s" % existing_node.name,
5020
                                   errors.ECODE_NOTUNIQUE)
5021

    
5022
    # After this 'if' block, None is no longer a valid value for the
5023
    # _capable op attributes
5024
    if self.op.readd:
5025
      old_node = self.cfg.GetNodeInfo(node)
5026
      assert old_node is not None, "Can't retrieve locked node %s" % node
5027
      for attr in self._NFLAGS:
5028
        if getattr(self.op, attr) is None:
5029
          setattr(self.op, attr, getattr(old_node, attr))
5030
    else:
5031
      for attr in self._NFLAGS:
5032
        if getattr(self.op, attr) is None:
5033
          setattr(self.op, attr, True)
5034

    
5035
    if self.op.readd and not self.op.vm_capable:
5036
      pri, sec = cfg.GetNodeInstances(node)
5037
      if pri or sec:
5038
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5039
                                   " flag set to false, but it already holds"
5040
                                   " instances" % node,
5041
                                   errors.ECODE_STATE)
5042

    
5043
    # check that the type of the node (single versus dual homed) is the
5044
    # same as for the master
5045
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5046
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5047
    newbie_singlehomed = secondary_ip == primary_ip
5048
    if master_singlehomed != newbie_singlehomed:
5049
      if master_singlehomed:
5050
        raise errors.OpPrereqError("The master has no secondary ip but the"
5051
                                   " new node has one",
5052
                                   errors.ECODE_INVAL)
5053
      else:
5054
        raise errors.OpPrereqError("The master has a secondary ip but the"
5055
                                   " new node doesn't have one",
5056
                                   errors.ECODE_INVAL)
5057

    
5058
    # checks reachability
5059
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5060
      raise errors.OpPrereqError("Node not reachable by ping",
5061
                                 errors.ECODE_ENVIRON)
5062

    
5063
    if not newbie_singlehomed:
5064
      # check reachability from my secondary ip to newbie's secondary ip
5065
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5066
                           source=myself.secondary_ip):
5067
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5068
                                   " based ping to node daemon port",
5069
                                   errors.ECODE_ENVIRON)
5070

    
5071
    if self.op.readd:
5072
      exceptions = [node]
5073
    else:
5074
      exceptions = []
5075

    
5076
    if self.op.master_capable:
5077
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5078
    else:
5079
      self.master_candidate = False
5080

    
5081
    if self.op.readd:
5082
      self.new_node = old_node
5083
    else:
5084
      node_group = cfg.LookupNodeGroup(self.op.group)
5085
      self.new_node = objects.Node(name=node,
5086
                                   primary_ip=primary_ip,
5087
                                   secondary_ip=secondary_ip,
5088
                                   master_candidate=self.master_candidate,
5089
                                   offline=False, drained=False,
5090
                                   group=node_group)
5091

    
5092
    if self.op.ndparams:
5093
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5094

    
5095
  def Exec(self, feedback_fn):
5096
    """Adds the new node to the cluster.
5097

5098
    """
5099
    new_node = self.new_node
5100
    node = new_node.name
5101

    
5102
    # We adding a new node so we assume it's powered
5103
    new_node.powered = True
5104

    
5105
    # for re-adds, reset the offline/drained/master-candidate flags;
5106
    # we need to reset here, otherwise offline would prevent RPC calls
5107
    # later in the procedure; this also means that if the re-add
5108
    # fails, we are left with a non-offlined, broken node
5109
    if self.op.readd:
5110
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5111
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5112
      # if we demote the node, we do cleanup later in the procedure
5113
      new_node.master_candidate = self.master_candidate
5114
      if self.changed_primary_ip:
5115
        new_node.primary_ip = self.op.primary_ip
5116

    
5117
    # copy the master/vm_capable flags
5118
    for attr in self._NFLAGS:
5119
      setattr(new_node, attr, getattr(self.op, attr))
5120

    
5121
    # notify the user about any possible mc promotion
5122
    if new_node.master_candidate:
5123
      self.LogInfo("Node will be a master candidate")
5124

    
5125
    if self.op.ndparams:
5126
      new_node.ndparams = self.op.ndparams
5127
    else:
5128
      new_node.ndparams = {}
5129

    
5130
    # check connectivity
5131
    result = self.rpc.call_version([node])[node]
5132
    result.Raise("Can't get version information from node %s" % node)
5133
    if constants.PROTOCOL_VERSION == result.payload:
5134
      logging.info("Communication to node %s fine, sw version %s match",
5135
                   node, result.payload)
5136
    else:
5137
      raise errors.OpExecError("Version mismatch master version %s,"
5138
                               " node version %s" %
5139
                               (constants.PROTOCOL_VERSION, result.payload))
5140

    
5141
    # Add node to our /etc/hosts, and add key to known_hosts
5142
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5143
      master_node = self.cfg.GetMasterNode()
5144
      result = self.rpc.call_etc_hosts_modify(master_node,
5145
                                              constants.ETC_HOSTS_ADD,
5146
                                              self.hostname.name,
5147
                                              self.hostname.ip)
5148
      result.Raise("Can't update hosts file with new host data")
5149

    
5150
    if new_node.secondary_ip != new_node.primary_ip:
5151
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5152
                               False)
5153

    
5154
    node_verify_list = [self.cfg.GetMasterNode()]
5155
    node_verify_param = {
5156
      constants.NV_NODELIST: ([node], {}),
5157
      # TODO: do a node-net-test as well?
5158
    }
5159

    
5160
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5161
                                       self.cfg.GetClusterName())
5162
    for verifier in node_verify_list:
5163
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5164
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5165
      if nl_payload:
5166
        for failed in nl_payload:
5167
          feedback_fn("ssh/hostname verification failed"
5168
                      " (checking from %s): %s" %
5169
                      (verifier, nl_payload[failed]))
5170
        raise errors.OpExecError("ssh/hostname verification failed")
5171

    
5172
    if self.op.readd:
5173
      _RedistributeAncillaryFiles(self)
5174
      self.context.ReaddNode(new_node)
5175
      # make sure we redistribute the config
5176
      self.cfg.Update(new_node, feedback_fn)
5177
      # and make sure the new node will not have old files around
5178
      if not new_node.master_candidate:
5179
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5180
        msg = result.fail_msg
5181
        if msg:
5182
          self.LogWarning("Node failed to demote itself from master"
5183
                          " candidate status: %s" % msg)
5184
    else:
5185
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5186
                                  additional_vm=self.op.vm_capable)
5187
      self.context.AddNode(new_node, self.proc.GetECId())
5188

    
5189

    
5190
class LUNodeSetParams(LogicalUnit):
5191
  """Modifies the parameters of a node.
5192

5193
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5194
      to the node role (as _ROLE_*)
5195
  @cvar _R2F: a dictionary from node role to tuples of flags
5196
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5197

5198
  """
5199
  HPATH = "node-modify"
5200
  HTYPE = constants.HTYPE_NODE
5201
  REQ_BGL = False
5202
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5203
  _F2R = {
5204
    (True, False, False): _ROLE_CANDIDATE,
5205
    (False, True, False): _ROLE_DRAINED,
5206
    (False, False, True): _ROLE_OFFLINE,
5207
    (False, False, False): _ROLE_REGULAR,
5208
    }
5209
  _R2F = dict((v, k) for k, v in _F2R.items())
5210
  _FLAGS = ["master_candidate", "drained", "offline"]
5211

    
5212
  def CheckArguments(self):
5213
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5214
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5215
                self.op.master_capable, self.op.vm_capable,
5216
                self.op.secondary_ip, self.op.ndparams]
5217
    if all_mods.count(None) == len(all_mods):
5218
      raise errors.OpPrereqError("Please pass at least one modification",
5219
                                 errors.ECODE_INVAL)
5220
    if all_mods.count(True) > 1:
5221
      raise errors.OpPrereqError("Can't set the node into more than one"
5222
                                 " state at the same time",
5223
                                 errors.ECODE_INVAL)
5224

    
5225
    # Boolean value that tells us whether we might be demoting from MC
5226
    self.might_demote = (self.op.master_candidate == False or
5227
                         self.op.offline == True or
5228
                         self.op.drained == True or
5229
                         self.op.master_capable == False)
5230

    
5231
    if self.op.secondary_ip:
5232
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5233
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5234
                                   " address" % self.op.secondary_ip,
5235
                                   errors.ECODE_INVAL)
5236

    
5237
    self.lock_all = self.op.auto_promote and self.might_demote
5238
    self.lock_instances = self.op.secondary_ip is not None
5239

    
5240
  def ExpandNames(self):
5241
    if self.lock_all:
5242
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5243
    else:
5244
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5245

    
5246
    if self.lock_instances:
5247
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5248

    
5249
  def DeclareLocks(self, level):
5250
    # If we have locked all instances, before waiting to lock nodes, release
5251
    # all the ones living on nodes unrelated to the current operation.
5252
    if level == locking.LEVEL_NODE and self.lock_instances:
5253
      self.affected_instances = []
5254
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5255
        instances_keep = []
5256

    
5257
        # Build list of instances to release
5258
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5259
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5260
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5261
              self.op.node_name in instance.all_nodes):
5262
            instances_keep.append(instance_name)
5263
            self.affected_instances.append(instance)
5264

    
5265
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5266

    
5267
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5268
                set(instances_keep))
5269

    
5270
  def BuildHooksEnv(self):
5271
    """Build hooks env.
5272

5273
    This runs on the master node.
5274

5275
    """
5276
    return {
5277
      "OP_TARGET": self.op.node_name,
5278
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5279
      "OFFLINE": str(self.op.offline),
5280
      "DRAINED": str(self.op.drained),
5281
      "MASTER_CAPABLE": str(self.op.master_capable),
5282
      "VM_CAPABLE": str(self.op.vm_capable),
5283
      }
5284

    
5285
  def BuildHooksNodes(self):
5286
    """Build hooks nodes.
5287

5288
    """
5289
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5290
    return (nl, nl)
5291

    
5292
  def CheckPrereq(self):
5293
    """Check prerequisites.
5294

5295
    This only checks the instance list against the existing names.
5296

5297
    """
5298
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5299

    
5300
    if (self.op.master_candidate is not None or
5301
        self.op.drained is not None or
5302
        self.op.offline is not None):
5303
      # we can't change the master's node flags
5304
      if self.op.node_name == self.cfg.GetMasterNode():
5305
        raise errors.OpPrereqError("The master role can be changed"
5306
                                   " only via master-failover",
5307
                                   errors.ECODE_INVAL)
5308

    
5309
    if self.op.master_candidate and not node.master_capable:
5310
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5311
                                 " it a master candidate" % node.name,
5312
                                 errors.ECODE_STATE)
5313

    
5314
    if self.op.vm_capable == False:
5315
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5316
      if ipri or isec:
5317
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5318
                                   " the vm_capable flag" % node.name,
5319
                                   errors.ECODE_STATE)
5320

    
5321
    if node.master_candidate and self.might_demote and not self.lock_all:
5322
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5323
      # check if after removing the current node, we're missing master
5324
      # candidates
5325
      (mc_remaining, mc_should, _) = \
5326
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5327
      if mc_remaining < mc_should:
5328
        raise errors.OpPrereqError("Not enough master candidates, please"
5329
                                   " pass auto promote option to allow"
5330
                                   " promotion", errors.ECODE_STATE)
5331

    
5332
    self.old_flags = old_flags = (node.master_candidate,
5333
                                  node.drained, node.offline)
5334
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5335
    self.old_role = old_role = self._F2R[old_flags]
5336

    
5337
    # Check for ineffective changes
5338
    for attr in self._FLAGS:
5339
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5340
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5341
        setattr(self.op, attr, None)
5342

    
5343
    # Past this point, any flag change to False means a transition
5344
    # away from the respective state, as only real changes are kept
5345

    
5346
    # TODO: We might query the real power state if it supports OOB
5347
    if _SupportsOob(self.cfg, node):
5348
      if self.op.offline is False and not (node.powered or
5349
                                           self.op.powered == True):
5350
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5351
                                    " offline status can be reset") %
5352
                                   self.op.node_name)
5353
    elif self.op.powered is not None:
5354
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5355
                                  " as it does not support out-of-band"
5356
                                  " handling") % self.op.node_name)
5357

    
5358
    # If we're being deofflined/drained, we'll MC ourself if needed
5359
    if (self.op.drained == False or self.op.offline == False or
5360
        (self.op.master_capable and not node.master_capable)):
5361
      if _DecideSelfPromotion(self):
5362
        self.op.master_candidate = True
5363
        self.LogInfo("Auto-promoting node to master candidate")
5364

    
5365
    # If we're no longer master capable, we'll demote ourselves from MC
5366
    if self.op.master_capable == False and node.master_candidate:
5367
      self.LogInfo("Demoting from master candidate")
5368
      self.op.master_candidate = False
5369

    
5370
    # Compute new role
5371
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5372
    if self.op.master_candidate:
5373
      new_role = self._ROLE_CANDIDATE
5374
    elif self.op.drained:
5375
      new_role = self._ROLE_DRAINED
5376
    elif self.op.offline:
5377
      new_role = self._ROLE_OFFLINE
5378
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5379
      # False is still in new flags, which means we're un-setting (the
5380
      # only) True flag
5381
      new_role = self._ROLE_REGULAR
5382
    else: # no new flags, nothing, keep old role
5383
      new_role = old_role
5384

    
5385
    self.new_role = new_role
5386

    
5387
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5388
      # Trying to transition out of offline status
5389
      # TODO: Use standard RPC runner, but make sure it works when the node is
5390
      # still marked offline
5391
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5392
      if result.fail_msg:
5393
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5394
                                   " to report its version: %s" %
5395
                                   (node.name, result.fail_msg),
5396
                                   errors.ECODE_STATE)
5397
      else:
5398
        self.LogWarning("Transitioning node from offline to online state"
5399
                        " without using re-add. Please make sure the node"
5400
                        " is healthy!")
5401

    
5402
    if self.op.secondary_ip:
5403
      # Ok even without locking, because this can't be changed by any LU
5404
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5405
      master_singlehomed = master.secondary_ip == master.primary_ip
5406
      if master_singlehomed and self.op.secondary_ip:
5407
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5408
                                   " homed cluster", errors.ECODE_INVAL)
5409

    
5410
      if node.offline:
5411
        if self.affected_instances:
5412
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5413
                                     " node has instances (%s) configured"
5414
                                     " to use it" % self.affected_instances)
5415
      else:
5416
        # On online nodes, check that no instances are running, and that
5417
        # the node has the new ip and we can reach it.
5418
        for instance in self.affected_instances:
5419
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5420

    
5421
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5422
        if master.name != node.name:
5423
          # check reachability from master secondary ip to new secondary ip
5424
          if not netutils.TcpPing(self.op.secondary_ip,
5425
                                  constants.DEFAULT_NODED_PORT,
5426
                                  source=master.secondary_ip):
5427
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5428
                                       " based ping to node daemon port",
5429
                                       errors.ECODE_ENVIRON)
5430

    
5431
    if self.op.ndparams:
5432
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5433
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5434
      self.new_ndparams = new_ndparams
5435

    
5436
  def Exec(self, feedback_fn):
5437
    """Modifies a node.
5438

5439
    """
5440
    node = self.node
5441
    old_role = self.old_role
5442
    new_role = self.new_role
5443

    
5444
    result = []
5445

    
5446
    if self.op.ndparams:
5447
      node.ndparams = self.new_ndparams
5448

    
5449
    if self.op.powered is not None:
5450
      node.powered = self.op.powered
5451

    
5452
    for attr in ["master_capable", "vm_capable"]:
5453
      val = getattr(self.op, attr)
5454
      if val is not None:
5455
        setattr(node, attr, val)
5456
        result.append((attr, str(val)))
5457

    
5458
    if new_role != old_role:
5459
      # Tell the node to demote itself, if no longer MC and not offline
5460
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5461
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5462
        if msg:
5463
          self.LogWarning("Node failed to demote itself: %s", msg)
5464

    
5465
      new_flags = self._R2F[new_role]
5466
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5467
        if of != nf:
5468
          result.append((desc, str(nf)))
5469
      (node.master_candidate, node.drained, node.offline) = new_flags
5470

    
5471
      # we locked all nodes, we adjust the CP before updating this node
5472
      if self.lock_all:
5473
        _AdjustCandidatePool(self, [node.name])
5474

    
5475
    if self.op.secondary_ip:
5476
      node.secondary_ip = self.op.secondary_ip
5477
      result.append(("secondary_ip", self.op.secondary_ip))
5478

    
5479
    # this will trigger configuration file update, if needed
5480
    self.cfg.Update(node, feedback_fn)
5481

    
5482
    # this will trigger job queue propagation or cleanup if the mc
5483
    # flag changed
5484
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5485
      self.context.ReaddNode(node)
5486

    
5487
    return result
5488

    
5489

    
5490
class LUNodePowercycle(NoHooksLU):
5491
  """Powercycles a node.
5492

5493
  """
5494
  REQ_BGL = False
5495

    
5496
  def CheckArguments(self):
5497
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5498
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5499
      raise errors.OpPrereqError("The node is the master and the force"
5500
                                 " parameter was not set",
5501
                                 errors.ECODE_INVAL)
5502

    
5503
  def ExpandNames(self):
5504
    """Locking for PowercycleNode.
5505

5506
    This is a last-resort option and shouldn't block on other
5507
    jobs. Therefore, we grab no locks.
5508

5509
    """
5510
    self.needed_locks = {}
5511

    
5512
  def Exec(self, feedback_fn):
5513
    """Reboots a node.
5514

5515
    """
5516
    result = self.rpc.call_node_powercycle(self.op.node_name,
5517
                                           self.cfg.GetHypervisorType())
5518
    result.Raise("Failed to schedule the reboot")
5519
    return result.payload
5520

    
5521

    
5522
class LUClusterQuery(NoHooksLU):
5523
  """Query cluster configuration.
5524

5525
  """
5526
  REQ_BGL = False
5527

    
5528
  def ExpandNames(self):
5529
    self.needed_locks = {}
5530

    
5531
  def Exec(self, feedback_fn):
5532
    """Return cluster config.
5533

5534
    """
5535
    cluster = self.cfg.GetClusterInfo()
5536
    os_hvp = {}
5537

    
5538
    # Filter just for enabled hypervisors
5539
    for os_name, hv_dict in cluster.os_hvp.items():
5540
      os_hvp[os_name] = {}
5541
      for hv_name, hv_params in hv_dict.items():
5542
        if hv_name in cluster.enabled_hypervisors:
5543
          os_hvp[os_name][hv_name] = hv_params
5544

    
5545
    # Convert ip_family to ip_version
5546
    primary_ip_version = constants.IP4_VERSION
5547
    if cluster.primary_ip_family == netutils.IP6Address.family:
5548
      primary_ip_version = constants.IP6_VERSION
5549

    
5550
    result = {
5551
      "software_version": constants.RELEASE_VERSION,
5552
      "protocol_version": constants.PROTOCOL_VERSION,
5553
      "config_version": constants.CONFIG_VERSION,
5554
      "os_api_version": max(constants.OS_API_VERSIONS),
5555
      "export_version": constants.EXPORT_VERSION,
5556
      "architecture": (platform.architecture()[0], platform.machine()),
5557
      "name": cluster.cluster_name,
5558
      "master": cluster.master_node,
5559
      "default_hypervisor": cluster.enabled_hypervisors[0],
5560
      "enabled_hypervisors": cluster.enabled_hypervisors,
5561
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5562
                        for hypervisor_name in cluster.enabled_hypervisors]),
5563
      "os_hvp": os_hvp,
5564
      "beparams": cluster.beparams,
5565
      "osparams": cluster.osparams,
5566
      "nicparams": cluster.nicparams,
5567
      "ndparams": cluster.ndparams,
5568
      "candidate_pool_size": cluster.candidate_pool_size,
5569
      "master_netdev": cluster.master_netdev,
5570
      "master_netmask": cluster.master_netmask,
5571
      "volume_group_name": cluster.volume_group_name,
5572
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5573
      "file_storage_dir": cluster.file_storage_dir,
5574
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5575
      "maintain_node_health": cluster.maintain_node_health,
5576
      "ctime": cluster.ctime,
5577
      "mtime": cluster.mtime,
5578
      "uuid": cluster.uuid,
5579
      "tags": list(cluster.GetTags()),
5580
      "uid_pool": cluster.uid_pool,
5581
      "default_iallocator": cluster.default_iallocator,
5582
      "reserved_lvs": cluster.reserved_lvs,
5583
      "primary_ip_version": primary_ip_version,
5584
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5585
      "hidden_os": cluster.hidden_os,
5586
      "blacklisted_os": cluster.blacklisted_os,
5587
      }
5588

    
5589
    return result
5590

    
5591

    
5592
class LUClusterConfigQuery(NoHooksLU):
5593
  """Return configuration values.
5594

5595
  """
5596
  REQ_BGL = False
5597
  _FIELDS_DYNAMIC = utils.FieldSet()
5598
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5599
                                  "watcher_pause", "volume_group_name")
5600

    
5601
  def CheckArguments(self):
5602
    _CheckOutputFields(static=self._FIELDS_STATIC,
5603
                       dynamic=self._FIELDS_DYNAMIC,
5604
                       selected=self.op.output_fields)
5605

    
5606
  def ExpandNames(self):
5607
    self.needed_locks = {}
5608

    
5609
  def Exec(self, feedback_fn):
5610
    """Dump a representation of the cluster config to the standard output.
5611

5612
    """
5613
    values = []
5614
    for field in self.op.output_fields:
5615
      if field == "cluster_name":
5616
        entry = self.cfg.GetClusterName()
5617
      elif field == "master_node":
5618
        entry = self.cfg.GetMasterNode()
5619
      elif field == "drain_flag":
5620
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5621
      elif field == "watcher_pause":
5622
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5623
      elif field == "volume_group_name":
5624
        entry = self.cfg.GetVGName()
5625
      else:
5626
        raise errors.ParameterError(field)
5627
      values.append(entry)
5628
    return values
5629

    
5630

    
5631
class LUInstanceActivateDisks(NoHooksLU):
5632
  """Bring up an instance's disks.
5633

5634
  """
5635
  REQ_BGL = False
5636

    
5637
  def ExpandNames(self):
5638
    self._ExpandAndLockInstance()
5639
    self.needed_locks[locking.LEVEL_NODE] = []
5640
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5641

    
5642
  def DeclareLocks(self, level):
5643
    if level == locking.LEVEL_NODE:
5644
      self._LockInstancesNodes()
5645

    
5646
  def CheckPrereq(self):
5647
    """Check prerequisites.
5648

5649
    This checks that the instance is in the cluster.
5650

5651
    """
5652
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5653
    assert self.instance is not None, \
5654
      "Cannot retrieve locked instance %s" % self.op.instance_name
5655
    _CheckNodeOnline(self, self.instance.primary_node)
5656

    
5657
  def Exec(self, feedback_fn):
5658
    """Activate the disks.
5659

5660
    """
5661
    disks_ok, disks_info = \
5662
              _AssembleInstanceDisks(self, self.instance,
5663
                                     ignore_size=self.op.ignore_size)
5664
    if not disks_ok:
5665
      raise errors.OpExecError("Cannot activate block devices")
5666

    
5667
    return disks_info
5668

    
5669

    
5670
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5671
                           ignore_size=False):
5672
  """Prepare the block devices for an instance.
5673

5674
  This sets up the block devices on all nodes.
5675

5676
  @type lu: L{LogicalUnit}
5677
  @param lu: the logical unit on whose behalf we execute
5678
  @type instance: L{objects.Instance}
5679
  @param instance: the instance for whose disks we assemble
5680
  @type disks: list of L{objects.Disk} or None
5681
  @param disks: which disks to assemble (or all, if None)
5682
  @type ignore_secondaries: boolean
5683
  @param ignore_secondaries: if true, errors on secondary nodes
5684
      won't result in an error return from the function
5685
  @type ignore_size: boolean
5686
  @param ignore_size: if true, the current known size of the disk
5687
      will not be used during the disk activation, useful for cases
5688
      when the size is wrong
5689
  @return: False if the operation failed, otherwise a list of
5690
      (host, instance_visible_name, node_visible_name)
5691
      with the mapping from node devices to instance devices
5692

5693
  """
5694
  device_info = []
5695
  disks_ok = True
5696
  iname = instance.name
5697
  disks = _ExpandCheckDisks(instance, disks)
5698

    
5699
  # With the two passes mechanism we try to reduce the window of
5700
  # opportunity for the race condition of switching DRBD to primary
5701
  # before handshaking occured, but we do not eliminate it
5702

    
5703
  # The proper fix would be to wait (with some limits) until the
5704
  # connection has been made and drbd transitions from WFConnection
5705
  # into any other network-connected state (Connected, SyncTarget,
5706
  # SyncSource, etc.)
5707

    
5708
  # 1st pass, assemble on all nodes in secondary mode
5709
  for idx, inst_disk in enumerate(disks):
5710
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5711
      if ignore_size:
5712
        node_disk = node_disk.Copy()
5713
        node_disk.UnsetSize()
5714
      lu.cfg.SetDiskID(node_disk, node)
5715
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5716
      msg = result.fail_msg
5717
      if msg:
5718
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5719
                           " (is_primary=False, pass=1): %s",
5720
                           inst_disk.iv_name, node, msg)
5721
        if not ignore_secondaries:
5722
          disks_ok = False
5723

    
5724
  # FIXME: race condition on drbd migration to primary
5725

    
5726
  # 2nd pass, do only the primary node
5727
  for idx, inst_disk in enumerate(disks):
5728
    dev_path = None
5729

    
5730
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5731
      if node != instance.primary_node:
5732
        continue
5733
      if ignore_size:
5734
        node_disk = node_disk.Copy()
5735
        node_disk.UnsetSize()
5736
      lu.cfg.SetDiskID(node_disk, node)
5737
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5738
      msg = result.fail_msg
5739
      if msg:
5740
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5741
                           " (is_primary=True, pass=2): %s",
5742
                           inst_disk.iv_name, node, msg)
5743
        disks_ok = False
5744
      else:
5745
        dev_path = result.payload
5746

    
5747
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5748

    
5749
  # leave the disks configured for the primary node
5750
  # this is a workaround that would be fixed better by
5751
  # improving the logical/physical id handling
5752
  for disk in disks:
5753
    lu.cfg.SetDiskID(disk, instance.primary_node)
5754

    
5755
  return disks_ok, device_info
5756

    
5757

    
5758
def _StartInstanceDisks(lu, instance, force):
5759
  """Start the disks of an instance.
5760

5761
  """
5762
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5763
                                           ignore_secondaries=force)
5764
  if not disks_ok:
5765
    _ShutdownInstanceDisks(lu, instance)
5766
    if force is not None and not force:
5767
      lu.proc.LogWarning("", hint="If the message above refers to a"
5768
                         " secondary node,"
5769
                         " you can retry the operation using '--force'.")
5770
    raise errors.OpExecError("Disk consistency error")
5771

    
5772

    
5773
class LUInstanceDeactivateDisks(NoHooksLU):
5774
  """Shutdown an instance's disks.
5775

5776
  """
5777
  REQ_BGL = False
5778

    
5779
  def ExpandNames(self):
5780
    self._ExpandAndLockInstance()
5781
    self.needed_locks[locking.LEVEL_NODE] = []
5782
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5783

    
5784
  def DeclareLocks(self, level):
5785
    if level == locking.LEVEL_NODE:
5786
      self._LockInstancesNodes()
5787

    
5788
  def CheckPrereq(self):
5789
    """Check prerequisites.
5790

5791
    This checks that the instance is in the cluster.
5792

5793
    """
5794
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5795
    assert self.instance is not None, \
5796
      "Cannot retrieve locked instance %s" % self.op.instance_name
5797

    
5798
  def Exec(self, feedback_fn):
5799
    """Deactivate the disks
5800

5801
    """
5802
    instance = self.instance
5803
    if self.op.force:
5804
      _ShutdownInstanceDisks(self, instance)
5805
    else:
5806
      _SafeShutdownInstanceDisks(self, instance)
5807

    
5808

    
5809
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5810
  """Shutdown block devices of an instance.
5811

5812
  This function checks if an instance is running, before calling
5813
  _ShutdownInstanceDisks.
5814

5815
  """
5816
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5817
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5818

    
5819

    
5820
def _ExpandCheckDisks(instance, disks):
5821
  """Return the instance disks selected by the disks list
5822

5823
  @type disks: list of L{objects.Disk} or None
5824
  @param disks: selected disks
5825
  @rtype: list of L{objects.Disk}
5826
  @return: selected instance disks to act on
5827

5828
  """
5829
  if disks is None:
5830
    return instance.disks
5831
  else:
5832
    if not set(disks).issubset(instance.disks):
5833
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5834
                                   " target instance")
5835
    return disks
5836

    
5837

    
5838
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5839
  """Shutdown block devices of an instance.
5840

5841
  This does the shutdown on all nodes of the instance.
5842

5843
  If the ignore_primary is false, errors on the primary node are
5844
  ignored.
5845

5846
  """
5847
  all_result = True
5848
  disks = _ExpandCheckDisks(instance, disks)
5849

    
5850
  for disk in disks:
5851
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5852
      lu.cfg.SetDiskID(top_disk, node)
5853
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5854
      msg = result.fail_msg
5855
      if msg:
5856
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5857
                      disk.iv_name, node, msg)
5858
        if ((node == instance.primary_node and not ignore_primary) or
5859
            (node != instance.primary_node and not result.offline)):
5860
          all_result = False
5861
  return all_result
5862

    
5863

    
5864
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5865
  """Checks if a node has enough free memory.
5866

5867
  This function check if a given node has the needed amount of free
5868
  memory. In case the node has less memory or we cannot get the
5869
  information from the node, this function raise an OpPrereqError
5870
  exception.
5871

5872
  @type lu: C{LogicalUnit}
5873
  @param lu: a logical unit from which we get configuration data
5874
  @type node: C{str}
5875
  @param node: the node to check
5876
  @type reason: C{str}
5877
  @param reason: string to use in the error message
5878
  @type requested: C{int}
5879
  @param requested: the amount of memory in MiB to check for
5880
  @type hypervisor_name: C{str}
5881
  @param hypervisor_name: the hypervisor to ask for memory stats
5882
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5883
      we cannot check the node
5884

5885
  """
5886
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5887
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5888
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5889
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5890
  if not isinstance(free_mem, int):
5891
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5892
                               " was '%s'" % (node, free_mem),
5893
                               errors.ECODE_ENVIRON)
5894
  if requested > free_mem:
5895
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5896
                               " needed %s MiB, available %s MiB" %
5897
                               (node, reason, requested, free_mem),
5898
                               errors.ECODE_NORES)
5899

    
5900

    
5901
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5902
  """Checks if nodes have enough free disk space in the all VGs.
5903

5904
  This function check if all given nodes have the needed amount of
5905
  free disk. In case any node has less disk or we cannot get the
5906
  information from the node, this function raise an OpPrereqError
5907
  exception.
5908

5909
  @type lu: C{LogicalUnit}
5910
  @param lu: a logical unit from which we get configuration data
5911
  @type nodenames: C{list}
5912
  @param nodenames: the list of node names to check
5913
  @type req_sizes: C{dict}
5914
  @param req_sizes: the hash of vg and corresponding amount of disk in
5915
      MiB to check for
5916
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5917
      or we cannot check the node
5918

5919
  """
5920
  for vg, req_size in req_sizes.items():
5921
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5922

    
5923

    
5924
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5925
  """Checks if nodes have enough free disk space in the specified VG.
5926

5927
  This function check if all given nodes have the needed amount of
5928
  free disk. In case any node has less disk or we cannot get the
5929
  information from the node, this function raise an OpPrereqError
5930
  exception.
5931

5932
  @type lu: C{LogicalUnit}
5933
  @param lu: a logical unit from which we get configuration data
5934
  @type nodenames: C{list}
5935
  @param nodenames: the list of node names to check
5936
  @type vg: C{str}
5937
  @param vg: the volume group to check
5938
  @type requested: C{int}
5939
  @param requested: the amount of disk in MiB to check for
5940
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5941
      or we cannot check the node
5942

5943
  """
5944
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5945
  for node in nodenames:
5946
    info = nodeinfo[node]
5947
    info.Raise("Cannot get current information from node %s" % node,
5948
               prereq=True, ecode=errors.ECODE_ENVIRON)
5949
    vg_free = info.payload.get("vg_free", None)
5950
    if not isinstance(vg_free, int):
5951
      raise errors.OpPrereqError("Can't compute free disk space on node"
5952
                                 " %s for vg %s, result was '%s'" %
5953
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5954
    if requested > vg_free:
5955
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5956
                                 " vg %s: required %d MiB, available %d MiB" %
5957
                                 (node, vg, requested, vg_free),
5958
                                 errors.ECODE_NORES)
5959

    
5960

    
5961
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5962
  """Checks if nodes have enough physical CPUs
5963

5964
  This function checks if all given nodes have the needed number of
5965
  physical CPUs. In case any node has less CPUs or we cannot get the
5966
  information from the node, this function raises an OpPrereqError
5967
  exception.
5968

5969
  @type lu: C{LogicalUnit}
5970
  @param lu: a logical unit from which we get configuration data
5971
  @type nodenames: C{list}
5972
  @param nodenames: the list of node names to check
5973
  @type requested: C{int}
5974
  @param requested: the minimum acceptable number of physical CPUs
5975
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5976
      or we cannot check the node
5977

5978
  """
5979
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5980
  for node in nodenames:
5981
    info = nodeinfo[node]
5982
    info.Raise("Cannot get current information from node %s" % node,
5983
               prereq=True, ecode=errors.ECODE_ENVIRON)
5984
    num_cpus = info.payload.get("cpu_total", None)
5985
    if not isinstance(num_cpus, int):
5986
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5987
                                 " on node %s, result was '%s'" %
5988
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5989
    if requested > num_cpus:
5990
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5991
                                 "required" % (node, num_cpus, requested),
5992
                                 errors.ECODE_NORES)
5993

    
5994

    
5995
class LUInstanceStartup(LogicalUnit):
5996
  """Starts an instance.
5997

5998
  """
5999
  HPATH = "instance-start"
6000
  HTYPE = constants.HTYPE_INSTANCE
6001
  REQ_BGL = False
6002

    
6003
  def CheckArguments(self):
6004
    # extra beparams
6005
    if self.op.beparams:
6006
      # fill the beparams dict
6007
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6008

    
6009
  def ExpandNames(self):
6010
    self._ExpandAndLockInstance()
6011

    
6012
  def BuildHooksEnv(self):
6013
    """Build hooks env.
6014

6015
    This runs on master, primary and secondary nodes of the instance.
6016

6017
    """
6018
    env = {
6019
      "FORCE": self.op.force,
6020
      }
6021

    
6022
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6023

    
6024
    return env
6025

    
6026
  def BuildHooksNodes(self):
6027
    """Build hooks nodes.
6028

6029
    """
6030
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6031
    return (nl, nl)
6032

    
6033
  def CheckPrereq(self):
6034
    """Check prerequisites.
6035

6036
    This checks that the instance is in the cluster.
6037

6038
    """
6039
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6040
    assert self.instance is not None, \
6041
      "Cannot retrieve locked instance %s" % self.op.instance_name
6042

    
6043
    # extra hvparams
6044
    if self.op.hvparams:
6045
      # check hypervisor parameter syntax (locally)
6046
      cluster = self.cfg.GetClusterInfo()
6047
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6048
      filled_hvp = cluster.FillHV(instance)
6049
      filled_hvp.update(self.op.hvparams)
6050
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6051
      hv_type.CheckParameterSyntax(filled_hvp)
6052
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6053

    
6054
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6055

    
6056
    if self.primary_offline and self.op.ignore_offline_nodes:
6057
      self.proc.LogWarning("Ignoring offline primary node")
6058

    
6059
      if self.op.hvparams or self.op.beparams:
6060
        self.proc.LogWarning("Overridden parameters are ignored")
6061
    else:
6062
      _CheckNodeOnline(self, instance.primary_node)
6063

    
6064
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6065

    
6066
      # check bridges existence
6067
      _CheckInstanceBridgesExist(self, instance)
6068

    
6069
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6070
                                                instance.name,
6071
                                                instance.hypervisor)
6072
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6073
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6074
      if not remote_info.payload: # not running already
6075
        _CheckNodeFreeMemory(self, instance.primary_node,
6076
                             "starting instance %s" % instance.name,
6077
                             bep[constants.BE_MEMORY], instance.hypervisor)
6078

    
6079
  def Exec(self, feedback_fn):
6080
    """Start the instance.
6081

6082
    """
6083
    instance = self.instance
6084
    force = self.op.force
6085

    
6086
    if not self.op.no_remember:
6087
      self.cfg.MarkInstanceUp(instance.name)
6088

    
6089
    if self.primary_offline:
6090
      assert self.op.ignore_offline_nodes
6091
      self.proc.LogInfo("Primary node offline, marked instance as started")
6092
    else:
6093
      node_current = instance.primary_node
6094

    
6095
      _StartInstanceDisks(self, instance, force)
6096

    
6097
      result = \
6098
        self.rpc.call_instance_start(node_current,
6099
                                     (instance, self.op.hvparams,
6100
                                      self.op.beparams),
6101
                                     self.op.startup_paused)
6102
      msg = result.fail_msg
6103
      if msg:
6104
        _ShutdownInstanceDisks(self, instance)
6105
        raise errors.OpExecError("Could not start instance: %s" % msg)
6106

    
6107

    
6108
class LUInstanceReboot(LogicalUnit):
6109
  """Reboot an instance.
6110

6111
  """
6112
  HPATH = "instance-reboot"
6113
  HTYPE = constants.HTYPE_INSTANCE
6114
  REQ_BGL = False
6115

    
6116
  def ExpandNames(self):
6117
    self._ExpandAndLockInstance()
6118

    
6119
  def BuildHooksEnv(self):
6120
    """Build hooks env.
6121

6122
    This runs on master, primary and secondary nodes of the instance.
6123

6124
    """
6125
    env = {
6126
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6127
      "REBOOT_TYPE": self.op.reboot_type,
6128
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6129
      }
6130

    
6131
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6132

    
6133
    return env
6134

    
6135
  def BuildHooksNodes(self):
6136
    """Build hooks nodes.
6137

6138
    """
6139
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6140
    return (nl, nl)
6141

    
6142
  def CheckPrereq(self):
6143
    """Check prerequisites.
6144

6145
    This checks that the instance is in the cluster.
6146

6147
    """
6148
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6149
    assert self.instance is not None, \
6150
      "Cannot retrieve locked instance %s" % self.op.instance_name
6151

    
6152
    _CheckNodeOnline(self, instance.primary_node)
6153

    
6154
    # check bridges existence
6155
    _CheckInstanceBridgesExist(self, instance)
6156

    
6157
  def Exec(self, feedback_fn):
6158
    """Reboot the instance.
6159

6160
    """
6161
    instance = self.instance
6162
    ignore_secondaries = self.op.ignore_secondaries
6163
    reboot_type = self.op.reboot_type
6164

    
6165
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6166
                                              instance.name,
6167
                                              instance.hypervisor)
6168
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6169
    instance_running = bool(remote_info.payload)
6170

    
6171
    node_current = instance.primary_node
6172

    
6173
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6174
                                            constants.INSTANCE_REBOOT_HARD]:
6175
      for disk in instance.disks:
6176
        self.cfg.SetDiskID(disk, node_current)
6177
      result = self.rpc.call_instance_reboot(node_current, instance,
6178
                                             reboot_type,
6179
                                             self.op.shutdown_timeout)
6180
      result.Raise("Could not reboot instance")
6181
    else:
6182
      if instance_running:
6183
        result = self.rpc.call_instance_shutdown(node_current, instance,
6184
                                                 self.op.shutdown_timeout)
6185
        result.Raise("Could not shutdown instance for full reboot")
6186
        _ShutdownInstanceDisks(self, instance)
6187
      else:
6188
        self.LogInfo("Instance %s was already stopped, starting now",
6189
                     instance.name)
6190
      _StartInstanceDisks(self, instance, ignore_secondaries)
6191
      result = self.rpc.call_instance_start(node_current,
6192
                                            (instance, None, None), False)
6193
      msg = result.fail_msg
6194
      if msg:
6195
        _ShutdownInstanceDisks(self, instance)
6196
        raise errors.OpExecError("Could not start instance for"
6197
                                 " full reboot: %s" % msg)
6198

    
6199
    self.cfg.MarkInstanceUp(instance.name)
6200

    
6201

    
6202
class LUInstanceShutdown(LogicalUnit):
6203
  """Shutdown an instance.
6204

6205
  """
6206
  HPATH = "instance-stop"
6207
  HTYPE = constants.HTYPE_INSTANCE
6208
  REQ_BGL = False
6209

    
6210
  def ExpandNames(self):
6211
    self._ExpandAndLockInstance()
6212

    
6213
  def BuildHooksEnv(self):
6214
    """Build hooks env.
6215

6216
    This runs on master, primary and secondary nodes of the instance.
6217

6218
    """
6219
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6220
    env["TIMEOUT"] = self.op.timeout
6221
    return env
6222

    
6223
  def BuildHooksNodes(self):
6224
    """Build hooks nodes.
6225

6226
    """
6227
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6228
    return (nl, nl)
6229

    
6230
  def CheckPrereq(self):
6231
    """Check prerequisites.
6232

6233
    This checks that the instance is in the cluster.
6234

6235
    """
6236
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6237
    assert self.instance is not None, \
6238
      "Cannot retrieve locked instance %s" % self.op.instance_name
6239

    
6240
    self.primary_offline = \
6241
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6242

    
6243
    if self.primary_offline and self.op.ignore_offline_nodes:
6244
      self.proc.LogWarning("Ignoring offline primary node")
6245
    else:
6246
      _CheckNodeOnline(self, self.instance.primary_node)
6247

    
6248
  def Exec(self, feedback_fn):
6249
    """Shutdown the instance.
6250

6251
    """
6252
    instance = self.instance
6253
    node_current = instance.primary_node
6254
    timeout = self.op.timeout
6255

    
6256
    if not self.op.no_remember:
6257
      self.cfg.MarkInstanceDown(instance.name)
6258

    
6259
    if self.primary_offline:
6260
      assert self.op.ignore_offline_nodes
6261
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6262
    else:
6263
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6264
      msg = result.fail_msg
6265
      if msg:
6266
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6267

    
6268
      _ShutdownInstanceDisks(self, instance)
6269

    
6270

    
6271
class LUInstanceReinstall(LogicalUnit):
6272
  """Reinstall an instance.
6273

6274
  """
6275
  HPATH = "instance-reinstall"
6276
  HTYPE = constants.HTYPE_INSTANCE
6277
  REQ_BGL = False
6278

    
6279
  def ExpandNames(self):
6280
    self._ExpandAndLockInstance()
6281

    
6282
  def BuildHooksEnv(self):
6283
    """Build hooks env.
6284

6285
    This runs on master, primary and secondary nodes of the instance.
6286

6287
    """
6288
    return _BuildInstanceHookEnvByObject(self, self.instance)
6289

    
6290
  def BuildHooksNodes(self):
6291
    """Build hooks nodes.
6292

6293
    """
6294
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6295
    return (nl, nl)
6296

    
6297
  def CheckPrereq(self):
6298
    """Check prerequisites.
6299

6300
    This checks that the instance is in the cluster and is not running.
6301

6302
    """
6303
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6304
    assert instance is not None, \
6305
      "Cannot retrieve locked instance %s" % self.op.instance_name
6306
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6307
                     " offline, cannot reinstall")
6308
    for node in instance.secondary_nodes:
6309
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6310
                       " cannot reinstall")
6311

    
6312
    if instance.disk_template == constants.DT_DISKLESS:
6313
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6314
                                 self.op.instance_name,
6315
                                 errors.ECODE_INVAL)
6316
    _CheckInstanceDown(self, instance, "cannot reinstall")
6317

    
6318
    if self.op.os_type is not None:
6319
      # OS verification
6320
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6321
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6322
      instance_os = self.op.os_type
6323
    else:
6324
      instance_os = instance.os
6325

    
6326
    nodelist = list(instance.all_nodes)
6327

    
6328
    if self.op.osparams:
6329
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6330
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6331
      self.os_inst = i_osdict # the new dict (without defaults)
6332
    else:
6333
      self.os_inst = None
6334

    
6335
    self.instance = instance
6336

    
6337
  def Exec(self, feedback_fn):
6338
    """Reinstall the instance.
6339

6340
    """
6341
    inst = self.instance
6342

    
6343
    if self.op.os_type is not None:
6344
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6345
      inst.os = self.op.os_type
6346
      # Write to configuration
6347
      self.cfg.Update(inst, feedback_fn)
6348

    
6349
    _StartInstanceDisks(self, inst, None)
6350
    try:
6351
      feedback_fn("Running the instance OS create scripts...")
6352
      # FIXME: pass debug option from opcode to backend
6353
      result = self.rpc.call_instance_os_add(inst.primary_node,
6354
                                             (inst, self.os_inst), True,
6355
                                             self.op.debug_level)
6356
      result.Raise("Could not install OS for instance %s on node %s" %
6357
                   (inst.name, inst.primary_node))
6358
    finally:
6359
      _ShutdownInstanceDisks(self, inst)
6360

    
6361

    
6362
class LUInstanceRecreateDisks(LogicalUnit):
6363
  """Recreate an instance's missing disks.
6364

6365
  """
6366
  HPATH = "instance-recreate-disks"
6367
  HTYPE = constants.HTYPE_INSTANCE
6368
  REQ_BGL = False
6369

    
6370
  def CheckArguments(self):
6371
    # normalise the disk list
6372
    self.op.disks = sorted(frozenset(self.op.disks))
6373

    
6374
  def ExpandNames(self):
6375
    self._ExpandAndLockInstance()
6376
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6377
    if self.op.nodes:
6378
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6379
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6380
    else:
6381
      self.needed_locks[locking.LEVEL_NODE] = []
6382

    
6383
  def DeclareLocks(self, level):
6384
    if level == locking.LEVEL_NODE:
6385
      # if we replace the nodes, we only need to lock the old primary,
6386
      # otherwise we need to lock all nodes for disk re-creation
6387
      primary_only = bool(self.op.nodes)
6388
      self._LockInstancesNodes(primary_only=primary_only)
6389

    
6390
  def BuildHooksEnv(self):
6391
    """Build hooks env.
6392

6393
    This runs on master, primary and secondary nodes of the instance.
6394

6395
    """
6396
    return _BuildInstanceHookEnvByObject(self, self.instance)
6397

    
6398
  def BuildHooksNodes(self):
6399
    """Build hooks nodes.
6400

6401
    """
6402
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6403
    return (nl, nl)
6404

    
6405
  def CheckPrereq(self):
6406
    """Check prerequisites.
6407

6408
    This checks that the instance is in the cluster and is not running.
6409

6410
    """
6411
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6412
    assert instance is not None, \
6413
      "Cannot retrieve locked instance %s" % self.op.instance_name
6414
    if self.op.nodes:
6415
      if len(self.op.nodes) != len(instance.all_nodes):
6416
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6417
                                   " %d replacement nodes were specified" %
6418
                                   (instance.name, len(instance.all_nodes),
6419
                                    len(self.op.nodes)),
6420
                                   errors.ECODE_INVAL)
6421
      assert instance.disk_template != constants.DT_DRBD8 or \
6422
          len(self.op.nodes) == 2
6423
      assert instance.disk_template != constants.DT_PLAIN or \
6424
          len(self.op.nodes) == 1
6425
      primary_node = self.op.nodes[0]
6426
    else:
6427
      primary_node = instance.primary_node
6428
    _CheckNodeOnline(self, primary_node)
6429

    
6430
    if instance.disk_template == constants.DT_DISKLESS:
6431
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6432
                                 self.op.instance_name, errors.ECODE_INVAL)
6433
    # if we replace nodes *and* the old primary is offline, we don't
6434
    # check
6435
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6436
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6437
    if not (self.op.nodes and old_pnode.offline):
6438
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6439

    
6440
    if not self.op.disks:
6441
      self.op.disks = range(len(instance.disks))
6442
    else:
6443
      for idx in self.op.disks:
6444
        if idx >= len(instance.disks):
6445
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6446
                                     errors.ECODE_INVAL)
6447
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6448
      raise errors.OpPrereqError("Can't recreate disks partially and"
6449
                                 " change the nodes at the same time",
6450
                                 errors.ECODE_INVAL)
6451
    self.instance = instance
6452

    
6453
  def Exec(self, feedback_fn):
6454
    """Recreate the disks.
6455

6456
    """
6457
    instance = self.instance
6458

    
6459
    to_skip = []
6460
    mods = [] # keeps track of needed logical_id changes
6461

    
6462
    for idx, disk in enumerate(instance.disks):
6463
      if idx not in self.op.disks: # disk idx has not been passed in
6464
        to_skip.append(idx)
6465
        continue
6466
      # update secondaries for disks, if needed
6467
      if self.op.nodes:
6468
        if disk.dev_type == constants.LD_DRBD8:
6469
          # need to update the nodes and minors
6470
          assert len(self.op.nodes) == 2
6471
          assert len(disk.logical_id) == 6 # otherwise disk internals
6472
                                           # have changed
6473
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6474
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6475
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6476
                    new_minors[0], new_minors[1], old_secret)
6477
          assert len(disk.logical_id) == len(new_id)
6478
          mods.append((idx, new_id))
6479

    
6480
    # now that we have passed all asserts above, we can apply the mods
6481
    # in a single run (to avoid partial changes)
6482
    for idx, new_id in mods:
6483
      instance.disks[idx].logical_id = new_id
6484

    
6485
    # change primary node, if needed
6486
    if self.op.nodes:
6487
      instance.primary_node = self.op.nodes[0]
6488
      self.LogWarning("Changing the instance's nodes, you will have to"
6489
                      " remove any disks left on the older nodes manually")
6490

    
6491
    if self.op.nodes:
6492
      self.cfg.Update(instance, feedback_fn)
6493

    
6494
    _CreateDisks(self, instance, to_skip=to_skip)
6495

    
6496

    
6497
class LUInstanceRename(LogicalUnit):
6498
  """Rename an instance.
6499

6500
  """
6501
  HPATH = "instance-rename"
6502
  HTYPE = constants.HTYPE_INSTANCE
6503

    
6504
  def CheckArguments(self):
6505
    """Check arguments.
6506

6507
    """
6508
    if self.op.ip_check and not self.op.name_check:
6509
      # TODO: make the ip check more flexible and not depend on the name check
6510
      raise errors.OpPrereqError("IP address check requires a name check",
6511
                                 errors.ECODE_INVAL)
6512

    
6513
  def BuildHooksEnv(self):
6514
    """Build hooks env.
6515

6516
    This runs on master, primary and secondary nodes of the instance.
6517

6518
    """
6519
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6520
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6521
    return env
6522

    
6523
  def BuildHooksNodes(self):
6524
    """Build hooks nodes.
6525

6526
    """
6527
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6528
    return (nl, nl)
6529

    
6530
  def CheckPrereq(self):
6531
    """Check prerequisites.
6532

6533
    This checks that the instance is in the cluster and is not running.
6534

6535
    """
6536
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6537
                                                self.op.instance_name)
6538
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6539
    assert instance is not None
6540
    _CheckNodeOnline(self, instance.primary_node)
6541
    _CheckInstanceDown(self, instance, "cannot rename")
6542
    self.instance = instance
6543

    
6544
    new_name = self.op.new_name
6545
    if self.op.name_check:
6546
      hostname = netutils.GetHostname(name=new_name)
6547
      if hostname != new_name:
6548
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6549
                     hostname.name)
6550
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6551
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6552
                                    " same as given hostname '%s'") %
6553
                                    (hostname.name, self.op.new_name),
6554
                                    errors.ECODE_INVAL)
6555
      new_name = self.op.new_name = hostname.name
6556
      if (self.op.ip_check and
6557
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6558
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6559
                                   (hostname.ip, new_name),
6560
                                   errors.ECODE_NOTUNIQUE)
6561

    
6562
    instance_list = self.cfg.GetInstanceList()
6563
    if new_name in instance_list and new_name != instance.name:
6564
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6565
                                 new_name, errors.ECODE_EXISTS)
6566

    
6567
  def Exec(self, feedback_fn):
6568
    """Rename the instance.
6569

6570
    """
6571
    inst = self.instance
6572
    old_name = inst.name
6573

    
6574
    rename_file_storage = False
6575
    if (inst.disk_template in constants.DTS_FILEBASED and
6576
        self.op.new_name != inst.name):
6577
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6578
      rename_file_storage = True
6579

    
6580
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6581
    # Change the instance lock. This is definitely safe while we hold the BGL.
6582
    # Otherwise the new lock would have to be added in acquired mode.
6583
    assert self.REQ_BGL
6584
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6585
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6586

    
6587
    # re-read the instance from the configuration after rename
6588
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6589

    
6590
    if rename_file_storage:
6591
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6592
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6593
                                                     old_file_storage_dir,
6594
                                                     new_file_storage_dir)
6595
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6596
                   " (but the instance has been renamed in Ganeti)" %
6597
                   (inst.primary_node, old_file_storage_dir,
6598
                    new_file_storage_dir))
6599

    
6600
    _StartInstanceDisks(self, inst, None)
6601
    try:
6602
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6603
                                                 old_name, self.op.debug_level)
6604
      msg = result.fail_msg
6605
      if msg:
6606
        msg = ("Could not run OS rename script for instance %s on node %s"
6607
               " (but the instance has been renamed in Ganeti): %s" %
6608
               (inst.name, inst.primary_node, msg))
6609
        self.proc.LogWarning(msg)
6610
    finally:
6611
      _ShutdownInstanceDisks(self, inst)
6612

    
6613
    return inst.name
6614

    
6615

    
6616
class LUInstanceRemove(LogicalUnit):
6617
  """Remove an instance.
6618

6619
  """
6620
  HPATH = "instance-remove"
6621
  HTYPE = constants.HTYPE_INSTANCE
6622
  REQ_BGL = False
6623

    
6624
  def ExpandNames(self):
6625
    self._ExpandAndLockInstance()
6626
    self.needed_locks[locking.LEVEL_NODE] = []
6627
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6628

    
6629
  def DeclareLocks(self, level):
6630
    if level == locking.LEVEL_NODE:
6631
      self._LockInstancesNodes()
6632

    
6633
  def BuildHooksEnv(self):
6634
    """Build hooks env.
6635

6636
    This runs on master, primary and secondary nodes of the instance.
6637

6638
    """
6639
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6640
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6641
    return env
6642

    
6643
  def BuildHooksNodes(self):
6644
    """Build hooks nodes.
6645

6646
    """
6647
    nl = [self.cfg.GetMasterNode()]
6648
    nl_post = list(self.instance.all_nodes) + nl
6649
    return (nl, nl_post)
6650

    
6651
  def CheckPrereq(self):
6652
    """Check prerequisites.
6653

6654
    This checks that the instance is in the cluster.
6655

6656
    """
6657
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6658
    assert self.instance is not None, \
6659
      "Cannot retrieve locked instance %s" % self.op.instance_name
6660

    
6661
  def Exec(self, feedback_fn):
6662
    """Remove the instance.
6663

6664
    """
6665
    instance = self.instance
6666
    logging.info("Shutting down instance %s on node %s",
6667
                 instance.name, instance.primary_node)
6668

    
6669
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6670
                                             self.op.shutdown_timeout)
6671
    msg = result.fail_msg
6672
    if msg:
6673
      if self.op.ignore_failures:
6674
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6675
      else:
6676
        raise errors.OpExecError("Could not shutdown instance %s on"
6677
                                 " node %s: %s" %
6678
                                 (instance.name, instance.primary_node, msg))
6679

    
6680
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6681

    
6682

    
6683
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6684
  """Utility function to remove an instance.
6685

6686
  """
6687
  logging.info("Removing block devices for instance %s", instance.name)
6688

    
6689
  if not _RemoveDisks(lu, instance):
6690
    if not ignore_failures:
6691
      raise errors.OpExecError("Can't remove instance's disks")
6692
    feedback_fn("Warning: can't remove instance's disks")
6693

    
6694
  logging.info("Removing instance %s out of cluster config", instance.name)
6695

    
6696
  lu.cfg.RemoveInstance(instance.name)
6697

    
6698
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6699
    "Instance lock removal conflict"
6700

    
6701
  # Remove lock for the instance
6702
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6703

    
6704

    
6705
class LUInstanceQuery(NoHooksLU):
6706
  """Logical unit for querying instances.
6707

6708
  """
6709
  # pylint: disable=W0142
6710
  REQ_BGL = False
6711

    
6712
  def CheckArguments(self):
6713
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6714
                             self.op.output_fields, self.op.use_locking)
6715

    
6716
  def ExpandNames(self):
6717
    self.iq.ExpandNames(self)
6718

    
6719
  def DeclareLocks(self, level):
6720
    self.iq.DeclareLocks(self, level)
6721

    
6722
  def Exec(self, feedback_fn):
6723
    return self.iq.OldStyleQuery(self)
6724

    
6725

    
6726
class LUInstanceFailover(LogicalUnit):
6727
  """Failover an instance.
6728

6729
  """
6730
  HPATH = "instance-failover"
6731
  HTYPE = constants.HTYPE_INSTANCE
6732
  REQ_BGL = False
6733

    
6734
  def CheckArguments(self):
6735
    """Check the arguments.
6736

6737
    """
6738
    self.iallocator = getattr(self.op, "iallocator", None)
6739
    self.target_node = getattr(self.op, "target_node", None)
6740

    
6741
  def ExpandNames(self):
6742
    self._ExpandAndLockInstance()
6743

    
6744
    if self.op.target_node is not None:
6745
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6746

    
6747
    self.needed_locks[locking.LEVEL_NODE] = []
6748
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6749

    
6750
    ignore_consistency = self.op.ignore_consistency
6751
    shutdown_timeout = self.op.shutdown_timeout
6752
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6753
                                       cleanup=False,
6754
                                       failover=True,
6755
                                       ignore_consistency=ignore_consistency,
6756
                                       shutdown_timeout=shutdown_timeout)
6757
    self.tasklets = [self._migrater]
6758

    
6759
  def DeclareLocks(self, level):
6760
    if level == locking.LEVEL_NODE:
6761
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6762
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6763
        if self.op.target_node is None:
6764
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6765
        else:
6766
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6767
                                                   self.op.target_node]
6768
        del self.recalculate_locks[locking.LEVEL_NODE]
6769
      else:
6770
        self._LockInstancesNodes()
6771

    
6772
  def BuildHooksEnv(self):
6773
    """Build hooks env.
6774

6775
    This runs on master, primary and secondary nodes of the instance.
6776

6777
    """
6778
    instance = self._migrater.instance
6779
    source_node = instance.primary_node
6780
    target_node = self.op.target_node
6781
    env = {
6782
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6783
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6784
      "OLD_PRIMARY": source_node,
6785
      "NEW_PRIMARY": target_node,
6786
      }
6787

    
6788
    if instance.disk_template in constants.DTS_INT_MIRROR:
6789
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6790
      env["NEW_SECONDARY"] = source_node
6791
    else:
6792
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6793

    
6794
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6795

    
6796
    return env
6797

    
6798
  def BuildHooksNodes(self):
6799
    """Build hooks nodes.
6800

6801
    """
6802
    instance = self._migrater.instance
6803
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6804
    return (nl, nl + [instance.primary_node])
6805

    
6806

    
6807
class LUInstanceMigrate(LogicalUnit):
6808
  """Migrate an instance.
6809

6810
  This is migration without shutting down, compared to the failover,
6811
  which is done with shutdown.
6812

6813
  """
6814
  HPATH = "instance-migrate"
6815
  HTYPE = constants.HTYPE_INSTANCE
6816
  REQ_BGL = False
6817

    
6818
  def ExpandNames(self):
6819
    self._ExpandAndLockInstance()
6820

    
6821
    if self.op.target_node is not None:
6822
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6823

    
6824
    self.needed_locks[locking.LEVEL_NODE] = []
6825
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6826

    
6827
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6828
                                       cleanup=self.op.cleanup,
6829
                                       failover=False,
6830
                                       fallback=self.op.allow_failover)
6831
    self.tasklets = [self._migrater]
6832

    
6833
  def DeclareLocks(self, level):
6834
    if level == locking.LEVEL_NODE:
6835
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6836
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6837
        if self.op.target_node is None:
6838
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6839
        else:
6840
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6841
                                                   self.op.target_node]
6842
        del self.recalculate_locks[locking.LEVEL_NODE]
6843
      else:
6844
        self._LockInstancesNodes()
6845

    
6846
  def BuildHooksEnv(self):
6847
    """Build hooks env.
6848

6849
    This runs on master, primary and secondary nodes of the instance.
6850

6851
    """
6852
    instance = self._migrater.instance
6853
    source_node = instance.primary_node
6854
    target_node = self.op.target_node
6855
    env = _BuildInstanceHookEnvByObject(self, instance)
6856
    env.update({
6857
      "MIGRATE_LIVE": self._migrater.live,
6858
      "MIGRATE_CLEANUP": self.op.cleanup,
6859
      "OLD_PRIMARY": source_node,
6860
      "NEW_PRIMARY": target_node,
6861
      })
6862

    
6863
    if instance.disk_template in constants.DTS_INT_MIRROR:
6864
      env["OLD_SECONDARY"] = target_node
6865
      env["NEW_SECONDARY"] = source_node
6866
    else:
6867
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6868

    
6869
    return env
6870

    
6871
  def BuildHooksNodes(self):
6872
    """Build hooks nodes.
6873

6874
    """
6875
    instance = self._migrater.instance
6876
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6877
    return (nl, nl + [instance.primary_node])
6878

    
6879

    
6880
class LUInstanceMove(LogicalUnit):
6881
  """Move an instance by data-copying.
6882

6883
  """
6884
  HPATH = "instance-move"
6885
  HTYPE = constants.HTYPE_INSTANCE
6886
  REQ_BGL = False
6887

    
6888
  def ExpandNames(self):
6889
    self._ExpandAndLockInstance()
6890
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6891
    self.op.target_node = target_node
6892
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6893
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6894

    
6895
  def DeclareLocks(self, level):
6896
    if level == locking.LEVEL_NODE:
6897
      self._LockInstancesNodes(primary_only=True)
6898

    
6899
  def BuildHooksEnv(self):
6900
    """Build hooks env.
6901

6902
    This runs on master, primary and secondary nodes of the instance.
6903

6904
    """
6905
    env = {
6906
      "TARGET_NODE": self.op.target_node,
6907
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6908
      }
6909
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6910
    return env
6911

    
6912
  def BuildHooksNodes(self):
6913
    """Build hooks nodes.
6914

6915
    """
6916
    nl = [
6917
      self.cfg.GetMasterNode(),
6918
      self.instance.primary_node,
6919
      self.op.target_node,
6920
      ]
6921
    return (nl, nl)
6922

    
6923
  def CheckPrereq(self):
6924
    """Check prerequisites.
6925

6926
    This checks that the instance is in the cluster.
6927

6928
    """
6929
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6930
    assert self.instance is not None, \
6931
      "Cannot retrieve locked instance %s" % self.op.instance_name
6932

    
6933
    node = self.cfg.GetNodeInfo(self.op.target_node)
6934
    assert node is not None, \
6935
      "Cannot retrieve locked node %s" % self.op.target_node
6936

    
6937
    self.target_node = target_node = node.name
6938

    
6939
    if target_node == instance.primary_node:
6940
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6941
                                 (instance.name, target_node),
6942
                                 errors.ECODE_STATE)
6943

    
6944
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6945

    
6946
    for idx, dsk in enumerate(instance.disks):
6947
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6948
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6949
                                   " cannot copy" % idx, errors.ECODE_STATE)
6950

    
6951
    _CheckNodeOnline(self, target_node)
6952
    _CheckNodeNotDrained(self, target_node)
6953
    _CheckNodeVmCapable(self, target_node)
6954

    
6955
    if instance.admin_up:
6956
      # check memory requirements on the secondary node
6957
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6958
                           instance.name, bep[constants.BE_MEMORY],
6959
                           instance.hypervisor)
6960
    else:
6961
      self.LogInfo("Not checking memory on the secondary node as"
6962
                   " instance will not be started")
6963

    
6964
    # check bridge existance
6965
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6966

    
6967
  def Exec(self, feedback_fn):
6968
    """Move an instance.
6969

6970
    The move is done by shutting it down on its present node, copying
6971
    the data over (slow) and starting it on the new node.
6972

6973
    """
6974
    instance = self.instance
6975

    
6976
    source_node = instance.primary_node
6977
    target_node = self.target_node
6978

    
6979
    self.LogInfo("Shutting down instance %s on source node %s",
6980
                 instance.name, source_node)
6981

    
6982
    result = self.rpc.call_instance_shutdown(source_node, instance,
6983
                                             self.op.shutdown_timeout)
6984
    msg = result.fail_msg
6985
    if msg:
6986
      if self.op.ignore_consistency:
6987
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6988
                             " Proceeding anyway. Please make sure node"
6989
                             " %s is down. Error details: %s",
6990
                             instance.name, source_node, source_node, msg)
6991
      else:
6992
        raise errors.OpExecError("Could not shutdown instance %s on"
6993
                                 " node %s: %s" %
6994
                                 (instance.name, source_node, msg))
6995

    
6996
    # create the target disks
6997
    try:
6998
      _CreateDisks(self, instance, target_node=target_node)
6999
    except errors.OpExecError:
7000
      self.LogWarning("Device creation failed, reverting...")
7001
      try:
7002
        _RemoveDisks(self, instance, target_node=target_node)
7003
      finally:
7004
        self.cfg.ReleaseDRBDMinors(instance.name)
7005
        raise
7006

    
7007
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7008

    
7009
    errs = []
7010
    # activate, get path, copy the data over
7011
    for idx, disk in enumerate(instance.disks):
7012
      self.LogInfo("Copying data for disk %d", idx)
7013
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7014
                                               instance.name, True, idx)
7015
      if result.fail_msg:
7016
        self.LogWarning("Can't assemble newly created disk %d: %s",
7017
                        idx, result.fail_msg)
7018
        errs.append(result.fail_msg)
7019
        break
7020
      dev_path = result.payload
7021
      result = self.rpc.call_blockdev_export(source_node, disk,
7022
                                             target_node, dev_path,
7023
                                             cluster_name)
7024
      if result.fail_msg:
7025
        self.LogWarning("Can't copy data over for disk %d: %s",
7026
                        idx, result.fail_msg)
7027
        errs.append(result.fail_msg)
7028
        break
7029

    
7030
    if errs:
7031
      self.LogWarning("Some disks failed to copy, aborting")
7032
      try:
7033
        _RemoveDisks(self, instance, target_node=target_node)
7034
      finally:
7035
        self.cfg.ReleaseDRBDMinors(instance.name)
7036
        raise errors.OpExecError("Errors during disk copy: %s" %
7037
                                 (",".join(errs),))
7038

    
7039
    instance.primary_node = target_node
7040
    self.cfg.Update(instance, feedback_fn)
7041

    
7042
    self.LogInfo("Removing the disks on the original node")
7043
    _RemoveDisks(self, instance, target_node=source_node)
7044

    
7045
    # Only start the instance if it's marked as up
7046
    if instance.admin_up:
7047
      self.LogInfo("Starting instance %s on node %s",
7048
                   instance.name, target_node)
7049

    
7050
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7051
                                           ignore_secondaries=True)
7052
      if not disks_ok:
7053
        _ShutdownInstanceDisks(self, instance)
7054
        raise errors.OpExecError("Can't activate the instance's disks")
7055

    
7056
      result = self.rpc.call_instance_start(target_node,
7057
                                            (instance, None, None), False)
7058
      msg = result.fail_msg
7059
      if msg:
7060
        _ShutdownInstanceDisks(self, instance)
7061
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7062
                                 (instance.name, target_node, msg))
7063

    
7064

    
7065
class LUNodeMigrate(LogicalUnit):
7066
  """Migrate all instances from a node.
7067

7068
  """
7069
  HPATH = "node-migrate"
7070
  HTYPE = constants.HTYPE_NODE
7071
  REQ_BGL = False
7072

    
7073
  def CheckArguments(self):
7074
    pass
7075

    
7076
  def ExpandNames(self):
7077
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7078

    
7079
    self.share_locks = _ShareAll()
7080
    self.needed_locks = {
7081
      locking.LEVEL_NODE: [self.op.node_name],
7082
      }
7083

    
7084
  def BuildHooksEnv(self):
7085
    """Build hooks env.
7086

7087
    This runs on the master, the primary and all the secondaries.
7088

7089
    """
7090
    return {
7091
      "NODE_NAME": self.op.node_name,
7092
      }
7093

    
7094
  def BuildHooksNodes(self):
7095
    """Build hooks nodes.
7096

7097
    """
7098
    nl = [self.cfg.GetMasterNode()]
7099
    return (nl, nl)
7100

    
7101
  def CheckPrereq(self):
7102
    pass
7103

    
7104
  def Exec(self, feedback_fn):
7105
    # Prepare jobs for migration instances
7106
    jobs = [
7107
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7108
                                 mode=self.op.mode,
7109
                                 live=self.op.live,
7110
                                 iallocator=self.op.iallocator,
7111
                                 target_node=self.op.target_node)]
7112
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7113
      ]
7114

    
7115
    # TODO: Run iallocator in this opcode and pass correct placement options to
7116
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7117
    # running the iallocator and the actual migration, a good consistency model
7118
    # will have to be found.
7119

    
7120
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7121
            frozenset([self.op.node_name]))
7122

    
7123
    return ResultWithJobs(jobs)
7124

    
7125

    
7126
class TLMigrateInstance(Tasklet):
7127
  """Tasklet class for instance migration.
7128

7129
  @type live: boolean
7130
  @ivar live: whether the migration will be done live or non-live;
7131
      this variable is initalized only after CheckPrereq has run
7132
  @type cleanup: boolean
7133
  @ivar cleanup: Wheater we cleanup from a failed migration
7134
  @type iallocator: string
7135
  @ivar iallocator: The iallocator used to determine target_node
7136
  @type target_node: string
7137
  @ivar target_node: If given, the target_node to reallocate the instance to
7138
  @type failover: boolean
7139
  @ivar failover: Whether operation results in failover or migration
7140
  @type fallback: boolean
7141
  @ivar fallback: Whether fallback to failover is allowed if migration not
7142
                  possible
7143
  @type ignore_consistency: boolean
7144
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7145
                            and target node
7146
  @type shutdown_timeout: int
7147
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7148

7149
  """
7150

    
7151
  # Constants
7152
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7153
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7154

    
7155
  def __init__(self, lu, instance_name, cleanup=False,
7156
               failover=False, fallback=False,
7157
               ignore_consistency=False,
7158
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7159
    """Initializes this class.
7160

7161
    """
7162
    Tasklet.__init__(self, lu)
7163

    
7164
    # Parameters
7165
    self.instance_name = instance_name
7166
    self.cleanup = cleanup
7167
    self.live = False # will be overridden later
7168
    self.failover = failover
7169
    self.fallback = fallback
7170
    self.ignore_consistency = ignore_consistency
7171
    self.shutdown_timeout = shutdown_timeout
7172

    
7173
  def CheckPrereq(self):
7174
    """Check prerequisites.
7175

7176
    This checks that the instance is in the cluster.
7177

7178
    """
7179
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7180
    instance = self.cfg.GetInstanceInfo(instance_name)
7181
    assert instance is not None
7182
    self.instance = instance
7183

    
7184
    if (not self.cleanup and not instance.admin_up and not self.failover and
7185
        self.fallback):
7186
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7187
                      " to failover")
7188
      self.failover = True
7189

    
7190
    if instance.disk_template not in constants.DTS_MIRRORED:
7191
      if self.failover:
7192
        text = "failovers"
7193
      else:
7194
        text = "migrations"
7195
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7196
                                 " %s" % (instance.disk_template, text),
7197
                                 errors.ECODE_STATE)
7198

    
7199
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7200
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7201

    
7202
      if self.lu.op.iallocator:
7203
        self._RunAllocator()
7204
      else:
7205
        # We set set self.target_node as it is required by
7206
        # BuildHooksEnv
7207
        self.target_node = self.lu.op.target_node
7208

    
7209
      # self.target_node is already populated, either directly or by the
7210
      # iallocator run
7211
      target_node = self.target_node
7212
      if self.target_node == instance.primary_node:
7213
        raise errors.OpPrereqError("Cannot migrate instance %s"
7214
                                   " to its primary (%s)" %
7215
                                   (instance.name, instance.primary_node))
7216

    
7217
      if len(self.lu.tasklets) == 1:
7218
        # It is safe to release locks only when we're the only tasklet
7219
        # in the LU
7220
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7221
                      keep=[instance.primary_node, self.target_node])
7222

    
7223
    else:
7224
      secondary_nodes = instance.secondary_nodes
7225
      if not secondary_nodes:
7226
        raise errors.ConfigurationError("No secondary node but using"
7227
                                        " %s disk template" %
7228
                                        instance.disk_template)
7229
      target_node = secondary_nodes[0]
7230
      if self.lu.op.iallocator or (self.lu.op.target_node and
7231
                                   self.lu.op.target_node != target_node):
7232
        if self.failover:
7233
          text = "failed over"
7234
        else:
7235
          text = "migrated"
7236
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7237
                                   " be %s to arbitrary nodes"
7238
                                   " (neither an iallocator nor a target"
7239
                                   " node can be passed)" %
7240
                                   (instance.disk_template, text),
7241
                                   errors.ECODE_INVAL)
7242

    
7243
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7244

    
7245
    # check memory requirements on the secondary node
7246
    if not self.failover or instance.admin_up:
7247
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7248
                           instance.name, i_be[constants.BE_MEMORY],
7249
                           instance.hypervisor)
7250
    else:
7251
      self.lu.LogInfo("Not checking memory on the secondary node as"
7252
                      " instance will not be started")
7253

    
7254
    # check bridge existance
7255
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7256

    
7257
    if not self.cleanup:
7258
      _CheckNodeNotDrained(self.lu, target_node)
7259
      if not self.failover:
7260
        result = self.rpc.call_instance_migratable(instance.primary_node,
7261
                                                   instance)
7262
        if result.fail_msg and self.fallback:
7263
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7264
                          " failover")
7265
          self.failover = True
7266
        else:
7267
          result.Raise("Can't migrate, please use failover",
7268
                       prereq=True, ecode=errors.ECODE_STATE)
7269

    
7270
    assert not (self.failover and self.cleanup)
7271

    
7272
    if not self.failover:
7273
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7274
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7275
                                   " parameters are accepted",
7276
                                   errors.ECODE_INVAL)
7277
      if self.lu.op.live is not None:
7278
        if self.lu.op.live:
7279
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7280
        else:
7281
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7282
        # reset the 'live' parameter to None so that repeated
7283
        # invocations of CheckPrereq do not raise an exception
7284
        self.lu.op.live = None
7285
      elif self.lu.op.mode is None:
7286
        # read the default value from the hypervisor
7287
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7288
                                                skip_globals=False)
7289
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7290

    
7291
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7292
    else:
7293
      # Failover is never live
7294
      self.live = False
7295

    
7296
  def _RunAllocator(self):
7297
    """Run the allocator based on input opcode.
7298

7299
    """
7300
    ial = IAllocator(self.cfg, self.rpc,
7301
                     mode=constants.IALLOCATOR_MODE_RELOC,
7302
                     name=self.instance_name,
7303
                     # TODO See why hail breaks with a single node below
7304
                     relocate_from=[self.instance.primary_node,
7305
                                    self.instance.primary_node],
7306
                     )
7307

    
7308
    ial.Run(self.lu.op.iallocator)
7309

    
7310
    if not ial.success:
7311
      raise errors.OpPrereqError("Can't compute nodes using"
7312
                                 " iallocator '%s': %s" %
7313
                                 (self.lu.op.iallocator, ial.info),
7314
                                 errors.ECODE_NORES)
7315
    if len(ial.result) != ial.required_nodes:
7316
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7317
                                 " of nodes (%s), required %s" %
7318
                                 (self.lu.op.iallocator, len(ial.result),
7319
                                  ial.required_nodes), errors.ECODE_FAULT)
7320
    self.target_node = ial.result[0]
7321
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7322
                 self.instance_name, self.lu.op.iallocator,
7323
                 utils.CommaJoin(ial.result))
7324

    
7325
  def _WaitUntilSync(self):
7326
    """Poll with custom rpc for disk sync.
7327

7328
    This uses our own step-based rpc call.
7329

7330
    """
7331
    self.feedback_fn("* wait until resync is done")
7332
    all_done = False
7333
    while not all_done:
7334
      all_done = True
7335
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7336
                                            self.nodes_ip,
7337
                                            self.instance.disks)
7338
      min_percent = 100
7339
      for node, nres in result.items():
7340
        nres.Raise("Cannot resync disks on node %s" % node)
7341
        node_done, node_percent = nres.payload
7342
        all_done = all_done and node_done
7343
        if node_percent is not None:
7344
          min_percent = min(min_percent, node_percent)
7345
      if not all_done:
7346
        if min_percent < 100:
7347
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7348
        time.sleep(2)
7349

    
7350
  def _EnsureSecondary(self, node):
7351
    """Demote a node to secondary.
7352

7353
    """
7354
    self.feedback_fn("* switching node %s to secondary mode" % node)
7355

    
7356
    for dev in self.instance.disks:
7357
      self.cfg.SetDiskID(dev, node)
7358

    
7359
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7360
                                          self.instance.disks)
7361
    result.Raise("Cannot change disk to secondary on node %s" % node)
7362

    
7363
  def _GoStandalone(self):
7364
    """Disconnect from the network.
7365

7366
    """
7367
    self.feedback_fn("* changing into standalone mode")
7368
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7369
                                               self.instance.disks)
7370
    for node, nres in result.items():
7371
      nres.Raise("Cannot disconnect disks node %s" % node)
7372

    
7373
  def _GoReconnect(self, multimaster):
7374
    """Reconnect to the network.
7375

7376
    """
7377
    if multimaster:
7378
      msg = "dual-master"
7379
    else:
7380
      msg = "single-master"
7381
    self.feedback_fn("* changing disks into %s mode" % msg)
7382
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7383
                                           self.instance.disks,
7384
                                           self.instance.name, multimaster)
7385
    for node, nres in result.items():
7386
      nres.Raise("Cannot change disks config on node %s" % node)
7387

    
7388
  def _ExecCleanup(self):
7389
    """Try to cleanup after a failed migration.
7390

7391
    The cleanup is done by:
7392
      - check that the instance is running only on one node
7393
        (and update the config if needed)
7394
      - change disks on its secondary node to secondary
7395
      - wait until disks are fully synchronized
7396
      - disconnect from the network
7397
      - change disks into single-master mode
7398
      - wait again until disks are fully synchronized
7399

7400
    """
7401
    instance = self.instance
7402
    target_node = self.target_node
7403
    source_node = self.source_node
7404

    
7405
    # check running on only one node
7406
    self.feedback_fn("* checking where the instance actually runs"
7407
                     " (if this hangs, the hypervisor might be in"
7408
                     " a bad state)")
7409
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7410
    for node, result in ins_l.items():
7411
      result.Raise("Can't contact node %s" % node)
7412

    
7413
    runningon_source = instance.name in ins_l[source_node].payload
7414
    runningon_target = instance.name in ins_l[target_node].payload
7415

    
7416
    if runningon_source and runningon_target:
7417
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7418
                               " or the hypervisor is confused; you will have"
7419
                               " to ensure manually that it runs only on one"
7420
                               " and restart this operation")
7421

    
7422
    if not (runningon_source or runningon_target):
7423
      raise errors.OpExecError("Instance does not seem to be running at all;"
7424
                               " in this case it's safer to repair by"
7425
                               " running 'gnt-instance stop' to ensure disk"
7426
                               " shutdown, and then restarting it")
7427

    
7428
    if runningon_target:
7429
      # the migration has actually succeeded, we need to update the config
7430
      self.feedback_fn("* instance running on secondary node (%s),"
7431
                       " updating config" % target_node)
7432
      instance.primary_node = target_node
7433
      self.cfg.Update(instance, self.feedback_fn)
7434
      demoted_node = source_node
7435
    else:
7436
      self.feedback_fn("* instance confirmed to be running on its"
7437
                       " primary node (%s)" % source_node)
7438
      demoted_node = target_node
7439

    
7440
    if instance.disk_template in constants.DTS_INT_MIRROR:
7441
      self._EnsureSecondary(demoted_node)
7442
      try:
7443
        self._WaitUntilSync()
7444
      except errors.OpExecError:
7445
        # we ignore here errors, since if the device is standalone, it
7446
        # won't be able to sync
7447
        pass
7448
      self._GoStandalone()
7449
      self._GoReconnect(False)
7450
      self._WaitUntilSync()
7451

    
7452
    self.feedback_fn("* done")
7453

    
7454
  def _RevertDiskStatus(self):
7455
    """Try to revert the disk status after a failed migration.
7456

7457
    """
7458
    target_node = self.target_node
7459
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7460
      return
7461

    
7462
    try:
7463
      self._EnsureSecondary(target_node)
7464
      self._GoStandalone()
7465
      self._GoReconnect(False)
7466
      self._WaitUntilSync()
7467
    except errors.OpExecError, err:
7468
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7469
                         " please try to recover the instance manually;"
7470
                         " error '%s'" % str(err))
7471

    
7472
  def _AbortMigration(self):
7473
    """Call the hypervisor code to abort a started migration.
7474

7475
    """
7476
    instance = self.instance
7477
    target_node = self.target_node
7478
    source_node = self.source_node
7479
    migration_info = self.migration_info
7480

    
7481
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7482
                                                                 instance,
7483
                                                                 migration_info,
7484
                                                                 False)
7485
    abort_msg = abort_result.fail_msg
7486
    if abort_msg:
7487
      logging.error("Aborting migration failed on target node %s: %s",
7488
                    target_node, abort_msg)
7489
      # Don't raise an exception here, as we stil have to try to revert the
7490
      # disk status, even if this step failed.
7491

    
7492
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7493
        instance, False, self.live)
7494
    abort_msg = abort_result.fail_msg
7495
    if abort_msg:
7496
      logging.error("Aborting migration failed on source node %s: %s",
7497
                    source_node, abort_msg)
7498

    
7499
  def _ExecMigration(self):
7500
    """Migrate an instance.
7501

7502
    The migrate is done by:
7503
      - change the disks into dual-master mode
7504
      - wait until disks are fully synchronized again
7505
      - migrate the instance
7506
      - change disks on the new secondary node (the old primary) to secondary
7507
      - wait until disks are fully synchronized
7508
      - change disks into single-master mode
7509

7510
    """
7511
    instance = self.instance
7512
    target_node = self.target_node
7513
    source_node = self.source_node
7514

    
7515
    # Check for hypervisor version mismatch and warn the user.
7516
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7517
                                       None, self.instance.hypervisor)
7518
    src_info = nodeinfo[source_node]
7519
    dst_info = nodeinfo[target_node]
7520

    
7521
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7522
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7523
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7524
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7525
      if src_version != dst_version:
7526
        self.feedback_fn("* warning: hypervisor version mismatch between"
7527
                         " source (%s) and target (%s) node" %
7528
                         (src_version, dst_version))
7529

    
7530
    self.feedback_fn("* checking disk consistency between source and target")
7531
    for dev in instance.disks:
7532
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7533
        raise errors.OpExecError("Disk %s is degraded or not fully"
7534
                                 " synchronized on target node,"
7535
                                 " aborting migration" % dev.iv_name)
7536

    
7537
    # First get the migration information from the remote node
7538
    result = self.rpc.call_migration_info(source_node, instance)
7539
    msg = result.fail_msg
7540
    if msg:
7541
      log_err = ("Failed fetching source migration information from %s: %s" %
7542
                 (source_node, msg))
7543
      logging.error(log_err)
7544
      raise errors.OpExecError(log_err)
7545

    
7546
    self.migration_info = migration_info = result.payload
7547

    
7548
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7549
      # Then switch the disks to master/master mode
7550
      self._EnsureSecondary(target_node)
7551
      self._GoStandalone()
7552
      self._GoReconnect(True)
7553
      self._WaitUntilSync()
7554

    
7555
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7556
    result = self.rpc.call_accept_instance(target_node,
7557
                                           instance,
7558
                                           migration_info,
7559
                                           self.nodes_ip[target_node])
7560

    
7561
    msg = result.fail_msg
7562
    if msg:
7563
      logging.error("Instance pre-migration failed, trying to revert"
7564
                    " disk status: %s", msg)
7565
      self.feedback_fn("Pre-migration failed, aborting")
7566
      self._AbortMigration()
7567
      self._RevertDiskStatus()
7568
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7569
                               (instance.name, msg))
7570

    
7571
    self.feedback_fn("* migrating instance to %s" % target_node)
7572
    result = self.rpc.call_instance_migrate(source_node, instance,
7573
                                            self.nodes_ip[target_node],
7574
                                            self.live)
7575
    msg = result.fail_msg
7576
    if msg:
7577
      logging.error("Instance migration failed, trying to revert"
7578
                    " disk status: %s", msg)
7579
      self.feedback_fn("Migration failed, aborting")
7580
      self._AbortMigration()
7581
      self._RevertDiskStatus()
7582
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7583
                               (instance.name, msg))
7584

    
7585
    self.feedback_fn("* starting memory transfer")
7586
    last_feedback = time.time()
7587
    while True:
7588
      result = self.rpc.call_instance_get_migration_status(source_node,
7589
                                                           instance)
7590
      msg = result.fail_msg
7591
      ms = result.payload   # MigrationStatus instance
7592
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7593
        logging.error("Instance migration failed, trying to revert"
7594
                      " disk status: %s", msg)
7595
        self.feedback_fn("Migration failed, aborting")
7596
        self._AbortMigration()
7597
        self._RevertDiskStatus()
7598
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7599
                                 (instance.name, msg))
7600

    
7601
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7602
        self.feedback_fn("* memory transfer complete")
7603
        break
7604

    
7605
      if (utils.TimeoutExpired(last_feedback,
7606
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7607
          ms.transferred_ram is not None):
7608
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7609
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7610
        last_feedback = time.time()
7611

    
7612
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7613

    
7614
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7615
                                                           instance,
7616
                                                           True,
7617
                                                           self.live)
7618
    msg = result.fail_msg
7619
    if msg:
7620
      logging.error("Instance migration succeeded, but finalization failed"
7621
                    " on the source node: %s", msg)
7622
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7623
                               msg)
7624

    
7625
    instance.primary_node = target_node
7626

    
7627
    # distribute new instance config to the other nodes
7628
    self.cfg.Update(instance, self.feedback_fn)
7629

    
7630
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7631
                                                           instance,
7632
                                                           migration_info,
7633
                                                           True)
7634
    msg = result.fail_msg
7635
    if msg:
7636
      logging.error("Instance migration succeeded, but finalization failed"
7637
                    " on the target node: %s", msg)
7638
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7639
                               msg)
7640

    
7641
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7642
      self._EnsureSecondary(source_node)
7643
      self._WaitUntilSync()
7644
      self._GoStandalone()
7645
      self._GoReconnect(False)
7646
      self._WaitUntilSync()
7647

    
7648
    self.feedback_fn("* done")
7649

    
7650
  def _ExecFailover(self):
7651
    """Failover an instance.
7652

7653
    The failover is done by shutting it down on its present node and
7654
    starting it on the secondary.
7655

7656
    """
7657
    instance = self.instance
7658
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7659

    
7660
    source_node = instance.primary_node
7661
    target_node = self.target_node
7662

    
7663
    if instance.admin_up:
7664
      self.feedback_fn("* checking disk consistency between source and target")
7665
      for dev in instance.disks:
7666
        # for drbd, these are drbd over lvm
7667
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7668
          if primary_node.offline:
7669
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7670
                             " target node %s" %
7671
                             (primary_node.name, dev.iv_name, target_node))
7672
          elif not self.ignore_consistency:
7673
            raise errors.OpExecError("Disk %s is degraded on target node,"
7674
                                     " aborting failover" % dev.iv_name)
7675
    else:
7676
      self.feedback_fn("* not checking disk consistency as instance is not"
7677
                       " running")
7678

    
7679
    self.feedback_fn("* shutting down instance on source node")
7680
    logging.info("Shutting down instance %s on node %s",
7681
                 instance.name, source_node)
7682

    
7683
    result = self.rpc.call_instance_shutdown(source_node, instance,
7684
                                             self.shutdown_timeout)
7685
    msg = result.fail_msg
7686
    if msg:
7687
      if self.ignore_consistency or primary_node.offline:
7688
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7689
                           " proceeding anyway; please make sure node"
7690
                           " %s is down; error details: %s",
7691
                           instance.name, source_node, source_node, msg)
7692
      else:
7693
        raise errors.OpExecError("Could not shutdown instance %s on"
7694
                                 " node %s: %s" %
7695
                                 (instance.name, source_node, msg))
7696

    
7697
    self.feedback_fn("* deactivating the instance's disks on source node")
7698
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7699
      raise errors.OpExecError("Can't shut down the instance's disks")
7700

    
7701
    instance.primary_node = target_node
7702
    # distribute new instance config to the other nodes
7703
    self.cfg.Update(instance, self.feedback_fn)
7704

    
7705
    # Only start the instance if it's marked as up
7706
    if instance.admin_up:
7707
      self.feedback_fn("* activating the instance's disks on target node %s" %
7708
                       target_node)
7709
      logging.info("Starting instance %s on node %s",
7710
                   instance.name, target_node)
7711

    
7712
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7713
                                           ignore_secondaries=True)
7714
      if not disks_ok:
7715
        _ShutdownInstanceDisks(self.lu, instance)
7716
        raise errors.OpExecError("Can't activate the instance's disks")
7717

    
7718
      self.feedback_fn("* starting the instance on the target node %s" %
7719
                       target_node)
7720
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7721
                                            False)
7722
      msg = result.fail_msg
7723
      if msg:
7724
        _ShutdownInstanceDisks(self.lu, instance)
7725
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7726
                                 (instance.name, target_node, msg))
7727

    
7728
  def Exec(self, feedback_fn):
7729
    """Perform the migration.
7730

7731
    """
7732
    self.feedback_fn = feedback_fn
7733
    self.source_node = self.instance.primary_node
7734

    
7735
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7736
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7737
      self.target_node = self.instance.secondary_nodes[0]
7738
      # Otherwise self.target_node has been populated either
7739
      # directly, or through an iallocator.
7740

    
7741
    self.all_nodes = [self.source_node, self.target_node]
7742
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7743
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7744

    
7745
    if self.failover:
7746
      feedback_fn("Failover instance %s" % self.instance.name)
7747
      self._ExecFailover()
7748
    else:
7749
      feedback_fn("Migrating instance %s" % self.instance.name)
7750

    
7751
      if self.cleanup:
7752
        return self._ExecCleanup()
7753
      else:
7754
        return self._ExecMigration()
7755

    
7756

    
7757
def _CreateBlockDev(lu, node, instance, device, force_create,
7758
                    info, force_open):
7759
  """Create a tree of block devices on a given node.
7760

7761
  If this device type has to be created on secondaries, create it and
7762
  all its children.
7763

7764
  If not, just recurse to children keeping the same 'force' value.
7765

7766
  @param lu: the lu on whose behalf we execute
7767
  @param node: the node on which to create the device
7768
  @type instance: L{objects.Instance}
7769
  @param instance: the instance which owns the device
7770
  @type device: L{objects.Disk}
7771
  @param device: the device to create
7772
  @type force_create: boolean
7773
  @param force_create: whether to force creation of this device; this
7774
      will be change to True whenever we find a device which has
7775
      CreateOnSecondary() attribute
7776
  @param info: the extra 'metadata' we should attach to the device
7777
      (this will be represented as a LVM tag)
7778
  @type force_open: boolean
7779
  @param force_open: this parameter will be passes to the
7780
      L{backend.BlockdevCreate} function where it specifies
7781
      whether we run on primary or not, and it affects both
7782
      the child assembly and the device own Open() execution
7783

7784
  """
7785
  if device.CreateOnSecondary():
7786
    force_create = True
7787

    
7788
  if device.children:
7789
    for child in device.children:
7790
      _CreateBlockDev(lu, node, instance, child, force_create,
7791
                      info, force_open)
7792

    
7793
  if not force_create:
7794
    return
7795

    
7796
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7797

    
7798

    
7799
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7800
  """Create a single block device on a given node.
7801

7802
  This will not recurse over children of the device, so they must be
7803
  created in advance.
7804

7805
  @param lu: the lu on whose behalf we execute
7806
  @param node: the node on which to create the device
7807
  @type instance: L{objects.Instance}
7808
  @param instance: the instance which owns the device
7809
  @type device: L{objects.Disk}
7810
  @param device: the device to create
7811
  @param info: the extra 'metadata' we should attach to the device
7812
      (this will be represented as a LVM tag)
7813
  @type force_open: boolean
7814
  @param force_open: this parameter will be passes to the
7815
      L{backend.BlockdevCreate} function where it specifies
7816
      whether we run on primary or not, and it affects both
7817
      the child assembly and the device own Open() execution
7818

7819
  """
7820
  lu.cfg.SetDiskID(device, node)
7821
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7822
                                       instance.name, force_open, info)
7823
  result.Raise("Can't create block device %s on"
7824
               " node %s for instance %s" % (device, node, instance.name))
7825
  if device.physical_id is None:
7826
    device.physical_id = result.payload
7827

    
7828

    
7829
def _GenerateUniqueNames(lu, exts):
7830
  """Generate a suitable LV name.
7831

7832
  This will generate a logical volume name for the given instance.
7833

7834
  """
7835
  results = []
7836
  for val in exts:
7837
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7838
    results.append("%s%s" % (new_id, val))
7839
  return results
7840

    
7841

    
7842
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7843
                         iv_name, p_minor, s_minor):
7844
  """Generate a drbd8 device complete with its children.
7845

7846
  """
7847
  assert len(vgnames) == len(names) == 2
7848
  port = lu.cfg.AllocatePort()
7849
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7850
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7851
                          logical_id=(vgnames[0], names[0]))
7852
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7853
                          logical_id=(vgnames[1], names[1]))
7854
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7855
                          logical_id=(primary, secondary, port,
7856
                                      p_minor, s_minor,
7857
                                      shared_secret),
7858
                          children=[dev_data, dev_meta],
7859
                          iv_name=iv_name)
7860
  return drbd_dev
7861

    
7862

    
7863
def _GenerateDiskTemplate(lu, template_name,
7864
                          instance_name, primary_node,
7865
                          secondary_nodes, disk_info,
7866
                          file_storage_dir, file_driver,
7867
                          base_index, feedback_fn):
7868
  """Generate the entire disk layout for a given template type.
7869

7870
  """
7871
  #TODO: compute space requirements
7872

    
7873
  vgname = lu.cfg.GetVGName()
7874
  disk_count = len(disk_info)
7875
  disks = []
7876
  if template_name == constants.DT_DISKLESS:
7877
    pass
7878
  elif template_name == constants.DT_PLAIN:
7879
    if len(secondary_nodes) != 0:
7880
      raise errors.ProgrammerError("Wrong template configuration")
7881

    
7882
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7883
                                      for i in range(disk_count)])
7884
    for idx, disk in enumerate(disk_info):
7885
      disk_index = idx + base_index
7886
      vg = disk.get(constants.IDISK_VG, vgname)
7887
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7888
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7889
                              size=disk[constants.IDISK_SIZE],
7890
                              logical_id=(vg, names[idx]),
7891
                              iv_name="disk/%d" % disk_index,
7892
                              mode=disk[constants.IDISK_MODE])
7893
      disks.append(disk_dev)
7894
  elif template_name == constants.DT_DRBD8:
7895
    if len(secondary_nodes) != 1:
7896
      raise errors.ProgrammerError("Wrong template configuration")
7897
    remote_node = secondary_nodes[0]
7898
    minors = lu.cfg.AllocateDRBDMinor(
7899
      [primary_node, remote_node] * len(disk_info), instance_name)
7900

    
7901
    names = []
7902
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7903
                                               for i in range(disk_count)]):
7904
      names.append(lv_prefix + "_data")
7905
      names.append(lv_prefix + "_meta")
7906
    for idx, disk in enumerate(disk_info):
7907
      disk_index = idx + base_index
7908
      data_vg = disk.get(constants.IDISK_VG, vgname)
7909
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7910
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7911
                                      disk[constants.IDISK_SIZE],
7912
                                      [data_vg, meta_vg],
7913
                                      names[idx * 2:idx * 2 + 2],
7914
                                      "disk/%d" % disk_index,
7915
                                      minors[idx * 2], minors[idx * 2 + 1])
7916
      disk_dev.mode = disk[constants.IDISK_MODE]
7917
      disks.append(disk_dev)
7918
  elif template_name == constants.DT_FILE:
7919
    if len(secondary_nodes) != 0:
7920
      raise errors.ProgrammerError("Wrong template configuration")
7921

    
7922
    opcodes.RequireFileStorage()
7923

    
7924
    for idx, disk in enumerate(disk_info):
7925
      disk_index = idx + base_index
7926
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7927
                              size=disk[constants.IDISK_SIZE],
7928
                              iv_name="disk/%d" % disk_index,
7929
                              logical_id=(file_driver,
7930
                                          "%s/disk%d" % (file_storage_dir,
7931
                                                         disk_index)),
7932
                              mode=disk[constants.IDISK_MODE])
7933
      disks.append(disk_dev)
7934
  elif template_name == constants.DT_SHARED_FILE:
7935
    if len(secondary_nodes) != 0:
7936
      raise errors.ProgrammerError("Wrong template configuration")
7937

    
7938
    opcodes.RequireSharedFileStorage()
7939

    
7940
    for idx, disk in enumerate(disk_info):
7941
      disk_index = idx + base_index
7942
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7943
                              size=disk[constants.IDISK_SIZE],
7944
                              iv_name="disk/%d" % disk_index,
7945
                              logical_id=(file_driver,
7946
                                          "%s/disk%d" % (file_storage_dir,
7947
                                                         disk_index)),
7948
                              mode=disk[constants.IDISK_MODE])
7949
      disks.append(disk_dev)
7950
  elif template_name == constants.DT_BLOCK:
7951
    if len(secondary_nodes) != 0:
7952
      raise errors.ProgrammerError("Wrong template configuration")
7953

    
7954
    for idx, disk in enumerate(disk_info):
7955
      disk_index = idx + base_index
7956
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7957
                              size=disk[constants.IDISK_SIZE],
7958
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7959
                                          disk[constants.IDISK_ADOPT]),
7960
                              iv_name="disk/%d" % disk_index,
7961
                              mode=disk[constants.IDISK_MODE])
7962
      disks.append(disk_dev)
7963

    
7964
  else:
7965
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7966
  return disks
7967

    
7968

    
7969
def _GetInstanceInfoText(instance):
7970
  """Compute that text that should be added to the disk's metadata.
7971

7972
  """
7973
  return "originstname+%s" % instance.name
7974

    
7975

    
7976
def _CalcEta(time_taken, written, total_size):
7977
  """Calculates the ETA based on size written and total size.
7978

7979
  @param time_taken: The time taken so far
7980
  @param written: amount written so far
7981
  @param total_size: The total size of data to be written
7982
  @return: The remaining time in seconds
7983

7984
  """
7985
  avg_time = time_taken / float(written)
7986
  return (total_size - written) * avg_time
7987

    
7988

    
7989
def _WipeDisks(lu, instance):
7990
  """Wipes instance disks.
7991

7992
  @type lu: L{LogicalUnit}
7993
  @param lu: the logical unit on whose behalf we execute
7994
  @type instance: L{objects.Instance}
7995
  @param instance: the instance whose disks we should create
7996
  @return: the success of the wipe
7997

7998
  """
7999
  node = instance.primary_node
8000

    
8001
  for device in instance.disks:
8002
    lu.cfg.SetDiskID(device, node)
8003

    
8004
  logging.info("Pause sync of instance %s disks", instance.name)
8005
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8006

    
8007
  for idx, success in enumerate(result.payload):
8008
    if not success:
8009
      logging.warn("pause-sync of instance %s for disks %d failed",
8010
                   instance.name, idx)
8011

    
8012
  try:
8013
    for idx, device in enumerate(instance.disks):
8014
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8015
      # MAX_WIPE_CHUNK at max
8016
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8017
                            constants.MIN_WIPE_CHUNK_PERCENT)
8018
      # we _must_ make this an int, otherwise rounding errors will
8019
      # occur
8020
      wipe_chunk_size = int(wipe_chunk_size)
8021

    
8022
      lu.LogInfo("* Wiping disk %d", idx)
8023
      logging.info("Wiping disk %d for instance %s, node %s using"
8024
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8025

    
8026
      offset = 0
8027
      size = device.size
8028
      last_output = 0
8029
      start_time = time.time()
8030

    
8031
      while offset < size:
8032
        wipe_size = min(wipe_chunk_size, size - offset)
8033
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8034
                      idx, offset, wipe_size)
8035
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8036
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8037
                     (idx, offset, wipe_size))
8038
        now = time.time()
8039
        offset += wipe_size
8040
        if now - last_output >= 60:
8041
          eta = _CalcEta(now - start_time, offset, size)
8042
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8043
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8044
          last_output = now
8045
  finally:
8046
    logging.info("Resume sync of instance %s disks", instance.name)
8047

    
8048
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8049

    
8050
    for idx, success in enumerate(result.payload):
8051
      if not success:
8052
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8053
                      " look at the status and troubleshoot the issue", idx)
8054
        logging.warn("resume-sync of instance %s for disks %d failed",
8055
                     instance.name, idx)
8056

    
8057

    
8058
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8059
  """Create all disks for an instance.
8060

8061
  This abstracts away some work from AddInstance.
8062

8063
  @type lu: L{LogicalUnit}
8064
  @param lu: the logical unit on whose behalf we execute
8065
  @type instance: L{objects.Instance}
8066
  @param instance: the instance whose disks we should create
8067
  @type to_skip: list
8068
  @param to_skip: list of indices to skip
8069
  @type target_node: string
8070
  @param target_node: if passed, overrides the target node for creation
8071
  @rtype: boolean
8072
  @return: the success of the creation
8073

8074
  """
8075
  info = _GetInstanceInfoText(instance)
8076
  if target_node is None:
8077
    pnode = instance.primary_node
8078
    all_nodes = instance.all_nodes
8079
  else:
8080
    pnode = target_node
8081
    all_nodes = [pnode]
8082

    
8083
  if instance.disk_template in constants.DTS_FILEBASED:
8084
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8085
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8086

    
8087
    result.Raise("Failed to create directory '%s' on"
8088
                 " node %s" % (file_storage_dir, pnode))
8089

    
8090
  # Note: this needs to be kept in sync with adding of disks in
8091
  # LUInstanceSetParams
8092
  for idx, device in enumerate(instance.disks):
8093
    if to_skip and idx in to_skip:
8094
      continue
8095
    logging.info("Creating volume %s for instance %s",
8096
                 device.iv_name, instance.name)
8097
    #HARDCODE
8098
    for node in all_nodes:
8099
      f_create = node == pnode
8100
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8101

    
8102

    
8103
def _RemoveDisks(lu, instance, target_node=None):
8104
  """Remove all disks for an instance.
8105

8106
  This abstracts away some work from `AddInstance()` and
8107
  `RemoveInstance()`. Note that in case some of the devices couldn't
8108
  be removed, the removal will continue with the other ones (compare
8109
  with `_CreateDisks()`).
8110

8111
  @type lu: L{LogicalUnit}
8112
  @param lu: the logical unit on whose behalf we execute
8113
  @type instance: L{objects.Instance}
8114
  @param instance: the instance whose disks we should remove
8115
  @type target_node: string
8116
  @param target_node: used to override the node on which to remove the disks
8117
  @rtype: boolean
8118
  @return: the success of the removal
8119

8120
  """
8121
  logging.info("Removing block devices for instance %s", instance.name)
8122

    
8123
  all_result = True
8124
  for device in instance.disks:
8125
    if target_node:
8126
      edata = [(target_node, device)]
8127
    else:
8128
      edata = device.ComputeNodeTree(instance.primary_node)
8129
    for node, disk in edata:
8130
      lu.cfg.SetDiskID(disk, node)
8131
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8132
      if msg:
8133
        lu.LogWarning("Could not remove block device %s on node %s,"
8134
                      " continuing anyway: %s", device.iv_name, node, msg)
8135
        all_result = False
8136

    
8137
  if instance.disk_template == constants.DT_FILE:
8138
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8139
    if target_node:
8140
      tgt = target_node
8141
    else:
8142
      tgt = instance.primary_node
8143
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8144
    if result.fail_msg:
8145
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8146
                    file_storage_dir, instance.primary_node, result.fail_msg)
8147
      all_result = False
8148

    
8149
  return all_result
8150

    
8151

    
8152
def _ComputeDiskSizePerVG(disk_template, disks):
8153
  """Compute disk size requirements in the volume group
8154

8155
  """
8156
  def _compute(disks, payload):
8157
    """Universal algorithm.
8158

8159
    """
8160
    vgs = {}
8161
    for disk in disks:
8162
      vgs[disk[constants.IDISK_VG]] = \
8163
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8164

    
8165
    return vgs
8166

    
8167
  # Required free disk space as a function of disk and swap space
8168
  req_size_dict = {
8169
    constants.DT_DISKLESS: {},
8170
    constants.DT_PLAIN: _compute(disks, 0),
8171
    # 128 MB are added for drbd metadata for each disk
8172
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8173
    constants.DT_FILE: {},
8174
    constants.DT_SHARED_FILE: {},
8175
  }
8176

    
8177
  if disk_template not in req_size_dict:
8178
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8179
                                 " is unknown" % disk_template)
8180

    
8181
  return req_size_dict[disk_template]
8182

    
8183

    
8184
def _ComputeDiskSize(disk_template, disks):
8185
  """Compute disk size requirements in the volume group
8186

8187
  """
8188
  # Required free disk space as a function of disk and swap space
8189
  req_size_dict = {
8190
    constants.DT_DISKLESS: None,
8191
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8192
    # 128 MB are added for drbd metadata for each disk
8193
    constants.DT_DRBD8:
8194
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8195
    constants.DT_FILE: None,
8196
    constants.DT_SHARED_FILE: 0,
8197
    constants.DT_BLOCK: 0,
8198
  }
8199

    
8200
  if disk_template not in req_size_dict:
8201
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8202
                                 " is unknown" % disk_template)
8203

    
8204
  return req_size_dict[disk_template]
8205

    
8206

    
8207
def _FilterVmNodes(lu, nodenames):
8208
  """Filters out non-vm_capable nodes from a list.
8209

8210
  @type lu: L{LogicalUnit}
8211
  @param lu: the logical unit for which we check
8212
  @type nodenames: list
8213
  @param nodenames: the list of nodes on which we should check
8214
  @rtype: list
8215
  @return: the list of vm-capable nodes
8216

8217
  """
8218
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8219
  return [name for name in nodenames if name not in vm_nodes]
8220

    
8221

    
8222
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8223
  """Hypervisor parameter validation.
8224

8225
  This function abstract the hypervisor parameter validation to be
8226
  used in both instance create and instance modify.
8227

8228
  @type lu: L{LogicalUnit}
8229
  @param lu: the logical unit for which we check
8230
  @type nodenames: list
8231
  @param nodenames: the list of nodes on which we should check
8232
  @type hvname: string
8233
  @param hvname: the name of the hypervisor we should use
8234
  @type hvparams: dict
8235
  @param hvparams: the parameters which we need to check
8236
  @raise errors.OpPrereqError: if the parameters are not valid
8237

8238
  """
8239
  nodenames = _FilterVmNodes(lu, nodenames)
8240

    
8241
  cluster = lu.cfg.GetClusterInfo()
8242
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8243

    
8244
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8245
  for node in nodenames:
8246
    info = hvinfo[node]
8247
    if info.offline:
8248
      continue
8249
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8250

    
8251

    
8252
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8253
  """OS parameters validation.
8254

8255
  @type lu: L{LogicalUnit}
8256
  @param lu: the logical unit for which we check
8257
  @type required: boolean
8258
  @param required: whether the validation should fail if the OS is not
8259
      found
8260
  @type nodenames: list
8261
  @param nodenames: the list of nodes on which we should check
8262
  @type osname: string
8263
  @param osname: the name of the hypervisor we should use
8264
  @type osparams: dict
8265
  @param osparams: the parameters which we need to check
8266
  @raise errors.OpPrereqError: if the parameters are not valid
8267

8268
  """
8269
  nodenames = _FilterVmNodes(lu, nodenames)
8270
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8271
                                   [constants.OS_VALIDATE_PARAMETERS],
8272
                                   osparams)
8273
  for node, nres in result.items():
8274
    # we don't check for offline cases since this should be run only
8275
    # against the master node and/or an instance's nodes
8276
    nres.Raise("OS Parameters validation failed on node %s" % node)
8277
    if not nres.payload:
8278
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8279
                 osname, node)
8280

    
8281

    
8282
class LUInstanceCreate(LogicalUnit):
8283
  """Create an instance.
8284

8285
  """
8286
  HPATH = "instance-add"
8287
  HTYPE = constants.HTYPE_INSTANCE
8288
  REQ_BGL = False
8289

    
8290
  def CheckArguments(self):
8291
    """Check arguments.
8292

8293
    """
8294
    # do not require name_check to ease forward/backward compatibility
8295
    # for tools
8296
    if self.op.no_install and self.op.start:
8297
      self.LogInfo("No-installation mode selected, disabling startup")
8298
      self.op.start = False
8299
    # validate/normalize the instance name
8300
    self.op.instance_name = \
8301
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8302

    
8303
    if self.op.ip_check and not self.op.name_check:
8304
      # TODO: make the ip check more flexible and not depend on the name check
8305
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8306
                                 " check", errors.ECODE_INVAL)
8307

    
8308
    # check nics' parameter names
8309
    for nic in self.op.nics:
8310
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8311

    
8312
    # check disks. parameter names and consistent adopt/no-adopt strategy
8313
    has_adopt = has_no_adopt = False
8314
    for disk in self.op.disks:
8315
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8316
      if constants.IDISK_ADOPT in disk:
8317
        has_adopt = True
8318
      else:
8319
        has_no_adopt = True
8320
    if has_adopt and has_no_adopt:
8321
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8322
                                 errors.ECODE_INVAL)
8323
    if has_adopt:
8324
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8325
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8326
                                   " '%s' disk template" %
8327
                                   self.op.disk_template,
8328
                                   errors.ECODE_INVAL)
8329
      if self.op.iallocator is not None:
8330
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8331
                                   " iallocator script", errors.ECODE_INVAL)
8332
      if self.op.mode == constants.INSTANCE_IMPORT:
8333
        raise errors.OpPrereqError("Disk adoption not allowed for"
8334
                                   " instance import", errors.ECODE_INVAL)
8335
    else:
8336
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8337
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8338
                                   " but no 'adopt' parameter given" %
8339
                                   self.op.disk_template,
8340
                                   errors.ECODE_INVAL)
8341

    
8342
    self.adopt_disks = has_adopt
8343

    
8344
    # instance name verification
8345
    if self.op.name_check:
8346
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8347
      self.op.instance_name = self.hostname1.name
8348
      # used in CheckPrereq for ip ping check
8349
      self.check_ip = self.hostname1.ip
8350
    else:
8351
      self.check_ip = None
8352

    
8353
    # file storage checks
8354
    if (self.op.file_driver and
8355
        not self.op.file_driver in constants.FILE_DRIVER):
8356
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8357
                                 self.op.file_driver, errors.ECODE_INVAL)
8358

    
8359
    if self.op.disk_template == constants.DT_FILE:
8360
      opcodes.RequireFileStorage()
8361
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8362
      opcodes.RequireSharedFileStorage()
8363

    
8364
    ### Node/iallocator related checks
8365
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8366

    
8367
    if self.op.pnode is not None:
8368
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8369
        if self.op.snode is None:
8370
          raise errors.OpPrereqError("The networked disk templates need"
8371
                                     " a mirror node", errors.ECODE_INVAL)
8372
      elif self.op.snode:
8373
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8374
                        " template")
8375
        self.op.snode = None
8376

    
8377
    self._cds = _GetClusterDomainSecret()
8378

    
8379
    if self.op.mode == constants.INSTANCE_IMPORT:
8380
      # On import force_variant must be True, because if we forced it at
8381
      # initial install, our only chance when importing it back is that it
8382
      # works again!
8383
      self.op.force_variant = True
8384

    
8385
      if self.op.no_install:
8386
        self.LogInfo("No-installation mode has no effect during import")
8387

    
8388
    elif self.op.mode == constants.INSTANCE_CREATE:
8389
      if self.op.os_type is None:
8390
        raise errors.OpPrereqError("No guest OS specified",
8391
                                   errors.ECODE_INVAL)
8392
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8393
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8394
                                   " installation" % self.op.os_type,
8395
                                   errors.ECODE_STATE)
8396
      if self.op.disk_template is None:
8397
        raise errors.OpPrereqError("No disk template specified",
8398
                                   errors.ECODE_INVAL)
8399

    
8400
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8401
      # Check handshake to ensure both clusters have the same domain secret
8402
      src_handshake = self.op.source_handshake
8403
      if not src_handshake:
8404
        raise errors.OpPrereqError("Missing source handshake",
8405
                                   errors.ECODE_INVAL)
8406

    
8407
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8408
                                                           src_handshake)
8409
      if errmsg:
8410
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8411
                                   errors.ECODE_INVAL)
8412

    
8413
      # Load and check source CA
8414
      self.source_x509_ca_pem = self.op.source_x509_ca
8415
      if not self.source_x509_ca_pem:
8416
        raise errors.OpPrereqError("Missing source X509 CA",
8417
                                   errors.ECODE_INVAL)
8418

    
8419
      try:
8420
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8421
                                                    self._cds)
8422
      except OpenSSL.crypto.Error, err:
8423
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8424
                                   (err, ), errors.ECODE_INVAL)
8425

    
8426
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8427
      if errcode is not None:
8428
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8429
                                   errors.ECODE_INVAL)
8430

    
8431
      self.source_x509_ca = cert
8432

    
8433
      src_instance_name = self.op.source_instance_name
8434
      if not src_instance_name:
8435
        raise errors.OpPrereqError("Missing source instance name",
8436
                                   errors.ECODE_INVAL)
8437

    
8438
      self.source_instance_name = \
8439
          netutils.GetHostname(name=src_instance_name).name
8440

    
8441
    else:
8442
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8443
                                 self.op.mode, errors.ECODE_INVAL)
8444

    
8445
  def ExpandNames(self):
8446
    """ExpandNames for CreateInstance.
8447

8448
    Figure out the right locks for instance creation.
8449

8450
    """
8451
    self.needed_locks = {}
8452

    
8453
    instance_name = self.op.instance_name
8454
    # this is just a preventive check, but someone might still add this
8455
    # instance in the meantime, and creation will fail at lock-add time
8456
    if instance_name in self.cfg.GetInstanceList():
8457
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8458
                                 instance_name, errors.ECODE_EXISTS)
8459

    
8460
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8461

    
8462
    if self.op.iallocator:
8463
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8464
    else:
8465
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8466
      nodelist = [self.op.pnode]
8467
      if self.op.snode is not None:
8468
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8469
        nodelist.append(self.op.snode)
8470
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8471

    
8472
    # in case of import lock the source node too
8473
    if self.op.mode == constants.INSTANCE_IMPORT:
8474
      src_node = self.op.src_node
8475
      src_path = self.op.src_path
8476

    
8477
      if src_path is None:
8478
        self.op.src_path = src_path = self.op.instance_name
8479

    
8480
      if src_node is None:
8481
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8482
        self.op.src_node = None
8483
        if os.path.isabs(src_path):
8484
          raise errors.OpPrereqError("Importing an instance from a path"
8485
                                     " requires a source node option",
8486
                                     errors.ECODE_INVAL)
8487
      else:
8488
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8489
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8490
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8491
        if not os.path.isabs(src_path):
8492
          self.op.src_path = src_path = \
8493
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8494

    
8495
  def _RunAllocator(self):
8496
    """Run the allocator based on input opcode.
8497

8498
    """
8499
    nics = [n.ToDict() for n in self.nics]
8500
    ial = IAllocator(self.cfg, self.rpc,
8501
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8502
                     name=self.op.instance_name,
8503
                     disk_template=self.op.disk_template,
8504
                     tags=self.op.tags,
8505
                     os=self.op.os_type,
8506
                     vcpus=self.be_full[constants.BE_VCPUS],
8507
                     memory=self.be_full[constants.BE_MEMORY],
8508
                     disks=self.disks,
8509
                     nics=nics,
8510
                     hypervisor=self.op.hypervisor,
8511
                     )
8512

    
8513
    ial.Run(self.op.iallocator)
8514

    
8515
    if not ial.success:
8516
      raise errors.OpPrereqError("Can't compute nodes using"
8517
                                 " iallocator '%s': %s" %
8518
                                 (self.op.iallocator, ial.info),
8519
                                 errors.ECODE_NORES)
8520
    if len(ial.result) != ial.required_nodes:
8521
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8522
                                 " of nodes (%s), required %s" %
8523
                                 (self.op.iallocator, len(ial.result),
8524
                                  ial.required_nodes), errors.ECODE_FAULT)
8525
    self.op.pnode = ial.result[0]
8526
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8527
                 self.op.instance_name, self.op.iallocator,
8528
                 utils.CommaJoin(ial.result))
8529
    if ial.required_nodes == 2:
8530
      self.op.snode = ial.result[1]
8531

    
8532
  def BuildHooksEnv(self):
8533
    """Build hooks env.
8534

8535
    This runs on master, primary and secondary nodes of the instance.
8536

8537
    """
8538
    env = {
8539
      "ADD_MODE": self.op.mode,
8540
      }
8541
    if self.op.mode == constants.INSTANCE_IMPORT:
8542
      env["SRC_NODE"] = self.op.src_node
8543
      env["SRC_PATH"] = self.op.src_path
8544
      env["SRC_IMAGES"] = self.src_images
8545

    
8546
    env.update(_BuildInstanceHookEnv(
8547
      name=self.op.instance_name,
8548
      primary_node=self.op.pnode,
8549
      secondary_nodes=self.secondaries,
8550
      status=self.op.start,
8551
      os_type=self.op.os_type,
8552
      memory=self.be_full[constants.BE_MEMORY],
8553
      vcpus=self.be_full[constants.BE_VCPUS],
8554
      nics=_NICListToTuple(self, self.nics),
8555
      disk_template=self.op.disk_template,
8556
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8557
             for d in self.disks],
8558
      bep=self.be_full,
8559
      hvp=self.hv_full,
8560
      hypervisor_name=self.op.hypervisor,
8561
      tags=self.op.tags,
8562
    ))
8563

    
8564
    return env
8565

    
8566
  def BuildHooksNodes(self):
8567
    """Build hooks nodes.
8568

8569
    """
8570
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8571
    return nl, nl
8572

    
8573
  def _ReadExportInfo(self):
8574
    """Reads the export information from disk.
8575

8576
    It will override the opcode source node and path with the actual
8577
    information, if these two were not specified before.
8578

8579
    @return: the export information
8580

8581
    """
8582
    assert self.op.mode == constants.INSTANCE_IMPORT
8583

    
8584
    src_node = self.op.src_node
8585
    src_path = self.op.src_path
8586

    
8587
    if src_node is None:
8588
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8589
      exp_list = self.rpc.call_export_list(locked_nodes)
8590
      found = False
8591
      for node in exp_list:
8592
        if exp_list[node].fail_msg:
8593
          continue
8594
        if src_path in exp_list[node].payload:
8595
          found = True
8596
          self.op.src_node = src_node = node
8597
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8598
                                                       src_path)
8599
          break
8600
      if not found:
8601
        raise errors.OpPrereqError("No export found for relative path %s" %
8602
                                    src_path, errors.ECODE_INVAL)
8603

    
8604
    _CheckNodeOnline(self, src_node)
8605
    result = self.rpc.call_export_info(src_node, src_path)
8606
    result.Raise("No export or invalid export found in dir %s" % src_path)
8607

    
8608
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8609
    if not export_info.has_section(constants.INISECT_EXP):
8610
      raise errors.ProgrammerError("Corrupted export config",
8611
                                   errors.ECODE_ENVIRON)
8612

    
8613
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8614
    if (int(ei_version) != constants.EXPORT_VERSION):
8615
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8616
                                 (ei_version, constants.EXPORT_VERSION),
8617
                                 errors.ECODE_ENVIRON)
8618
    return export_info
8619

    
8620
  def _ReadExportParams(self, einfo):
8621
    """Use export parameters as defaults.
8622

8623
    In case the opcode doesn't specify (as in override) some instance
8624
    parameters, then try to use them from the export information, if
8625
    that declares them.
8626

8627
    """
8628
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8629

    
8630
    if self.op.disk_template is None:
8631
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8632
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8633
                                          "disk_template")
8634
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8635
          raise errors.OpPrereqError("Disk template specified in configuration"
8636
                                     " file is not one of the allowed values:"
8637
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8638
      else:
8639
        raise errors.OpPrereqError("No disk template specified and the export"
8640
                                   " is missing the disk_template information",
8641
                                   errors.ECODE_INVAL)
8642

    
8643
    if not self.op.disks:
8644
      disks = []
8645
      # TODO: import the disk iv_name too
8646
      for idx in range(constants.MAX_DISKS):
8647
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8648
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8649
          disks.append({constants.IDISK_SIZE: disk_sz})
8650
      self.op.disks = disks
8651
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8652
        raise errors.OpPrereqError("No disk info specified and the export"
8653
                                   " is missing the disk information",
8654
                                   errors.ECODE_INVAL)
8655

    
8656
    if not self.op.nics:
8657
      nics = []
8658
      for idx in range(constants.MAX_NICS):
8659
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8660
          ndict = {}
8661
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8662
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8663
            ndict[name] = v
8664
          nics.append(ndict)
8665
        else:
8666
          break
8667
      self.op.nics = nics
8668

    
8669
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8670
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8671

    
8672
    if (self.op.hypervisor is None and
8673
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8674
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8675

    
8676
    if einfo.has_section(constants.INISECT_HYP):
8677
      # use the export parameters but do not override the ones
8678
      # specified by the user
8679
      for name, value in einfo.items(constants.INISECT_HYP):
8680
        if name not in self.op.hvparams:
8681
          self.op.hvparams[name] = value
8682

    
8683
    if einfo.has_section(constants.INISECT_BEP):
8684
      # use the parameters, without overriding
8685
      for name, value in einfo.items(constants.INISECT_BEP):
8686
        if name not in self.op.beparams:
8687
          self.op.beparams[name] = value
8688
    else:
8689
      # try to read the parameters old style, from the main section
8690
      for name in constants.BES_PARAMETERS:
8691
        if (name not in self.op.beparams and
8692
            einfo.has_option(constants.INISECT_INS, name)):
8693
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8694

    
8695
    if einfo.has_section(constants.INISECT_OSP):
8696
      # use the parameters, without overriding
8697
      for name, value in einfo.items(constants.INISECT_OSP):
8698
        if name not in self.op.osparams:
8699
          self.op.osparams[name] = value
8700

    
8701
  def _RevertToDefaults(self, cluster):
8702
    """Revert the instance parameters to the default values.
8703

8704
    """
8705
    # hvparams
8706
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8707
    for name in self.op.hvparams.keys():
8708
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8709
        del self.op.hvparams[name]
8710
    # beparams
8711
    be_defs = cluster.SimpleFillBE({})
8712
    for name in self.op.beparams.keys():
8713
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8714
        del self.op.beparams[name]
8715
    # nic params
8716
    nic_defs = cluster.SimpleFillNIC({})
8717
    for nic in self.op.nics:
8718
      for name in constants.NICS_PARAMETERS:
8719
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8720
          del nic[name]
8721
    # osparams
8722
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8723
    for name in self.op.osparams.keys():
8724
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8725
        del self.op.osparams[name]
8726

    
8727
  def _CalculateFileStorageDir(self):
8728
    """Calculate final instance file storage dir.
8729

8730
    """
8731
    # file storage dir calculation/check
8732
    self.instance_file_storage_dir = None
8733
    if self.op.disk_template in constants.DTS_FILEBASED:
8734
      # build the full file storage dir path
8735
      joinargs = []
8736

    
8737
      if self.op.disk_template == constants.DT_SHARED_FILE:
8738
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8739
      else:
8740
        get_fsd_fn = self.cfg.GetFileStorageDir
8741

    
8742
      cfg_storagedir = get_fsd_fn()
8743
      if not cfg_storagedir:
8744
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8745
      joinargs.append(cfg_storagedir)
8746

    
8747
      if self.op.file_storage_dir is not None:
8748
        joinargs.append(self.op.file_storage_dir)
8749

    
8750
      joinargs.append(self.op.instance_name)
8751

    
8752
      # pylint: disable=W0142
8753
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8754

    
8755
  def CheckPrereq(self):
8756
    """Check prerequisites.
8757

8758
    """
8759
    self._CalculateFileStorageDir()
8760

    
8761
    if self.op.mode == constants.INSTANCE_IMPORT:
8762
      export_info = self._ReadExportInfo()
8763
      self._ReadExportParams(export_info)
8764

    
8765
    if (not self.cfg.GetVGName() and
8766
        self.op.disk_template not in constants.DTS_NOT_LVM):
8767
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8768
                                 " instances", errors.ECODE_STATE)
8769

    
8770
    if (self.op.hypervisor is None or
8771
        self.op.hypervisor == constants.VALUE_AUTO):
8772
      self.op.hypervisor = self.cfg.GetHypervisorType()
8773

    
8774
    cluster = self.cfg.GetClusterInfo()
8775
    enabled_hvs = cluster.enabled_hypervisors
8776
    if self.op.hypervisor not in enabled_hvs:
8777
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8778
                                 " cluster (%s)" % (self.op.hypervisor,
8779
                                  ",".join(enabled_hvs)),
8780
                                 errors.ECODE_STATE)
8781

    
8782
    # Check tag validity
8783
    for tag in self.op.tags:
8784
      objects.TaggableObject.ValidateTag(tag)
8785

    
8786
    # check hypervisor parameter syntax (locally)
8787
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8788
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8789
                                      self.op.hvparams)
8790
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8791
    hv_type.CheckParameterSyntax(filled_hvp)
8792
    self.hv_full = filled_hvp
8793
    # check that we don't specify global parameters on an instance
8794
    _CheckGlobalHvParams(self.op.hvparams)
8795

    
8796
    # fill and remember the beparams dict
8797
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8798
    for param, value in self.op.beparams.iteritems():
8799
      if value == constants.VALUE_AUTO:
8800
        self.op.beparams[param] = default_beparams[param]
8801
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8802
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8803

    
8804
    # build os parameters
8805
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8806

    
8807
    # now that hvp/bep are in final format, let's reset to defaults,
8808
    # if told to do so
8809
    if self.op.identify_defaults:
8810
      self._RevertToDefaults(cluster)
8811

    
8812
    # NIC buildup
8813
    self.nics = []
8814
    for idx, nic in enumerate(self.op.nics):
8815
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8816
      nic_mode = nic_mode_req
8817
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8818
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8819

    
8820
      # in routed mode, for the first nic, the default ip is 'auto'
8821
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8822
        default_ip_mode = constants.VALUE_AUTO
8823
      else:
8824
        default_ip_mode = constants.VALUE_NONE
8825

    
8826
      # ip validity checks
8827
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8828
      if ip is None or ip.lower() == constants.VALUE_NONE:
8829
        nic_ip = None
8830
      elif ip.lower() == constants.VALUE_AUTO:
8831
        if not self.op.name_check:
8832
          raise errors.OpPrereqError("IP address set to auto but name checks"
8833
                                     " have been skipped",
8834
                                     errors.ECODE_INVAL)
8835
        nic_ip = self.hostname1.ip
8836
      else:
8837
        if not netutils.IPAddress.IsValid(ip):
8838
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8839
                                     errors.ECODE_INVAL)
8840
        nic_ip = ip
8841

    
8842
      # TODO: check the ip address for uniqueness
8843
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8844
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8845
                                   errors.ECODE_INVAL)
8846

    
8847
      # MAC address verification
8848
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8849
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8850
        mac = utils.NormalizeAndValidateMac(mac)
8851

    
8852
        try:
8853
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8854
        except errors.ReservationError:
8855
          raise errors.OpPrereqError("MAC address %s already in use"
8856
                                     " in cluster" % mac,
8857
                                     errors.ECODE_NOTUNIQUE)
8858

    
8859
      #  Build nic parameters
8860
      link = nic.get(constants.INIC_LINK, None)
8861
      if link == constants.VALUE_AUTO:
8862
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8863
      nicparams = {}
8864
      if nic_mode_req:
8865
        nicparams[constants.NIC_MODE] = nic_mode
8866
      if link:
8867
        nicparams[constants.NIC_LINK] = link
8868

    
8869
      check_params = cluster.SimpleFillNIC(nicparams)
8870
      objects.NIC.CheckParameterSyntax(check_params)
8871
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8872

    
8873
    # disk checks/pre-build
8874
    default_vg = self.cfg.GetVGName()
8875
    self.disks = []
8876
    for disk in self.op.disks:
8877
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8878
      if mode not in constants.DISK_ACCESS_SET:
8879
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8880
                                   mode, errors.ECODE_INVAL)
8881
      size = disk.get(constants.IDISK_SIZE, None)
8882
      if size is None:
8883
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8884
      try:
8885
        size = int(size)
8886
      except (TypeError, ValueError):
8887
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8888
                                   errors.ECODE_INVAL)
8889

    
8890
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8891
      new_disk = {
8892
        constants.IDISK_SIZE: size,
8893
        constants.IDISK_MODE: mode,
8894
        constants.IDISK_VG: data_vg,
8895
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8896
        }
8897
      if constants.IDISK_ADOPT in disk:
8898
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8899
      self.disks.append(new_disk)
8900

    
8901
    if self.op.mode == constants.INSTANCE_IMPORT:
8902
      disk_images = []
8903
      for idx in range(len(self.disks)):
8904
        option = "disk%d_dump" % idx
8905
        if export_info.has_option(constants.INISECT_INS, option):
8906
          # FIXME: are the old os-es, disk sizes, etc. useful?
8907
          export_name = export_info.get(constants.INISECT_INS, option)
8908
          image = utils.PathJoin(self.op.src_path, export_name)
8909
          disk_images.append(image)
8910
        else:
8911
          disk_images.append(False)
8912

    
8913
      self.src_images = disk_images
8914

    
8915
      old_name = export_info.get(constants.INISECT_INS, "name")
8916
      if self.op.instance_name == old_name:
8917
        for idx, nic in enumerate(self.nics):
8918
          if nic.mac == constants.VALUE_AUTO:
8919
            nic_mac_ini = "nic%d_mac" % idx
8920
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8921

    
8922
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8923

    
8924
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8925
    if self.op.ip_check:
8926
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8927
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8928
                                   (self.check_ip, self.op.instance_name),
8929
                                   errors.ECODE_NOTUNIQUE)
8930

    
8931
    #### mac address generation
8932
    # By generating here the mac address both the allocator and the hooks get
8933
    # the real final mac address rather than the 'auto' or 'generate' value.
8934
    # There is a race condition between the generation and the instance object
8935
    # creation, which means that we know the mac is valid now, but we're not
8936
    # sure it will be when we actually add the instance. If things go bad
8937
    # adding the instance will abort because of a duplicate mac, and the
8938
    # creation job will fail.
8939
    for nic in self.nics:
8940
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8941
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8942

    
8943
    #### allocator run
8944

    
8945
    if self.op.iallocator is not None:
8946
      self._RunAllocator()
8947

    
8948
    #### node related checks
8949

    
8950
    # check primary node
8951
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8952
    assert self.pnode is not None, \
8953
      "Cannot retrieve locked node %s" % self.op.pnode
8954
    if pnode.offline:
8955
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8956
                                 pnode.name, errors.ECODE_STATE)
8957
    if pnode.drained:
8958
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8959
                                 pnode.name, errors.ECODE_STATE)
8960
    if not pnode.vm_capable:
8961
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8962
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8963

    
8964
    self.secondaries = []
8965

    
8966
    # mirror node verification
8967
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8968
      if self.op.snode == pnode.name:
8969
        raise errors.OpPrereqError("The secondary node cannot be the"
8970
                                   " primary node", errors.ECODE_INVAL)
8971
      _CheckNodeOnline(self, self.op.snode)
8972
      _CheckNodeNotDrained(self, self.op.snode)
8973
      _CheckNodeVmCapable(self, self.op.snode)
8974
      self.secondaries.append(self.op.snode)
8975

    
8976
    nodenames = [pnode.name] + self.secondaries
8977

    
8978
    if not self.adopt_disks:
8979
      # Check lv size requirements, if not adopting
8980
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8981
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8982

    
8983
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8984
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8985
                                disk[constants.IDISK_ADOPT])
8986
                     for disk in self.disks])
8987
      if len(all_lvs) != len(self.disks):
8988
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8989
                                   errors.ECODE_INVAL)
8990
      for lv_name in all_lvs:
8991
        try:
8992
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8993
          # to ReserveLV uses the same syntax
8994
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8995
        except errors.ReservationError:
8996
          raise errors.OpPrereqError("LV named %s used by another instance" %
8997
                                     lv_name, errors.ECODE_NOTUNIQUE)
8998

    
8999
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9000
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9001

    
9002
      node_lvs = self.rpc.call_lv_list([pnode.name],
9003
                                       vg_names.payload.keys())[pnode.name]
9004
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9005
      node_lvs = node_lvs.payload
9006

    
9007
      delta = all_lvs.difference(node_lvs.keys())
9008
      if delta:
9009
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9010
                                   utils.CommaJoin(delta),
9011
                                   errors.ECODE_INVAL)
9012
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9013
      if online_lvs:
9014
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9015
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9016
                                   errors.ECODE_STATE)
9017
      # update the size of disk based on what is found
9018
      for dsk in self.disks:
9019
        dsk[constants.IDISK_SIZE] = \
9020
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9021
                                        dsk[constants.IDISK_ADOPT])][0]))
9022

    
9023
    elif self.op.disk_template == constants.DT_BLOCK:
9024
      # Normalize and de-duplicate device paths
9025
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9026
                       for disk in self.disks])
9027
      if len(all_disks) != len(self.disks):
9028
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9029
                                   errors.ECODE_INVAL)
9030
      baddisks = [d for d in all_disks
9031
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9032
      if baddisks:
9033
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9034
                                   " cannot be adopted" %
9035
                                   (", ".join(baddisks),
9036
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9037
                                   errors.ECODE_INVAL)
9038

    
9039
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9040
                                            list(all_disks))[pnode.name]
9041
      node_disks.Raise("Cannot get block device information from node %s" %
9042
                       pnode.name)
9043
      node_disks = node_disks.payload
9044
      delta = all_disks.difference(node_disks.keys())
9045
      if delta:
9046
        raise errors.OpPrereqError("Missing block device(s): %s" %
9047
                                   utils.CommaJoin(delta),
9048
                                   errors.ECODE_INVAL)
9049
      for dsk in self.disks:
9050
        dsk[constants.IDISK_SIZE] = \
9051
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9052

    
9053
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9054

    
9055
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9056
    # check OS parameters (remotely)
9057
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9058

    
9059
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9060

    
9061
    # memory check on primary node
9062
    if self.op.start:
9063
      _CheckNodeFreeMemory(self, self.pnode.name,
9064
                           "creating instance %s" % self.op.instance_name,
9065
                           self.be_full[constants.BE_MEMORY],
9066
                           self.op.hypervisor)
9067

    
9068
    self.dry_run_result = list(nodenames)
9069

    
9070
  def Exec(self, feedback_fn):
9071
    """Create and add the instance to the cluster.
9072

9073
    """
9074
    instance = self.op.instance_name
9075
    pnode_name = self.pnode.name
9076

    
9077
    ht_kind = self.op.hypervisor
9078
    if ht_kind in constants.HTS_REQ_PORT:
9079
      network_port = self.cfg.AllocatePort()
9080
    else:
9081
      network_port = None
9082

    
9083
    disks = _GenerateDiskTemplate(self,
9084
                                  self.op.disk_template,
9085
                                  instance, pnode_name,
9086
                                  self.secondaries,
9087
                                  self.disks,
9088
                                  self.instance_file_storage_dir,
9089
                                  self.op.file_driver,
9090
                                  0,
9091
                                  feedback_fn)
9092

    
9093
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9094
                            primary_node=pnode_name,
9095
                            nics=self.nics, disks=disks,
9096
                            disk_template=self.op.disk_template,
9097
                            admin_up=False,
9098
                            network_port=network_port,
9099
                            beparams=self.op.beparams,
9100
                            hvparams=self.op.hvparams,
9101
                            hypervisor=self.op.hypervisor,
9102
                            osparams=self.op.osparams,
9103
                            )
9104

    
9105
    if self.op.tags:
9106
      for tag in self.op.tags:
9107
        iobj.AddTag(tag)
9108

    
9109
    if self.adopt_disks:
9110
      if self.op.disk_template == constants.DT_PLAIN:
9111
        # rename LVs to the newly-generated names; we need to construct
9112
        # 'fake' LV disks with the old data, plus the new unique_id
9113
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9114
        rename_to = []
9115
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9116
          rename_to.append(t_dsk.logical_id)
9117
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9118
          self.cfg.SetDiskID(t_dsk, pnode_name)
9119
        result = self.rpc.call_blockdev_rename(pnode_name,
9120
                                               zip(tmp_disks, rename_to))
9121
        result.Raise("Failed to rename adoped LVs")
9122
    else:
9123
      feedback_fn("* creating instance disks...")
9124
      try:
9125
        _CreateDisks(self, iobj)
9126
      except errors.OpExecError:
9127
        self.LogWarning("Device creation failed, reverting...")
9128
        try:
9129
          _RemoveDisks(self, iobj)
9130
        finally:
9131
          self.cfg.ReleaseDRBDMinors(instance)
9132
          raise
9133

    
9134
    feedback_fn("adding instance %s to cluster config" % instance)
9135

    
9136
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9137

    
9138
    # Declare that we don't want to remove the instance lock anymore, as we've
9139
    # added the instance to the config
9140
    del self.remove_locks[locking.LEVEL_INSTANCE]
9141

    
9142
    if self.op.mode == constants.INSTANCE_IMPORT:
9143
      # Release unused nodes
9144
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9145
    else:
9146
      # Release all nodes
9147
      _ReleaseLocks(self, locking.LEVEL_NODE)
9148

    
9149
    disk_abort = False
9150
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9151
      feedback_fn("* wiping instance disks...")
9152
      try:
9153
        _WipeDisks(self, iobj)
9154
      except errors.OpExecError, err:
9155
        logging.exception("Wiping disks failed")
9156
        self.LogWarning("Wiping instance disks failed (%s)", err)
9157
        disk_abort = True
9158

    
9159
    if disk_abort:
9160
      # Something is already wrong with the disks, don't do anything else
9161
      pass
9162
    elif self.op.wait_for_sync:
9163
      disk_abort = not _WaitForSync(self, iobj)
9164
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9165
      # make sure the disks are not degraded (still sync-ing is ok)
9166
      feedback_fn("* checking mirrors status")
9167
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9168
    else:
9169
      disk_abort = False
9170

    
9171
    if disk_abort:
9172
      _RemoveDisks(self, iobj)
9173
      self.cfg.RemoveInstance(iobj.name)
9174
      # Make sure the instance lock gets removed
9175
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9176
      raise errors.OpExecError("There are some degraded disks for"
9177
                               " this instance")
9178

    
9179
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9180
      if self.op.mode == constants.INSTANCE_CREATE:
9181
        if not self.op.no_install:
9182
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9183
                        not self.op.wait_for_sync)
9184
          if pause_sync:
9185
            feedback_fn("* pausing disk sync to install instance OS")
9186
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9187
                                                              iobj.disks, True)
9188
            for idx, success in enumerate(result.payload):
9189
              if not success:
9190
                logging.warn("pause-sync of instance %s for disk %d failed",
9191
                             instance, idx)
9192

    
9193
          feedback_fn("* running the instance OS create scripts...")
9194
          # FIXME: pass debug option from opcode to backend
9195
          os_add_result = \
9196
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9197
                                          self.op.debug_level)
9198
          if pause_sync:
9199
            feedback_fn("* resuming disk sync")
9200
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9201
                                                              iobj.disks, False)
9202
            for idx, success in enumerate(result.payload):
9203
              if not success:
9204
                logging.warn("resume-sync of instance %s for disk %d failed",
9205
                             instance, idx)
9206

    
9207
          os_add_result.Raise("Could not add os for instance %s"
9208
                              " on node %s" % (instance, pnode_name))
9209

    
9210
      elif self.op.mode == constants.INSTANCE_IMPORT:
9211
        feedback_fn("* running the instance OS import scripts...")
9212

    
9213
        transfers = []
9214

    
9215
        for idx, image in enumerate(self.src_images):
9216
          if not image:
9217
            continue
9218

    
9219
          # FIXME: pass debug option from opcode to backend
9220
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9221
                                             constants.IEIO_FILE, (image, ),
9222
                                             constants.IEIO_SCRIPT,
9223
                                             (iobj.disks[idx], idx),
9224
                                             None)
9225
          transfers.append(dt)
9226

    
9227
        import_result = \
9228
          masterd.instance.TransferInstanceData(self, feedback_fn,
9229
                                                self.op.src_node, pnode_name,
9230
                                                self.pnode.secondary_ip,
9231
                                                iobj, transfers)
9232
        if not compat.all(import_result):
9233
          self.LogWarning("Some disks for instance %s on node %s were not"
9234
                          " imported successfully" % (instance, pnode_name))
9235

    
9236
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9237
        feedback_fn("* preparing remote import...")
9238
        # The source cluster will stop the instance before attempting to make a
9239
        # connection. In some cases stopping an instance can take a long time,
9240
        # hence the shutdown timeout is added to the connection timeout.
9241
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9242
                           self.op.source_shutdown_timeout)
9243
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9244

    
9245
        assert iobj.primary_node == self.pnode.name
9246
        disk_results = \
9247
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9248
                                        self.source_x509_ca,
9249
                                        self._cds, timeouts)
9250
        if not compat.all(disk_results):
9251
          # TODO: Should the instance still be started, even if some disks
9252
          # failed to import (valid for local imports, too)?
9253
          self.LogWarning("Some disks for instance %s on node %s were not"
9254
                          " imported successfully" % (instance, pnode_name))
9255

    
9256
        # Run rename script on newly imported instance
9257
        assert iobj.name == instance
9258
        feedback_fn("Running rename script for %s" % instance)
9259
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9260
                                                   self.source_instance_name,
9261
                                                   self.op.debug_level)
9262
        if result.fail_msg:
9263
          self.LogWarning("Failed to run rename script for %s on node"
9264
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9265

    
9266
      else:
9267
        # also checked in the prereq part
9268
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9269
                                     % self.op.mode)
9270

    
9271
    if self.op.start:
9272
      iobj.admin_up = True
9273
      self.cfg.Update(iobj, feedback_fn)
9274
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9275
      feedback_fn("* starting instance...")
9276
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9277
                                            False)
9278
      result.Raise("Could not start instance")
9279

    
9280
    return list(iobj.all_nodes)
9281

    
9282

    
9283
class LUInstanceConsole(NoHooksLU):
9284
  """Connect to an instance's console.
9285

9286
  This is somewhat special in that it returns the command line that
9287
  you need to run on the master node in order to connect to the
9288
  console.
9289

9290
  """
9291
  REQ_BGL = False
9292

    
9293
  def ExpandNames(self):
9294
    self._ExpandAndLockInstance()
9295

    
9296
  def CheckPrereq(self):
9297
    """Check prerequisites.
9298

9299
    This checks that the instance is in the cluster.
9300

9301
    """
9302
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9303
    assert self.instance is not None, \
9304
      "Cannot retrieve locked instance %s" % self.op.instance_name
9305
    _CheckNodeOnline(self, self.instance.primary_node)
9306

    
9307
  def Exec(self, feedback_fn):
9308
    """Connect to the console of an instance
9309

9310
    """
9311
    instance = self.instance
9312
    node = instance.primary_node
9313

    
9314
    node_insts = self.rpc.call_instance_list([node],
9315
                                             [instance.hypervisor])[node]
9316
    node_insts.Raise("Can't get node information from %s" % node)
9317

    
9318
    if instance.name not in node_insts.payload:
9319
      if instance.admin_up:
9320
        state = constants.INSTST_ERRORDOWN
9321
      else:
9322
        state = constants.INSTST_ADMINDOWN
9323
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9324
                               (instance.name, state))
9325

    
9326
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9327

    
9328
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9329

    
9330

    
9331
def _GetInstanceConsole(cluster, instance):
9332
  """Returns console information for an instance.
9333

9334
  @type cluster: L{objects.Cluster}
9335
  @type instance: L{objects.Instance}
9336
  @rtype: dict
9337

9338
  """
9339
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9340
  # beparams and hvparams are passed separately, to avoid editing the
9341
  # instance and then saving the defaults in the instance itself.
9342
  hvparams = cluster.FillHV(instance)
9343
  beparams = cluster.FillBE(instance)
9344
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9345

    
9346
  assert console.instance == instance.name
9347
  assert console.Validate()
9348

    
9349
  return console.ToDict()
9350

    
9351

    
9352
class LUInstanceReplaceDisks(LogicalUnit):
9353
  """Replace the disks of an instance.
9354

9355
  """
9356
  HPATH = "mirrors-replace"
9357
  HTYPE = constants.HTYPE_INSTANCE
9358
  REQ_BGL = False
9359

    
9360
  def CheckArguments(self):
9361
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9362
                                  self.op.iallocator)
9363

    
9364
  def ExpandNames(self):
9365
    self._ExpandAndLockInstance()
9366

    
9367
    assert locking.LEVEL_NODE not in self.needed_locks
9368
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9369

    
9370
    assert self.op.iallocator is None or self.op.remote_node is None, \
9371
      "Conflicting options"
9372

    
9373
    if self.op.remote_node is not None:
9374
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9375

    
9376
      # Warning: do not remove the locking of the new secondary here
9377
      # unless DRBD8.AddChildren is changed to work in parallel;
9378
      # currently it doesn't since parallel invocations of
9379
      # FindUnusedMinor will conflict
9380
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9381
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9382
    else:
9383
      self.needed_locks[locking.LEVEL_NODE] = []
9384
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9385

    
9386
      if self.op.iallocator is not None:
9387
        # iallocator will select a new node in the same group
9388
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9389

    
9390
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9391
                                   self.op.iallocator, self.op.remote_node,
9392
                                   self.op.disks, False, self.op.early_release)
9393

    
9394
    self.tasklets = [self.replacer]
9395

    
9396
  def DeclareLocks(self, level):
9397
    if level == locking.LEVEL_NODEGROUP:
9398
      assert self.op.remote_node is None
9399
      assert self.op.iallocator is not None
9400
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9401

    
9402
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9403
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9404
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9405

    
9406
    elif level == locking.LEVEL_NODE:
9407
      if self.op.iallocator is not None:
9408
        assert self.op.remote_node is None
9409
        assert not self.needed_locks[locking.LEVEL_NODE]
9410

    
9411
        # Lock member nodes of all locked groups
9412
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9413
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9414
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9415
      else:
9416
        self._LockInstancesNodes()
9417

    
9418
  def BuildHooksEnv(self):
9419
    """Build hooks env.
9420

9421
    This runs on the master, the primary and all the secondaries.
9422

9423
    """
9424
    instance = self.replacer.instance
9425
    env = {
9426
      "MODE": self.op.mode,
9427
      "NEW_SECONDARY": self.op.remote_node,
9428
      "OLD_SECONDARY": instance.secondary_nodes[0],
9429
      }
9430
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9431
    return env
9432

    
9433
  def BuildHooksNodes(self):
9434
    """Build hooks nodes.
9435

9436
    """
9437
    instance = self.replacer.instance
9438
    nl = [
9439
      self.cfg.GetMasterNode(),
9440
      instance.primary_node,
9441
      ]
9442
    if self.op.remote_node is not None:
9443
      nl.append(self.op.remote_node)
9444
    return nl, nl
9445

    
9446
  def CheckPrereq(self):
9447
    """Check prerequisites.
9448

9449
    """
9450
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9451
            self.op.iallocator is None)
9452

    
9453
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9454
    if owned_groups:
9455
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9456

    
9457
    return LogicalUnit.CheckPrereq(self)
9458

    
9459

    
9460
class TLReplaceDisks(Tasklet):
9461
  """Replaces disks for an instance.
9462

9463
  Note: Locking is not within the scope of this class.
9464

9465
  """
9466
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9467
               disks, delay_iallocator, early_release):
9468
    """Initializes this class.
9469

9470
    """
9471
    Tasklet.__init__(self, lu)
9472

    
9473
    # Parameters
9474
    self.instance_name = instance_name
9475
    self.mode = mode
9476
    self.iallocator_name = iallocator_name
9477
    self.remote_node = remote_node
9478
    self.disks = disks
9479
    self.delay_iallocator = delay_iallocator
9480
    self.early_release = early_release
9481

    
9482
    # Runtime data
9483
    self.instance = None
9484
    self.new_node = None
9485
    self.target_node = None
9486
    self.other_node = None
9487
    self.remote_node_info = None
9488
    self.node_secondary_ip = None
9489

    
9490
  @staticmethod
9491
  def CheckArguments(mode, remote_node, iallocator):
9492
    """Helper function for users of this class.
9493

9494
    """
9495
    # check for valid parameter combination
9496
    if mode == constants.REPLACE_DISK_CHG:
9497
      if remote_node is None and iallocator is None:
9498
        raise errors.OpPrereqError("When changing the secondary either an"
9499
                                   " iallocator script must be used or the"
9500
                                   " new node given", errors.ECODE_INVAL)
9501

    
9502
      if remote_node is not None and iallocator is not None:
9503
        raise errors.OpPrereqError("Give either the iallocator or the new"
9504
                                   " secondary, not both", errors.ECODE_INVAL)
9505

    
9506
    elif remote_node is not None or iallocator is not None:
9507
      # Not replacing the secondary
9508
      raise errors.OpPrereqError("The iallocator and new node options can"
9509
                                 " only be used when changing the"
9510
                                 " secondary node", errors.ECODE_INVAL)
9511

    
9512
  @staticmethod
9513
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9514
    """Compute a new secondary node using an IAllocator.
9515

9516
    """
9517
    ial = IAllocator(lu.cfg, lu.rpc,
9518
                     mode=constants.IALLOCATOR_MODE_RELOC,
9519
                     name=instance_name,
9520
                     relocate_from=list(relocate_from))
9521

    
9522
    ial.Run(iallocator_name)
9523

    
9524
    if not ial.success:
9525
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9526
                                 " %s" % (iallocator_name, ial.info),
9527
                                 errors.ECODE_NORES)
9528

    
9529
    if len(ial.result) != ial.required_nodes:
9530
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9531
                                 " of nodes (%s), required %s" %
9532
                                 (iallocator_name,
9533
                                  len(ial.result), ial.required_nodes),
9534
                                 errors.ECODE_FAULT)
9535

    
9536
    remote_node_name = ial.result[0]
9537

    
9538
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9539
               instance_name, remote_node_name)
9540

    
9541
    return remote_node_name
9542

    
9543
  def _FindFaultyDisks(self, node_name):
9544
    """Wrapper for L{_FindFaultyInstanceDisks}.
9545

9546
    """
9547
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9548
                                    node_name, True)
9549

    
9550
  def _CheckDisksActivated(self, instance):
9551
    """Checks if the instance disks are activated.
9552

9553
    @param instance: The instance to check disks
9554
    @return: True if they are activated, False otherwise
9555

9556
    """
9557
    nodes = instance.all_nodes
9558

    
9559
    for idx, dev in enumerate(instance.disks):
9560
      for node in nodes:
9561
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9562
        self.cfg.SetDiskID(dev, node)
9563

    
9564
        result = self.rpc.call_blockdev_find(node, dev)
9565

    
9566
        if result.offline:
9567
          continue
9568
        elif result.fail_msg or not result.payload:
9569
          return False
9570

    
9571
    return True
9572

    
9573
  def CheckPrereq(self):
9574
    """Check prerequisites.
9575

9576
    This checks that the instance is in the cluster.
9577

9578
    """
9579
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9580
    assert instance is not None, \
9581
      "Cannot retrieve locked instance %s" % self.instance_name
9582

    
9583
    if instance.disk_template != constants.DT_DRBD8:
9584
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9585
                                 " instances", errors.ECODE_INVAL)
9586

    
9587
    if len(instance.secondary_nodes) != 1:
9588
      raise errors.OpPrereqError("The instance has a strange layout,"
9589
                                 " expected one secondary but found %d" %
9590
                                 len(instance.secondary_nodes),
9591
                                 errors.ECODE_FAULT)
9592

    
9593
    if not self.delay_iallocator:
9594
      self._CheckPrereq2()
9595

    
9596
  def _CheckPrereq2(self):
9597
    """Check prerequisites, second part.
9598

9599
    This function should always be part of CheckPrereq. It was separated and is
9600
    now called from Exec because during node evacuation iallocator was only
9601
    called with an unmodified cluster model, not taking planned changes into
9602
    account.
9603

9604
    """
9605
    instance = self.instance
9606
    secondary_node = instance.secondary_nodes[0]
9607

    
9608
    if self.iallocator_name is None:
9609
      remote_node = self.remote_node
9610
    else:
9611
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9612
                                       instance.name, instance.secondary_nodes)
9613

    
9614
    if remote_node is None:
9615
      self.remote_node_info = None
9616
    else:
9617
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9618
             "Remote node '%s' is not locked" % remote_node
9619

    
9620
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9621
      assert self.remote_node_info is not None, \
9622
        "Cannot retrieve locked node %s" % remote_node
9623

    
9624
    if remote_node == self.instance.primary_node:
9625
      raise errors.OpPrereqError("The specified node is the primary node of"
9626
                                 " the instance", errors.ECODE_INVAL)
9627

    
9628
    if remote_node == secondary_node:
9629
      raise errors.OpPrereqError("The specified node is already the"
9630
                                 " secondary node of the instance",
9631
                                 errors.ECODE_INVAL)
9632

    
9633
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9634
                                    constants.REPLACE_DISK_CHG):
9635
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9636
                                 errors.ECODE_INVAL)
9637

    
9638
    if self.mode == constants.REPLACE_DISK_AUTO:
9639
      if not self._CheckDisksActivated(instance):
9640
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9641
                                   " first" % self.instance_name,
9642
                                   errors.ECODE_STATE)
9643
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9644
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9645

    
9646
      if faulty_primary and faulty_secondary:
9647
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9648
                                   " one node and can not be repaired"
9649
                                   " automatically" % self.instance_name,
9650
                                   errors.ECODE_STATE)
9651

    
9652
      if faulty_primary:
9653
        self.disks = faulty_primary
9654
        self.target_node = instance.primary_node
9655
        self.other_node = secondary_node
9656
        check_nodes = [self.target_node, self.other_node]
9657
      elif faulty_secondary:
9658
        self.disks = faulty_secondary
9659
        self.target_node = secondary_node
9660
        self.other_node = instance.primary_node
9661
        check_nodes = [self.target_node, self.other_node]
9662
      else:
9663
        self.disks = []
9664
        check_nodes = []
9665

    
9666
    else:
9667
      # Non-automatic modes
9668
      if self.mode == constants.REPLACE_DISK_PRI:
9669
        self.target_node = instance.primary_node
9670
        self.other_node = secondary_node
9671
        check_nodes = [self.target_node, self.other_node]
9672

    
9673
      elif self.mode == constants.REPLACE_DISK_SEC:
9674
        self.target_node = secondary_node
9675
        self.other_node = instance.primary_node
9676
        check_nodes = [self.target_node, self.other_node]
9677

    
9678
      elif self.mode == constants.REPLACE_DISK_CHG:
9679
        self.new_node = remote_node
9680
        self.other_node = instance.primary_node
9681
        self.target_node = secondary_node
9682
        check_nodes = [self.new_node, self.other_node]
9683

    
9684
        _CheckNodeNotDrained(self.lu, remote_node)
9685
        _CheckNodeVmCapable(self.lu, remote_node)
9686

    
9687
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9688
        assert old_node_info is not None
9689
        if old_node_info.offline and not self.early_release:
9690
          # doesn't make sense to delay the release
9691
          self.early_release = True
9692
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9693
                          " early-release mode", secondary_node)
9694

    
9695
      else:
9696
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9697
                                     self.mode)
9698

    
9699
      # If not specified all disks should be replaced
9700
      if not self.disks:
9701
        self.disks = range(len(self.instance.disks))
9702

    
9703
    for node in check_nodes:
9704
      _CheckNodeOnline(self.lu, node)
9705

    
9706
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9707
                                                          self.other_node,
9708
                                                          self.target_node]
9709
                              if node_name is not None)
9710

    
9711
    # Release unneeded node locks
9712
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9713

    
9714
    # Release any owned node group
9715
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9716
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9717

    
9718
    # Check whether disks are valid
9719
    for disk_idx in self.disks:
9720
      instance.FindDisk(disk_idx)
9721

    
9722
    # Get secondary node IP addresses
9723
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9724
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9725

    
9726
  def Exec(self, feedback_fn):
9727
    """Execute disk replacement.
9728

9729
    This dispatches the disk replacement to the appropriate handler.
9730

9731
    """
9732
    if self.delay_iallocator:
9733
      self._CheckPrereq2()
9734

    
9735
    if __debug__:
9736
      # Verify owned locks before starting operation
9737
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9738
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9739
          ("Incorrect node locks, owning %s, expected %s" %
9740
           (owned_nodes, self.node_secondary_ip.keys()))
9741

    
9742
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9743
      assert list(owned_instances) == [self.instance_name], \
9744
          "Instance '%s' not locked" % self.instance_name
9745

    
9746
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9747
          "Should not own any node group lock at this point"
9748

    
9749
    if not self.disks:
9750
      feedback_fn("No disks need replacement")
9751
      return
9752

    
9753
    feedback_fn("Replacing disk(s) %s for %s" %
9754
                (utils.CommaJoin(self.disks), self.instance.name))
9755

    
9756
    activate_disks = (not self.instance.admin_up)
9757

    
9758
    # Activate the instance disks if we're replacing them on a down instance
9759
    if activate_disks:
9760
      _StartInstanceDisks(self.lu, self.instance, True)
9761

    
9762
    try:
9763
      # Should we replace the secondary node?
9764
      if self.new_node is not None:
9765
        fn = self._ExecDrbd8Secondary
9766
      else:
9767
        fn = self._ExecDrbd8DiskOnly
9768

    
9769
      result = fn(feedback_fn)
9770
    finally:
9771
      # Deactivate the instance disks if we're replacing them on a
9772
      # down instance
9773
      if activate_disks:
9774
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9775

    
9776
    if __debug__:
9777
      # Verify owned locks
9778
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9779
      nodes = frozenset(self.node_secondary_ip)
9780
      assert ((self.early_release and not owned_nodes) or
9781
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9782
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9783
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9784

    
9785
    return result
9786

    
9787
  def _CheckVolumeGroup(self, nodes):
9788
    self.lu.LogInfo("Checking volume groups")
9789

    
9790
    vgname = self.cfg.GetVGName()
9791

    
9792
    # Make sure volume group exists on all involved nodes
9793
    results = self.rpc.call_vg_list(nodes)
9794
    if not results:
9795
      raise errors.OpExecError("Can't list volume groups on the nodes")
9796

    
9797
    for node in nodes:
9798
      res = results[node]
9799
      res.Raise("Error checking node %s" % node)
9800
      if vgname not in res.payload:
9801
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9802
                                 (vgname, node))
9803

    
9804
  def _CheckDisksExistence(self, nodes):
9805
    # Check disk existence
9806
    for idx, dev in enumerate(self.instance.disks):
9807
      if idx not in self.disks:
9808
        continue
9809

    
9810
      for node in nodes:
9811
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9812
        self.cfg.SetDiskID(dev, node)
9813

    
9814
        result = self.rpc.call_blockdev_find(node, dev)
9815

    
9816
        msg = result.fail_msg
9817
        if msg or not result.payload:
9818
          if not msg:
9819
            msg = "disk not found"
9820
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9821
                                   (idx, node, msg))
9822

    
9823
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9824
    for idx, dev in enumerate(self.instance.disks):
9825
      if idx not in self.disks:
9826
        continue
9827

    
9828
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9829
                      (idx, node_name))
9830

    
9831
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9832
                                   ldisk=ldisk):
9833
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9834
                                 " replace disks for instance %s" %
9835
                                 (node_name, self.instance.name))
9836

    
9837
  def _CreateNewStorage(self, node_name):
9838
    """Create new storage on the primary or secondary node.
9839

9840
    This is only used for same-node replaces, not for changing the
9841
    secondary node, hence we don't want to modify the existing disk.
9842

9843
    """
9844
    iv_names = {}
9845

    
9846
    for idx, dev in enumerate(self.instance.disks):
9847
      if idx not in self.disks:
9848
        continue
9849

    
9850
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9851

    
9852
      self.cfg.SetDiskID(dev, node_name)
9853

    
9854
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9855
      names = _GenerateUniqueNames(self.lu, lv_names)
9856

    
9857
      vg_data = dev.children[0].logical_id[0]
9858
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9859
                             logical_id=(vg_data, names[0]))
9860
      vg_meta = dev.children[1].logical_id[0]
9861
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9862
                             logical_id=(vg_meta, names[1]))
9863

    
9864
      new_lvs = [lv_data, lv_meta]
9865
      old_lvs = [child.Copy() for child in dev.children]
9866
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9867

    
9868
      # we pass force_create=True to force the LVM creation
9869
      for new_lv in new_lvs:
9870
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9871
                        _GetInstanceInfoText(self.instance), False)
9872

    
9873
    return iv_names
9874

    
9875
  def _CheckDevices(self, node_name, iv_names):
9876
    for name, (dev, _, _) in iv_names.iteritems():
9877
      self.cfg.SetDiskID(dev, node_name)
9878

    
9879
      result = self.rpc.call_blockdev_find(node_name, dev)
9880

    
9881
      msg = result.fail_msg
9882
      if msg or not result.payload:
9883
        if not msg:
9884
          msg = "disk not found"
9885
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9886
                                 (name, msg))
9887

    
9888
      if result.payload.is_degraded:
9889
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9890

    
9891
  def _RemoveOldStorage(self, node_name, iv_names):
9892
    for name, (_, old_lvs, _) in iv_names.iteritems():
9893
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9894

    
9895
      for lv in old_lvs:
9896
        self.cfg.SetDiskID(lv, node_name)
9897

    
9898
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9899
        if msg:
9900
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9901
                             hint="remove unused LVs manually")
9902

    
9903
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9904
    """Replace a disk on the primary or secondary for DRBD 8.
9905

9906
    The algorithm for replace is quite complicated:
9907

9908
      1. for each disk to be replaced:
9909

9910
        1. create new LVs on the target node with unique names
9911
        1. detach old LVs from the drbd device
9912
        1. rename old LVs to name_replaced.<time_t>
9913
        1. rename new LVs to old LVs
9914
        1. attach the new LVs (with the old names now) to the drbd device
9915

9916
      1. wait for sync across all devices
9917

9918
      1. for each modified disk:
9919

9920
        1. remove old LVs (which have the name name_replaces.<time_t>)
9921

9922
    Failures are not very well handled.
9923

9924
    """
9925
    steps_total = 6
9926

    
9927
    # Step: check device activation
9928
    self.lu.LogStep(1, steps_total, "Check device existence")
9929
    self._CheckDisksExistence([self.other_node, self.target_node])
9930
    self._CheckVolumeGroup([self.target_node, self.other_node])
9931

    
9932
    # Step: check other node consistency
9933
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9934
    self._CheckDisksConsistency(self.other_node,
9935
                                self.other_node == self.instance.primary_node,
9936
                                False)
9937

    
9938
    # Step: create new storage
9939
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9940
    iv_names = self._CreateNewStorage(self.target_node)
9941

    
9942
    # Step: for each lv, detach+rename*2+attach
9943
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9944
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9945
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9946

    
9947
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9948
                                                     old_lvs)
9949
      result.Raise("Can't detach drbd from local storage on node"
9950
                   " %s for device %s" % (self.target_node, dev.iv_name))
9951
      #dev.children = []
9952
      #cfg.Update(instance)
9953

    
9954
      # ok, we created the new LVs, so now we know we have the needed
9955
      # storage; as such, we proceed on the target node to rename
9956
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9957
      # using the assumption that logical_id == physical_id (which in
9958
      # turn is the unique_id on that node)
9959

    
9960
      # FIXME(iustin): use a better name for the replaced LVs
9961
      temp_suffix = int(time.time())
9962
      ren_fn = lambda d, suff: (d.physical_id[0],
9963
                                d.physical_id[1] + "_replaced-%s" % suff)
9964

    
9965
      # Build the rename list based on what LVs exist on the node
9966
      rename_old_to_new = []
9967
      for to_ren in old_lvs:
9968
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9969
        if not result.fail_msg and result.payload:
9970
          # device exists
9971
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9972

    
9973
      self.lu.LogInfo("Renaming the old LVs on the target node")
9974
      result = self.rpc.call_blockdev_rename(self.target_node,
9975
                                             rename_old_to_new)
9976
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9977

    
9978
      # Now we rename the new LVs to the old LVs
9979
      self.lu.LogInfo("Renaming the new LVs on the target node")
9980
      rename_new_to_old = [(new, old.physical_id)
9981
                           for old, new in zip(old_lvs, new_lvs)]
9982
      result = self.rpc.call_blockdev_rename(self.target_node,
9983
                                             rename_new_to_old)
9984
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9985

    
9986
      # Intermediate steps of in memory modifications
9987
      for old, new in zip(old_lvs, new_lvs):
9988
        new.logical_id = old.logical_id
9989
        self.cfg.SetDiskID(new, self.target_node)
9990

    
9991
      # We need to modify old_lvs so that removal later removes the
9992
      # right LVs, not the newly added ones; note that old_lvs is a
9993
      # copy here
9994
      for disk in old_lvs:
9995
        disk.logical_id = ren_fn(disk, temp_suffix)
9996
        self.cfg.SetDiskID(disk, self.target_node)
9997

    
9998
      # Now that the new lvs have the old name, we can add them to the device
9999
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10000
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10001
                                                  new_lvs)
10002
      msg = result.fail_msg
10003
      if msg:
10004
        for new_lv in new_lvs:
10005
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10006
                                               new_lv).fail_msg
10007
          if msg2:
10008
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10009
                               hint=("cleanup manually the unused logical"
10010
                                     "volumes"))
10011
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10012

    
10013
    cstep = 5
10014
    if self.early_release:
10015
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10016
      cstep += 1
10017
      self._RemoveOldStorage(self.target_node, iv_names)
10018
      # WARNING: we release both node locks here, do not do other RPCs
10019
      # than WaitForSync to the primary node
10020
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10021
                    names=[self.target_node, self.other_node])
10022

    
10023
    # Wait for sync
10024
    # This can fail as the old devices are degraded and _WaitForSync
10025
    # does a combined result over all disks, so we don't check its return value
10026
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10027
    cstep += 1
10028
    _WaitForSync(self.lu, self.instance)
10029

    
10030
    # Check all devices manually
10031
    self._CheckDevices(self.instance.primary_node, iv_names)
10032

    
10033
    # Step: remove old storage
10034
    if not self.early_release:
10035
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10036
      cstep += 1
10037
      self._RemoveOldStorage(self.target_node, iv_names)
10038

    
10039
  def _ExecDrbd8Secondary(self, feedback_fn):
10040
    """Replace the secondary node for DRBD 8.
10041

10042
    The algorithm for replace is quite complicated:
10043
      - for all disks of the instance:
10044
        - create new LVs on the new node with same names
10045
        - shutdown the drbd device on the old secondary
10046
        - disconnect the drbd network on the primary
10047
        - create the drbd device on the new secondary
10048
        - network attach the drbd on the primary, using an artifice:
10049
          the drbd code for Attach() will connect to the network if it
10050
          finds a device which is connected to the good local disks but
10051
          not network enabled
10052
      - wait for sync across all devices
10053
      - remove all disks from the old secondary
10054

10055
    Failures are not very well handled.
10056

10057
    """
10058
    steps_total = 6
10059

    
10060
    pnode = self.instance.primary_node
10061

    
10062
    # Step: check device activation
10063
    self.lu.LogStep(1, steps_total, "Check device existence")
10064
    self._CheckDisksExistence([self.instance.primary_node])
10065
    self._CheckVolumeGroup([self.instance.primary_node])
10066

    
10067
    # Step: check other node consistency
10068
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10069
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10070

    
10071
    # Step: create new storage
10072
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10073
    for idx, dev in enumerate(self.instance.disks):
10074
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10075
                      (self.new_node, idx))
10076
      # we pass force_create=True to force LVM creation
10077
      for new_lv in dev.children:
10078
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10079
                        _GetInstanceInfoText(self.instance), False)
10080

    
10081
    # Step 4: dbrd minors and drbd setups changes
10082
    # after this, we must manually remove the drbd minors on both the
10083
    # error and the success paths
10084
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10085
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10086
                                         for dev in self.instance.disks],
10087
                                        self.instance.name)
10088
    logging.debug("Allocated minors %r", minors)
10089

    
10090
    iv_names = {}
10091
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10092
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10093
                      (self.new_node, idx))
10094
      # create new devices on new_node; note that we create two IDs:
10095
      # one without port, so the drbd will be activated without
10096
      # networking information on the new node at this stage, and one
10097
      # with network, for the latter activation in step 4
10098
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10099
      if self.instance.primary_node == o_node1:
10100
        p_minor = o_minor1
10101
      else:
10102
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10103
        p_minor = o_minor2
10104

    
10105
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10106
                      p_minor, new_minor, o_secret)
10107
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10108
                    p_minor, new_minor, o_secret)
10109

    
10110
      iv_names[idx] = (dev, dev.children, new_net_id)
10111
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10112
                    new_net_id)
10113
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10114
                              logical_id=new_alone_id,
10115
                              children=dev.children,
10116
                              size=dev.size)
10117
      try:
10118
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10119
                              _GetInstanceInfoText(self.instance), False)
10120
      except errors.GenericError:
10121
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10122
        raise
10123

    
10124
    # We have new devices, shutdown the drbd on the old secondary
10125
    for idx, dev in enumerate(self.instance.disks):
10126
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10127
      self.cfg.SetDiskID(dev, self.target_node)
10128
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10129
      if msg:
10130
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10131
                           "node: %s" % (idx, msg),
10132
                           hint=("Please cleanup this device manually as"
10133
                                 " soon as possible"))
10134

    
10135
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10136
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10137
                                               self.instance.disks)[pnode]
10138

    
10139
    msg = result.fail_msg
10140
    if msg:
10141
      # detaches didn't succeed (unlikely)
10142
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10143
      raise errors.OpExecError("Can't detach the disks from the network on"
10144
                               " old node: %s" % (msg,))
10145

    
10146
    # if we managed to detach at least one, we update all the disks of
10147
    # the instance to point to the new secondary
10148
    self.lu.LogInfo("Updating instance configuration")
10149
    for dev, _, new_logical_id in iv_names.itervalues():
10150
      dev.logical_id = new_logical_id
10151
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10152

    
10153
    self.cfg.Update(self.instance, feedback_fn)
10154

    
10155
    # and now perform the drbd attach
10156
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10157
                    " (standalone => connected)")
10158
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10159
                                            self.new_node],
10160
                                           self.node_secondary_ip,
10161
                                           self.instance.disks,
10162
                                           self.instance.name,
10163
                                           False)
10164
    for to_node, to_result in result.items():
10165
      msg = to_result.fail_msg
10166
      if msg:
10167
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10168
                           to_node, msg,
10169
                           hint=("please do a gnt-instance info to see the"
10170
                                 " status of disks"))
10171
    cstep = 5
10172
    if self.early_release:
10173
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10174
      cstep += 1
10175
      self._RemoveOldStorage(self.target_node, iv_names)
10176
      # WARNING: we release all node locks here, do not do other RPCs
10177
      # than WaitForSync to the primary node
10178
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10179
                    names=[self.instance.primary_node,
10180
                           self.target_node,
10181
                           self.new_node])
10182

    
10183
    # Wait for sync
10184
    # This can fail as the old devices are degraded and _WaitForSync
10185
    # does a combined result over all disks, so we don't check its return value
10186
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10187
    cstep += 1
10188
    _WaitForSync(self.lu, self.instance)
10189

    
10190
    # Check all devices manually
10191
    self._CheckDevices(self.instance.primary_node, iv_names)
10192

    
10193
    # Step: remove old storage
10194
    if not self.early_release:
10195
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10196
      self._RemoveOldStorage(self.target_node, iv_names)
10197

    
10198

    
10199
class LURepairNodeStorage(NoHooksLU):
10200
  """Repairs the volume group on a node.
10201

10202
  """
10203
  REQ_BGL = False
10204

    
10205
  def CheckArguments(self):
10206
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10207

    
10208
    storage_type = self.op.storage_type
10209

    
10210
    if (constants.SO_FIX_CONSISTENCY not in
10211
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10212
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10213
                                 " repaired" % storage_type,
10214
                                 errors.ECODE_INVAL)
10215

    
10216
  def ExpandNames(self):
10217
    self.needed_locks = {
10218
      locking.LEVEL_NODE: [self.op.node_name],
10219
      }
10220

    
10221
  def _CheckFaultyDisks(self, instance, node_name):
10222
    """Ensure faulty disks abort the opcode or at least warn."""
10223
    try:
10224
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10225
                                  node_name, True):
10226
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10227
                                   " node '%s'" % (instance.name, node_name),
10228
                                   errors.ECODE_STATE)
10229
    except errors.OpPrereqError, err:
10230
      if self.op.ignore_consistency:
10231
        self.proc.LogWarning(str(err.args[0]))
10232
      else:
10233
        raise
10234

    
10235
  def CheckPrereq(self):
10236
    """Check prerequisites.
10237

10238
    """
10239
    # Check whether any instance on this node has faulty disks
10240
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10241
      if not inst.admin_up:
10242
        continue
10243
      check_nodes = set(inst.all_nodes)
10244
      check_nodes.discard(self.op.node_name)
10245
      for inst_node_name in check_nodes:
10246
        self._CheckFaultyDisks(inst, inst_node_name)
10247

    
10248
  def Exec(self, feedback_fn):
10249
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10250
                (self.op.name, self.op.node_name))
10251

    
10252
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10253
    result = self.rpc.call_storage_execute(self.op.node_name,
10254
                                           self.op.storage_type, st_args,
10255
                                           self.op.name,
10256
                                           constants.SO_FIX_CONSISTENCY)
10257
    result.Raise("Failed to repair storage unit '%s' on %s" %
10258
                 (self.op.name, self.op.node_name))
10259

    
10260

    
10261
class LUNodeEvacuate(NoHooksLU):
10262
  """Evacuates instances off a list of nodes.
10263

10264
  """
10265
  REQ_BGL = False
10266

    
10267
  def CheckArguments(self):
10268
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10269

    
10270
  def ExpandNames(self):
10271
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10272

    
10273
    if self.op.remote_node is not None:
10274
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10275
      assert self.op.remote_node
10276

    
10277
      if self.op.remote_node == self.op.node_name:
10278
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10279
                                   " secondary node", errors.ECODE_INVAL)
10280

    
10281
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10282
        raise errors.OpPrereqError("Without the use of an iallocator only"
10283
                                   " secondary instances can be evacuated",
10284
                                   errors.ECODE_INVAL)
10285

    
10286
    # Declare locks
10287
    self.share_locks = _ShareAll()
10288
    self.needed_locks = {
10289
      locking.LEVEL_INSTANCE: [],
10290
      locking.LEVEL_NODEGROUP: [],
10291
      locking.LEVEL_NODE: [],
10292
      }
10293

    
10294
    if self.op.remote_node is None:
10295
      # Iallocator will choose any node(s) in the same group
10296
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10297
    else:
10298
      group_nodes = frozenset([self.op.remote_node])
10299

    
10300
    # Determine nodes to be locked
10301
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10302

    
10303
  def _DetermineInstances(self):
10304
    """Builds list of instances to operate on.
10305

10306
    """
10307
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10308

    
10309
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10310
      # Primary instances only
10311
      inst_fn = _GetNodePrimaryInstances
10312
      assert self.op.remote_node is None, \
10313
        "Evacuating primary instances requires iallocator"
10314
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10315
      # Secondary instances only
10316
      inst_fn = _GetNodeSecondaryInstances
10317
    else:
10318
      # All instances
10319
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10320
      inst_fn = _GetNodeInstances
10321

    
10322
    return inst_fn(self.cfg, self.op.node_name)
10323

    
10324
  def DeclareLocks(self, level):
10325
    if level == locking.LEVEL_INSTANCE:
10326
      # Lock instances optimistically, needs verification once node and group
10327
      # locks have been acquired
10328
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10329
        set(i.name for i in self._DetermineInstances())
10330

    
10331
    elif level == locking.LEVEL_NODEGROUP:
10332
      # Lock node groups optimistically, needs verification once nodes have
10333
      # been acquired
10334
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10335
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10336

    
10337
    elif level == locking.LEVEL_NODE:
10338
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10339

    
10340
  def CheckPrereq(self):
10341
    # Verify locks
10342
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10343
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10344
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10345

    
10346
    assert owned_nodes == self.lock_nodes
10347

    
10348
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10349
    if owned_groups != wanted_groups:
10350
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10351
                               " current groups are '%s', used to be '%s'" %
10352
                               (utils.CommaJoin(wanted_groups),
10353
                                utils.CommaJoin(owned_groups)))
10354

    
10355
    # Determine affected instances
10356
    self.instances = self._DetermineInstances()
10357
    self.instance_names = [i.name for i in self.instances]
10358

    
10359
    if set(self.instance_names) != owned_instances:
10360
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10361
                               " were acquired, current instances are '%s',"
10362
                               " used to be '%s'" %
10363
                               (self.op.node_name,
10364
                                utils.CommaJoin(self.instance_names),
10365
                                utils.CommaJoin(owned_instances)))
10366

    
10367
    if self.instance_names:
10368
      self.LogInfo("Evacuating instances from node '%s': %s",
10369
                   self.op.node_name,
10370
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10371
    else:
10372
      self.LogInfo("No instances to evacuate from node '%s'",
10373
                   self.op.node_name)
10374

    
10375
    if self.op.remote_node is not None:
10376
      for i in self.instances:
10377
        if i.primary_node == self.op.remote_node:
10378
          raise errors.OpPrereqError("Node %s is the primary node of"
10379
                                     " instance %s, cannot use it as"
10380
                                     " secondary" %
10381
                                     (self.op.remote_node, i.name),
10382
                                     errors.ECODE_INVAL)
10383

    
10384
  def Exec(self, feedback_fn):
10385
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10386

    
10387
    if not self.instance_names:
10388
      # No instances to evacuate
10389
      jobs = []
10390

    
10391
    elif self.op.iallocator is not None:
10392
      # TODO: Implement relocation to other group
10393
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10394
                       evac_mode=self.op.mode,
10395
                       instances=list(self.instance_names))
10396

    
10397
      ial.Run(self.op.iallocator)
10398

    
10399
      if not ial.success:
10400
        raise errors.OpPrereqError("Can't compute node evacuation using"
10401
                                   " iallocator '%s': %s" %
10402
                                   (self.op.iallocator, ial.info),
10403
                                   errors.ECODE_NORES)
10404

    
10405
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10406

    
10407
    elif self.op.remote_node is not None:
10408
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10409
      jobs = [
10410
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10411
                                        remote_node=self.op.remote_node,
10412
                                        disks=[],
10413
                                        mode=constants.REPLACE_DISK_CHG,
10414
                                        early_release=self.op.early_release)]
10415
        for instance_name in self.instance_names
10416
        ]
10417

    
10418
    else:
10419
      raise errors.ProgrammerError("No iallocator or remote node")
10420

    
10421
    return ResultWithJobs(jobs)
10422

    
10423

    
10424
def _SetOpEarlyRelease(early_release, op):
10425
  """Sets C{early_release} flag on opcodes if available.
10426

10427
  """
10428
  try:
10429
    op.early_release = early_release
10430
  except AttributeError:
10431
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10432

    
10433
  return op
10434

    
10435

    
10436
def _NodeEvacDest(use_nodes, group, nodes):
10437
  """Returns group or nodes depending on caller's choice.
10438

10439
  """
10440
  if use_nodes:
10441
    return utils.CommaJoin(nodes)
10442
  else:
10443
    return group
10444

    
10445

    
10446
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10447
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10448

10449
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10450
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10451

10452
  @type lu: L{LogicalUnit}
10453
  @param lu: Logical unit instance
10454
  @type alloc_result: tuple/list
10455
  @param alloc_result: Result from iallocator
10456
  @type early_release: bool
10457
  @param early_release: Whether to release locks early if possible
10458
  @type use_nodes: bool
10459
  @param use_nodes: Whether to display node names instead of groups
10460

10461
  """
10462
  (moved, failed, jobs) = alloc_result
10463

    
10464
  if failed:
10465
    lu.LogWarning("Unable to evacuate instances %s",
10466
                  utils.CommaJoin("%s (%s)" % (name, reason)
10467
                                  for (name, reason) in failed))
10468

    
10469
  if moved:
10470
    lu.LogInfo("Instances to be moved: %s",
10471
               utils.CommaJoin("%s (to %s)" %
10472
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10473
                               for (name, group, nodes) in moved))
10474

    
10475
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10476
              map(opcodes.OpCode.LoadOpCode, ops))
10477
          for ops in jobs]
10478

    
10479

    
10480
class LUInstanceGrowDisk(LogicalUnit):
10481
  """Grow a disk of an instance.
10482

10483
  """
10484
  HPATH = "disk-grow"
10485
  HTYPE = constants.HTYPE_INSTANCE
10486
  REQ_BGL = False
10487

    
10488
  def ExpandNames(self):
10489
    self._ExpandAndLockInstance()
10490
    self.needed_locks[locking.LEVEL_NODE] = []
10491
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10492

    
10493
  def DeclareLocks(self, level):
10494
    if level == locking.LEVEL_NODE:
10495
      self._LockInstancesNodes()
10496

    
10497
  def BuildHooksEnv(self):
10498
    """Build hooks env.
10499

10500
    This runs on the master, the primary and all the secondaries.
10501

10502
    """
10503
    env = {
10504
      "DISK": self.op.disk,
10505
      "AMOUNT": self.op.amount,
10506
      }
10507
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10508
    return env
10509

    
10510
  def BuildHooksNodes(self):
10511
    """Build hooks nodes.
10512

10513
    """
10514
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10515
    return (nl, nl)
10516

    
10517
  def CheckPrereq(self):
10518
    """Check prerequisites.
10519

10520
    This checks that the instance is in the cluster.
10521

10522
    """
10523
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10524
    assert instance is not None, \
10525
      "Cannot retrieve locked instance %s" % self.op.instance_name
10526
    nodenames = list(instance.all_nodes)
10527
    for node in nodenames:
10528
      _CheckNodeOnline(self, node)
10529

    
10530
    self.instance = instance
10531

    
10532
    if instance.disk_template not in constants.DTS_GROWABLE:
10533
      raise errors.OpPrereqError("Instance's disk layout does not support"
10534
                                 " growing", errors.ECODE_INVAL)
10535

    
10536
    self.disk = instance.FindDisk(self.op.disk)
10537

    
10538
    if instance.disk_template not in (constants.DT_FILE,
10539
                                      constants.DT_SHARED_FILE):
10540
      # TODO: check the free disk space for file, when that feature will be
10541
      # supported
10542
      _CheckNodesFreeDiskPerVG(self, nodenames,
10543
                               self.disk.ComputeGrowth(self.op.amount))
10544

    
10545
  def Exec(self, feedback_fn):
10546
    """Execute disk grow.
10547

10548
    """
10549
    instance = self.instance
10550
    disk = self.disk
10551

    
10552
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10553
    if not disks_ok:
10554
      raise errors.OpExecError("Cannot activate block device to grow")
10555

    
10556
    # First run all grow ops in dry-run mode
10557
    for node in instance.all_nodes:
10558
      self.cfg.SetDiskID(disk, node)
10559
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10560
      result.Raise("Grow request failed to node %s" % node)
10561

    
10562
    # We know that (as far as we can test) operations across different
10563
    # nodes will succeed, time to run it for real
10564
    for node in instance.all_nodes:
10565
      self.cfg.SetDiskID(disk, node)
10566
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10567
      result.Raise("Grow request failed to node %s" % node)
10568

    
10569
      # TODO: Rewrite code to work properly
10570
      # DRBD goes into sync mode for a short amount of time after executing the
10571
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10572
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10573
      # time is a work-around.
10574
      time.sleep(5)
10575

    
10576
    disk.RecordGrow(self.op.amount)
10577
    self.cfg.Update(instance, feedback_fn)
10578
    if self.op.wait_for_sync:
10579
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10580
      if disk_abort:
10581
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10582
                             " status; please check the instance")
10583
      if not instance.admin_up:
10584
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10585
    elif not instance.admin_up:
10586
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10587
                           " not supposed to be running because no wait for"
10588
                           " sync mode was requested")
10589

    
10590

    
10591
class LUInstanceQueryData(NoHooksLU):
10592
  """Query runtime instance data.
10593

10594
  """
10595
  REQ_BGL = False
10596

    
10597
  def ExpandNames(self):
10598
    self.needed_locks = {}
10599

    
10600
    # Use locking if requested or when non-static information is wanted
10601
    if not (self.op.static or self.op.use_locking):
10602
      self.LogWarning("Non-static data requested, locks need to be acquired")
10603
      self.op.use_locking = True
10604

    
10605
    if self.op.instances or not self.op.use_locking:
10606
      # Expand instance names right here
10607
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10608
    else:
10609
      # Will use acquired locks
10610
      self.wanted_names = None
10611

    
10612
    if self.op.use_locking:
10613
      self.share_locks = _ShareAll()
10614

    
10615
      if self.wanted_names is None:
10616
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10617
      else:
10618
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10619

    
10620
      self.needed_locks[locking.LEVEL_NODE] = []
10621
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10622

    
10623
  def DeclareLocks(self, level):
10624
    if self.op.use_locking and level == locking.LEVEL_NODE:
10625
      self._LockInstancesNodes()
10626

    
10627
  def CheckPrereq(self):
10628
    """Check prerequisites.
10629

10630
    This only checks the optional instance list against the existing names.
10631

10632
    """
10633
    if self.wanted_names is None:
10634
      assert self.op.use_locking, "Locking was not used"
10635
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10636

    
10637
    self.wanted_instances = \
10638
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10639

    
10640
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10641
    """Returns the status of a block device
10642

10643
    """
10644
    if self.op.static or not node:
10645
      return None
10646

    
10647
    self.cfg.SetDiskID(dev, node)
10648

    
10649
    result = self.rpc.call_blockdev_find(node, dev)
10650
    if result.offline:
10651
      return None
10652

    
10653
    result.Raise("Can't compute disk status for %s" % instance_name)
10654

    
10655
    status = result.payload
10656
    if status is None:
10657
      return None
10658

    
10659
    return (status.dev_path, status.major, status.minor,
10660
            status.sync_percent, status.estimated_time,
10661
            status.is_degraded, status.ldisk_status)
10662

    
10663
  def _ComputeDiskStatus(self, instance, snode, dev):
10664
    """Compute block device status.
10665

10666
    """
10667
    if dev.dev_type in constants.LDS_DRBD:
10668
      # we change the snode then (otherwise we use the one passed in)
10669
      if dev.logical_id[0] == instance.primary_node:
10670
        snode = dev.logical_id[1]
10671
      else:
10672
        snode = dev.logical_id[0]
10673

    
10674
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10675
                                              instance.name, dev)
10676
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10677

    
10678
    if dev.children:
10679
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10680
                                        instance, snode),
10681
                         dev.children)
10682
    else:
10683
      dev_children = []
10684

    
10685
    return {
10686
      "iv_name": dev.iv_name,
10687
      "dev_type": dev.dev_type,
10688
      "logical_id": dev.logical_id,
10689
      "physical_id": dev.physical_id,
10690
      "pstatus": dev_pstatus,
10691
      "sstatus": dev_sstatus,
10692
      "children": dev_children,
10693
      "mode": dev.mode,
10694
      "size": dev.size,
10695
      }
10696

    
10697
  def Exec(self, feedback_fn):
10698
    """Gather and return data"""
10699
    result = {}
10700

    
10701
    cluster = self.cfg.GetClusterInfo()
10702

    
10703
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10704
                                          for i in self.wanted_instances)
10705
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10706
      if self.op.static or pnode.offline:
10707
        remote_state = None
10708
        if pnode.offline:
10709
          self.LogWarning("Primary node %s is marked offline, returning static"
10710
                          " information only for instance %s" %
10711
                          (pnode.name, instance.name))
10712
      else:
10713
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10714
                                                  instance.name,
10715
                                                  instance.hypervisor)
10716
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10717
        remote_info = remote_info.payload
10718
        if remote_info and "state" in remote_info:
10719
          remote_state = "up"
10720
        else:
10721
          remote_state = "down"
10722

    
10723
      if instance.admin_up:
10724
        config_state = "up"
10725
      else:
10726
        config_state = "down"
10727

    
10728
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10729
                  instance.disks)
10730

    
10731
      result[instance.name] = {
10732
        "name": instance.name,
10733
        "config_state": config_state,
10734
        "run_state": remote_state,
10735
        "pnode": instance.primary_node,
10736
        "snodes": instance.secondary_nodes,
10737
        "os": instance.os,
10738
        # this happens to be the same format used for hooks
10739
        "nics": _NICListToTuple(self, instance.nics),
10740
        "disk_template": instance.disk_template,
10741
        "disks": disks,
10742
        "hypervisor": instance.hypervisor,
10743
        "network_port": instance.network_port,
10744
        "hv_instance": instance.hvparams,
10745
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10746
        "be_instance": instance.beparams,
10747
        "be_actual": cluster.FillBE(instance),
10748
        "os_instance": instance.osparams,
10749
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10750
        "serial_no": instance.serial_no,
10751
        "mtime": instance.mtime,
10752
        "ctime": instance.ctime,
10753
        "uuid": instance.uuid,
10754
        }
10755

    
10756
    return result
10757

    
10758

    
10759
class LUInstanceSetParams(LogicalUnit):
10760
  """Modifies an instances's parameters.
10761

10762
  """
10763
  HPATH = "instance-modify"
10764
  HTYPE = constants.HTYPE_INSTANCE
10765
  REQ_BGL = False
10766

    
10767
  def CheckArguments(self):
10768
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10769
            self.op.hvparams or self.op.beparams or self.op.os_name):
10770
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10771

    
10772
    if self.op.hvparams:
10773
      _CheckGlobalHvParams(self.op.hvparams)
10774

    
10775
    # Disk validation
10776
    disk_addremove = 0
10777
    for disk_op, disk_dict in self.op.disks:
10778
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10779
      if disk_op == constants.DDM_REMOVE:
10780
        disk_addremove += 1
10781
        continue
10782
      elif disk_op == constants.DDM_ADD:
10783
        disk_addremove += 1
10784
      else:
10785
        if not isinstance(disk_op, int):
10786
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10787
        if not isinstance(disk_dict, dict):
10788
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10789
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10790

    
10791
      if disk_op == constants.DDM_ADD:
10792
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10793
        if mode not in constants.DISK_ACCESS_SET:
10794
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10795
                                     errors.ECODE_INVAL)
10796
        size = disk_dict.get(constants.IDISK_SIZE, None)
10797
        if size is None:
10798
          raise errors.OpPrereqError("Required disk parameter size missing",
10799
                                     errors.ECODE_INVAL)
10800
        try:
10801
          size = int(size)
10802
        except (TypeError, ValueError), err:
10803
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10804
                                     str(err), errors.ECODE_INVAL)
10805
        disk_dict[constants.IDISK_SIZE] = size
10806
      else:
10807
        # modification of disk
10808
        if constants.IDISK_SIZE in disk_dict:
10809
          raise errors.OpPrereqError("Disk size change not possible, use"
10810
                                     " grow-disk", errors.ECODE_INVAL)
10811

    
10812
    if disk_addremove > 1:
10813
      raise errors.OpPrereqError("Only one disk add or remove operation"
10814
                                 " supported at a time", errors.ECODE_INVAL)
10815

    
10816
    if self.op.disks and self.op.disk_template is not None:
10817
      raise errors.OpPrereqError("Disk template conversion and other disk"
10818
                                 " changes not supported at the same time",
10819
                                 errors.ECODE_INVAL)
10820

    
10821
    if (self.op.disk_template and
10822
        self.op.disk_template in constants.DTS_INT_MIRROR and
10823
        self.op.remote_node is None):
10824
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10825
                                 " one requires specifying a secondary node",
10826
                                 errors.ECODE_INVAL)
10827

    
10828
    # NIC validation
10829
    nic_addremove = 0
10830
    for nic_op, nic_dict in self.op.nics:
10831
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10832
      if nic_op == constants.DDM_REMOVE:
10833
        nic_addremove += 1
10834
        continue
10835
      elif nic_op == constants.DDM_ADD:
10836
        nic_addremove += 1
10837
      else:
10838
        if not isinstance(nic_op, int):
10839
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10840
        if not isinstance(nic_dict, dict):
10841
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10842
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10843

    
10844
      # nic_dict should be a dict
10845
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10846
      if nic_ip is not None:
10847
        if nic_ip.lower() == constants.VALUE_NONE:
10848
          nic_dict[constants.INIC_IP] = None
10849
        else:
10850
          if not netutils.IPAddress.IsValid(nic_ip):
10851
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10852
                                       errors.ECODE_INVAL)
10853

    
10854
      nic_bridge = nic_dict.get("bridge", None)
10855
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10856
      if nic_bridge and nic_link:
10857
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10858
                                   " at the same time", errors.ECODE_INVAL)
10859
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10860
        nic_dict["bridge"] = None
10861
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10862
        nic_dict[constants.INIC_LINK] = None
10863

    
10864
      if nic_op == constants.DDM_ADD:
10865
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10866
        if nic_mac is None:
10867
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10868

    
10869
      if constants.INIC_MAC in nic_dict:
10870
        nic_mac = nic_dict[constants.INIC_MAC]
10871
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10872
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10873

    
10874
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10875
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10876
                                     " modifying an existing nic",
10877
                                     errors.ECODE_INVAL)
10878

    
10879
    if nic_addremove > 1:
10880
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10881
                                 " supported at a time", errors.ECODE_INVAL)
10882

    
10883
  def ExpandNames(self):
10884
    self._ExpandAndLockInstance()
10885
    self.needed_locks[locking.LEVEL_NODE] = []
10886
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10887

    
10888
  def DeclareLocks(self, level):
10889
    if level == locking.LEVEL_NODE:
10890
      self._LockInstancesNodes()
10891
      if self.op.disk_template and self.op.remote_node:
10892
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10893
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10894

    
10895
  def BuildHooksEnv(self):
10896
    """Build hooks env.
10897

10898
    This runs on the master, primary and secondaries.
10899

10900
    """
10901
    args = dict()
10902
    if constants.BE_MEMORY in self.be_new:
10903
      args["memory"] = self.be_new[constants.BE_MEMORY]
10904
    if constants.BE_VCPUS in self.be_new:
10905
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10906
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10907
    # information at all.
10908
    if self.op.nics:
10909
      args["nics"] = []
10910
      nic_override = dict(self.op.nics)
10911
      for idx, nic in enumerate(self.instance.nics):
10912
        if idx in nic_override:
10913
          this_nic_override = nic_override[idx]
10914
        else:
10915
          this_nic_override = {}
10916
        if constants.INIC_IP in this_nic_override:
10917
          ip = this_nic_override[constants.INIC_IP]
10918
        else:
10919
          ip = nic.ip
10920
        if constants.INIC_MAC in this_nic_override:
10921
          mac = this_nic_override[constants.INIC_MAC]
10922
        else:
10923
          mac = nic.mac
10924
        if idx in self.nic_pnew:
10925
          nicparams = self.nic_pnew[idx]
10926
        else:
10927
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10928
        mode = nicparams[constants.NIC_MODE]
10929
        link = nicparams[constants.NIC_LINK]
10930
        args["nics"].append((ip, mac, mode, link))
10931
      if constants.DDM_ADD in nic_override:
10932
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10933
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10934
        nicparams = self.nic_pnew[constants.DDM_ADD]
10935
        mode = nicparams[constants.NIC_MODE]
10936
        link = nicparams[constants.NIC_LINK]
10937
        args["nics"].append((ip, mac, mode, link))
10938
      elif constants.DDM_REMOVE in nic_override:
10939
        del args["nics"][-1]
10940

    
10941
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10942
    if self.op.disk_template:
10943
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10944

    
10945
    return env
10946

    
10947
  def BuildHooksNodes(self):
10948
    """Build hooks nodes.
10949

10950
    """
10951
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10952
    return (nl, nl)
10953

    
10954
  def CheckPrereq(self):
10955
    """Check prerequisites.
10956

10957
    This only checks the instance list against the existing names.
10958

10959
    """
10960
    # checking the new params on the primary/secondary nodes
10961

    
10962
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10963
    cluster = self.cluster = self.cfg.GetClusterInfo()
10964
    assert self.instance is not None, \
10965
      "Cannot retrieve locked instance %s" % self.op.instance_name
10966
    pnode = instance.primary_node
10967
    nodelist = list(instance.all_nodes)
10968

    
10969
    # OS change
10970
    if self.op.os_name and not self.op.force:
10971
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10972
                      self.op.force_variant)
10973
      instance_os = self.op.os_name
10974
    else:
10975
      instance_os = instance.os
10976

    
10977
    if self.op.disk_template:
10978
      if instance.disk_template == self.op.disk_template:
10979
        raise errors.OpPrereqError("Instance already has disk template %s" %
10980
                                   instance.disk_template, errors.ECODE_INVAL)
10981

    
10982
      if (instance.disk_template,
10983
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10984
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10985
                                   " %s to %s" % (instance.disk_template,
10986
                                                  self.op.disk_template),
10987
                                   errors.ECODE_INVAL)
10988
      _CheckInstanceDown(self, instance, "cannot change disk template")
10989
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10990
        if self.op.remote_node == pnode:
10991
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10992
                                     " as the primary node of the instance" %
10993
                                     self.op.remote_node, errors.ECODE_STATE)
10994
        _CheckNodeOnline(self, self.op.remote_node)
10995
        _CheckNodeNotDrained(self, self.op.remote_node)
10996
        # FIXME: here we assume that the old instance type is DT_PLAIN
10997
        assert instance.disk_template == constants.DT_PLAIN
10998
        disks = [{constants.IDISK_SIZE: d.size,
10999
                  constants.IDISK_VG: d.logical_id[0]}
11000
                 for d in instance.disks]
11001
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11002
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11003

    
11004
    # hvparams processing
11005
    if self.op.hvparams:
11006
      hv_type = instance.hypervisor
11007
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11008
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11009
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11010

    
11011
      # local check
11012
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11013
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11014
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11015
      self.hv_inst = i_hvdict # the new dict (without defaults)
11016
    else:
11017
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11018
                                              instance.hvparams)
11019
      self.hv_new = self.hv_inst = {}
11020

    
11021
    # beparams processing
11022
    if self.op.beparams:
11023
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11024
                                   use_none=True)
11025
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11026
      be_new = cluster.SimpleFillBE(i_bedict)
11027
      self.be_proposed = self.be_new = be_new # the new actual values
11028
      self.be_inst = i_bedict # the new dict (without defaults)
11029
    else:
11030
      self.be_new = self.be_inst = {}
11031
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11032
    be_old = cluster.FillBE(instance)
11033

    
11034
    # CPU param validation -- checking every time a paramtere is
11035
    # changed to cover all cases where either CPU mask or vcpus have
11036
    # changed
11037
    if (constants.BE_VCPUS in self.be_proposed and
11038
        constants.HV_CPU_MASK in self.hv_proposed):
11039
      cpu_list = \
11040
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11041
      # Verify mask is consistent with number of vCPUs. Can skip this
11042
      # test if only 1 entry in the CPU mask, which means same mask
11043
      # is applied to all vCPUs.
11044
      if (len(cpu_list) > 1 and
11045
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11046
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11047
                                   " CPU mask [%s]" %
11048
                                   (self.be_proposed[constants.BE_VCPUS],
11049
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11050
                                   errors.ECODE_INVAL)
11051

    
11052
      # Only perform this test if a new CPU mask is given
11053
      if constants.HV_CPU_MASK in self.hv_new:
11054
        # Calculate the largest CPU number requested
11055
        max_requested_cpu = max(map(max, cpu_list))
11056
        # Check that all of the instance's nodes have enough physical CPUs to
11057
        # satisfy the requested CPU mask
11058
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11059
                                max_requested_cpu + 1, instance.hypervisor)
11060

    
11061
    # osparams processing
11062
    if self.op.osparams:
11063
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11064
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11065
      self.os_inst = i_osdict # the new dict (without defaults)
11066
    else:
11067
      self.os_inst = {}
11068

    
11069
    self.warn = []
11070

    
11071
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11072
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11073
      mem_check_list = [pnode]
11074
      if be_new[constants.BE_AUTO_BALANCE]:
11075
        # either we changed auto_balance to yes or it was from before
11076
        mem_check_list.extend(instance.secondary_nodes)
11077
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11078
                                                  instance.hypervisor)
11079
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11080
                                         instance.hypervisor)
11081
      pninfo = nodeinfo[pnode]
11082
      msg = pninfo.fail_msg
11083
      if msg:
11084
        # Assume the primary node is unreachable and go ahead
11085
        self.warn.append("Can't get info from primary node %s: %s" %
11086
                         (pnode, msg))
11087
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11088
        self.warn.append("Node data from primary node %s doesn't contain"
11089
                         " free memory information" % pnode)
11090
      elif instance_info.fail_msg:
11091
        self.warn.append("Can't get instance runtime information: %s" %
11092
                        instance_info.fail_msg)
11093
      else:
11094
        if instance_info.payload:
11095
          current_mem = int(instance_info.payload["memory"])
11096
        else:
11097
          # Assume instance not running
11098
          # (there is a slight race condition here, but it's not very probable,
11099
          # and we have no other way to check)
11100
          current_mem = 0
11101
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11102
                    pninfo.payload["memory_free"])
11103
        if miss_mem > 0:
11104
          raise errors.OpPrereqError("This change will prevent the instance"
11105
                                     " from starting, due to %d MB of memory"
11106
                                     " missing on its primary node" % miss_mem,
11107
                                     errors.ECODE_NORES)
11108

    
11109
      if be_new[constants.BE_AUTO_BALANCE]:
11110
        for node, nres in nodeinfo.items():
11111
          if node not in instance.secondary_nodes:
11112
            continue
11113
          nres.Raise("Can't get info from secondary node %s" % node,
11114
                     prereq=True, ecode=errors.ECODE_STATE)
11115
          if not isinstance(nres.payload.get("memory_free", None), int):
11116
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11117
                                       " memory information" % node,
11118
                                       errors.ECODE_STATE)
11119
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11120
            raise errors.OpPrereqError("This change will prevent the instance"
11121
                                       " from failover to its secondary node"
11122
                                       " %s, due to not enough memory" % node,
11123
                                       errors.ECODE_STATE)
11124

    
11125
    # NIC processing
11126
    self.nic_pnew = {}
11127
    self.nic_pinst = {}
11128
    for nic_op, nic_dict in self.op.nics:
11129
      if nic_op == constants.DDM_REMOVE:
11130
        if not instance.nics:
11131
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11132
                                     errors.ECODE_INVAL)
11133
        continue
11134
      if nic_op != constants.DDM_ADD:
11135
        # an existing nic
11136
        if not instance.nics:
11137
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11138
                                     " no NICs" % nic_op,
11139
                                     errors.ECODE_INVAL)
11140
        if nic_op < 0 or nic_op >= len(instance.nics):
11141
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11142
                                     " are 0 to %d" %
11143
                                     (nic_op, len(instance.nics) - 1),
11144
                                     errors.ECODE_INVAL)
11145
        old_nic_params = instance.nics[nic_op].nicparams
11146
        old_nic_ip = instance.nics[nic_op].ip
11147
      else:
11148
        old_nic_params = {}
11149
        old_nic_ip = None
11150

    
11151
      update_params_dict = dict([(key, nic_dict[key])
11152
                                 for key in constants.NICS_PARAMETERS
11153
                                 if key in nic_dict])
11154

    
11155
      if "bridge" in nic_dict:
11156
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11157

    
11158
      new_nic_params = _GetUpdatedParams(old_nic_params,
11159
                                         update_params_dict)
11160
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11161
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11162
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11163
      self.nic_pinst[nic_op] = new_nic_params
11164
      self.nic_pnew[nic_op] = new_filled_nic_params
11165
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11166

    
11167
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11168
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11169
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11170
        if msg:
11171
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11172
          if self.op.force:
11173
            self.warn.append(msg)
11174
          else:
11175
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11176
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11177
        if constants.INIC_IP in nic_dict:
11178
          nic_ip = nic_dict[constants.INIC_IP]
11179
        else:
11180
          nic_ip = old_nic_ip
11181
        if nic_ip is None:
11182
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11183
                                     " on a routed nic", errors.ECODE_INVAL)
11184
      if constants.INIC_MAC in nic_dict:
11185
        nic_mac = nic_dict[constants.INIC_MAC]
11186
        if nic_mac is None:
11187
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11188
                                     errors.ECODE_INVAL)
11189
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11190
          # otherwise generate the mac
11191
          nic_dict[constants.INIC_MAC] = \
11192
            self.cfg.GenerateMAC(self.proc.GetECId())
11193
        else:
11194
          # or validate/reserve the current one
11195
          try:
11196
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11197
          except errors.ReservationError:
11198
            raise errors.OpPrereqError("MAC address %s already in use"
11199
                                       " in cluster" % nic_mac,
11200
                                       errors.ECODE_NOTUNIQUE)
11201

    
11202
    # DISK processing
11203
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11204
      raise errors.OpPrereqError("Disk operations not supported for"
11205
                                 " diskless instances",
11206
                                 errors.ECODE_INVAL)
11207
    for disk_op, _ in self.op.disks:
11208
      if disk_op == constants.DDM_REMOVE:
11209
        if len(instance.disks) == 1:
11210
          raise errors.OpPrereqError("Cannot remove the last disk of"
11211
                                     " an instance", errors.ECODE_INVAL)
11212
        _CheckInstanceDown(self, instance, "cannot remove disks")
11213

    
11214
      if (disk_op == constants.DDM_ADD and
11215
          len(instance.disks) >= constants.MAX_DISKS):
11216
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11217
                                   " add more" % constants.MAX_DISKS,
11218
                                   errors.ECODE_STATE)
11219
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11220
        # an existing disk
11221
        if disk_op < 0 or disk_op >= len(instance.disks):
11222
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11223
                                     " are 0 to %d" %
11224
                                     (disk_op, len(instance.disks)),
11225
                                     errors.ECODE_INVAL)
11226

    
11227
    return
11228

    
11229
  def _ConvertPlainToDrbd(self, feedback_fn):
11230
    """Converts an instance from plain to drbd.
11231

11232
    """
11233
    feedback_fn("Converting template to drbd")
11234
    instance = self.instance
11235
    pnode = instance.primary_node
11236
    snode = self.op.remote_node
11237

    
11238
    # create a fake disk info for _GenerateDiskTemplate
11239
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11240
                  constants.IDISK_VG: d.logical_id[0]}
11241
                 for d in instance.disks]
11242
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11243
                                      instance.name, pnode, [snode],
11244
                                      disk_info, None, None, 0, feedback_fn)
11245
    info = _GetInstanceInfoText(instance)
11246
    feedback_fn("Creating aditional volumes...")
11247
    # first, create the missing data and meta devices
11248
    for disk in new_disks:
11249
      # unfortunately this is... not too nice
11250
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11251
                            info, True)
11252
      for child in disk.children:
11253
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11254
    # at this stage, all new LVs have been created, we can rename the
11255
    # old ones
11256
    feedback_fn("Renaming original volumes...")
11257
    rename_list = [(o, n.children[0].logical_id)
11258
                   for (o, n) in zip(instance.disks, new_disks)]
11259
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11260
    result.Raise("Failed to rename original LVs")
11261

    
11262
    feedback_fn("Initializing DRBD devices...")
11263
    # all child devices are in place, we can now create the DRBD devices
11264
    for disk in new_disks:
11265
      for node in [pnode, snode]:
11266
        f_create = node == pnode
11267
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11268

    
11269
    # at this point, the instance has been modified
11270
    instance.disk_template = constants.DT_DRBD8
11271
    instance.disks = new_disks
11272
    self.cfg.Update(instance, feedback_fn)
11273

    
11274
    # disks are created, waiting for sync
11275
    disk_abort = not _WaitForSync(self, instance,
11276
                                  oneshot=not self.op.wait_for_sync)
11277
    if disk_abort:
11278
      raise errors.OpExecError("There are some degraded disks for"
11279
                               " this instance, please cleanup manually")
11280

    
11281
  def _ConvertDrbdToPlain(self, feedback_fn):
11282
    """Converts an instance from drbd to plain.
11283

11284
    """
11285
    instance = self.instance
11286
    assert len(instance.secondary_nodes) == 1
11287
    pnode = instance.primary_node
11288
    snode = instance.secondary_nodes[0]
11289
    feedback_fn("Converting template to plain")
11290

    
11291
    old_disks = instance.disks
11292
    new_disks = [d.children[0] for d in old_disks]
11293

    
11294
    # copy over size and mode
11295
    for parent, child in zip(old_disks, new_disks):
11296
      child.size = parent.size
11297
      child.mode = parent.mode
11298

    
11299
    # update instance structure
11300
    instance.disks = new_disks
11301
    instance.disk_template = constants.DT_PLAIN
11302
    self.cfg.Update(instance, feedback_fn)
11303

    
11304
    feedback_fn("Removing volumes on the secondary node...")
11305
    for disk in old_disks:
11306
      self.cfg.SetDiskID(disk, snode)
11307
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11308
      if msg:
11309
        self.LogWarning("Could not remove block device %s on node %s,"
11310
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11311

    
11312
    feedback_fn("Removing unneeded volumes on the primary node...")
11313
    for idx, disk in enumerate(old_disks):
11314
      meta = disk.children[1]
11315
      self.cfg.SetDiskID(meta, pnode)
11316
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11317
      if msg:
11318
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11319
                        " continuing anyway: %s", idx, pnode, msg)
11320

    
11321
  def Exec(self, feedback_fn):
11322
    """Modifies an instance.
11323

11324
    All parameters take effect only at the next restart of the instance.
11325

11326
    """
11327
    # Process here the warnings from CheckPrereq, as we don't have a
11328
    # feedback_fn there.
11329
    for warn in self.warn:
11330
      feedback_fn("WARNING: %s" % warn)
11331

    
11332
    result = []
11333
    instance = self.instance
11334
    # disk changes
11335
    for disk_op, disk_dict in self.op.disks:
11336
      if disk_op == constants.DDM_REMOVE:
11337
        # remove the last disk
11338
        device = instance.disks.pop()
11339
        device_idx = len(instance.disks)
11340
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11341
          self.cfg.SetDiskID(disk, node)
11342
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11343
          if msg:
11344
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11345
                            " continuing anyway", device_idx, node, msg)
11346
        result.append(("disk/%d" % device_idx, "remove"))
11347
      elif disk_op == constants.DDM_ADD:
11348
        # add a new disk
11349
        if instance.disk_template in (constants.DT_FILE,
11350
                                        constants.DT_SHARED_FILE):
11351
          file_driver, file_path = instance.disks[0].logical_id
11352
          file_path = os.path.dirname(file_path)
11353
        else:
11354
          file_driver = file_path = None
11355
        disk_idx_base = len(instance.disks)
11356
        new_disk = _GenerateDiskTemplate(self,
11357
                                         instance.disk_template,
11358
                                         instance.name, instance.primary_node,
11359
                                         instance.secondary_nodes,
11360
                                         [disk_dict],
11361
                                         file_path,
11362
                                         file_driver,
11363
                                         disk_idx_base, feedback_fn)[0]
11364
        instance.disks.append(new_disk)
11365
        info = _GetInstanceInfoText(instance)
11366

    
11367
        logging.info("Creating volume %s for instance %s",
11368
                     new_disk.iv_name, instance.name)
11369
        # Note: this needs to be kept in sync with _CreateDisks
11370
        #HARDCODE
11371
        for node in instance.all_nodes:
11372
          f_create = node == instance.primary_node
11373
          try:
11374
            _CreateBlockDev(self, node, instance, new_disk,
11375
                            f_create, info, f_create)
11376
          except errors.OpExecError, err:
11377
            self.LogWarning("Failed to create volume %s (%s) on"
11378
                            " node %s: %s",
11379
                            new_disk.iv_name, new_disk, node, err)
11380
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11381
                       (new_disk.size, new_disk.mode)))
11382
      else:
11383
        # change a given disk
11384
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11385
        result.append(("disk.mode/%d" % disk_op,
11386
                       disk_dict[constants.IDISK_MODE]))
11387

    
11388
    if self.op.disk_template:
11389
      r_shut = _ShutdownInstanceDisks(self, instance)
11390
      if not r_shut:
11391
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11392
                                 " proceed with disk template conversion")
11393
      mode = (instance.disk_template, self.op.disk_template)
11394
      try:
11395
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11396
      except:
11397
        self.cfg.ReleaseDRBDMinors(instance.name)
11398
        raise
11399
      result.append(("disk_template", self.op.disk_template))
11400

    
11401
    # NIC changes
11402
    for nic_op, nic_dict in self.op.nics:
11403
      if nic_op == constants.DDM_REMOVE:
11404
        # remove the last nic
11405
        del instance.nics[-1]
11406
        result.append(("nic.%d" % len(instance.nics), "remove"))
11407
      elif nic_op == constants.DDM_ADD:
11408
        # mac and bridge should be set, by now
11409
        mac = nic_dict[constants.INIC_MAC]
11410
        ip = nic_dict.get(constants.INIC_IP, None)
11411
        nicparams = self.nic_pinst[constants.DDM_ADD]
11412
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11413
        instance.nics.append(new_nic)
11414
        result.append(("nic.%d" % (len(instance.nics) - 1),
11415
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11416
                       (new_nic.mac, new_nic.ip,
11417
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11418
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11419
                       )))
11420
      else:
11421
        for key in (constants.INIC_MAC, constants.INIC_IP):
11422
          if key in nic_dict:
11423
            setattr(instance.nics[nic_op], key, nic_dict[key])
11424
        if nic_op in self.nic_pinst:
11425
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11426
        for key, val in nic_dict.iteritems():
11427
          result.append(("nic.%s/%d" % (key, nic_op), val))
11428

    
11429
    # hvparams changes
11430
    if self.op.hvparams:
11431
      instance.hvparams = self.hv_inst
11432
      for key, val in self.op.hvparams.iteritems():
11433
        result.append(("hv/%s" % key, val))
11434

    
11435
    # beparams changes
11436
    if self.op.beparams:
11437
      instance.beparams = self.be_inst
11438
      for key, val in self.op.beparams.iteritems():
11439
        result.append(("be/%s" % key, val))
11440

    
11441
    # OS change
11442
    if self.op.os_name:
11443
      instance.os = self.op.os_name
11444

    
11445
    # osparams changes
11446
    if self.op.osparams:
11447
      instance.osparams = self.os_inst
11448
      for key, val in self.op.osparams.iteritems():
11449
        result.append(("os/%s" % key, val))
11450

    
11451
    self.cfg.Update(instance, feedback_fn)
11452

    
11453
    return result
11454

    
11455
  _DISK_CONVERSIONS = {
11456
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11457
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11458
    }
11459

    
11460

    
11461
class LUInstanceChangeGroup(LogicalUnit):
11462
  HPATH = "instance-change-group"
11463
  HTYPE = constants.HTYPE_INSTANCE
11464
  REQ_BGL = False
11465

    
11466
  def ExpandNames(self):
11467
    self.share_locks = _ShareAll()
11468
    self.needed_locks = {
11469
      locking.LEVEL_NODEGROUP: [],
11470
      locking.LEVEL_NODE: [],
11471
      }
11472

    
11473
    self._ExpandAndLockInstance()
11474

    
11475
    if self.op.target_groups:
11476
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11477
                                  self.op.target_groups)
11478
    else:
11479
      self.req_target_uuids = None
11480

    
11481
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11482

    
11483
  def DeclareLocks(self, level):
11484
    if level == locking.LEVEL_NODEGROUP:
11485
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11486

    
11487
      if self.req_target_uuids:
11488
        lock_groups = set(self.req_target_uuids)
11489

    
11490
        # Lock all groups used by instance optimistically; this requires going
11491
        # via the node before it's locked, requiring verification later on
11492
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11493
        lock_groups.update(instance_groups)
11494
      else:
11495
        # No target groups, need to lock all of them
11496
        lock_groups = locking.ALL_SET
11497

    
11498
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11499

    
11500
    elif level == locking.LEVEL_NODE:
11501
      if self.req_target_uuids:
11502
        # Lock all nodes used by instances
11503
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11504
        self._LockInstancesNodes()
11505

    
11506
        # Lock all nodes in all potential target groups
11507
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11508
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11509
        member_nodes = [node_name
11510
                        for group in lock_groups
11511
                        for node_name in self.cfg.GetNodeGroup(group).members]
11512
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11513
      else:
11514
        # Lock all nodes as all groups are potential targets
11515
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11516

    
11517
  def CheckPrereq(self):
11518
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11519
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11520
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11521

    
11522
    assert (self.req_target_uuids is None or
11523
            owned_groups.issuperset(self.req_target_uuids))
11524
    assert owned_instances == set([self.op.instance_name])
11525

    
11526
    # Get instance information
11527
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11528

    
11529
    # Check if node groups for locked instance are still correct
11530
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11531
      ("Instance %s's nodes changed while we kept the lock" %
11532
       self.op.instance_name)
11533

    
11534
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11535
                                           owned_groups)
11536

    
11537
    if self.req_target_uuids:
11538
      # User requested specific target groups
11539
      self.target_uuids = self.req_target_uuids
11540
    else:
11541
      # All groups except those used by the instance are potential targets
11542
      self.target_uuids = owned_groups - inst_groups
11543

    
11544
    conflicting_groups = self.target_uuids & inst_groups
11545
    if conflicting_groups:
11546
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11547
                                 " used by the instance '%s'" %
11548
                                 (utils.CommaJoin(conflicting_groups),
11549
                                  self.op.instance_name),
11550
                                 errors.ECODE_INVAL)
11551

    
11552
    if not self.target_uuids:
11553
      raise errors.OpPrereqError("There are no possible target groups",
11554
                                 errors.ECODE_INVAL)
11555

    
11556
  def BuildHooksEnv(self):
11557
    """Build hooks env.
11558

11559
    """
11560
    assert self.target_uuids
11561

    
11562
    env = {
11563
      "TARGET_GROUPS": " ".join(self.target_uuids),
11564
      }
11565

    
11566
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11567

    
11568
    return env
11569

    
11570
  def BuildHooksNodes(self):
11571
    """Build hooks nodes.
11572

11573
    """
11574
    mn = self.cfg.GetMasterNode()
11575
    return ([mn], [mn])
11576

    
11577
  def Exec(self, feedback_fn):
11578
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11579

    
11580
    assert instances == [self.op.instance_name], "Instance not locked"
11581

    
11582
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11583
                     instances=instances, target_groups=list(self.target_uuids))
11584

    
11585
    ial.Run(self.op.iallocator)
11586

    
11587
    if not ial.success:
11588
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11589
                                 " instance '%s' using iallocator '%s': %s" %
11590
                                 (self.op.instance_name, self.op.iallocator,
11591
                                  ial.info),
11592
                                 errors.ECODE_NORES)
11593

    
11594
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11595

    
11596
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11597
                 " instance '%s'", len(jobs), self.op.instance_name)
11598

    
11599
    return ResultWithJobs(jobs)
11600

    
11601

    
11602
class LUBackupQuery(NoHooksLU):
11603
  """Query the exports list
11604

11605
  """
11606
  REQ_BGL = False
11607

    
11608
  def ExpandNames(self):
11609
    self.needed_locks = {}
11610
    self.share_locks[locking.LEVEL_NODE] = 1
11611
    if not self.op.nodes:
11612
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11613
    else:
11614
      self.needed_locks[locking.LEVEL_NODE] = \
11615
        _GetWantedNodes(self, self.op.nodes)
11616

    
11617
  def Exec(self, feedback_fn):
11618
    """Compute the list of all the exported system images.
11619

11620
    @rtype: dict
11621
    @return: a dictionary with the structure node->(export-list)
11622
        where export-list is a list of the instances exported on
11623
        that node.
11624

11625
    """
11626
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11627
    rpcresult = self.rpc.call_export_list(self.nodes)
11628
    result = {}
11629
    for node in rpcresult:
11630
      if rpcresult[node].fail_msg:
11631
        result[node] = False
11632
      else:
11633
        result[node] = rpcresult[node].payload
11634

    
11635
    return result
11636

    
11637

    
11638
class LUBackupPrepare(NoHooksLU):
11639
  """Prepares an instance for an export and returns useful information.
11640

11641
  """
11642
  REQ_BGL = False
11643

    
11644
  def ExpandNames(self):
11645
    self._ExpandAndLockInstance()
11646

    
11647
  def CheckPrereq(self):
11648
    """Check prerequisites.
11649

11650
    """
11651
    instance_name = self.op.instance_name
11652

    
11653
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11654
    assert self.instance is not None, \
11655
          "Cannot retrieve locked instance %s" % self.op.instance_name
11656
    _CheckNodeOnline(self, self.instance.primary_node)
11657

    
11658
    self._cds = _GetClusterDomainSecret()
11659

    
11660
  def Exec(self, feedback_fn):
11661
    """Prepares an instance for an export.
11662

11663
    """
11664
    instance = self.instance
11665

    
11666
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11667
      salt = utils.GenerateSecret(8)
11668

    
11669
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11670
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11671
                                              constants.RIE_CERT_VALIDITY)
11672
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11673

    
11674
      (name, cert_pem) = result.payload
11675

    
11676
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11677
                                             cert_pem)
11678

    
11679
      return {
11680
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11681
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11682
                          salt),
11683
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11684
        }
11685

    
11686
    return None
11687

    
11688

    
11689
class LUBackupExport(LogicalUnit):
11690
  """Export an instance to an image in the cluster.
11691

11692
  """
11693
  HPATH = "instance-export"
11694
  HTYPE = constants.HTYPE_INSTANCE
11695
  REQ_BGL = False
11696

    
11697
  def CheckArguments(self):
11698
    """Check the arguments.
11699

11700
    """
11701
    self.x509_key_name = self.op.x509_key_name
11702
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11703

    
11704
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11705
      if not self.x509_key_name:
11706
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11707
                                   errors.ECODE_INVAL)
11708

    
11709
      if not self.dest_x509_ca_pem:
11710
        raise errors.OpPrereqError("Missing destination X509 CA",
11711
                                   errors.ECODE_INVAL)
11712

    
11713
  def ExpandNames(self):
11714
    self._ExpandAndLockInstance()
11715

    
11716
    # Lock all nodes for local exports
11717
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11718
      # FIXME: lock only instance primary and destination node
11719
      #
11720
      # Sad but true, for now we have do lock all nodes, as we don't know where
11721
      # the previous export might be, and in this LU we search for it and
11722
      # remove it from its current node. In the future we could fix this by:
11723
      #  - making a tasklet to search (share-lock all), then create the
11724
      #    new one, then one to remove, after
11725
      #  - removing the removal operation altogether
11726
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11727

    
11728
  def DeclareLocks(self, level):
11729
    """Last minute lock declaration."""
11730
    # All nodes are locked anyway, so nothing to do here.
11731

    
11732
  def BuildHooksEnv(self):
11733
    """Build hooks env.
11734

11735
    This will run on the master, primary node and target node.
11736

11737
    """
11738
    env = {
11739
      "EXPORT_MODE": self.op.mode,
11740
      "EXPORT_NODE": self.op.target_node,
11741
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11742
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11743
      # TODO: Generic function for boolean env variables
11744
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11745
      }
11746

    
11747
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11748

    
11749
    return env
11750

    
11751
  def BuildHooksNodes(self):
11752
    """Build hooks nodes.
11753

11754
    """
11755
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11756

    
11757
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11758
      nl.append(self.op.target_node)
11759

    
11760
    return (nl, nl)
11761

    
11762
  def CheckPrereq(self):
11763
    """Check prerequisites.
11764

11765
    This checks that the instance and node names are valid.
11766

11767
    """
11768
    instance_name = self.op.instance_name
11769

    
11770
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11771
    assert self.instance is not None, \
11772
          "Cannot retrieve locked instance %s" % self.op.instance_name
11773
    _CheckNodeOnline(self, self.instance.primary_node)
11774

    
11775
    if (self.op.remove_instance and self.instance.admin_up and
11776
        not self.op.shutdown):
11777
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11778
                                 " down before")
11779

    
11780
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11781
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11782
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11783
      assert self.dst_node is not None
11784

    
11785
      _CheckNodeOnline(self, self.dst_node.name)
11786
      _CheckNodeNotDrained(self, self.dst_node.name)
11787

    
11788
      self._cds = None
11789
      self.dest_disk_info = None
11790
      self.dest_x509_ca = None
11791

    
11792
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11793
      self.dst_node = None
11794

    
11795
      if len(self.op.target_node) != len(self.instance.disks):
11796
        raise errors.OpPrereqError(("Received destination information for %s"
11797
                                    " disks, but instance %s has %s disks") %
11798
                                   (len(self.op.target_node), instance_name,
11799
                                    len(self.instance.disks)),
11800
                                   errors.ECODE_INVAL)
11801

    
11802
      cds = _GetClusterDomainSecret()
11803

    
11804
      # Check X509 key name
11805
      try:
11806
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11807
      except (TypeError, ValueError), err:
11808
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11809

    
11810
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11811
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11812
                                   errors.ECODE_INVAL)
11813

    
11814
      # Load and verify CA
11815
      try:
11816
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11817
      except OpenSSL.crypto.Error, err:
11818
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11819
                                   (err, ), errors.ECODE_INVAL)
11820

    
11821
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11822
      if errcode is not None:
11823
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11824
                                   (msg, ), errors.ECODE_INVAL)
11825

    
11826
      self.dest_x509_ca = cert
11827

    
11828
      # Verify target information
11829
      disk_info = []
11830
      for idx, disk_data in enumerate(self.op.target_node):
11831
        try:
11832
          (host, port, magic) = \
11833
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11834
        except errors.GenericError, err:
11835
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11836
                                     (idx, err), errors.ECODE_INVAL)
11837

    
11838
        disk_info.append((host, port, magic))
11839

    
11840
      assert len(disk_info) == len(self.op.target_node)
11841
      self.dest_disk_info = disk_info
11842

    
11843
    else:
11844
      raise errors.ProgrammerError("Unhandled export mode %r" %
11845
                                   self.op.mode)
11846

    
11847
    # instance disk type verification
11848
    # TODO: Implement export support for file-based disks
11849
    for disk in self.instance.disks:
11850
      if disk.dev_type == constants.LD_FILE:
11851
        raise errors.OpPrereqError("Export not supported for instances with"
11852
                                   " file-based disks", errors.ECODE_INVAL)
11853

    
11854
  def _CleanupExports(self, feedback_fn):
11855
    """Removes exports of current instance from all other nodes.
11856

11857
    If an instance in a cluster with nodes A..D was exported to node C, its
11858
    exports will be removed from the nodes A, B and D.
11859

11860
    """
11861
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11862

    
11863
    nodelist = self.cfg.GetNodeList()
11864
    nodelist.remove(self.dst_node.name)
11865

    
11866
    # on one-node clusters nodelist will be empty after the removal
11867
    # if we proceed the backup would be removed because OpBackupQuery
11868
    # substitutes an empty list with the full cluster node list.
11869
    iname = self.instance.name
11870
    if nodelist:
11871
      feedback_fn("Removing old exports for instance %s" % iname)
11872
      exportlist = self.rpc.call_export_list(nodelist)
11873
      for node in exportlist:
11874
        if exportlist[node].fail_msg:
11875
          continue
11876
        if iname in exportlist[node].payload:
11877
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11878
          if msg:
11879
            self.LogWarning("Could not remove older export for instance %s"
11880
                            " on node %s: %s", iname, node, msg)
11881

    
11882
  def Exec(self, feedback_fn):
11883
    """Export an instance to an image in the cluster.
11884

11885
    """
11886
    assert self.op.mode in constants.EXPORT_MODES
11887

    
11888
    instance = self.instance
11889
    src_node = instance.primary_node
11890

    
11891
    if self.op.shutdown:
11892
      # shutdown the instance, but not the disks
11893
      feedback_fn("Shutting down instance %s" % instance.name)
11894
      result = self.rpc.call_instance_shutdown(src_node, instance,
11895
                                               self.op.shutdown_timeout)
11896
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11897
      result.Raise("Could not shutdown instance %s on"
11898
                   " node %s" % (instance.name, src_node))
11899

    
11900
    # set the disks ID correctly since call_instance_start needs the
11901
    # correct drbd minor to create the symlinks
11902
    for disk in instance.disks:
11903
      self.cfg.SetDiskID(disk, src_node)
11904

    
11905
    activate_disks = (not instance.admin_up)
11906

    
11907
    if activate_disks:
11908
      # Activate the instance disks if we'exporting a stopped instance
11909
      feedback_fn("Activating disks for %s" % instance.name)
11910
      _StartInstanceDisks(self, instance, None)
11911

    
11912
    try:
11913
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11914
                                                     instance)
11915

    
11916
      helper.CreateSnapshots()
11917
      try:
11918
        if (self.op.shutdown and instance.admin_up and
11919
            not self.op.remove_instance):
11920
          assert not activate_disks
11921
          feedback_fn("Starting instance %s" % instance.name)
11922
          result = self.rpc.call_instance_start(src_node,
11923
                                                (instance, None, None), False)
11924
          msg = result.fail_msg
11925
          if msg:
11926
            feedback_fn("Failed to start instance: %s" % msg)
11927
            _ShutdownInstanceDisks(self, instance)
11928
            raise errors.OpExecError("Could not start instance: %s" % msg)
11929

    
11930
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11931
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11932
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11933
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11934
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11935

    
11936
          (key_name, _, _) = self.x509_key_name
11937

    
11938
          dest_ca_pem = \
11939
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11940
                                            self.dest_x509_ca)
11941

    
11942
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11943
                                                     key_name, dest_ca_pem,
11944
                                                     timeouts)
11945
      finally:
11946
        helper.Cleanup()
11947

    
11948
      # Check for backwards compatibility
11949
      assert len(dresults) == len(instance.disks)
11950
      assert compat.all(isinstance(i, bool) for i in dresults), \
11951
             "Not all results are boolean: %r" % dresults
11952

    
11953
    finally:
11954
      if activate_disks:
11955
        feedback_fn("Deactivating disks for %s" % instance.name)
11956
        _ShutdownInstanceDisks(self, instance)
11957

    
11958
    if not (compat.all(dresults) and fin_resu):
11959
      failures = []
11960
      if not fin_resu:
11961
        failures.append("export finalization")
11962
      if not compat.all(dresults):
11963
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11964
                               if not dsk)
11965
        failures.append("disk export: disk(s) %s" % fdsk)
11966

    
11967
      raise errors.OpExecError("Export failed, errors in %s" %
11968
                               utils.CommaJoin(failures))
11969

    
11970
    # At this point, the export was successful, we can cleanup/finish
11971

    
11972
    # Remove instance if requested
11973
    if self.op.remove_instance:
11974
      feedback_fn("Removing instance %s" % instance.name)
11975
      _RemoveInstance(self, feedback_fn, instance,
11976
                      self.op.ignore_remove_failures)
11977

    
11978
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11979
      self._CleanupExports(feedback_fn)
11980

    
11981
    return fin_resu, dresults
11982

    
11983

    
11984
class LUBackupRemove(NoHooksLU):
11985
  """Remove exports related to the named instance.
11986

11987
  """
11988
  REQ_BGL = False
11989

    
11990
  def ExpandNames(self):
11991
    self.needed_locks = {}
11992
    # We need all nodes to be locked in order for RemoveExport to work, but we
11993
    # don't need to lock the instance itself, as nothing will happen to it (and
11994
    # we can remove exports also for a removed instance)
11995
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11996

    
11997
  def Exec(self, feedback_fn):
11998
    """Remove any export.
11999

12000
    """
12001
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12002
    # If the instance was not found we'll try with the name that was passed in.
12003
    # This will only work if it was an FQDN, though.
12004
    fqdn_warn = False
12005
    if not instance_name:
12006
      fqdn_warn = True
12007
      instance_name = self.op.instance_name
12008

    
12009
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12010
    exportlist = self.rpc.call_export_list(locked_nodes)
12011
    found = False
12012
    for node in exportlist:
12013
      msg = exportlist[node].fail_msg
12014
      if msg:
12015
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12016
        continue
12017
      if instance_name in exportlist[node].payload:
12018
        found = True
12019
        result = self.rpc.call_export_remove(node, instance_name)
12020
        msg = result.fail_msg
12021
        if msg:
12022
          logging.error("Could not remove export for instance %s"
12023
                        " on node %s: %s", instance_name, node, msg)
12024

    
12025
    if fqdn_warn and not found:
12026
      feedback_fn("Export not found. If trying to remove an export belonging"
12027
                  " to a deleted instance please use its Fully Qualified"
12028
                  " Domain Name.")
12029

    
12030

    
12031
class LUGroupAdd(LogicalUnit):
12032
  """Logical unit for creating node groups.
12033

12034
  """
12035
  HPATH = "group-add"
12036
  HTYPE = constants.HTYPE_GROUP
12037
  REQ_BGL = False
12038

    
12039
  def ExpandNames(self):
12040
    # We need the new group's UUID here so that we can create and acquire the
12041
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12042
    # that it should not check whether the UUID exists in the configuration.
12043
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12044
    self.needed_locks = {}
12045
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12046

    
12047
  def CheckPrereq(self):
12048
    """Check prerequisites.
12049

12050
    This checks that the given group name is not an existing node group
12051
    already.
12052

12053
    """
12054
    try:
12055
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12056
    except errors.OpPrereqError:
12057
      pass
12058
    else:
12059
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12060
                                 " node group (UUID: %s)" %
12061
                                 (self.op.group_name, existing_uuid),
12062
                                 errors.ECODE_EXISTS)
12063

    
12064
    if self.op.ndparams:
12065
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12066

    
12067
  def BuildHooksEnv(self):
12068
    """Build hooks env.
12069

12070
    """
12071
    return {
12072
      "GROUP_NAME": self.op.group_name,
12073
      }
12074

    
12075
  def BuildHooksNodes(self):
12076
    """Build hooks nodes.
12077

12078
    """
12079
    mn = self.cfg.GetMasterNode()
12080
    return ([mn], [mn])
12081

    
12082
  def Exec(self, feedback_fn):
12083
    """Add the node group to the cluster.
12084

12085
    """
12086
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12087
                                  uuid=self.group_uuid,
12088
                                  alloc_policy=self.op.alloc_policy,
12089
                                  ndparams=self.op.ndparams)
12090

    
12091
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12092
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12093

    
12094

    
12095
class LUGroupAssignNodes(NoHooksLU):
12096
  """Logical unit for assigning nodes to groups.
12097

12098
  """
12099
  REQ_BGL = False
12100

    
12101
  def ExpandNames(self):
12102
    # These raise errors.OpPrereqError on their own:
12103
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12104
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12105

    
12106
    # We want to lock all the affected nodes and groups. We have readily
12107
    # available the list of nodes, and the *destination* group. To gather the
12108
    # list of "source" groups, we need to fetch node information later on.
12109
    self.needed_locks = {
12110
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12111
      locking.LEVEL_NODE: self.op.nodes,
12112
      }
12113

    
12114
  def DeclareLocks(self, level):
12115
    if level == locking.LEVEL_NODEGROUP:
12116
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12117

    
12118
      # Try to get all affected nodes' groups without having the group or node
12119
      # lock yet. Needs verification later in the code flow.
12120
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12121

    
12122
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12123

    
12124
  def CheckPrereq(self):
12125
    """Check prerequisites.
12126

12127
    """
12128
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12129
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12130
            frozenset(self.op.nodes))
12131

    
12132
    expected_locks = (set([self.group_uuid]) |
12133
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12134
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12135
    if actual_locks != expected_locks:
12136
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12137
                               " current groups are '%s', used to be '%s'" %
12138
                               (utils.CommaJoin(expected_locks),
12139
                                utils.CommaJoin(actual_locks)))
12140

    
12141
    self.node_data = self.cfg.GetAllNodesInfo()
12142
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12143
    instance_data = self.cfg.GetAllInstancesInfo()
12144

    
12145
    if self.group is None:
12146
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12147
                               (self.op.group_name, self.group_uuid))
12148

    
12149
    (new_splits, previous_splits) = \
12150
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12151
                                             for node in self.op.nodes],
12152
                                            self.node_data, instance_data)
12153

    
12154
    if new_splits:
12155
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12156

    
12157
      if not self.op.force:
12158
        raise errors.OpExecError("The following instances get split by this"
12159
                                 " change and --force was not given: %s" %
12160
                                 fmt_new_splits)
12161
      else:
12162
        self.LogWarning("This operation will split the following instances: %s",
12163
                        fmt_new_splits)
12164

    
12165
        if previous_splits:
12166
          self.LogWarning("In addition, these already-split instances continue"
12167
                          " to be split across groups: %s",
12168
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12169

    
12170
  def Exec(self, feedback_fn):
12171
    """Assign nodes to a new group.
12172

12173
    """
12174
    for node in self.op.nodes:
12175
      self.node_data[node].group = self.group_uuid
12176

    
12177
    # FIXME: Depends on side-effects of modifying the result of
12178
    # C{cfg.GetAllNodesInfo}
12179

    
12180
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12181

    
12182
  @staticmethod
12183
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12184
    """Check for split instances after a node assignment.
12185

12186
    This method considers a series of node assignments as an atomic operation,
12187
    and returns information about split instances after applying the set of
12188
    changes.
12189

12190
    In particular, it returns information about newly split instances, and
12191
    instances that were already split, and remain so after the change.
12192

12193
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12194
    considered.
12195

12196
    @type changes: list of (node_name, new_group_uuid) pairs.
12197
    @param changes: list of node assignments to consider.
12198
    @param node_data: a dict with data for all nodes
12199
    @param instance_data: a dict with all instances to consider
12200
    @rtype: a two-tuple
12201
    @return: a list of instances that were previously okay and result split as a
12202
      consequence of this change, and a list of instances that were previously
12203
      split and this change does not fix.
12204

12205
    """
12206
    changed_nodes = dict((node, group) for node, group in changes
12207
                         if node_data[node].group != group)
12208

    
12209
    all_split_instances = set()
12210
    previously_split_instances = set()
12211

    
12212
    def InstanceNodes(instance):
12213
      return [instance.primary_node] + list(instance.secondary_nodes)
12214

    
12215
    for inst in instance_data.values():
12216
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12217
        continue
12218

    
12219
      instance_nodes = InstanceNodes(inst)
12220

    
12221
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12222
        previously_split_instances.add(inst.name)
12223

    
12224
      if len(set(changed_nodes.get(node, node_data[node].group)
12225
                 for node in instance_nodes)) > 1:
12226
        all_split_instances.add(inst.name)
12227

    
12228
    return (list(all_split_instances - previously_split_instances),
12229
            list(previously_split_instances & all_split_instances))
12230

    
12231

    
12232
class _GroupQuery(_QueryBase):
12233
  FIELDS = query.GROUP_FIELDS
12234

    
12235
  def ExpandNames(self, lu):
12236
    lu.needed_locks = {}
12237

    
12238
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12239
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12240

    
12241
    if not self.names:
12242
      self.wanted = [name_to_uuid[name]
12243
                     for name in utils.NiceSort(name_to_uuid.keys())]
12244
    else:
12245
      # Accept names to be either names or UUIDs.
12246
      missing = []
12247
      self.wanted = []
12248
      all_uuid = frozenset(self._all_groups.keys())
12249

    
12250
      for name in self.names:
12251
        if name in all_uuid:
12252
          self.wanted.append(name)
12253
        elif name in name_to_uuid:
12254
          self.wanted.append(name_to_uuid[name])
12255
        else:
12256
          missing.append(name)
12257

    
12258
      if missing:
12259
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12260
                                   utils.CommaJoin(missing),
12261
                                   errors.ECODE_NOENT)
12262

    
12263
  def DeclareLocks(self, lu, level):
12264
    pass
12265

    
12266
  def _GetQueryData(self, lu):
12267
    """Computes the list of node groups and their attributes.
12268

12269
    """
12270
    do_nodes = query.GQ_NODE in self.requested_data
12271
    do_instances = query.GQ_INST in self.requested_data
12272

    
12273
    group_to_nodes = None
12274
    group_to_instances = None
12275

    
12276
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12277
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12278
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12279
    # instance->node. Hence, we will need to process nodes even if we only need
12280
    # instance information.
12281
    if do_nodes or do_instances:
12282
      all_nodes = lu.cfg.GetAllNodesInfo()
12283
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12284
      node_to_group = {}
12285

    
12286
      for node in all_nodes.values():
12287
        if node.group in group_to_nodes:
12288
          group_to_nodes[node.group].append(node.name)
12289
          node_to_group[node.name] = node.group
12290

    
12291
      if do_instances:
12292
        all_instances = lu.cfg.GetAllInstancesInfo()
12293
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12294

    
12295
        for instance in all_instances.values():
12296
          node = instance.primary_node
12297
          if node in node_to_group:
12298
            group_to_instances[node_to_group[node]].append(instance.name)
12299

    
12300
        if not do_nodes:
12301
          # Do not pass on node information if it was not requested.
12302
          group_to_nodes = None
12303

    
12304
    return query.GroupQueryData([self._all_groups[uuid]
12305
                                 for uuid in self.wanted],
12306
                                group_to_nodes, group_to_instances)
12307

    
12308

    
12309
class LUGroupQuery(NoHooksLU):
12310
  """Logical unit for querying node groups.
12311

12312
  """
12313
  REQ_BGL = False
12314

    
12315
  def CheckArguments(self):
12316
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12317
                          self.op.output_fields, False)
12318

    
12319
  def ExpandNames(self):
12320
    self.gq.ExpandNames(self)
12321

    
12322
  def DeclareLocks(self, level):
12323
    self.gq.DeclareLocks(self, level)
12324

    
12325
  def Exec(self, feedback_fn):
12326
    return self.gq.OldStyleQuery(self)
12327

    
12328

    
12329
class LUGroupSetParams(LogicalUnit):
12330
  """Modifies the parameters of a node group.
12331

12332
  """
12333
  HPATH = "group-modify"
12334
  HTYPE = constants.HTYPE_GROUP
12335
  REQ_BGL = False
12336

    
12337
  def CheckArguments(self):
12338
    all_changes = [
12339
      self.op.ndparams,
12340
      self.op.alloc_policy,
12341
      ]
12342

    
12343
    if all_changes.count(None) == len(all_changes):
12344
      raise errors.OpPrereqError("Please pass at least one modification",
12345
                                 errors.ECODE_INVAL)
12346

    
12347
  def ExpandNames(self):
12348
    # This raises errors.OpPrereqError on its own:
12349
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12350

    
12351
    self.needed_locks = {
12352
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12353
      }
12354

    
12355
  def CheckPrereq(self):
12356
    """Check prerequisites.
12357

12358
    """
12359
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12360

    
12361
    if self.group is None:
12362
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12363
                               (self.op.group_name, self.group_uuid))
12364

    
12365
    if self.op.ndparams:
12366
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12367
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12368
      self.new_ndparams = new_ndparams
12369

    
12370
  def BuildHooksEnv(self):
12371
    """Build hooks env.
12372

12373
    """
12374
    return {
12375
      "GROUP_NAME": self.op.group_name,
12376
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12377
      }
12378

    
12379
  def BuildHooksNodes(self):
12380
    """Build hooks nodes.
12381

12382
    """
12383
    mn = self.cfg.GetMasterNode()
12384
    return ([mn], [mn])
12385

    
12386
  def Exec(self, feedback_fn):
12387
    """Modifies the node group.
12388

12389
    """
12390
    result = []
12391

    
12392
    if self.op.ndparams:
12393
      self.group.ndparams = self.new_ndparams
12394
      result.append(("ndparams", str(self.group.ndparams)))
12395

    
12396
    if self.op.alloc_policy:
12397
      self.group.alloc_policy = self.op.alloc_policy
12398

    
12399
    self.cfg.Update(self.group, feedback_fn)
12400
    return result
12401

    
12402

    
12403
class LUGroupRemove(LogicalUnit):
12404
  HPATH = "group-remove"
12405
  HTYPE = constants.HTYPE_GROUP
12406
  REQ_BGL = False
12407

    
12408
  def ExpandNames(self):
12409
    # This will raises errors.OpPrereqError on its own:
12410
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12411
    self.needed_locks = {
12412
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12413
      }
12414

    
12415
  def CheckPrereq(self):
12416
    """Check prerequisites.
12417

12418
    This checks that the given group name exists as a node group, that is
12419
    empty (i.e., contains no nodes), and that is not the last group of the
12420
    cluster.
12421

12422
    """
12423
    # Verify that the group is empty.
12424
    group_nodes = [node.name
12425
                   for node in self.cfg.GetAllNodesInfo().values()
12426
                   if node.group == self.group_uuid]
12427

    
12428
    if group_nodes:
12429
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12430
                                 " nodes: %s" %
12431
                                 (self.op.group_name,
12432
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12433
                                 errors.ECODE_STATE)
12434

    
12435
    # Verify the cluster would not be left group-less.
12436
    if len(self.cfg.GetNodeGroupList()) == 1:
12437
      raise errors.OpPrereqError("Group '%s' is the only group,"
12438
                                 " cannot be removed" %
12439
                                 self.op.group_name,
12440
                                 errors.ECODE_STATE)
12441

    
12442
  def BuildHooksEnv(self):
12443
    """Build hooks env.
12444

12445
    """
12446
    return {
12447
      "GROUP_NAME": self.op.group_name,
12448
      }
12449

    
12450
  def BuildHooksNodes(self):
12451
    """Build hooks nodes.
12452

12453
    """
12454
    mn = self.cfg.GetMasterNode()
12455
    return ([mn], [mn])
12456

    
12457
  def Exec(self, feedback_fn):
12458
    """Remove the node group.
12459

12460
    """
12461
    try:
12462
      self.cfg.RemoveNodeGroup(self.group_uuid)
12463
    except errors.ConfigurationError:
12464
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12465
                               (self.op.group_name, self.group_uuid))
12466

    
12467
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12468

    
12469

    
12470
class LUGroupRename(LogicalUnit):
12471
  HPATH = "group-rename"
12472
  HTYPE = constants.HTYPE_GROUP
12473
  REQ_BGL = False
12474

    
12475
  def ExpandNames(self):
12476
    # This raises errors.OpPrereqError on its own:
12477
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12478

    
12479
    self.needed_locks = {
12480
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12481
      }
12482

    
12483
  def CheckPrereq(self):
12484
    """Check prerequisites.
12485

12486
    Ensures requested new name is not yet used.
12487

12488
    """
12489
    try:
12490
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12491
    except errors.OpPrereqError:
12492
      pass
12493
    else:
12494
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12495
                                 " node group (UUID: %s)" %
12496
                                 (self.op.new_name, new_name_uuid),
12497
                                 errors.ECODE_EXISTS)
12498

    
12499
  def BuildHooksEnv(self):
12500
    """Build hooks env.
12501

12502
    """
12503
    return {
12504
      "OLD_NAME": self.op.group_name,
12505
      "NEW_NAME": self.op.new_name,
12506
      }
12507

    
12508
  def BuildHooksNodes(self):
12509
    """Build hooks nodes.
12510

12511
    """
12512
    mn = self.cfg.GetMasterNode()
12513

    
12514
    all_nodes = self.cfg.GetAllNodesInfo()
12515
    all_nodes.pop(mn, None)
12516

    
12517
    run_nodes = [mn]
12518
    run_nodes.extend(node.name for node in all_nodes.values()
12519
                     if node.group == self.group_uuid)
12520

    
12521
    return (run_nodes, run_nodes)
12522

    
12523
  def Exec(self, feedback_fn):
12524
    """Rename the node group.
12525

12526
    """
12527
    group = self.cfg.GetNodeGroup(self.group_uuid)
12528

    
12529
    if group is None:
12530
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12531
                               (self.op.group_name, self.group_uuid))
12532

    
12533
    group.name = self.op.new_name
12534
    self.cfg.Update(group, feedback_fn)
12535

    
12536
    return self.op.new_name
12537

    
12538

    
12539
class LUGroupEvacuate(LogicalUnit):
12540
  HPATH = "group-evacuate"
12541
  HTYPE = constants.HTYPE_GROUP
12542
  REQ_BGL = False
12543

    
12544
  def ExpandNames(self):
12545
    # This raises errors.OpPrereqError on its own:
12546
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12547

    
12548
    if self.op.target_groups:
12549
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12550
                                  self.op.target_groups)
12551
    else:
12552
      self.req_target_uuids = []
12553

    
12554
    if self.group_uuid in self.req_target_uuids:
12555
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12556
                                 " as a target group (targets are %s)" %
12557
                                 (self.group_uuid,
12558
                                  utils.CommaJoin(self.req_target_uuids)),
12559
                                 errors.ECODE_INVAL)
12560

    
12561
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12562

    
12563
    self.share_locks = _ShareAll()
12564
    self.needed_locks = {
12565
      locking.LEVEL_INSTANCE: [],
12566
      locking.LEVEL_NODEGROUP: [],
12567
      locking.LEVEL_NODE: [],
12568
      }
12569

    
12570
  def DeclareLocks(self, level):
12571
    if level == locking.LEVEL_INSTANCE:
12572
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12573

    
12574
      # Lock instances optimistically, needs verification once node and group
12575
      # locks have been acquired
12576
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12577
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12578

    
12579
    elif level == locking.LEVEL_NODEGROUP:
12580
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12581

    
12582
      if self.req_target_uuids:
12583
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12584

    
12585
        # Lock all groups used by instances optimistically; this requires going
12586
        # via the node before it's locked, requiring verification later on
12587
        lock_groups.update(group_uuid
12588
                           for instance_name in
12589
                             self.owned_locks(locking.LEVEL_INSTANCE)
12590
                           for group_uuid in
12591
                             self.cfg.GetInstanceNodeGroups(instance_name))
12592
      else:
12593
        # No target groups, need to lock all of them
12594
        lock_groups = locking.ALL_SET
12595

    
12596
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12597

    
12598
    elif level == locking.LEVEL_NODE:
12599
      # This will only lock the nodes in the group to be evacuated which
12600
      # contain actual instances
12601
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12602
      self._LockInstancesNodes()
12603

    
12604
      # Lock all nodes in group to be evacuated and target groups
12605
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12606
      assert self.group_uuid in owned_groups
12607
      member_nodes = [node_name
12608
                      for group in owned_groups
12609
                      for node_name in self.cfg.GetNodeGroup(group).members]
12610
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12611

    
12612
  def CheckPrereq(self):
12613
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12614
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12615
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12616

    
12617
    assert owned_groups.issuperset(self.req_target_uuids)
12618
    assert self.group_uuid in owned_groups
12619

    
12620
    # Check if locked instances are still correct
12621
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12622

    
12623
    # Get instance information
12624
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12625

    
12626
    # Check if node groups for locked instances are still correct
12627
    for instance_name in owned_instances:
12628
      inst = self.instances[instance_name]
12629
      assert owned_nodes.issuperset(inst.all_nodes), \
12630
        "Instance %s's nodes changed while we kept the lock" % instance_name
12631

    
12632
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12633
                                             owned_groups)
12634

    
12635
      assert self.group_uuid in inst_groups, \
12636
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12637

    
12638
    if self.req_target_uuids:
12639
      # User requested specific target groups
12640
      self.target_uuids = self.req_target_uuids
12641
    else:
12642
      # All groups except the one to be evacuated are potential targets
12643
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12644
                           if group_uuid != self.group_uuid]
12645

    
12646
      if not self.target_uuids:
12647
        raise errors.OpPrereqError("There are no possible target groups",
12648
                                   errors.ECODE_INVAL)
12649

    
12650
  def BuildHooksEnv(self):
12651
    """Build hooks env.
12652

12653
    """
12654
    return {
12655
      "GROUP_NAME": self.op.group_name,
12656
      "TARGET_GROUPS": " ".join(self.target_uuids),
12657
      }
12658

    
12659
  def BuildHooksNodes(self):
12660
    """Build hooks nodes.
12661

12662
    """
12663
    mn = self.cfg.GetMasterNode()
12664

    
12665
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12666

    
12667
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12668

    
12669
    return (run_nodes, run_nodes)
12670

    
12671
  def Exec(self, feedback_fn):
12672
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12673

    
12674
    assert self.group_uuid not in self.target_uuids
12675

    
12676
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12677
                     instances=instances, target_groups=self.target_uuids)
12678

    
12679
    ial.Run(self.op.iallocator)
12680

    
12681
    if not ial.success:
12682
      raise errors.OpPrereqError("Can't compute group evacuation using"
12683
                                 " iallocator '%s': %s" %
12684
                                 (self.op.iallocator, ial.info),
12685
                                 errors.ECODE_NORES)
12686

    
12687
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12688

    
12689
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12690
                 len(jobs), self.op.group_name)
12691

    
12692
    return ResultWithJobs(jobs)
12693

    
12694

    
12695
class TagsLU(NoHooksLU): # pylint: disable=W0223
12696
  """Generic tags LU.
12697

12698
  This is an abstract class which is the parent of all the other tags LUs.
12699

12700
  """
12701
  def ExpandNames(self):
12702
    self.group_uuid = None
12703
    self.needed_locks = {}
12704
    if self.op.kind == constants.TAG_NODE:
12705
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12706
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12707
    elif self.op.kind == constants.TAG_INSTANCE:
12708
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12709
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12710
    elif self.op.kind == constants.TAG_NODEGROUP:
12711
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12712

    
12713
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12714
    # not possible to acquire the BGL based on opcode parameters)
12715

    
12716
  def CheckPrereq(self):
12717
    """Check prerequisites.
12718

12719
    """
12720
    if self.op.kind == constants.TAG_CLUSTER:
12721
      self.target = self.cfg.GetClusterInfo()
12722
    elif self.op.kind == constants.TAG_NODE:
12723
      self.target = self.cfg.GetNodeInfo(self.op.name)
12724
    elif self.op.kind == constants.TAG_INSTANCE:
12725
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12726
    elif self.op.kind == constants.TAG_NODEGROUP:
12727
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12728
    else:
12729
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12730
                                 str(self.op.kind), errors.ECODE_INVAL)
12731

    
12732

    
12733
class LUTagsGet(TagsLU):
12734
  """Returns the tags of a given object.
12735

12736
  """
12737
  REQ_BGL = False
12738

    
12739
  def ExpandNames(self):
12740
    TagsLU.ExpandNames(self)
12741

    
12742
    # Share locks as this is only a read operation
12743
    self.share_locks = _ShareAll()
12744

    
12745
  def Exec(self, feedback_fn):
12746
    """Returns the tag list.
12747

12748
    """
12749
    return list(self.target.GetTags())
12750

    
12751

    
12752
class LUTagsSearch(NoHooksLU):
12753
  """Searches the tags for a given pattern.
12754

12755
  """
12756
  REQ_BGL = False
12757

    
12758
  def ExpandNames(self):
12759
    self.needed_locks = {}
12760

    
12761
  def CheckPrereq(self):
12762
    """Check prerequisites.
12763

12764
    This checks the pattern passed for validity by compiling it.
12765

12766
    """
12767
    try:
12768
      self.re = re.compile(self.op.pattern)
12769
    except re.error, err:
12770
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12771
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12772

    
12773
  def Exec(self, feedback_fn):
12774
    """Returns the tag list.
12775

12776
    """
12777
    cfg = self.cfg
12778
    tgts = [("/cluster", cfg.GetClusterInfo())]
12779
    ilist = cfg.GetAllInstancesInfo().values()
12780
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12781
    nlist = cfg.GetAllNodesInfo().values()
12782
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12783
    tgts.extend(("/nodegroup/%s" % n.name, n)
12784
                for n in cfg.GetAllNodeGroupsInfo().values())
12785
    results = []
12786
    for path, target in tgts:
12787
      for tag in target.GetTags():
12788
        if self.re.search(tag):
12789
          results.append((path, tag))
12790
    return results
12791

    
12792

    
12793
class LUTagsSet(TagsLU):
12794
  """Sets a tag on a given object.
12795

12796
  """
12797
  REQ_BGL = False
12798

    
12799
  def CheckPrereq(self):
12800
    """Check prerequisites.
12801

12802
    This checks the type and length of the tag name and value.
12803

12804
    """
12805
    TagsLU.CheckPrereq(self)
12806
    for tag in self.op.tags:
12807
      objects.TaggableObject.ValidateTag(tag)
12808

    
12809
  def Exec(self, feedback_fn):
12810
    """Sets the tag.
12811

12812
    """
12813
    try:
12814
      for tag in self.op.tags:
12815
        self.target.AddTag(tag)
12816
    except errors.TagError, err:
12817
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12818
    self.cfg.Update(self.target, feedback_fn)
12819

    
12820

    
12821
class LUTagsDel(TagsLU):
12822
  """Delete a list of tags from a given object.
12823

12824
  """
12825
  REQ_BGL = False
12826

    
12827
  def CheckPrereq(self):
12828
    """Check prerequisites.
12829

12830
    This checks that we have the given tag.
12831

12832
    """
12833
    TagsLU.CheckPrereq(self)
12834
    for tag in self.op.tags:
12835
      objects.TaggableObject.ValidateTag(tag)
12836
    del_tags = frozenset(self.op.tags)
12837
    cur_tags = self.target.GetTags()
12838

    
12839
    diff_tags = del_tags - cur_tags
12840
    if diff_tags:
12841
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12842
      raise errors.OpPrereqError("Tag(s) %s not found" %
12843
                                 (utils.CommaJoin(diff_names), ),
12844
                                 errors.ECODE_NOENT)
12845

    
12846
  def Exec(self, feedback_fn):
12847
    """Remove the tag from the object.
12848

12849
    """
12850
    for tag in self.op.tags:
12851
      self.target.RemoveTag(tag)
12852
    self.cfg.Update(self.target, feedback_fn)
12853

    
12854

    
12855
class LUTestDelay(NoHooksLU):
12856
  """Sleep for a specified amount of time.
12857

12858
  This LU sleeps on the master and/or nodes for a specified amount of
12859
  time.
12860

12861
  """
12862
  REQ_BGL = False
12863

    
12864
  def ExpandNames(self):
12865
    """Expand names and set required locks.
12866

12867
    This expands the node list, if any.
12868

12869
    """
12870
    self.needed_locks = {}
12871
    if self.op.on_nodes:
12872
      # _GetWantedNodes can be used here, but is not always appropriate to use
12873
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12874
      # more information.
12875
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12876
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12877

    
12878
  def _TestDelay(self):
12879
    """Do the actual sleep.
12880

12881
    """
12882
    if self.op.on_master:
12883
      if not utils.TestDelay(self.op.duration):
12884
        raise errors.OpExecError("Error during master delay test")
12885
    if self.op.on_nodes:
12886
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12887
      for node, node_result in result.items():
12888
        node_result.Raise("Failure during rpc call to node %s" % node)
12889

    
12890
  def Exec(self, feedback_fn):
12891
    """Execute the test delay opcode, with the wanted repetitions.
12892

12893
    """
12894
    if self.op.repeat == 0:
12895
      self._TestDelay()
12896
    else:
12897
      top_value = self.op.repeat - 1
12898
      for i in range(self.op.repeat):
12899
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12900
        self._TestDelay()
12901

    
12902

    
12903
class LUTestJqueue(NoHooksLU):
12904
  """Utility LU to test some aspects of the job queue.
12905

12906
  """
12907
  REQ_BGL = False
12908

    
12909
  # Must be lower than default timeout for WaitForJobChange to see whether it
12910
  # notices changed jobs
12911
  _CLIENT_CONNECT_TIMEOUT = 20.0
12912
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12913

    
12914
  @classmethod
12915
  def _NotifyUsingSocket(cls, cb, errcls):
12916
    """Opens a Unix socket and waits for another program to connect.
12917

12918
    @type cb: callable
12919
    @param cb: Callback to send socket name to client
12920
    @type errcls: class
12921
    @param errcls: Exception class to use for errors
12922

12923
    """
12924
    # Using a temporary directory as there's no easy way to create temporary
12925
    # sockets without writing a custom loop around tempfile.mktemp and
12926
    # socket.bind
12927
    tmpdir = tempfile.mkdtemp()
12928
    try:
12929
      tmpsock = utils.PathJoin(tmpdir, "sock")
12930

    
12931
      logging.debug("Creating temporary socket at %s", tmpsock)
12932
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12933
      try:
12934
        sock.bind(tmpsock)
12935
        sock.listen(1)
12936

    
12937
        # Send details to client
12938
        cb(tmpsock)
12939

    
12940
        # Wait for client to connect before continuing
12941
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12942
        try:
12943
          (conn, _) = sock.accept()
12944
        except socket.error, err:
12945
          raise errcls("Client didn't connect in time (%s)" % err)
12946
      finally:
12947
        sock.close()
12948
    finally:
12949
      # Remove as soon as client is connected
12950
      shutil.rmtree(tmpdir)
12951

    
12952
    # Wait for client to close
12953
    try:
12954
      try:
12955
        # pylint: disable=E1101
12956
        # Instance of '_socketobject' has no ... member
12957
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12958
        conn.recv(1)
12959
      except socket.error, err:
12960
        raise errcls("Client failed to confirm notification (%s)" % err)
12961
    finally:
12962
      conn.close()
12963

    
12964
  def _SendNotification(self, test, arg, sockname):
12965
    """Sends a notification to the client.
12966

12967
    @type test: string
12968
    @param test: Test name
12969
    @param arg: Test argument (depends on test)
12970
    @type sockname: string
12971
    @param sockname: Socket path
12972

12973
    """
12974
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12975

    
12976
  def _Notify(self, prereq, test, arg):
12977
    """Notifies the client of a test.
12978

12979
    @type prereq: bool
12980
    @param prereq: Whether this is a prereq-phase test
12981
    @type test: string
12982
    @param test: Test name
12983
    @param arg: Test argument (depends on test)
12984

12985
    """
12986
    if prereq:
12987
      errcls = errors.OpPrereqError
12988
    else:
12989
      errcls = errors.OpExecError
12990

    
12991
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12992
                                                  test, arg),
12993
                                   errcls)
12994

    
12995
  def CheckArguments(self):
12996
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12997
    self.expandnames_calls = 0
12998

    
12999
  def ExpandNames(self):
13000
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13001
    if checkargs_calls < 1:
13002
      raise errors.ProgrammerError("CheckArguments was not called")
13003

    
13004
    self.expandnames_calls += 1
13005

    
13006
    if self.op.notify_waitlock:
13007
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13008

    
13009
    self.LogInfo("Expanding names")
13010

    
13011
    # Get lock on master node (just to get a lock, not for a particular reason)
13012
    self.needed_locks = {
13013
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13014
      }
13015

    
13016
  def Exec(self, feedback_fn):
13017
    if self.expandnames_calls < 1:
13018
      raise errors.ProgrammerError("ExpandNames was not called")
13019

    
13020
    if self.op.notify_exec:
13021
      self._Notify(False, constants.JQT_EXEC, None)
13022

    
13023
    self.LogInfo("Executing")
13024

    
13025
    if self.op.log_messages:
13026
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13027
      for idx, msg in enumerate(self.op.log_messages):
13028
        self.LogInfo("Sending log message %s", idx + 1)
13029
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13030
        # Report how many test messages have been sent
13031
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13032

    
13033
    if self.op.fail:
13034
      raise errors.OpExecError("Opcode failure was requested")
13035

    
13036
    return True
13037

    
13038

    
13039
class IAllocator(object):
13040
  """IAllocator framework.
13041

13042
  An IAllocator instance has three sets of attributes:
13043
    - cfg that is needed to query the cluster
13044
    - input data (all members of the _KEYS class attribute are required)
13045
    - four buffer attributes (in|out_data|text), that represent the
13046
      input (to the external script) in text and data structure format,
13047
      and the output from it, again in two formats
13048
    - the result variables from the script (success, info, nodes) for
13049
      easy usage
13050

13051
  """
13052
  # pylint: disable=R0902
13053
  # lots of instance attributes
13054

    
13055
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13056
    self.cfg = cfg
13057
    self.rpc = rpc_runner
13058
    # init buffer variables
13059
    self.in_text = self.out_text = self.in_data = self.out_data = None
13060
    # init all input fields so that pylint is happy
13061
    self.mode = mode
13062
    self.memory = self.disks = self.disk_template = None
13063
    self.os = self.tags = self.nics = self.vcpus = None
13064
    self.hypervisor = None
13065
    self.relocate_from = None
13066
    self.name = None
13067
    self.instances = None
13068
    self.evac_mode = None
13069
    self.target_groups = []
13070
    # computed fields
13071
    self.required_nodes = None
13072
    # init result fields
13073
    self.success = self.info = self.result = None
13074

    
13075
    try:
13076
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13077
    except KeyError:
13078
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13079
                                   " IAllocator" % self.mode)
13080

    
13081
    keyset = [n for (n, _) in keydata]
13082

    
13083
    for key in kwargs:
13084
      if key not in keyset:
13085
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13086
                                     " IAllocator" % key)
13087
      setattr(self, key, kwargs[key])
13088

    
13089
    for key in keyset:
13090
      if key not in kwargs:
13091
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13092
                                     " IAllocator" % key)
13093
    self._BuildInputData(compat.partial(fn, self), keydata)
13094

    
13095
  def _ComputeClusterData(self):
13096
    """Compute the generic allocator input data.
13097

13098
    This is the data that is independent of the actual operation.
13099

13100
    """
13101
    cfg = self.cfg
13102
    cluster_info = cfg.GetClusterInfo()
13103
    # cluster data
13104
    data = {
13105
      "version": constants.IALLOCATOR_VERSION,
13106
      "cluster_name": cfg.GetClusterName(),
13107
      "cluster_tags": list(cluster_info.GetTags()),
13108
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13109
      # we don't have job IDs
13110
      }
13111
    ninfo = cfg.GetAllNodesInfo()
13112
    iinfo = cfg.GetAllInstancesInfo().values()
13113
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13114

    
13115
    # node data
13116
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13117

    
13118
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13119
      hypervisor_name = self.hypervisor
13120
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13121
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13122
    else:
13123
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13124

    
13125
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13126
                                        hypervisor_name)
13127
    node_iinfo = \
13128
      self.rpc.call_all_instances_info(node_list,
13129
                                       cluster_info.enabled_hypervisors)
13130

    
13131
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13132

    
13133
    config_ndata = self._ComputeBasicNodeData(ninfo)
13134
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13135
                                                 i_list, config_ndata)
13136
    assert len(data["nodes"]) == len(ninfo), \
13137
        "Incomplete node data computed"
13138

    
13139
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13140

    
13141
    self.in_data = data
13142

    
13143
  @staticmethod
13144
  def _ComputeNodeGroupData(cfg):
13145
    """Compute node groups data.
13146

13147
    """
13148
    ng = dict((guuid, {
13149
      "name": gdata.name,
13150
      "alloc_policy": gdata.alloc_policy,
13151
      })
13152
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13153

    
13154
    return ng
13155

    
13156
  @staticmethod
13157
  def _ComputeBasicNodeData(node_cfg):
13158
    """Compute global node data.
13159

13160
    @rtype: dict
13161
    @returns: a dict of name: (node dict, node config)
13162

13163
    """
13164
    # fill in static (config-based) values
13165
    node_results = dict((ninfo.name, {
13166
      "tags": list(ninfo.GetTags()),
13167
      "primary_ip": ninfo.primary_ip,
13168
      "secondary_ip": ninfo.secondary_ip,
13169
      "offline": ninfo.offline,
13170
      "drained": ninfo.drained,
13171
      "master_candidate": ninfo.master_candidate,
13172
      "group": ninfo.group,
13173
      "master_capable": ninfo.master_capable,
13174
      "vm_capable": ninfo.vm_capable,
13175
      })
13176
      for ninfo in node_cfg.values())
13177

    
13178
    return node_results
13179

    
13180
  @staticmethod
13181
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13182
                              node_results):
13183
    """Compute global node data.
13184

13185
    @param node_results: the basic node structures as filled from the config
13186

13187
    """
13188
    # make a copy of the current dict
13189
    node_results = dict(node_results)
13190
    for nname, nresult in node_data.items():
13191
      assert nname in node_results, "Missing basic data for node %s" % nname
13192
      ninfo = node_cfg[nname]
13193

    
13194
      if not (ninfo.offline or ninfo.drained):
13195
        nresult.Raise("Can't get data for node %s" % nname)
13196
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13197
                                nname)
13198
        remote_info = nresult.payload
13199

    
13200
        for attr in ["memory_total", "memory_free", "memory_dom0",
13201
                     "vg_size", "vg_free", "cpu_total"]:
13202
          if attr not in remote_info:
13203
            raise errors.OpExecError("Node '%s' didn't return attribute"
13204
                                     " '%s'" % (nname, attr))
13205
          if not isinstance(remote_info[attr], int):
13206
            raise errors.OpExecError("Node '%s' returned invalid value"
13207
                                     " for '%s': %s" %
13208
                                     (nname, attr, remote_info[attr]))
13209
        # compute memory used by primary instances
13210
        i_p_mem = i_p_up_mem = 0
13211
        for iinfo, beinfo in i_list:
13212
          if iinfo.primary_node == nname:
13213
            i_p_mem += beinfo[constants.BE_MEMORY]
13214
            if iinfo.name not in node_iinfo[nname].payload:
13215
              i_used_mem = 0
13216
            else:
13217
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13218
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13219
            remote_info["memory_free"] -= max(0, i_mem_diff)
13220

    
13221
            if iinfo.admin_up:
13222
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13223

    
13224
        # compute memory used by instances
13225
        pnr_dyn = {
13226
          "total_memory": remote_info["memory_total"],
13227
          "reserved_memory": remote_info["memory_dom0"],
13228
          "free_memory": remote_info["memory_free"],
13229
          "total_disk": remote_info["vg_size"],
13230
          "free_disk": remote_info["vg_free"],
13231
          "total_cpus": remote_info["cpu_total"],
13232
          "i_pri_memory": i_p_mem,
13233
          "i_pri_up_memory": i_p_up_mem,
13234
          }
13235
        pnr_dyn.update(node_results[nname])
13236
        node_results[nname] = pnr_dyn
13237

    
13238
    return node_results
13239

    
13240
  @staticmethod
13241
  def _ComputeInstanceData(cluster_info, i_list):
13242
    """Compute global instance data.
13243

13244
    """
13245
    instance_data = {}
13246
    for iinfo, beinfo in i_list:
13247
      nic_data = []
13248
      for nic in iinfo.nics:
13249
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13250
        nic_dict = {
13251
          "mac": nic.mac,
13252
          "ip": nic.ip,
13253
          "mode": filled_params[constants.NIC_MODE],
13254
          "link": filled_params[constants.NIC_LINK],
13255
          }
13256
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13257
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13258
        nic_data.append(nic_dict)
13259
      pir = {
13260
        "tags": list(iinfo.GetTags()),
13261
        "admin_up": iinfo.admin_up,
13262
        "vcpus": beinfo[constants.BE_VCPUS],
13263
        "memory": beinfo[constants.BE_MEMORY],
13264
        "os": iinfo.os,
13265
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13266
        "nics": nic_data,
13267
        "disks": [{constants.IDISK_SIZE: dsk.size,
13268
                   constants.IDISK_MODE: dsk.mode}
13269
                  for dsk in iinfo.disks],
13270
        "disk_template": iinfo.disk_template,
13271
        "hypervisor": iinfo.hypervisor,
13272
        }
13273
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13274
                                                 pir["disks"])
13275
      instance_data[iinfo.name] = pir
13276

    
13277
    return instance_data
13278

    
13279
  def _AddNewInstance(self):
13280
    """Add new instance data to allocator structure.
13281

13282
    This in combination with _AllocatorGetClusterData will create the
13283
    correct structure needed as input for the allocator.
13284

13285
    The checks for the completeness of the opcode must have already been
13286
    done.
13287

13288
    """
13289
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13290

    
13291
    if self.disk_template in constants.DTS_INT_MIRROR:
13292
      self.required_nodes = 2
13293
    else:
13294
      self.required_nodes = 1
13295

    
13296
    request = {
13297
      "name": self.name,
13298
      "disk_template": self.disk_template,
13299
      "tags": self.tags,
13300
      "os": self.os,
13301
      "vcpus": self.vcpus,
13302
      "memory": self.memory,
13303
      "disks": self.disks,
13304
      "disk_space_total": disk_space,
13305
      "nics": self.nics,
13306
      "required_nodes": self.required_nodes,
13307
      "hypervisor": self.hypervisor,
13308
      }
13309

    
13310
    return request
13311

    
13312
  def _AddRelocateInstance(self):
13313
    """Add relocate instance data to allocator structure.
13314

13315
    This in combination with _IAllocatorGetClusterData will create the
13316
    correct structure needed as input for the allocator.
13317

13318
    The checks for the completeness of the opcode must have already been
13319
    done.
13320

13321
    """
13322
    instance = self.cfg.GetInstanceInfo(self.name)
13323
    if instance is None:
13324
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13325
                                   " IAllocator" % self.name)
13326

    
13327
    if instance.disk_template not in constants.DTS_MIRRORED:
13328
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13329
                                 errors.ECODE_INVAL)
13330

    
13331
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13332
        len(instance.secondary_nodes) != 1:
13333
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13334
                                 errors.ECODE_STATE)
13335

    
13336
    self.required_nodes = 1
13337
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13338
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13339

    
13340
    request = {
13341
      "name": self.name,
13342
      "disk_space_total": disk_space,
13343
      "required_nodes": self.required_nodes,
13344
      "relocate_from": self.relocate_from,
13345
      }
13346
    return request
13347

    
13348
  def _AddNodeEvacuate(self):
13349
    """Get data for node-evacuate requests.
13350

13351
    """
13352
    return {
13353
      "instances": self.instances,
13354
      "evac_mode": self.evac_mode,
13355
      }
13356

    
13357
  def _AddChangeGroup(self):
13358
    """Get data for node-evacuate requests.
13359

13360
    """
13361
    return {
13362
      "instances": self.instances,
13363
      "target_groups": self.target_groups,
13364
      }
13365

    
13366
  def _BuildInputData(self, fn, keydata):
13367
    """Build input data structures.
13368

13369
    """
13370
    self._ComputeClusterData()
13371

    
13372
    request = fn()
13373
    request["type"] = self.mode
13374
    for keyname, keytype in keydata:
13375
      if keyname not in request:
13376
        raise errors.ProgrammerError("Request parameter %s is missing" %
13377
                                     keyname)
13378
      val = request[keyname]
13379
      if not keytype(val):
13380
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13381
                                     " validation, value %s, expected"
13382
                                     " type %s" % (keyname, val, keytype))
13383
    self.in_data["request"] = request
13384

    
13385
    self.in_text = serializer.Dump(self.in_data)
13386

    
13387
  _STRING_LIST = ht.TListOf(ht.TString)
13388
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13389
     # pylint: disable=E1101
13390
     # Class '...' has no 'OP_ID' member
13391
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13392
                          opcodes.OpInstanceMigrate.OP_ID,
13393
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13394
     })))
13395

    
13396
  _NEVAC_MOVED = \
13397
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13398
                       ht.TItems([ht.TNonEmptyString,
13399
                                  ht.TNonEmptyString,
13400
                                  ht.TListOf(ht.TNonEmptyString),
13401
                                 ])))
13402
  _NEVAC_FAILED = \
13403
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13404
                       ht.TItems([ht.TNonEmptyString,
13405
                                  ht.TMaybeString,
13406
                                 ])))
13407
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13408
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13409

    
13410
  _MODE_DATA = {
13411
    constants.IALLOCATOR_MODE_ALLOC:
13412
      (_AddNewInstance,
13413
       [
13414
        ("name", ht.TString),
13415
        ("memory", ht.TInt),
13416
        ("disks", ht.TListOf(ht.TDict)),
13417
        ("disk_template", ht.TString),
13418
        ("os", ht.TString),
13419
        ("tags", _STRING_LIST),
13420
        ("nics", ht.TListOf(ht.TDict)),
13421
        ("vcpus", ht.TInt),
13422
        ("hypervisor", ht.TString),
13423
        ], ht.TList),
13424
    constants.IALLOCATOR_MODE_RELOC:
13425
      (_AddRelocateInstance,
13426
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13427
       ht.TList),
13428
     constants.IALLOCATOR_MODE_NODE_EVAC:
13429
      (_AddNodeEvacuate, [
13430
        ("instances", _STRING_LIST),
13431
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13432
        ], _NEVAC_RESULT),
13433
     constants.IALLOCATOR_MODE_CHG_GROUP:
13434
      (_AddChangeGroup, [
13435
        ("instances", _STRING_LIST),
13436
        ("target_groups", _STRING_LIST),
13437
        ], _NEVAC_RESULT),
13438
    }
13439

    
13440
  def Run(self, name, validate=True, call_fn=None):
13441
    """Run an instance allocator and return the results.
13442

13443
    """
13444
    if call_fn is None:
13445
      call_fn = self.rpc.call_iallocator_runner
13446

    
13447
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13448
    result.Raise("Failure while running the iallocator script")
13449

    
13450
    self.out_text = result.payload
13451
    if validate:
13452
      self._ValidateResult()
13453

    
13454
  def _ValidateResult(self):
13455
    """Process the allocator results.
13456

13457
    This will process and if successful save the result in
13458
    self.out_data and the other parameters.
13459

13460
    """
13461
    try:
13462
      rdict = serializer.Load(self.out_text)
13463
    except Exception, err:
13464
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13465

    
13466
    if not isinstance(rdict, dict):
13467
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13468

    
13469
    # TODO: remove backwards compatiblity in later versions
13470
    if "nodes" in rdict and "result" not in rdict:
13471
      rdict["result"] = rdict["nodes"]
13472
      del rdict["nodes"]
13473

    
13474
    for key in "success", "info", "result":
13475
      if key not in rdict:
13476
        raise errors.OpExecError("Can't parse iallocator results:"
13477
                                 " missing key '%s'" % key)
13478
      setattr(self, key, rdict[key])
13479

    
13480
    if not self._result_check(self.result):
13481
      raise errors.OpExecError("Iallocator returned invalid result,"
13482
                               " expected %s, got %s" %
13483
                               (self._result_check, self.result),
13484
                               errors.ECODE_INVAL)
13485

    
13486
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13487
      assert self.relocate_from is not None
13488
      assert self.required_nodes == 1
13489

    
13490
      node2group = dict((name, ndata["group"])
13491
                        for (name, ndata) in self.in_data["nodes"].items())
13492

    
13493
      fn = compat.partial(self._NodesToGroups, node2group,
13494
                          self.in_data["nodegroups"])
13495

    
13496
      instance = self.cfg.GetInstanceInfo(self.name)
13497
      request_groups = fn(self.relocate_from + [instance.primary_node])
13498
      result_groups = fn(rdict["result"] + [instance.primary_node])
13499

    
13500
      if self.success and not set(result_groups).issubset(request_groups):
13501
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13502
                                 " differ from original groups (%s)" %
13503
                                 (utils.CommaJoin(result_groups),
13504
                                  utils.CommaJoin(request_groups)))
13505

    
13506
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13507
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13508

    
13509
    self.out_data = rdict
13510

    
13511
  @staticmethod
13512
  def _NodesToGroups(node2group, groups, nodes):
13513
    """Returns a list of unique group names for a list of nodes.
13514

13515
    @type node2group: dict
13516
    @param node2group: Map from node name to group UUID
13517
    @type groups: dict
13518
    @param groups: Group information
13519
    @type nodes: list
13520
    @param nodes: Node names
13521

13522
    """
13523
    result = set()
13524

    
13525
    for node in nodes:
13526
      try:
13527
        group_uuid = node2group[node]
13528
      except KeyError:
13529
        # Ignore unknown node
13530
        pass
13531
      else:
13532
        try:
13533
          group = groups[group_uuid]
13534
        except KeyError:
13535
          # Can't find group, let's use UUID
13536
          group_name = group_uuid
13537
        else:
13538
          group_name = group["name"]
13539

    
13540
        result.add(group_name)
13541

    
13542
    return sorted(result)
13543

    
13544

    
13545
class LUTestAllocator(NoHooksLU):
13546
  """Run allocator tests.
13547

13548
  This LU runs the allocator tests
13549

13550
  """
13551
  def CheckPrereq(self):
13552
    """Check prerequisites.
13553

13554
    This checks the opcode parameters depending on the director and mode test.
13555

13556
    """
13557
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13558
      for attr in ["memory", "disks", "disk_template",
13559
                   "os", "tags", "nics", "vcpus"]:
13560
        if not hasattr(self.op, attr):
13561
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13562
                                     attr, errors.ECODE_INVAL)
13563
      iname = self.cfg.ExpandInstanceName(self.op.name)
13564
      if iname is not None:
13565
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13566
                                   iname, errors.ECODE_EXISTS)
13567
      if not isinstance(self.op.nics, list):
13568
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13569
                                   errors.ECODE_INVAL)
13570
      if not isinstance(self.op.disks, list):
13571
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13572
                                   errors.ECODE_INVAL)
13573
      for row in self.op.disks:
13574
        if (not isinstance(row, dict) or
13575
            constants.IDISK_SIZE not in row or
13576
            not isinstance(row[constants.IDISK_SIZE], int) or
13577
            constants.IDISK_MODE not in row or
13578
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13579
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13580
                                     " parameter", errors.ECODE_INVAL)
13581
      if self.op.hypervisor is None:
13582
        self.op.hypervisor = self.cfg.GetHypervisorType()
13583
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13584
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13585
      self.op.name = fname
13586
      self.relocate_from = \
13587
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13588
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13589
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13590
      if not self.op.instances:
13591
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13592
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13593
    else:
13594
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13595
                                 self.op.mode, errors.ECODE_INVAL)
13596

    
13597
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13598
      if self.op.allocator is None:
13599
        raise errors.OpPrereqError("Missing allocator name",
13600
                                   errors.ECODE_INVAL)
13601
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13602
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13603
                                 self.op.direction, errors.ECODE_INVAL)
13604

    
13605
  def Exec(self, feedback_fn):
13606
    """Run the allocator test.
13607

13608
    """
13609
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13610
      ial = IAllocator(self.cfg, self.rpc,
13611
                       mode=self.op.mode,
13612
                       name=self.op.name,
13613
                       memory=self.op.memory,
13614
                       disks=self.op.disks,
13615
                       disk_template=self.op.disk_template,
13616
                       os=self.op.os,
13617
                       tags=self.op.tags,
13618
                       nics=self.op.nics,
13619
                       vcpus=self.op.vcpus,
13620
                       hypervisor=self.op.hypervisor,
13621
                       )
13622
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13623
      ial = IAllocator(self.cfg, self.rpc,
13624
                       mode=self.op.mode,
13625
                       name=self.op.name,
13626
                       relocate_from=list(self.relocate_from),
13627
                       )
13628
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13629
      ial = IAllocator(self.cfg, self.rpc,
13630
                       mode=self.op.mode,
13631
                       instances=self.op.instances,
13632
                       target_groups=self.op.target_groups)
13633
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13634
      ial = IAllocator(self.cfg, self.rpc,
13635
                       mode=self.op.mode,
13636
                       instances=self.op.instances,
13637
                       evac_mode=self.op.evac_mode)
13638
    else:
13639
      raise errors.ProgrammerError("Uncatched mode %s in"
13640
                                   " LUTestAllocator.Exec", self.op.mode)
13641

    
13642
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13643
      result = ial.in_text
13644
    else:
13645
      ial.Run(self.op.allocator, validate=False)
13646
      result = ial.out_text
13647
    return result
13648

    
13649

    
13650
#: Query type implementations
13651
_QUERY_IMPL = {
13652
  constants.QR_INSTANCE: _InstanceQuery,
13653
  constants.QR_NODE: _NodeQuery,
13654
  constants.QR_GROUP: _GroupQuery,
13655
  constants.QR_OS: _OsQuery,
13656
  }
13657

    
13658
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13659

    
13660

    
13661
def _GetQueryImplementation(name):
13662
  """Returns the implemtnation for a query type.
13663

13664
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13665

13666
  """
13667
  try:
13668
    return _QUERY_IMPL[name]
13669
  except KeyError:
13670
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13671
                               errors.ECODE_INVAL)