Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c4de9b7a

History | View | Annotate | Download (481.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
#: Size of DRBD meta block device
67
DRBD_META_SIZE = 128
68

    
69

    
70
class ResultWithJobs:
71
  """Data container for LU results with jobs.
72

73
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
74
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
75
  contained in the C{jobs} attribute and include the job IDs in the opcode
76
  result.
77

78
  """
79
  def __init__(self, jobs, **kwargs):
80
    """Initializes this class.
81

82
    Additional return values can be specified as keyword arguments.
83

84
    @type jobs: list of lists of L{opcode.OpCode}
85
    @param jobs: A list of lists of opcode objects
86

87
    """
88
    self.jobs = jobs
89
    self.other = kwargs
90

    
91

    
92
class LogicalUnit(object):
93
  """Logical Unit base class.
94

95
  Subclasses must follow these rules:
96
    - implement ExpandNames
97
    - implement CheckPrereq (except when tasklets are used)
98
    - implement Exec (except when tasklets are used)
99
    - implement BuildHooksEnv
100
    - implement BuildHooksNodes
101
    - redefine HPATH and HTYPE
102
    - optionally redefine their run requirements:
103
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104

105
  Note that all commands require root permissions.
106

107
  @ivar dry_run_result: the value (if any) that will be returned to the caller
108
      in dry-run mode (signalled by opcode dry_run parameter)
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  REQ_BGL = True
114

    
115
  def __init__(self, processor, op, context, rpc):
116
    """Constructor for LogicalUnit.
117

118
    This needs to be overridden in derived classes in order to check op
119
    validity.
120

121
    """
122
    self.proc = processor
123
    self.op = op
124
    self.cfg = context.cfg
125
    self.glm = context.glm
126
    # readability alias
127
    self.owned_locks = context.glm.list_owned
128
    self.context = context
129
    self.rpc = rpc
130
    # Dicts used to declare locking needs to mcpu
131
    self.needed_locks = None
132
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
133
    self.add_locks = {}
134
    self.remove_locks = {}
135
    # Used to force good behavior when calling helper functions
136
    self.recalculate_locks = {}
137
    # logging
138
    self.Log = processor.Log # pylint: disable=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
141
    self.LogStep = processor.LogStep # pylint: disable=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # Validate opcode parameters and set defaults
153
    self.op.Validate(True)
154

    
155
    self.CheckArguments()
156

    
157
  def CheckArguments(self):
158
    """Check syntactic validity for the opcode arguments.
159

160
    This method is for doing a simple syntactic check and ensure
161
    validity of opcode parameters, without any cluster-related
162
    checks. While the same can be accomplished in ExpandNames and/or
163
    CheckPrereq, doing these separate is better because:
164

165
      - ExpandNames is left as as purely a lock-related function
166
      - CheckPrereq is run after we have acquired locks (and possible
167
        waited for them)
168

169
    The function is allowed to change the self.op attribute so that
170
    later methods can no longer worry about missing parameters.
171

172
    """
173
    pass
174

    
175
  def ExpandNames(self):
176
    """Expand names for this LU.
177

178
    This method is called before starting to execute the opcode, and it should
179
    update all the parameters of the opcode to their canonical form (e.g. a
180
    short node name must be fully expanded after this method has successfully
181
    completed). This way locking, hooks, logging, etc. can work correctly.
182

183
    LUs which implement this method must also populate the self.needed_locks
184
    member, as a dict with lock levels as keys, and a list of needed lock names
185
    as values. Rules:
186

187
      - use an empty dict if you don't need any lock
188
      - if you don't need any lock at a particular level omit that level
189
      - don't put anything for the BGL level
190
      - if you want all locks at a level use locking.ALL_SET as a value
191

192
    If you need to share locks (rather than acquire them exclusively) at one
193
    level you can modify self.share_locks, setting a true value (usually 1) for
194
    that level. By default locks are not shared.
195

196
    This function can also define a list of tasklets, which then will be
197
    executed in order instead of the usual LU-level CheckPrereq and Exec
198
    functions, if those are not defined by the LU.
199

200
    Examples::
201

202
      # Acquire all nodes and one instance
203
      self.needed_locks = {
204
        locking.LEVEL_NODE: locking.ALL_SET,
205
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
206
      }
207
      # Acquire just two nodes
208
      self.needed_locks = {
209
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
210
      }
211
      # Acquire no locks
212
      self.needed_locks = {} # No, you can't leave it to the default value None
213

214
    """
215
    # The implementation of this method is mandatory only if the new LU is
216
    # concurrent, so that old LUs don't need to be changed all at the same
217
    # time.
218
    if self.REQ_BGL:
219
      self.needed_locks = {} # Exclusive LUs don't need locks.
220
    else:
221
      raise NotImplementedError
222

    
223
  def DeclareLocks(self, level):
224
    """Declare LU locking needs for a level
225

226
    While most LUs can just declare their locking needs at ExpandNames time,
227
    sometimes there's the need to calculate some locks after having acquired
228
    the ones before. This function is called just before acquiring locks at a
229
    particular level, but after acquiring the ones at lower levels, and permits
230
    such calculations. It can be used to modify self.needed_locks, and by
231
    default it does nothing.
232

233
    This function is only called if you have something already set in
234
    self.needed_locks for the level.
235

236
    @param level: Locking level which is going to be locked
237
    @type level: member of ganeti.locking.LEVELS
238

239
    """
240

    
241
  def CheckPrereq(self):
242
    """Check prerequisites for this LU.
243

244
    This method should check that the prerequisites for the execution
245
    of this LU are fulfilled. It can do internode communication, but
246
    it should be idempotent - no cluster or system changes are
247
    allowed.
248

249
    The method should raise errors.OpPrereqError in case something is
250
    not fulfilled. Its return value is ignored.
251

252
    This method should also update all the parameters of the opcode to
253
    their canonical form if it hasn't been done by ExpandNames before.
254

255
    """
256
    if self.tasklets is not None:
257
      for (idx, tl) in enumerate(self.tasklets):
258
        logging.debug("Checking prerequisites for tasklet %s/%s",
259
                      idx + 1, len(self.tasklets))
260
        tl.CheckPrereq()
261
    else:
262
      pass
263

    
264
  def Exec(self, feedback_fn):
265
    """Execute the LU.
266

267
    This method should implement the actual work. It should raise
268
    errors.OpExecError for failures that are somewhat dealt with in
269
    code, or expected.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
275
        tl.Exec(feedback_fn)
276
    else:
277
      raise NotImplementedError
278

    
279
  def BuildHooksEnv(self):
280
    """Build hooks environment for this LU.
281

282
    @rtype: dict
283
    @return: Dictionary containing the environment that will be used for
284
      running the hooks for this LU. The keys of the dict must not be prefixed
285
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
286
      will extend the environment with additional variables. If no environment
287
      should be defined, an empty dictionary should be returned (not C{None}).
288
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
289
      will not be called.
290

291
    """
292
    raise NotImplementedError
293

    
294
  def BuildHooksNodes(self):
295
    """Build list of nodes to run LU's hooks.
296

297
    @rtype: tuple; (list, list)
298
    @return: Tuple containing a list of node names on which the hook
299
      should run before the execution and a list of node names on which the
300
      hook should run after the execution. No nodes should be returned as an
301
      empty list (and not None).
302
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303
      will not be called.
304

305
    """
306
    raise NotImplementedError
307

    
308
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
309
    """Notify the LU about the results of its hooks.
310

311
    This method is called every time a hooks phase is executed, and notifies
312
    the Logical Unit about the hooks' result. The LU can then use it to alter
313
    its result based on the hooks.  By default the method does nothing and the
314
    previous result is passed back unchanged but any LU can define it if it
315
    wants to use the local cluster hook-scripts somehow.
316

317
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
318
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
319
    @param hook_results: the results of the multi-node hooks rpc call
320
    @param feedback_fn: function used send feedback back to the caller
321
    @param lu_result: the previous Exec result this LU had, or None
322
        in the PRE phase
323
    @return: the new Exec result, based on the previous result
324
        and hook results
325

326
    """
327
    # API must be kept, thus we ignore the unused argument and could
328
    # be a function warnings
329
    # pylint: disable=W0613,R0201
330
    return lu_result
331

    
332
  def _ExpandAndLockInstance(self):
333
    """Helper function to expand and lock an instance.
334

335
    Many LUs that work on an instance take its name in self.op.instance_name
336
    and need to expand it and then declare the expanded name for locking. This
337
    function does it, and then updates self.op.instance_name to the expanded
338
    name. It also initializes needed_locks as a dict, if this hasn't been done
339
    before.
340

341
    """
342
    if self.needed_locks is None:
343
      self.needed_locks = {}
344
    else:
345
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
346
        "_ExpandAndLockInstance called with instance-level locks set"
347
    self.op.instance_name = _ExpandInstanceName(self.cfg,
348
                                                self.op.instance_name)
349
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
350

    
351
  def _LockInstancesNodes(self, primary_only=False):
352
    """Helper function to declare instances' nodes for locking.
353

354
    This function should be called after locking one or more instances to lock
355
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
356
    with all primary or secondary nodes for instances already locked and
357
    present in self.needed_locks[locking.LEVEL_INSTANCE].
358

359
    It should be called from DeclareLocks, and for safety only works if
360
    self.recalculate_locks[locking.LEVEL_NODE] is set.
361

362
    In the future it may grow parameters to just lock some instance's nodes, or
363
    to just lock primaries or secondary nodes, if needed.
364

365
    If should be called in DeclareLocks in a way similar to::
366

367
      if level == locking.LEVEL_NODE:
368
        self._LockInstancesNodes()
369

370
    @type primary_only: boolean
371
    @param primary_only: only lock primary nodes of locked instances
372

373
    """
374
    assert locking.LEVEL_NODE in self.recalculate_locks, \
375
      "_LockInstancesNodes helper function called with no nodes to recalculate"
376

    
377
    # TODO: check if we're really been called with the instance locks held
378

    
379
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
380
    # future we might want to have different behaviors depending on the value
381
    # of self.recalculate_locks[locking.LEVEL_NODE]
382
    wanted_nodes = []
383
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
384
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
385
      wanted_nodes.append(instance.primary_node)
386
      if not primary_only:
387
        wanted_nodes.extend(instance.secondary_nodes)
388

    
389
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
390
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
391
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
392
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
393

    
394
    del self.recalculate_locks[locking.LEVEL_NODE]
395

    
396

    
397
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
398
  """Simple LU which runs no hooks.
399

400
  This LU is intended as a parent for other LogicalUnits which will
401
  run no hooks, in order to reduce duplicate code.
402

403
  """
404
  HPATH = None
405
  HTYPE = None
406

    
407
  def BuildHooksEnv(self):
408
    """Empty BuildHooksEnv for NoHooksLu.
409

410
    This just raises an error.
411

412
    """
413
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
414

    
415
  def BuildHooksNodes(self):
416
    """Empty BuildHooksNodes for NoHooksLU.
417

418
    """
419
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
420

    
421

    
422
class Tasklet:
423
  """Tasklet base class.
424

425
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
426
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
427
  tasklets know nothing about locks.
428

429
  Subclasses must follow these rules:
430
    - Implement CheckPrereq
431
    - Implement Exec
432

433
  """
434
  def __init__(self, lu):
435
    self.lu = lu
436

    
437
    # Shortcuts
438
    self.cfg = lu.cfg
439
    self.rpc = lu.rpc
440

    
441
  def CheckPrereq(self):
442
    """Check prerequisites for this tasklets.
443

444
    This method should check whether the prerequisites for the execution of
445
    this tasklet are fulfilled. It can do internode communication, but it
446
    should be idempotent - no cluster or system changes are allowed.
447

448
    The method should raise errors.OpPrereqError in case something is not
449
    fulfilled. Its return value is ignored.
450

451
    This method should also update all parameters to their canonical form if it
452
    hasn't been done before.
453

454
    """
455
    pass
456

    
457
  def Exec(self, feedback_fn):
458
    """Execute the tasklet.
459

460
    This method should implement the actual work. It should raise
461
    errors.OpExecError for failures that are somewhat dealt with in code, or
462
    expected.
463

464
    """
465
    raise NotImplementedError
466

    
467

    
468
class _QueryBase:
469
  """Base for query utility classes.
470

471
  """
472
  #: Attribute holding field definitions
473
  FIELDS = None
474

    
475
  def __init__(self, qfilter, fields, use_locking):
476
    """Initializes this class.
477

478
    """
479
    self.use_locking = use_locking
480

    
481
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
482
                             namefield="name")
483
    self.requested_data = self.query.RequestedData()
484
    self.names = self.query.RequestedNames()
485

    
486
    # Sort only if no names were requested
487
    self.sort_by_name = not self.names
488

    
489
    self.do_locking = None
490
    self.wanted = None
491

    
492
  def _GetNames(self, lu, all_names, lock_level):
493
    """Helper function to determine names asked for in the query.
494

495
    """
496
    if self.do_locking:
497
      names = lu.owned_locks(lock_level)
498
    else:
499
      names = all_names
500

    
501
    if self.wanted == locking.ALL_SET:
502
      assert not self.names
503
      # caller didn't specify names, so ordering is not important
504
      return utils.NiceSort(names)
505

    
506
    # caller specified names and we must keep the same order
507
    assert self.names
508
    assert not self.do_locking or lu.glm.is_owned(lock_level)
509

    
510
    missing = set(self.wanted).difference(names)
511
    if missing:
512
      raise errors.OpExecError("Some items were removed before retrieving"
513
                               " their data: %s" % missing)
514

    
515
    # Return expanded names
516
    return self.wanted
517

    
518
  def ExpandNames(self, lu):
519
    """Expand names for this query.
520

521
    See L{LogicalUnit.ExpandNames}.
522

523
    """
524
    raise NotImplementedError()
525

    
526
  def DeclareLocks(self, lu, level):
527
    """Declare locks for this query.
528

529
    See L{LogicalUnit.DeclareLocks}.
530

531
    """
532
    raise NotImplementedError()
533

    
534
  def _GetQueryData(self, lu):
535
    """Collects all data for this query.
536

537
    @return: Query data object
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def NewStyleQuery(self, lu):
543
    """Collect data and execute query.
544

545
    """
546
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
547
                                  sort_by_name=self.sort_by_name)
548

    
549
  def OldStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    return self.query.OldStyleQuery(self._GetQueryData(lu),
554
                                    sort_by_name=self.sort_by_name)
555

    
556

    
557
def _ShareAll():
558
  """Returns a dict declaring all lock levels shared.
559

560
  """
561
  return dict.fromkeys(locking.LEVELS, 1)
562

    
563

    
564
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
565
  """Checks if the owned node groups are still correct for an instance.
566

567
  @type cfg: L{config.ConfigWriter}
568
  @param cfg: The cluster configuration
569
  @type instance_name: string
570
  @param instance_name: Instance name
571
  @type owned_groups: set or frozenset
572
  @param owned_groups: List of currently owned node groups
573

574
  """
575
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
576

    
577
  if not owned_groups.issuperset(inst_groups):
578
    raise errors.OpPrereqError("Instance %s's node groups changed since"
579
                               " locks were acquired, current groups are"
580
                               " are '%s', owning groups '%s'; retry the"
581
                               " operation" %
582
                               (instance_name,
583
                                utils.CommaJoin(inst_groups),
584
                                utils.CommaJoin(owned_groups)),
585
                               errors.ECODE_STATE)
586

    
587
  return inst_groups
588

    
589

    
590
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
591
  """Checks if the instances in a node group are still correct.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type group_uuid: string
596
  @param group_uuid: Node group UUID
597
  @type owned_instances: set or frozenset
598
  @param owned_instances: List of currently owned instances
599

600
  """
601
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
602
  if owned_instances != wanted_instances:
603
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
604
                               " locks were acquired, wanted '%s', have '%s';"
605
                               " retry the operation" %
606
                               (group_uuid,
607
                                utils.CommaJoin(wanted_instances),
608
                                utils.CommaJoin(owned_instances)),
609
                               errors.ECODE_STATE)
610

    
611
  return wanted_instances
612

    
613

    
614
def _SupportsOob(cfg, node):
615
  """Tells if node supports OOB.
616

617
  @type cfg: L{config.ConfigWriter}
618
  @param cfg: The cluster configuration
619
  @type node: L{objects.Node}
620
  @param node: The node
621
  @return: The OOB script if supported or an empty string otherwise
622

623
  """
624
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
625

    
626

    
627
def _GetWantedNodes(lu, nodes):
628
  """Returns list of checked and expanded node names.
629

630
  @type lu: L{LogicalUnit}
631
  @param lu: the logical unit on whose behalf we execute
632
  @type nodes: list
633
  @param nodes: list of node names or None for all nodes
634
  @rtype: list
635
  @return: the list of nodes, sorted
636
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
637

638
  """
639
  if nodes:
640
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
641

    
642
  return utils.NiceSort(lu.cfg.GetNodeList())
643

    
644

    
645
def _GetWantedInstances(lu, instances):
646
  """Returns list of checked and expanded instance names.
647

648
  @type lu: L{LogicalUnit}
649
  @param lu: the logical unit on whose behalf we execute
650
  @type instances: list
651
  @param instances: list of instance names or None for all instances
652
  @rtype: list
653
  @return: the list of instances, sorted
654
  @raise errors.OpPrereqError: if the instances parameter is wrong type
655
  @raise errors.OpPrereqError: if any of the passed instances is not found
656

657
  """
658
  if instances:
659
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
660
  else:
661
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
662
  return wanted
663

    
664

    
665
def _GetUpdatedParams(old_params, update_dict,
666
                      use_default=True, use_none=False):
667
  """Return the new version of a parameter dictionary.
668

669
  @type old_params: dict
670
  @param old_params: old parameters
671
  @type update_dict: dict
672
  @param update_dict: dict containing new parameter values, or
673
      constants.VALUE_DEFAULT to reset the parameter to its default
674
      value
675
  @param use_default: boolean
676
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
677
      values as 'to be deleted' values
678
  @param use_none: boolean
679
  @type use_none: whether to recognise C{None} values as 'to be
680
      deleted' values
681
  @rtype: dict
682
  @return: the new parameter dictionary
683

684
  """
685
  params_copy = copy.deepcopy(old_params)
686
  for key, val in update_dict.iteritems():
687
    if ((use_default and val == constants.VALUE_DEFAULT) or
688
        (use_none and val is None)):
689
      try:
690
        del params_copy[key]
691
      except KeyError:
692
        pass
693
    else:
694
      params_copy[key] = val
695
  return params_copy
696

    
697

    
698
def _ReleaseLocks(lu, level, names=None, keep=None):
699
  """Releases locks owned by an LU.
700

701
  @type lu: L{LogicalUnit}
702
  @param level: Lock level
703
  @type names: list or None
704
  @param names: Names of locks to release
705
  @type keep: list or None
706
  @param keep: Names of locks to retain
707

708
  """
709
  assert not (keep is not None and names is not None), \
710
         "Only one of the 'names' and the 'keep' parameters can be given"
711

    
712
  if names is not None:
713
    should_release = names.__contains__
714
  elif keep:
715
    should_release = lambda name: name not in keep
716
  else:
717
    should_release = None
718

    
719
  if should_release:
720
    retain = []
721
    release = []
722

    
723
    # Determine which locks to release
724
    for name in lu.owned_locks(level):
725
      if should_release(name):
726
        release.append(name)
727
      else:
728
        retain.append(name)
729

    
730
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
731

    
732
    # Release just some locks
733
    lu.glm.release(level, names=release)
734

    
735
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
736
  else:
737
    # Release everything
738
    lu.glm.release(level)
739

    
740
    assert not lu.glm.is_owned(level), "No locks should be owned"
741

    
742

    
743
def _MapInstanceDisksToNodes(instances):
744
  """Creates a map from (node, volume) to instance name.
745

746
  @type instances: list of L{objects.Instance}
747
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
748

749
  """
750
  return dict(((node, vol), inst.name)
751
              for inst in instances
752
              for (node, vols) in inst.MapLVsByNode().items()
753
              for vol in vols)
754

    
755

    
756
def _RunPostHook(lu, node_name):
757
  """Runs the post-hook for an opcode on a single node.
758

759
  """
760
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
761
  try:
762
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
763
  except:
764
    # pylint: disable=W0702
765
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
766

    
767

    
768
def _CheckOutputFields(static, dynamic, selected):
769
  """Checks whether all selected fields are valid.
770

771
  @type static: L{utils.FieldSet}
772
  @param static: static fields set
773
  @type dynamic: L{utils.FieldSet}
774
  @param dynamic: dynamic fields set
775

776
  """
777
  f = utils.FieldSet()
778
  f.Extend(static)
779
  f.Extend(dynamic)
780

    
781
  delta = f.NonMatching(selected)
782
  if delta:
783
    raise errors.OpPrereqError("Unknown output fields selected: %s"
784
                               % ",".join(delta), errors.ECODE_INVAL)
785

    
786

    
787
def _CheckGlobalHvParams(params):
788
  """Validates that given hypervisor params are not global ones.
789

790
  This will ensure that instances don't get customised versions of
791
  global params.
792

793
  """
794
  used_globals = constants.HVC_GLOBALS.intersection(params)
795
  if used_globals:
796
    msg = ("The following hypervisor parameters are global and cannot"
797
           " be customized at instance level, please modify them at"
798
           " cluster level: %s" % utils.CommaJoin(used_globals))
799
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
800

    
801

    
802
def _CheckNodeOnline(lu, node, msg=None):
803
  """Ensure that a given node is online.
804

805
  @param lu: the LU on behalf of which we make the check
806
  @param node: the node to check
807
  @param msg: if passed, should be a message to replace the default one
808
  @raise errors.OpPrereqError: if the node is offline
809

810
  """
811
  if msg is None:
812
    msg = "Can't use offline node"
813
  if lu.cfg.GetNodeInfo(node).offline:
814
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
815

    
816

    
817
def _CheckNodeNotDrained(lu, node):
818
  """Ensure that a given node is not drained.
819

820
  @param lu: the LU on behalf of which we make the check
821
  @param node: the node to check
822
  @raise errors.OpPrereqError: if the node is drained
823

824
  """
825
  if lu.cfg.GetNodeInfo(node).drained:
826
    raise errors.OpPrereqError("Can't use drained node %s" % node,
827
                               errors.ECODE_STATE)
828

    
829

    
830
def _CheckNodeVmCapable(lu, node):
831
  """Ensure that a given node is vm capable.
832

833
  @param lu: the LU on behalf of which we make the check
834
  @param node: the node to check
835
  @raise errors.OpPrereqError: if the node is not vm capable
836

837
  """
838
  if not lu.cfg.GetNodeInfo(node).vm_capable:
839
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
840
                               errors.ECODE_STATE)
841

    
842

    
843
def _CheckNodeHasOS(lu, node, os_name, force_variant):
844
  """Ensure that a node supports a given OS.
845

846
  @param lu: the LU on behalf of which we make the check
847
  @param node: the node to check
848
  @param os_name: the OS to query about
849
  @param force_variant: whether to ignore variant errors
850
  @raise errors.OpPrereqError: if the node is not supporting the OS
851

852
  """
853
  result = lu.rpc.call_os_get(node, os_name)
854
  result.Raise("OS '%s' not in supported OS list for node %s" %
855
               (os_name, node),
856
               prereq=True, ecode=errors.ECODE_INVAL)
857
  if not force_variant:
858
    _CheckOSVariant(result.payload, os_name)
859

    
860

    
861
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
862
  """Ensure that a node has the given secondary ip.
863

864
  @type lu: L{LogicalUnit}
865
  @param lu: the LU on behalf of which we make the check
866
  @type node: string
867
  @param node: the node to check
868
  @type secondary_ip: string
869
  @param secondary_ip: the ip to check
870
  @type prereq: boolean
871
  @param prereq: whether to throw a prerequisite or an execute error
872
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
873
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
874

875
  """
876
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
877
  result.Raise("Failure checking secondary ip on node %s" % node,
878
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
879
  if not result.payload:
880
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
881
           " please fix and re-run this command" % secondary_ip)
882
    if prereq:
883
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
884
    else:
885
      raise errors.OpExecError(msg)
886

    
887

    
888
def _GetClusterDomainSecret():
889
  """Reads the cluster domain secret.
890

891
  """
892
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
893
                               strict=True)
894

    
895

    
896
def _CheckInstanceDown(lu, instance, reason):
897
  """Ensure that an instance is not running."""
898
  if instance.admin_up:
899
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
900
                               (instance.name, reason), errors.ECODE_STATE)
901

    
902
  pnode = instance.primary_node
903
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
904
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
905
              prereq=True, ecode=errors.ECODE_ENVIRON)
906

    
907
  if instance.name in ins_l.payload:
908
    raise errors.OpPrereqError("Instance %s is running, %s" %
909
                               (instance.name, reason), errors.ECODE_STATE)
910

    
911

    
912
def _ExpandItemName(fn, name, kind):
913
  """Expand an item name.
914

915
  @param fn: the function to use for expansion
916
  @param name: requested item name
917
  @param kind: text description ('Node' or 'Instance')
918
  @return: the resolved (full) name
919
  @raise errors.OpPrereqError: if the item is not found
920

921
  """
922
  full_name = fn(name)
923
  if full_name is None:
924
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925
                               errors.ECODE_NOENT)
926
  return full_name
927

    
928

    
929
def _ExpandNodeName(cfg, name):
930
  """Wrapper over L{_ExpandItemName} for nodes."""
931
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
932

    
933

    
934
def _ExpandInstanceName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for instance."""
936
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
937

    
938

    
939
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
940
                          memory, vcpus, nics, disk_template, disks,
941
                          bep, hvp, hypervisor_name, tags):
942
  """Builds instance related env variables for hooks
943

944
  This builds the hook environment from individual variables.
945

946
  @type name: string
947
  @param name: the name of the instance
948
  @type primary_node: string
949
  @param primary_node: the name of the instance's primary node
950
  @type secondary_nodes: list
951
  @param secondary_nodes: list of secondary nodes as strings
952
  @type os_type: string
953
  @param os_type: the name of the instance's OS
954
  @type status: boolean
955
  @param status: the should_run status of the instance
956
  @type memory: string
957
  @param memory: the memory size of the instance
958
  @type vcpus: string
959
  @param vcpus: the count of VCPUs the instance has
960
  @type nics: list
961
  @param nics: list of tuples (ip, mac, mode, link) representing
962
      the NICs the instance has
963
  @type disk_template: string
964
  @param disk_template: the disk template of the instance
965
  @type disks: list
966
  @param disks: the list of (size, mode) pairs
967
  @type bep: dict
968
  @param bep: the backend parameters for the instance
969
  @type hvp: dict
970
  @param hvp: the hypervisor parameters for the instance
971
  @type hypervisor_name: string
972
  @param hypervisor_name: the hypervisor for the instance
973
  @type tags: list
974
  @param tags: list of instance tags as strings
975
  @rtype: dict
976
  @return: the hook environment for this instance
977

978
  """
979
  if status:
980
    str_status = "up"
981
  else:
982
    str_status = "down"
983
  env = {
984
    "OP_TARGET": name,
985
    "INSTANCE_NAME": name,
986
    "INSTANCE_PRIMARY": primary_node,
987
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
988
    "INSTANCE_OS_TYPE": os_type,
989
    "INSTANCE_STATUS": str_status,
990
    "INSTANCE_MEMORY": memory,
991
    "INSTANCE_VCPUS": vcpus,
992
    "INSTANCE_DISK_TEMPLATE": disk_template,
993
    "INSTANCE_HYPERVISOR": hypervisor_name,
994
  }
995

    
996
  if nics:
997
    nic_count = len(nics)
998
    for idx, (ip, mac, mode, link) in enumerate(nics):
999
      if ip is None:
1000
        ip = ""
1001
      env["INSTANCE_NIC%d_IP" % idx] = ip
1002
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1003
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1004
      env["INSTANCE_NIC%d_LINK" % idx] = link
1005
      if mode == constants.NIC_MODE_BRIDGED:
1006
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1007
  else:
1008
    nic_count = 0
1009

    
1010
  env["INSTANCE_NIC_COUNT"] = nic_count
1011

    
1012
  if disks:
1013
    disk_count = len(disks)
1014
    for idx, (size, mode) in enumerate(disks):
1015
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1016
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1017
  else:
1018
    disk_count = 0
1019

    
1020
  env["INSTANCE_DISK_COUNT"] = disk_count
1021

    
1022
  if not tags:
1023
    tags = []
1024

    
1025
  env["INSTANCE_TAGS"] = " ".join(tags)
1026

    
1027
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1028
    for key, value in source.items():
1029
      env["INSTANCE_%s_%s" % (kind, key)] = value
1030

    
1031
  return env
1032

    
1033

    
1034
def _NICListToTuple(lu, nics):
1035
  """Build a list of nic information tuples.
1036

1037
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1038
  value in LUInstanceQueryData.
1039

1040
  @type lu:  L{LogicalUnit}
1041
  @param lu: the logical unit on whose behalf we execute
1042
  @type nics: list of L{objects.NIC}
1043
  @param nics: list of nics to convert to hooks tuples
1044

1045
  """
1046
  hooks_nics = []
1047
  cluster = lu.cfg.GetClusterInfo()
1048
  for nic in nics:
1049
    ip = nic.ip
1050
    mac = nic.mac
1051
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1052
    mode = filled_params[constants.NIC_MODE]
1053
    link = filled_params[constants.NIC_LINK]
1054
    hooks_nics.append((ip, mac, mode, link))
1055
  return hooks_nics
1056

    
1057

    
1058
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1059
  """Builds instance related env variables for hooks from an object.
1060

1061
  @type lu: L{LogicalUnit}
1062
  @param lu: the logical unit on whose behalf we execute
1063
  @type instance: L{objects.Instance}
1064
  @param instance: the instance for which we should build the
1065
      environment
1066
  @type override: dict
1067
  @param override: dictionary with key/values that will override
1068
      our values
1069
  @rtype: dict
1070
  @return: the hook environment dictionary
1071

1072
  """
1073
  cluster = lu.cfg.GetClusterInfo()
1074
  bep = cluster.FillBE(instance)
1075
  hvp = cluster.FillHV(instance)
1076
  args = {
1077
    "name": instance.name,
1078
    "primary_node": instance.primary_node,
1079
    "secondary_nodes": instance.secondary_nodes,
1080
    "os_type": instance.os,
1081
    "status": instance.admin_up,
1082
    "memory": bep[constants.BE_MEMORY],
1083
    "vcpus": bep[constants.BE_VCPUS],
1084
    "nics": _NICListToTuple(lu, instance.nics),
1085
    "disk_template": instance.disk_template,
1086
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1087
    "bep": bep,
1088
    "hvp": hvp,
1089
    "hypervisor_name": instance.hypervisor,
1090
    "tags": instance.tags,
1091
  }
1092
  if override:
1093
    args.update(override)
1094
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1095

    
1096

    
1097
def _AdjustCandidatePool(lu, exceptions):
1098
  """Adjust the candidate pool after node operations.
1099

1100
  """
1101
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1102
  if mod_list:
1103
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1104
               utils.CommaJoin(node.name for node in mod_list))
1105
    for name in mod_list:
1106
      lu.context.ReaddNode(name)
1107
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1108
  if mc_now > mc_max:
1109
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1110
               (mc_now, mc_max))
1111

    
1112

    
1113
def _DecideSelfPromotion(lu, exceptions=None):
1114
  """Decide whether I should promote myself as a master candidate.
1115

1116
  """
1117
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1118
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1119
  # the new node will increase mc_max with one, so:
1120
  mc_should = min(mc_should + 1, cp_size)
1121
  return mc_now < mc_should
1122

    
1123

    
1124
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1125
  """Check that the brigdes needed by a list of nics exist.
1126

1127
  """
1128
  cluster = lu.cfg.GetClusterInfo()
1129
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1130
  brlist = [params[constants.NIC_LINK] for params in paramslist
1131
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1132
  if brlist:
1133
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1134
    result.Raise("Error checking bridges on destination node '%s'" %
1135
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1136

    
1137

    
1138
def _CheckInstanceBridgesExist(lu, instance, node=None):
1139
  """Check that the brigdes needed by an instance exist.
1140

1141
  """
1142
  if node is None:
1143
    node = instance.primary_node
1144
  _CheckNicsBridgesExist(lu, instance.nics, node)
1145

    
1146

    
1147
def _CheckOSVariant(os_obj, name):
1148
  """Check whether an OS name conforms to the os variants specification.
1149

1150
  @type os_obj: L{objects.OS}
1151
  @param os_obj: OS object to check
1152
  @type name: string
1153
  @param name: OS name passed by the user, to check for validity
1154

1155
  """
1156
  variant = objects.OS.GetVariant(name)
1157
  if not os_obj.supported_variants:
1158
    if variant:
1159
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1160
                                 " passed)" % (os_obj.name, variant),
1161
                                 errors.ECODE_INVAL)
1162
    return
1163
  if not variant:
1164
    raise errors.OpPrereqError("OS name must include a variant",
1165
                               errors.ECODE_INVAL)
1166

    
1167
  if variant not in os_obj.supported_variants:
1168
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1169

    
1170

    
1171
def _GetNodeInstancesInner(cfg, fn):
1172
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1173

    
1174

    
1175
def _GetNodeInstances(cfg, node_name):
1176
  """Returns a list of all primary and secondary instances on a node.
1177

1178
  """
1179

    
1180
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1181

    
1182

    
1183
def _GetNodePrimaryInstances(cfg, node_name):
1184
  """Returns primary instances on a node.
1185

1186
  """
1187
  return _GetNodeInstancesInner(cfg,
1188
                                lambda inst: node_name == inst.primary_node)
1189

    
1190

    
1191
def _GetNodeSecondaryInstances(cfg, node_name):
1192
  """Returns secondary instances on a node.
1193

1194
  """
1195
  return _GetNodeInstancesInner(cfg,
1196
                                lambda inst: node_name in inst.secondary_nodes)
1197

    
1198

    
1199
def _GetStorageTypeArgs(cfg, storage_type):
1200
  """Returns the arguments for a storage type.
1201

1202
  """
1203
  # Special case for file storage
1204
  if storage_type == constants.ST_FILE:
1205
    # storage.FileStorage wants a list of storage directories
1206
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207

    
1208
  return []
1209

    
1210

    
1211
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1212
  faulty = []
1213

    
1214
  for dev in instance.disks:
1215
    cfg.SetDiskID(dev, node_name)
1216

    
1217
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1218
  result.Raise("Failed to get disk status from node %s" % node_name,
1219
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1220

    
1221
  for idx, bdev_status in enumerate(result.payload):
1222
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1223
      faulty.append(idx)
1224

    
1225
  return faulty
1226

    
1227

    
1228
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1229
  """Check the sanity of iallocator and node arguments and use the
1230
  cluster-wide iallocator if appropriate.
1231

1232
  Check that at most one of (iallocator, node) is specified. If none is
1233
  specified, then the LU's opcode's iallocator slot is filled with the
1234
  cluster-wide default iallocator.
1235

1236
  @type iallocator_slot: string
1237
  @param iallocator_slot: the name of the opcode iallocator slot
1238
  @type node_slot: string
1239
  @param node_slot: the name of the opcode target node slot
1240

1241
  """
1242
  node = getattr(lu.op, node_slot, None)
1243
  iallocator = getattr(lu.op, iallocator_slot, None)
1244

    
1245
  if node is not None and iallocator is not None:
1246
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1247
                               errors.ECODE_INVAL)
1248
  elif node is None and iallocator is None:
1249
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1250
    if default_iallocator:
1251
      setattr(lu.op, iallocator_slot, default_iallocator)
1252
    else:
1253
      raise errors.OpPrereqError("No iallocator or node given and no"
1254
                                 " cluster-wide default iallocator found;"
1255
                                 " please specify either an iallocator or a"
1256
                                 " node, or set a cluster-wide default"
1257
                                 " iallocator")
1258

    
1259

    
1260
def _GetDefaultIAllocator(cfg, iallocator):
1261
  """Decides on which iallocator to use.
1262

1263
  @type cfg: L{config.ConfigWriter}
1264
  @param cfg: Cluster configuration object
1265
  @type iallocator: string or None
1266
  @param iallocator: Iallocator specified in opcode
1267
  @rtype: string
1268
  @return: Iallocator name
1269

1270
  """
1271
  if not iallocator:
1272
    # Use default iallocator
1273
    iallocator = cfg.GetDefaultIAllocator()
1274

    
1275
  if not iallocator:
1276
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1277
                               " opcode nor as a cluster-wide default",
1278
                               errors.ECODE_INVAL)
1279

    
1280
  return iallocator
1281

    
1282

    
1283
class LUClusterPostInit(LogicalUnit):
1284
  """Logical unit for running hooks after cluster initialization.
1285

1286
  """
1287
  HPATH = "cluster-init"
1288
  HTYPE = constants.HTYPE_CLUSTER
1289

    
1290
  def BuildHooksEnv(self):
1291
    """Build hooks env.
1292

1293
    """
1294
    return {
1295
      "OP_TARGET": self.cfg.GetClusterName(),
1296
      }
1297

    
1298
  def BuildHooksNodes(self):
1299
    """Build hooks nodes.
1300

1301
    """
1302
    return ([], [self.cfg.GetMasterNode()])
1303

    
1304
  def Exec(self, feedback_fn):
1305
    """Nothing to do.
1306

1307
    """
1308
    return True
1309

    
1310

    
1311
class LUClusterDestroy(LogicalUnit):
1312
  """Logical unit for destroying the cluster.
1313

1314
  """
1315
  HPATH = "cluster-destroy"
1316
  HTYPE = constants.HTYPE_CLUSTER
1317

    
1318
  def BuildHooksEnv(self):
1319
    """Build hooks env.
1320

1321
    """
1322
    return {
1323
      "OP_TARGET": self.cfg.GetClusterName(),
1324
      }
1325

    
1326
  def BuildHooksNodes(self):
1327
    """Build hooks nodes.
1328

1329
    """
1330
    return ([], [])
1331

    
1332
  def CheckPrereq(self):
1333
    """Check prerequisites.
1334

1335
    This checks whether the cluster is empty.
1336

1337
    Any errors are signaled by raising errors.OpPrereqError.
1338

1339
    """
1340
    master = self.cfg.GetMasterNode()
1341

    
1342
    nodelist = self.cfg.GetNodeList()
1343
    if len(nodelist) != 1 or nodelist[0] != master:
1344
      raise errors.OpPrereqError("There are still %d node(s) in"
1345
                                 " this cluster." % (len(nodelist) - 1),
1346
                                 errors.ECODE_INVAL)
1347
    instancelist = self.cfg.GetInstanceList()
1348
    if instancelist:
1349
      raise errors.OpPrereqError("There are still %d instance(s) in"
1350
                                 " this cluster." % len(instancelist),
1351
                                 errors.ECODE_INVAL)
1352

    
1353
  def Exec(self, feedback_fn):
1354
    """Destroys the cluster.
1355

1356
    """
1357
    master = self.cfg.GetMasterNode()
1358

    
1359
    # Run post hooks on master node before it's removed
1360
    _RunPostHook(self, master)
1361

    
1362
    result = self.rpc.call_node_deactivate_master_ip(master)
1363
    result.Raise("Could not disable the master role")
1364

    
1365
    return master
1366

    
1367

    
1368
def _VerifyCertificate(filename):
1369
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1370

1371
  @type filename: string
1372
  @param filename: Path to PEM file
1373

1374
  """
1375
  try:
1376
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1377
                                           utils.ReadFile(filename))
1378
  except Exception, err: # pylint: disable=W0703
1379
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1380
            "Failed to load X509 certificate %s: %s" % (filename, err))
1381

    
1382
  (errcode, msg) = \
1383
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1384
                                constants.SSL_CERT_EXPIRATION_ERROR)
1385

    
1386
  if msg:
1387
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1388
  else:
1389
    fnamemsg = None
1390

    
1391
  if errcode is None:
1392
    return (None, fnamemsg)
1393
  elif errcode == utils.CERT_WARNING:
1394
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1395
  elif errcode == utils.CERT_ERROR:
1396
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1397

    
1398
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1399

    
1400

    
1401
def _GetAllHypervisorParameters(cluster, instances):
1402
  """Compute the set of all hypervisor parameters.
1403

1404
  @type cluster: L{objects.Cluster}
1405
  @param cluster: the cluster object
1406
  @param instances: list of L{objects.Instance}
1407
  @param instances: additional instances from which to obtain parameters
1408
  @rtype: list of (origin, hypervisor, parameters)
1409
  @return: a list with all parameters found, indicating the hypervisor they
1410
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411

1412
  """
1413
  hvp_data = []
1414

    
1415
  for hv_name in cluster.enabled_hypervisors:
1416
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1417

    
1418
  for os_name, os_hvp in cluster.os_hvp.items():
1419
    for hv_name, hv_params in os_hvp.items():
1420
      if hv_params:
1421
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1422
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1423

    
1424
  # TODO: collapse identical parameter values in a single one
1425
  for instance in instances:
1426
    if instance.hvparams:
1427
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1428
                       cluster.FillHV(instance)))
1429

    
1430
  return hvp_data
1431

    
1432

    
1433
class _VerifyErrors(object):
1434
  """Mix-in for cluster/group verify LUs.
1435

1436
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1437
  self.op and self._feedback_fn to be available.)
1438

1439
  """
1440

    
1441
  ETYPE_FIELD = "code"
1442
  ETYPE_ERROR = "ERROR"
1443
  ETYPE_WARNING = "WARNING"
1444

    
1445
  def _Error(self, ecode, item, msg, *args, **kwargs):
1446
    """Format an error message.
1447

1448
    Based on the opcode's error_codes parameter, either format a
1449
    parseable error code, or a simpler error string.
1450

1451
    This must be called only from Exec and functions called from Exec.
1452

1453
    """
1454
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1455
    itype, etxt, _ = ecode
1456
    # first complete the msg
1457
    if args:
1458
      msg = msg % args
1459
    # then format the whole message
1460
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1461
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1462
    else:
1463
      if item:
1464
        item = " " + item
1465
      else:
1466
        item = ""
1467
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1468
    # and finally report it via the feedback_fn
1469
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1470

    
1471
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1472
    """Log an error message if the passed condition is True.
1473

1474
    """
1475
    cond = (bool(cond)
1476
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1477

    
1478
    # If the error code is in the list of ignored errors, demote the error to a
1479
    # warning
1480
    (_, etxt, _) = ecode
1481
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1482
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1483

    
1484
    if cond:
1485
      self._Error(ecode, *args, **kwargs)
1486

    
1487
    # do not mark the operation as failed for WARN cases only
1488
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1489
      self.bad = self.bad or cond
1490

    
1491

    
1492
class LUClusterVerify(NoHooksLU):
1493
  """Submits all jobs necessary to verify the cluster.
1494

1495
  """
1496
  REQ_BGL = False
1497

    
1498
  def ExpandNames(self):
1499
    self.needed_locks = {}
1500

    
1501
  def Exec(self, feedback_fn):
1502
    jobs = []
1503

    
1504
    if self.op.group_name:
1505
      groups = [self.op.group_name]
1506
      depends_fn = lambda: None
1507
    else:
1508
      groups = self.cfg.GetNodeGroupList()
1509

    
1510
      # Verify global configuration
1511
      jobs.append([
1512
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1513
        ])
1514

    
1515
      # Always depend on global verification
1516
      depends_fn = lambda: [(-len(jobs), [])]
1517

    
1518
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1519
                                            ignore_errors=self.op.ignore_errors,
1520
                                            depends=depends_fn())]
1521
                for group in groups)
1522

    
1523
    # Fix up all parameters
1524
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1525
      op.debug_simulate_errors = self.op.debug_simulate_errors
1526
      op.verbose = self.op.verbose
1527
      op.error_codes = self.op.error_codes
1528
      try:
1529
        op.skip_checks = self.op.skip_checks
1530
      except AttributeError:
1531
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1532

    
1533
    return ResultWithJobs(jobs)
1534

    
1535

    
1536
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1537
  """Verifies the cluster config.
1538

1539
  """
1540
  REQ_BGL = True
1541

    
1542
  def _VerifyHVP(self, hvp_data):
1543
    """Verifies locally the syntax of the hypervisor parameters.
1544

1545
    """
1546
    for item, hv_name, hv_params in hvp_data:
1547
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1548
             (item, hv_name))
1549
      try:
1550
        hv_class = hypervisor.GetHypervisor(hv_name)
1551
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1552
        hv_class.CheckParameterSyntax(hv_params)
1553
      except errors.GenericError, err:
1554
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1555

    
1556
  def ExpandNames(self):
1557
    # Information can be safely retrieved as the BGL is acquired in exclusive
1558
    # mode
1559
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1560
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1561
    self.all_node_info = self.cfg.GetAllNodesInfo()
1562
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1563
    self.needed_locks = {}
1564

    
1565
  def Exec(self, feedback_fn):
1566
    """Verify integrity of cluster, performing various test on nodes.
1567

1568
    """
1569
    self.bad = False
1570
    self._feedback_fn = feedback_fn
1571

    
1572
    feedback_fn("* Verifying cluster config")
1573

    
1574
    for msg in self.cfg.VerifyConfig():
1575
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1576

    
1577
    feedback_fn("* Verifying cluster certificate files")
1578

    
1579
    for cert_filename in constants.ALL_CERT_FILES:
1580
      (errcode, msg) = _VerifyCertificate(cert_filename)
1581
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1582

    
1583
    feedback_fn("* Verifying hypervisor parameters")
1584

    
1585
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1586
                                                self.all_inst_info.values()))
1587

    
1588
    feedback_fn("* Verifying all nodes belong to an existing group")
1589

    
1590
    # We do this verification here because, should this bogus circumstance
1591
    # occur, it would never be caught by VerifyGroup, which only acts on
1592
    # nodes/instances reachable from existing node groups.
1593

    
1594
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1595
                         if node.group not in self.all_group_info)
1596

    
1597
    dangling_instances = {}
1598
    no_node_instances = []
1599

    
1600
    for inst in self.all_inst_info.values():
1601
      if inst.primary_node in dangling_nodes:
1602
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1603
      elif inst.primary_node not in self.all_node_info:
1604
        no_node_instances.append(inst.name)
1605

    
1606
    pretty_dangling = [
1607
        "%s (%s)" %
1608
        (node.name,
1609
         utils.CommaJoin(dangling_instances.get(node.name,
1610
                                                ["no instances"])))
1611
        for node in dangling_nodes]
1612

    
1613
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1614
                  None,
1615
                  "the following nodes (and their instances) belong to a non"
1616
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1617

    
1618
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1619
                  None,
1620
                  "the following instances have a non-existing primary-node:"
1621
                  " %s", utils.CommaJoin(no_node_instances))
1622

    
1623
    return not self.bad
1624

    
1625

    
1626
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1627
  """Verifies the status of a node group.
1628

1629
  """
1630
  HPATH = "cluster-verify"
1631
  HTYPE = constants.HTYPE_CLUSTER
1632
  REQ_BGL = False
1633

    
1634
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1635

    
1636
  class NodeImage(object):
1637
    """A class representing the logical and physical status of a node.
1638

1639
    @type name: string
1640
    @ivar name: the node name to which this object refers
1641
    @ivar volumes: a structure as returned from
1642
        L{ganeti.backend.GetVolumeList} (runtime)
1643
    @ivar instances: a list of running instances (runtime)
1644
    @ivar pinst: list of configured primary instances (config)
1645
    @ivar sinst: list of configured secondary instances (config)
1646
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1647
        instances for which this node is secondary (config)
1648
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1649
    @ivar dfree: free disk, as reported by the node (runtime)
1650
    @ivar offline: the offline status (config)
1651
    @type rpc_fail: boolean
1652
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1653
        not whether the individual keys were correct) (runtime)
1654
    @type lvm_fail: boolean
1655
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1656
    @type hyp_fail: boolean
1657
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1658
    @type ghost: boolean
1659
    @ivar ghost: whether this is a known node or not (config)
1660
    @type os_fail: boolean
1661
    @ivar os_fail: whether the RPC call didn't return valid OS data
1662
    @type oslist: list
1663
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1664
    @type vm_capable: boolean
1665
    @ivar vm_capable: whether the node can host instances
1666

1667
    """
1668
    def __init__(self, offline=False, name=None, vm_capable=True):
1669
      self.name = name
1670
      self.volumes = {}
1671
      self.instances = []
1672
      self.pinst = []
1673
      self.sinst = []
1674
      self.sbp = {}
1675
      self.mfree = 0
1676
      self.dfree = 0
1677
      self.offline = offline
1678
      self.vm_capable = vm_capable
1679
      self.rpc_fail = False
1680
      self.lvm_fail = False
1681
      self.hyp_fail = False
1682
      self.ghost = False
1683
      self.os_fail = False
1684
      self.oslist = {}
1685

    
1686
  def ExpandNames(self):
1687
    # This raises errors.OpPrereqError on its own:
1688
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1689

    
1690
    # Get instances in node group; this is unsafe and needs verification later
1691
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1692

    
1693
    self.needed_locks = {
1694
      locking.LEVEL_INSTANCE: inst_names,
1695
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1696
      locking.LEVEL_NODE: [],
1697
      }
1698

    
1699
    self.share_locks = _ShareAll()
1700

    
1701
  def DeclareLocks(self, level):
1702
    if level == locking.LEVEL_NODE:
1703
      # Get members of node group; this is unsafe and needs verification later
1704
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1705

    
1706
      all_inst_info = self.cfg.GetAllInstancesInfo()
1707

    
1708
      # In Exec(), we warn about mirrored instances that have primary and
1709
      # secondary living in separate node groups. To fully verify that
1710
      # volumes for these instances are healthy, we will need to do an
1711
      # extra call to their secondaries. We ensure here those nodes will
1712
      # be locked.
1713
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1714
        # Important: access only the instances whose lock is owned
1715
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1716
          nodes.update(all_inst_info[inst].secondary_nodes)
1717

    
1718
      self.needed_locks[locking.LEVEL_NODE] = nodes
1719

    
1720
  def CheckPrereq(self):
1721
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1722
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1723

    
1724
    group_nodes = set(self.group_info.members)
1725
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1726

    
1727
    unlocked_nodes = \
1728
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1729

    
1730
    unlocked_instances = \
1731
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1732

    
1733
    if unlocked_nodes:
1734
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1735
                                 utils.CommaJoin(unlocked_nodes))
1736

    
1737
    if unlocked_instances:
1738
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1739
                                 utils.CommaJoin(unlocked_instances))
1740

    
1741
    self.all_node_info = self.cfg.GetAllNodesInfo()
1742
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1743

    
1744
    self.my_node_names = utils.NiceSort(group_nodes)
1745
    self.my_inst_names = utils.NiceSort(group_instances)
1746

    
1747
    self.my_node_info = dict((name, self.all_node_info[name])
1748
                             for name in self.my_node_names)
1749

    
1750
    self.my_inst_info = dict((name, self.all_inst_info[name])
1751
                             for name in self.my_inst_names)
1752

    
1753
    # We detect here the nodes that will need the extra RPC calls for verifying
1754
    # split LV volumes; they should be locked.
1755
    extra_lv_nodes = set()
1756

    
1757
    for inst in self.my_inst_info.values():
1758
      if inst.disk_template in constants.DTS_INT_MIRROR:
1759
        group = self.my_node_info[inst.primary_node].group
1760
        for nname in inst.secondary_nodes:
1761
          if self.all_node_info[nname].group != group:
1762
            extra_lv_nodes.add(nname)
1763

    
1764
    unlocked_lv_nodes = \
1765
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1766

    
1767
    if unlocked_lv_nodes:
1768
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1769
                                 utils.CommaJoin(unlocked_lv_nodes))
1770
    self.extra_lv_nodes = list(extra_lv_nodes)
1771

    
1772
  def _VerifyNode(self, ninfo, nresult):
1773
    """Perform some basic validation on data returned from a node.
1774

1775
      - check the result data structure is well formed and has all the
1776
        mandatory fields
1777
      - check ganeti version
1778

1779
    @type ninfo: L{objects.Node}
1780
    @param ninfo: the node to check
1781
    @param nresult: the results from the node
1782
    @rtype: boolean
1783
    @return: whether overall this call was successful (and we can expect
1784
         reasonable values in the respose)
1785

1786
    """
1787
    node = ninfo.name
1788
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1789

    
1790
    # main result, nresult should be a non-empty dict
1791
    test = not nresult or not isinstance(nresult, dict)
1792
    _ErrorIf(test, constants.CV_ENODERPC, node,
1793
                  "unable to verify node: no data returned")
1794
    if test:
1795
      return False
1796

    
1797
    # compares ganeti version
1798
    local_version = constants.PROTOCOL_VERSION
1799
    remote_version = nresult.get("version", None)
1800
    test = not (remote_version and
1801
                isinstance(remote_version, (list, tuple)) and
1802
                len(remote_version) == 2)
1803
    _ErrorIf(test, constants.CV_ENODERPC, node,
1804
             "connection to node returned invalid data")
1805
    if test:
1806
      return False
1807

    
1808
    test = local_version != remote_version[0]
1809
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1810
             "incompatible protocol versions: master %s,"
1811
             " node %s", local_version, remote_version[0])
1812
    if test:
1813
      return False
1814

    
1815
    # node seems compatible, we can actually try to look into its results
1816

    
1817
    # full package version
1818
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1819
                  constants.CV_ENODEVERSION, node,
1820
                  "software version mismatch: master %s, node %s",
1821
                  constants.RELEASE_VERSION, remote_version[1],
1822
                  code=self.ETYPE_WARNING)
1823

    
1824
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1825
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1826
      for hv_name, hv_result in hyp_result.iteritems():
1827
        test = hv_result is not None
1828
        _ErrorIf(test, constants.CV_ENODEHV, node,
1829
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1830

    
1831
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1832
    if ninfo.vm_capable and isinstance(hvp_result, list):
1833
      for item, hv_name, hv_result in hvp_result:
1834
        _ErrorIf(True, constants.CV_ENODEHV, node,
1835
                 "hypervisor %s parameter verify failure (source %s): %s",
1836
                 hv_name, item, hv_result)
1837

    
1838
    test = nresult.get(constants.NV_NODESETUP,
1839
                       ["Missing NODESETUP results"])
1840
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1841
             "; ".join(test))
1842

    
1843
    return True
1844

    
1845
  def _VerifyNodeTime(self, ninfo, nresult,
1846
                      nvinfo_starttime, nvinfo_endtime):
1847
    """Check the node time.
1848

1849
    @type ninfo: L{objects.Node}
1850
    @param ninfo: the node to check
1851
    @param nresult: the remote results for the node
1852
    @param nvinfo_starttime: the start time of the RPC call
1853
    @param nvinfo_endtime: the end time of the RPC call
1854

1855
    """
1856
    node = ninfo.name
1857
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1858

    
1859
    ntime = nresult.get(constants.NV_TIME, None)
1860
    try:
1861
      ntime_merged = utils.MergeTime(ntime)
1862
    except (ValueError, TypeError):
1863
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1864
      return
1865

    
1866
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1867
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1868
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1869
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1870
    else:
1871
      ntime_diff = None
1872

    
1873
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1874
             "Node time diverges by at least %s from master node time",
1875
             ntime_diff)
1876

    
1877
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1878
    """Check the node LVM results.
1879

1880
    @type ninfo: L{objects.Node}
1881
    @param ninfo: the node to check
1882
    @param nresult: the remote results for the node
1883
    @param vg_name: the configured VG name
1884

1885
    """
1886
    if vg_name is None:
1887
      return
1888

    
1889
    node = ninfo.name
1890
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1891

    
1892
    # checks vg existence and size > 20G
1893
    vglist = nresult.get(constants.NV_VGLIST, None)
1894
    test = not vglist
1895
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1896
    if not test:
1897
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1898
                                            constants.MIN_VG_SIZE)
1899
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1900

    
1901
    # check pv names
1902
    pvlist = nresult.get(constants.NV_PVLIST, None)
1903
    test = pvlist is None
1904
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1905
    if not test:
1906
      # check that ':' is not present in PV names, since it's a
1907
      # special character for lvcreate (denotes the range of PEs to
1908
      # use on the PV)
1909
      for _, pvname, owner_vg in pvlist:
1910
        test = ":" in pvname
1911
        _ErrorIf(test, constants.CV_ENODELVM, node,
1912
                 "Invalid character ':' in PV '%s' of VG '%s'",
1913
                 pvname, owner_vg)
1914

    
1915
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1916
    """Check the node bridges.
1917

1918
    @type ninfo: L{objects.Node}
1919
    @param ninfo: the node to check
1920
    @param nresult: the remote results for the node
1921
    @param bridges: the expected list of bridges
1922

1923
    """
1924
    if not bridges:
1925
      return
1926

    
1927
    node = ninfo.name
1928
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1929

    
1930
    missing = nresult.get(constants.NV_BRIDGES, None)
1931
    test = not isinstance(missing, list)
1932
    _ErrorIf(test, constants.CV_ENODENET, node,
1933
             "did not return valid bridge information")
1934
    if not test:
1935
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1936
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1937

    
1938
  def _VerifyNodeNetwork(self, ninfo, nresult):
1939
    """Check the node network connectivity results.
1940

1941
    @type ninfo: L{objects.Node}
1942
    @param ninfo: the node to check
1943
    @param nresult: the remote results for the node
1944

1945
    """
1946
    node = ninfo.name
1947
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948

    
1949
    test = constants.NV_NODELIST not in nresult
1950
    _ErrorIf(test, constants.CV_ENODESSH, node,
1951
             "node hasn't returned node ssh connectivity data")
1952
    if not test:
1953
      if nresult[constants.NV_NODELIST]:
1954
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1955
          _ErrorIf(True, constants.CV_ENODESSH, node,
1956
                   "ssh communication with node '%s': %s", a_node, a_msg)
1957

    
1958
    test = constants.NV_NODENETTEST not in nresult
1959
    _ErrorIf(test, constants.CV_ENODENET, node,
1960
             "node hasn't returned node tcp connectivity data")
1961
    if not test:
1962
      if nresult[constants.NV_NODENETTEST]:
1963
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1964
        for anode in nlist:
1965
          _ErrorIf(True, constants.CV_ENODENET, node,
1966
                   "tcp communication with node '%s': %s",
1967
                   anode, nresult[constants.NV_NODENETTEST][anode])
1968

    
1969
    test = constants.NV_MASTERIP not in nresult
1970
    _ErrorIf(test, constants.CV_ENODENET, node,
1971
             "node hasn't returned node master IP reachability data")
1972
    if not test:
1973
      if not nresult[constants.NV_MASTERIP]:
1974
        if node == self.master_node:
1975
          msg = "the master node cannot reach the master IP (not configured?)"
1976
        else:
1977
          msg = "cannot reach the master IP"
1978
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1979

    
1980
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1981
                      diskstatus):
1982
    """Verify an instance.
1983

1984
    This function checks to see if the required block devices are
1985
    available on the instance's node.
1986

1987
    """
1988
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1989
    node_current = instanceconfig.primary_node
1990

    
1991
    node_vol_should = {}
1992
    instanceconfig.MapLVsByNode(node_vol_should)
1993

    
1994
    for node in node_vol_should:
1995
      n_img = node_image[node]
1996
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1997
        # ignore missing volumes on offline or broken nodes
1998
        continue
1999
      for volume in node_vol_should[node]:
2000
        test = volume not in n_img.volumes
2001
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2002
                 "volume %s missing on node %s", volume, node)
2003

    
2004
    if instanceconfig.admin_up:
2005
      pri_img = node_image[node_current]
2006
      test = instance not in pri_img.instances and not pri_img.offline
2007
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2008
               "instance not running on its primary node %s",
2009
               node_current)
2010

    
2011
    diskdata = [(nname, success, status, idx)
2012
                for (nname, disks) in diskstatus.items()
2013
                for idx, (success, status) in enumerate(disks)]
2014

    
2015
    for nname, success, bdev_status, idx in diskdata:
2016
      # the 'ghost node' construction in Exec() ensures that we have a
2017
      # node here
2018
      snode = node_image[nname]
2019
      bad_snode = snode.ghost or snode.offline
2020
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2021
               constants.CV_EINSTANCEFAULTYDISK, instance,
2022
               "couldn't retrieve status for disk/%s on %s: %s",
2023
               idx, nname, bdev_status)
2024
      _ErrorIf((instanceconfig.admin_up and success and
2025
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2026
               constants.CV_EINSTANCEFAULTYDISK, instance,
2027
               "disk/%s on %s is faulty", idx, nname)
2028

    
2029
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2030
    """Verify if there are any unknown volumes in the cluster.
2031

2032
    The .os, .swap and backup volumes are ignored. All other volumes are
2033
    reported as unknown.
2034

2035
    @type reserved: L{ganeti.utils.FieldSet}
2036
    @param reserved: a FieldSet of reserved volume names
2037

2038
    """
2039
    for node, n_img in node_image.items():
2040
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2041
        # skip non-healthy nodes
2042
        continue
2043
      for volume in n_img.volumes:
2044
        test = ((node not in node_vol_should or
2045
                volume not in node_vol_should[node]) and
2046
                not reserved.Matches(volume))
2047
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2048
                      "volume %s is unknown", volume)
2049

    
2050
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2051
    """Verify N+1 Memory Resilience.
2052

2053
    Check that if one single node dies we can still start all the
2054
    instances it was primary for.
2055

2056
    """
2057
    cluster_info = self.cfg.GetClusterInfo()
2058
    for node, n_img in node_image.items():
2059
      # This code checks that every node which is now listed as
2060
      # secondary has enough memory to host all instances it is
2061
      # supposed to should a single other node in the cluster fail.
2062
      # FIXME: not ready for failover to an arbitrary node
2063
      # FIXME: does not support file-backed instances
2064
      # WARNING: we currently take into account down instances as well
2065
      # as up ones, considering that even if they're down someone
2066
      # might want to start them even in the event of a node failure.
2067
      if n_img.offline:
2068
        # we're skipping offline nodes from the N+1 warning, since
2069
        # most likely we don't have good memory infromation from them;
2070
        # we already list instances living on such nodes, and that's
2071
        # enough warning
2072
        continue
2073
      for prinode, instances in n_img.sbp.items():
2074
        needed_mem = 0
2075
        for instance in instances:
2076
          bep = cluster_info.FillBE(instance_cfg[instance])
2077
          if bep[constants.BE_AUTO_BALANCE]:
2078
            needed_mem += bep[constants.BE_MEMORY]
2079
        test = n_img.mfree < needed_mem
2080
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2081
                      "not enough memory to accomodate instance failovers"
2082
                      " should node %s fail (%dMiB needed, %dMiB available)",
2083
                      prinode, needed_mem, n_img.mfree)
2084

    
2085
  @classmethod
2086
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2087
                   (files_all, files_opt, files_mc, files_vm)):
2088
    """Verifies file checksums collected from all nodes.
2089

2090
    @param errorif: Callback for reporting errors
2091
    @param nodeinfo: List of L{objects.Node} objects
2092
    @param master_node: Name of master node
2093
    @param all_nvinfo: RPC results
2094

2095
    """
2096
    # Define functions determining which nodes to consider for a file
2097
    files2nodefn = [
2098
      (files_all, None),
2099
      (files_mc, lambda node: (node.master_candidate or
2100
                               node.name == master_node)),
2101
      (files_vm, lambda node: node.vm_capable),
2102
      ]
2103

    
2104
    # Build mapping from filename to list of nodes which should have the file
2105
    nodefiles = {}
2106
    for (files, fn) in files2nodefn:
2107
      if fn is None:
2108
        filenodes = nodeinfo
2109
      else:
2110
        filenodes = filter(fn, nodeinfo)
2111
      nodefiles.update((filename,
2112
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2113
                       for filename in files)
2114

    
2115
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2116

    
2117
    fileinfo = dict((filename, {}) for filename in nodefiles)
2118
    ignore_nodes = set()
2119

    
2120
    for node in nodeinfo:
2121
      if node.offline:
2122
        ignore_nodes.add(node.name)
2123
        continue
2124

    
2125
      nresult = all_nvinfo[node.name]
2126

    
2127
      if nresult.fail_msg or not nresult.payload:
2128
        node_files = None
2129
      else:
2130
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2131

    
2132
      test = not (node_files and isinstance(node_files, dict))
2133
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2134
              "Node did not return file checksum data")
2135
      if test:
2136
        ignore_nodes.add(node.name)
2137
        continue
2138

    
2139
      # Build per-checksum mapping from filename to nodes having it
2140
      for (filename, checksum) in node_files.items():
2141
        assert filename in nodefiles
2142
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2143

    
2144
    for (filename, checksums) in fileinfo.items():
2145
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2146

    
2147
      # Nodes having the file
2148
      with_file = frozenset(node_name
2149
                            for nodes in fileinfo[filename].values()
2150
                            for node_name in nodes) - ignore_nodes
2151

    
2152
      expected_nodes = nodefiles[filename] - ignore_nodes
2153

    
2154
      # Nodes missing file
2155
      missing_file = expected_nodes - with_file
2156

    
2157
      if filename in files_opt:
2158
        # All or no nodes
2159
        errorif(missing_file and missing_file != expected_nodes,
2160
                constants.CV_ECLUSTERFILECHECK, None,
2161
                "File %s is optional, but it must exist on all or no"
2162
                " nodes (not found on %s)",
2163
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2164
      else:
2165
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2166
                "File %s is missing from node(s) %s", filename,
2167
                utils.CommaJoin(utils.NiceSort(missing_file)))
2168

    
2169
        # Warn if a node has a file it shouldn't
2170
        unexpected = with_file - expected_nodes
2171
        errorif(unexpected,
2172
                constants.CV_ECLUSTERFILECHECK, None,
2173
                "File %s should not exist on node(s) %s",
2174
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2175

    
2176
      # See if there are multiple versions of the file
2177
      test = len(checksums) > 1
2178
      if test:
2179
        variants = ["variant %s on %s" %
2180
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2181
                    for (idx, (checksum, nodes)) in
2182
                      enumerate(sorted(checksums.items()))]
2183
      else:
2184
        variants = []
2185

    
2186
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2187
              "File %s found with %s different checksums (%s)",
2188
              filename, len(checksums), "; ".join(variants))
2189

    
2190
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2191
                      drbd_map):
2192
    """Verifies and the node DRBD status.
2193

2194
    @type ninfo: L{objects.Node}
2195
    @param ninfo: the node to check
2196
    @param nresult: the remote results for the node
2197
    @param instanceinfo: the dict of instances
2198
    @param drbd_helper: the configured DRBD usermode helper
2199
    @param drbd_map: the DRBD map as returned by
2200
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2201

2202
    """
2203
    node = ninfo.name
2204
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2205

    
2206
    if drbd_helper:
2207
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2208
      test = (helper_result == None)
2209
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2210
               "no drbd usermode helper returned")
2211
      if helper_result:
2212
        status, payload = helper_result
2213
        test = not status
2214
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2215
                 "drbd usermode helper check unsuccessful: %s", payload)
2216
        test = status and (payload != drbd_helper)
2217
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2218
                 "wrong drbd usermode helper: %s", payload)
2219

    
2220
    # compute the DRBD minors
2221
    node_drbd = {}
2222
    for minor, instance in drbd_map[node].items():
2223
      test = instance not in instanceinfo
2224
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2225
               "ghost instance '%s' in temporary DRBD map", instance)
2226
        # ghost instance should not be running, but otherwise we
2227
        # don't give double warnings (both ghost instance and
2228
        # unallocated minor in use)
2229
      if test:
2230
        node_drbd[minor] = (instance, False)
2231
      else:
2232
        instance = instanceinfo[instance]
2233
        node_drbd[minor] = (instance.name, instance.admin_up)
2234

    
2235
    # and now check them
2236
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2237
    test = not isinstance(used_minors, (tuple, list))
2238
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2239
             "cannot parse drbd status file: %s", str(used_minors))
2240
    if test:
2241
      # we cannot check drbd status
2242
      return
2243

    
2244
    for minor, (iname, must_exist) in node_drbd.items():
2245
      test = minor not in used_minors and must_exist
2246
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2247
               "drbd minor %d of instance %s is not active", minor, iname)
2248
    for minor in used_minors:
2249
      test = minor not in node_drbd
2250
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2251
               "unallocated drbd minor %d is in use", minor)
2252

    
2253
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2254
    """Builds the node OS structures.
2255

2256
    @type ninfo: L{objects.Node}
2257
    @param ninfo: the node to check
2258
    @param nresult: the remote results for the node
2259
    @param nimg: the node image object
2260

2261
    """
2262
    node = ninfo.name
2263
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264

    
2265
    remote_os = nresult.get(constants.NV_OSLIST, None)
2266
    test = (not isinstance(remote_os, list) or
2267
            not compat.all(isinstance(v, list) and len(v) == 7
2268
                           for v in remote_os))
2269

    
2270
    _ErrorIf(test, constants.CV_ENODEOS, node,
2271
             "node hasn't returned valid OS data")
2272

    
2273
    nimg.os_fail = test
2274

    
2275
    if test:
2276
      return
2277

    
2278
    os_dict = {}
2279

    
2280
    for (name, os_path, status, diagnose,
2281
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2282

    
2283
      if name not in os_dict:
2284
        os_dict[name] = []
2285

    
2286
      # parameters is a list of lists instead of list of tuples due to
2287
      # JSON lacking a real tuple type, fix it:
2288
      parameters = [tuple(v) for v in parameters]
2289
      os_dict[name].append((os_path, status, diagnose,
2290
                            set(variants), set(parameters), set(api_ver)))
2291

    
2292
    nimg.oslist = os_dict
2293

    
2294
  def _VerifyNodeOS(self, ninfo, nimg, base):
2295
    """Verifies the node OS list.
2296

2297
    @type ninfo: L{objects.Node}
2298
    @param ninfo: the node to check
2299
    @param nimg: the node image object
2300
    @param base: the 'template' node we match against (e.g. from the master)
2301

2302
    """
2303
    node = ninfo.name
2304
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305

    
2306
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2307

    
2308
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2309
    for os_name, os_data in nimg.oslist.items():
2310
      assert os_data, "Empty OS status for OS %s?!" % os_name
2311
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2312
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2313
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2314
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2315
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2316
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2317
      # comparisons with the 'base' image
2318
      test = os_name not in base.oslist
2319
      _ErrorIf(test, constants.CV_ENODEOS, node,
2320
               "Extra OS %s not present on reference node (%s)",
2321
               os_name, base.name)
2322
      if test:
2323
        continue
2324
      assert base.oslist[os_name], "Base node has empty OS status?"
2325
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2326
      if not b_status:
2327
        # base OS is invalid, skipping
2328
        continue
2329
      for kind, a, b in [("API version", f_api, b_api),
2330
                         ("variants list", f_var, b_var),
2331
                         ("parameters", beautify_params(f_param),
2332
                          beautify_params(b_param))]:
2333
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2334
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2335
                 kind, os_name, base.name,
2336
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2337

    
2338
    # check any missing OSes
2339
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2340
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2341
             "OSes present on reference node %s but missing on this node: %s",
2342
             base.name, utils.CommaJoin(missing))
2343

    
2344
  def _VerifyOob(self, ninfo, nresult):
2345
    """Verifies out of band functionality of a node.
2346

2347
    @type ninfo: L{objects.Node}
2348
    @param ninfo: the node to check
2349
    @param nresult: the remote results for the node
2350

2351
    """
2352
    node = ninfo.name
2353
    # We just have to verify the paths on master and/or master candidates
2354
    # as the oob helper is invoked on the master
2355
    if ((ninfo.master_candidate or ninfo.master_capable) and
2356
        constants.NV_OOB_PATHS in nresult):
2357
      for path_result in nresult[constants.NV_OOB_PATHS]:
2358
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2359

    
2360
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2361
    """Verifies and updates the node volume data.
2362

2363
    This function will update a L{NodeImage}'s internal structures
2364
    with data from the remote call.
2365

2366
    @type ninfo: L{objects.Node}
2367
    @param ninfo: the node to check
2368
    @param nresult: the remote results for the node
2369
    @param nimg: the node image object
2370
    @param vg_name: the configured VG name
2371

2372
    """
2373
    node = ninfo.name
2374
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375

    
2376
    nimg.lvm_fail = True
2377
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2378
    if vg_name is None:
2379
      pass
2380
    elif isinstance(lvdata, basestring):
2381
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2382
               utils.SafeEncode(lvdata))
2383
    elif not isinstance(lvdata, dict):
2384
      _ErrorIf(True, constants.CV_ENODELVM, node,
2385
               "rpc call to node failed (lvlist)")
2386
    else:
2387
      nimg.volumes = lvdata
2388
      nimg.lvm_fail = False
2389

    
2390
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2391
    """Verifies and updates the node instance list.
2392

2393
    If the listing was successful, then updates this node's instance
2394
    list. Otherwise, it marks the RPC call as failed for the instance
2395
    list key.
2396

2397
    @type ninfo: L{objects.Node}
2398
    @param ninfo: the node to check
2399
    @param nresult: the remote results for the node
2400
    @param nimg: the node image object
2401

2402
    """
2403
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2404
    test = not isinstance(idata, list)
2405
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2406
                  "rpc call to node failed (instancelist): %s",
2407
                  utils.SafeEncode(str(idata)))
2408
    if test:
2409
      nimg.hyp_fail = True
2410
    else:
2411
      nimg.instances = idata
2412

    
2413
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2414
    """Verifies and computes a node information map
2415

2416
    @type ninfo: L{objects.Node}
2417
    @param ninfo: the node to check
2418
    @param nresult: the remote results for the node
2419
    @param nimg: the node image object
2420
    @param vg_name: the configured VG name
2421

2422
    """
2423
    node = ninfo.name
2424
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2425

    
2426
    # try to read free memory (from the hypervisor)
2427
    hv_info = nresult.get(constants.NV_HVINFO, None)
2428
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2429
    _ErrorIf(test, constants.CV_ENODEHV, node,
2430
             "rpc call to node failed (hvinfo)")
2431
    if not test:
2432
      try:
2433
        nimg.mfree = int(hv_info["memory_free"])
2434
      except (ValueError, TypeError):
2435
        _ErrorIf(True, constants.CV_ENODERPC, node,
2436
                 "node returned invalid nodeinfo, check hypervisor")
2437

    
2438
    # FIXME: devise a free space model for file based instances as well
2439
    if vg_name is not None:
2440
      test = (constants.NV_VGLIST not in nresult or
2441
              vg_name not in nresult[constants.NV_VGLIST])
2442
      _ErrorIf(test, constants.CV_ENODELVM, node,
2443
               "node didn't return data for the volume group '%s'"
2444
               " - it is either missing or broken", vg_name)
2445
      if not test:
2446
        try:
2447
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2448
        except (ValueError, TypeError):
2449
          _ErrorIf(True, constants.CV_ENODERPC, node,
2450
                   "node returned invalid LVM info, check LVM status")
2451

    
2452
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2453
    """Gets per-disk status information for all instances.
2454

2455
    @type nodelist: list of strings
2456
    @param nodelist: Node names
2457
    @type node_image: dict of (name, L{objects.Node})
2458
    @param node_image: Node objects
2459
    @type instanceinfo: dict of (name, L{objects.Instance})
2460
    @param instanceinfo: Instance objects
2461
    @rtype: {instance: {node: [(succes, payload)]}}
2462
    @return: a dictionary of per-instance dictionaries with nodes as
2463
        keys and disk information as values; the disk information is a
2464
        list of tuples (success, payload)
2465

2466
    """
2467
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468

    
2469
    node_disks = {}
2470
    node_disks_devonly = {}
2471
    diskless_instances = set()
2472
    diskless = constants.DT_DISKLESS
2473

    
2474
    for nname in nodelist:
2475
      node_instances = list(itertools.chain(node_image[nname].pinst,
2476
                                            node_image[nname].sinst))
2477
      diskless_instances.update(inst for inst in node_instances
2478
                                if instanceinfo[inst].disk_template == diskless)
2479
      disks = [(inst, disk)
2480
               for inst in node_instances
2481
               for disk in instanceinfo[inst].disks]
2482

    
2483
      if not disks:
2484
        # No need to collect data
2485
        continue
2486

    
2487
      node_disks[nname] = disks
2488

    
2489
      # Creating copies as SetDiskID below will modify the objects and that can
2490
      # lead to incorrect data returned from nodes
2491
      devonly = [dev.Copy() for (_, dev) in disks]
2492

    
2493
      for dev in devonly:
2494
        self.cfg.SetDiskID(dev, nname)
2495

    
2496
      node_disks_devonly[nname] = devonly
2497

    
2498
    assert len(node_disks) == len(node_disks_devonly)
2499

    
2500
    # Collect data from all nodes with disks
2501
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2502
                                                          node_disks_devonly)
2503

    
2504
    assert len(result) == len(node_disks)
2505

    
2506
    instdisk = {}
2507

    
2508
    for (nname, nres) in result.items():
2509
      disks = node_disks[nname]
2510

    
2511
      if nres.offline:
2512
        # No data from this node
2513
        data = len(disks) * [(False, "node offline")]
2514
      else:
2515
        msg = nres.fail_msg
2516
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2517
                 "while getting disk information: %s", msg)
2518
        if msg:
2519
          # No data from this node
2520
          data = len(disks) * [(False, msg)]
2521
        else:
2522
          data = []
2523
          for idx, i in enumerate(nres.payload):
2524
            if isinstance(i, (tuple, list)) and len(i) == 2:
2525
              data.append(i)
2526
            else:
2527
              logging.warning("Invalid result from node %s, entry %d: %s",
2528
                              nname, idx, i)
2529
              data.append((False, "Invalid result from the remote node"))
2530

    
2531
      for ((inst, _), status) in zip(disks, data):
2532
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2533

    
2534
    # Add empty entries for diskless instances.
2535
    for inst in diskless_instances:
2536
      assert inst not in instdisk
2537
      instdisk[inst] = {}
2538

    
2539
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2540
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2541
                      compat.all(isinstance(s, (tuple, list)) and
2542
                                 len(s) == 2 for s in statuses)
2543
                      for inst, nnames in instdisk.items()
2544
                      for nname, statuses in nnames.items())
2545
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2546

    
2547
    return instdisk
2548

    
2549
  @staticmethod
2550
  def _SshNodeSelector(group_uuid, all_nodes):
2551
    """Create endless iterators for all potential SSH check hosts.
2552

2553
    """
2554
    nodes = [node for node in all_nodes
2555
             if (node.group != group_uuid and
2556
                 not node.offline)]
2557
    keyfunc = operator.attrgetter("group")
2558

    
2559
    return map(itertools.cycle,
2560
               [sorted(map(operator.attrgetter("name"), names))
2561
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2562
                                                  keyfunc)])
2563

    
2564
  @classmethod
2565
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2566
    """Choose which nodes should talk to which other nodes.
2567

2568
    We will make nodes contact all nodes in their group, and one node from
2569
    every other group.
2570

2571
    @warning: This algorithm has a known issue if one node group is much
2572
      smaller than others (e.g. just one node). In such a case all other
2573
      nodes will talk to the single node.
2574

2575
    """
2576
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2577
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2578

    
2579
    return (online_nodes,
2580
            dict((name, sorted([i.next() for i in sel]))
2581
                 for name in online_nodes))
2582

    
2583
  def BuildHooksEnv(self):
2584
    """Build hooks env.
2585

2586
    Cluster-Verify hooks just ran in the post phase and their failure makes
2587
    the output be logged in the verify output and the verification to fail.
2588

2589
    """
2590
    env = {
2591
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2592
      }
2593

    
2594
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2595
               for node in self.my_node_info.values())
2596

    
2597
    return env
2598

    
2599
  def BuildHooksNodes(self):
2600
    """Build hooks nodes.
2601

2602
    """
2603
    return ([], self.my_node_names)
2604

    
2605
  def Exec(self, feedback_fn):
2606
    """Verify integrity of the node group, performing various test on nodes.
2607

2608
    """
2609
    # This method has too many local variables. pylint: disable=R0914
2610
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2611

    
2612
    if not self.my_node_names:
2613
      # empty node group
2614
      feedback_fn("* Empty node group, skipping verification")
2615
      return True
2616

    
2617
    self.bad = False
2618
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619
    verbose = self.op.verbose
2620
    self._feedback_fn = feedback_fn
2621

    
2622
    vg_name = self.cfg.GetVGName()
2623
    drbd_helper = self.cfg.GetDRBDHelper()
2624
    cluster = self.cfg.GetClusterInfo()
2625
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2626
    hypervisors = cluster.enabled_hypervisors
2627
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2628

    
2629
    i_non_redundant = [] # Non redundant instances
2630
    i_non_a_balanced = [] # Non auto-balanced instances
2631
    n_offline = 0 # Count of offline nodes
2632
    n_drained = 0 # Count of nodes being drained
2633
    node_vol_should = {}
2634

    
2635
    # FIXME: verify OS list
2636

    
2637
    # File verification
2638
    filemap = _ComputeAncillaryFiles(cluster, False)
2639

    
2640
    # do local checksums
2641
    master_node = self.master_node = self.cfg.GetMasterNode()
2642
    master_ip = self.cfg.GetMasterIP()
2643

    
2644
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2645

    
2646
    node_verify_param = {
2647
      constants.NV_FILELIST:
2648
        utils.UniqueSequence(filename
2649
                             for files in filemap
2650
                             for filename in files),
2651
      constants.NV_NODELIST:
2652
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2653
                                  self.all_node_info.values()),
2654
      constants.NV_HYPERVISOR: hypervisors,
2655
      constants.NV_HVPARAMS:
2656
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2657
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2658
                                 for node in node_data_list
2659
                                 if not node.offline],
2660
      constants.NV_INSTANCELIST: hypervisors,
2661
      constants.NV_VERSION: None,
2662
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2663
      constants.NV_NODESETUP: None,
2664
      constants.NV_TIME: None,
2665
      constants.NV_MASTERIP: (master_node, master_ip),
2666
      constants.NV_OSLIST: None,
2667
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2668
      }
2669

    
2670
    if vg_name is not None:
2671
      node_verify_param[constants.NV_VGLIST] = None
2672
      node_verify_param[constants.NV_LVLIST] = vg_name
2673
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2674
      node_verify_param[constants.NV_DRBDLIST] = None
2675

    
2676
    if drbd_helper:
2677
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2678

    
2679
    # bridge checks
2680
    # FIXME: this needs to be changed per node-group, not cluster-wide
2681
    bridges = set()
2682
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2683
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2684
      bridges.add(default_nicpp[constants.NIC_LINK])
2685
    for instance in self.my_inst_info.values():
2686
      for nic in instance.nics:
2687
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2688
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2689
          bridges.add(full_nic[constants.NIC_LINK])
2690

    
2691
    if bridges:
2692
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2693

    
2694
    # Build our expected cluster state
2695
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2696
                                                 name=node.name,
2697
                                                 vm_capable=node.vm_capable))
2698
                      for node in node_data_list)
2699

    
2700
    # Gather OOB paths
2701
    oob_paths = []
2702
    for node in self.all_node_info.values():
2703
      path = _SupportsOob(self.cfg, node)
2704
      if path and path not in oob_paths:
2705
        oob_paths.append(path)
2706

    
2707
    if oob_paths:
2708
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2709

    
2710
    for instance in self.my_inst_names:
2711
      inst_config = self.my_inst_info[instance]
2712

    
2713
      for nname in inst_config.all_nodes:
2714
        if nname not in node_image:
2715
          gnode = self.NodeImage(name=nname)
2716
          gnode.ghost = (nname not in self.all_node_info)
2717
          node_image[nname] = gnode
2718

    
2719
      inst_config.MapLVsByNode(node_vol_should)
2720

    
2721
      pnode = inst_config.primary_node
2722
      node_image[pnode].pinst.append(instance)
2723

    
2724
      for snode in inst_config.secondary_nodes:
2725
        nimg = node_image[snode]
2726
        nimg.sinst.append(instance)
2727
        if pnode not in nimg.sbp:
2728
          nimg.sbp[pnode] = []
2729
        nimg.sbp[pnode].append(instance)
2730

    
2731
    # At this point, we have the in-memory data structures complete,
2732
    # except for the runtime information, which we'll gather next
2733

    
2734
    # Due to the way our RPC system works, exact response times cannot be
2735
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2736
    # time before and after executing the request, we can at least have a time
2737
    # window.
2738
    nvinfo_starttime = time.time()
2739
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2740
                                           node_verify_param,
2741
                                           self.cfg.GetClusterName())
2742
    nvinfo_endtime = time.time()
2743

    
2744
    if self.extra_lv_nodes and vg_name is not None:
2745
      extra_lv_nvinfo = \
2746
          self.rpc.call_node_verify(self.extra_lv_nodes,
2747
                                    {constants.NV_LVLIST: vg_name},
2748
                                    self.cfg.GetClusterName())
2749
    else:
2750
      extra_lv_nvinfo = {}
2751

    
2752
    all_drbd_map = self.cfg.ComputeDRBDMap()
2753

    
2754
    feedback_fn("* Gathering disk information (%s nodes)" %
2755
                len(self.my_node_names))
2756
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2757
                                     self.my_inst_info)
2758

    
2759
    feedback_fn("* Verifying configuration file consistency")
2760

    
2761
    # If not all nodes are being checked, we need to make sure the master node
2762
    # and a non-checked vm_capable node are in the list.
2763
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2764
    if absent_nodes:
2765
      vf_nvinfo = all_nvinfo.copy()
2766
      vf_node_info = list(self.my_node_info.values())
2767
      additional_nodes = []
2768
      if master_node not in self.my_node_info:
2769
        additional_nodes.append(master_node)
2770
        vf_node_info.append(self.all_node_info[master_node])
2771
      # Add the first vm_capable node we find which is not included
2772
      for node in absent_nodes:
2773
        nodeinfo = self.all_node_info[node]
2774
        if nodeinfo.vm_capable and not nodeinfo.offline:
2775
          additional_nodes.append(node)
2776
          vf_node_info.append(self.all_node_info[node])
2777
          break
2778
      key = constants.NV_FILELIST
2779
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2780
                                                 {key: node_verify_param[key]},
2781
                                                 self.cfg.GetClusterName()))
2782
    else:
2783
      vf_nvinfo = all_nvinfo
2784
      vf_node_info = self.my_node_info.values()
2785

    
2786
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2787

    
2788
    feedback_fn("* Verifying node status")
2789

    
2790
    refos_img = None
2791

    
2792
    for node_i in node_data_list:
2793
      node = node_i.name
2794
      nimg = node_image[node]
2795

    
2796
      if node_i.offline:
2797
        if verbose:
2798
          feedback_fn("* Skipping offline node %s" % (node,))
2799
        n_offline += 1
2800
        continue
2801

    
2802
      if node == master_node:
2803
        ntype = "master"
2804
      elif node_i.master_candidate:
2805
        ntype = "master candidate"
2806
      elif node_i.drained:
2807
        ntype = "drained"
2808
        n_drained += 1
2809
      else:
2810
        ntype = "regular"
2811
      if verbose:
2812
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2813

    
2814
      msg = all_nvinfo[node].fail_msg
2815
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2816
               msg)
2817
      if msg:
2818
        nimg.rpc_fail = True
2819
        continue
2820

    
2821
      nresult = all_nvinfo[node].payload
2822

    
2823
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2824
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2825
      self._VerifyNodeNetwork(node_i, nresult)
2826
      self._VerifyOob(node_i, nresult)
2827

    
2828
      if nimg.vm_capable:
2829
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2830
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2831
                             all_drbd_map)
2832

    
2833
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2834
        self._UpdateNodeInstances(node_i, nresult, nimg)
2835
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2836
        self._UpdateNodeOS(node_i, nresult, nimg)
2837

    
2838
        if not nimg.os_fail:
2839
          if refos_img is None:
2840
            refos_img = nimg
2841
          self._VerifyNodeOS(node_i, nimg, refos_img)
2842
        self._VerifyNodeBridges(node_i, nresult, bridges)
2843

    
2844
        # Check whether all running instancies are primary for the node. (This
2845
        # can no longer be done from _VerifyInstance below, since some of the
2846
        # wrong instances could be from other node groups.)
2847
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2848

    
2849
        for inst in non_primary_inst:
2850
          test = inst in self.all_inst_info
2851
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2852
                   "instance should not run on node %s", node_i.name)
2853
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2854
                   "node is running unknown instance %s", inst)
2855

    
2856
    for node, result in extra_lv_nvinfo.items():
2857
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2858
                              node_image[node], vg_name)
2859

    
2860
    feedback_fn("* Verifying instance status")
2861
    for instance in self.my_inst_names:
2862
      if verbose:
2863
        feedback_fn("* Verifying instance %s" % instance)
2864
      inst_config = self.my_inst_info[instance]
2865
      self._VerifyInstance(instance, inst_config, node_image,
2866
                           instdisk[instance])
2867
      inst_nodes_offline = []
2868

    
2869
      pnode = inst_config.primary_node
2870
      pnode_img = node_image[pnode]
2871
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2872
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2873
               " primary node failed", instance)
2874

    
2875
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2876
               constants.CV_EINSTANCEBADNODE, instance,
2877
               "instance is marked as running and lives on offline node %s",
2878
               inst_config.primary_node)
2879

    
2880
      # If the instance is non-redundant we cannot survive losing its primary
2881
      # node, so we are not N+1 compliant. On the other hand we have no disk
2882
      # templates with more than one secondary so that situation is not well
2883
      # supported either.
2884
      # FIXME: does not support file-backed instances
2885
      if not inst_config.secondary_nodes:
2886
        i_non_redundant.append(instance)
2887

    
2888
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2889
               constants.CV_EINSTANCELAYOUT,
2890
               instance, "instance has multiple secondary nodes: %s",
2891
               utils.CommaJoin(inst_config.secondary_nodes),
2892
               code=self.ETYPE_WARNING)
2893

    
2894
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2895
        pnode = inst_config.primary_node
2896
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2897
        instance_groups = {}
2898

    
2899
        for node in instance_nodes:
2900
          instance_groups.setdefault(self.all_node_info[node].group,
2901
                                     []).append(node)
2902

    
2903
        pretty_list = [
2904
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2905
          # Sort so that we always list the primary node first.
2906
          for group, nodes in sorted(instance_groups.items(),
2907
                                     key=lambda (_, nodes): pnode in nodes,
2908
                                     reverse=True)]
2909

    
2910
        self._ErrorIf(len(instance_groups) > 1,
2911
                      constants.CV_EINSTANCESPLITGROUPS,
2912
                      instance, "instance has primary and secondary nodes in"
2913
                      " different groups: %s", utils.CommaJoin(pretty_list),
2914
                      code=self.ETYPE_WARNING)
2915

    
2916
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2917
        i_non_a_balanced.append(instance)
2918

    
2919
      for snode in inst_config.secondary_nodes:
2920
        s_img = node_image[snode]
2921
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2922
                 snode, "instance %s, connection to secondary node failed",
2923
                 instance)
2924

    
2925
        if s_img.offline:
2926
          inst_nodes_offline.append(snode)
2927

    
2928
      # warn that the instance lives on offline nodes
2929
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2930
               "instance has offline secondary node(s) %s",
2931
               utils.CommaJoin(inst_nodes_offline))
2932
      # ... or ghost/non-vm_capable nodes
2933
      for node in inst_config.all_nodes:
2934
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2935
                 instance, "instance lives on ghost node %s", node)
2936
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2937
                 instance, "instance lives on non-vm_capable node %s", node)
2938

    
2939
    feedback_fn("* Verifying orphan volumes")
2940
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2941

    
2942
    # We will get spurious "unknown volume" warnings if any node of this group
2943
    # is secondary for an instance whose primary is in another group. To avoid
2944
    # them, we find these instances and add their volumes to node_vol_should.
2945
    for inst in self.all_inst_info.values():
2946
      for secondary in inst.secondary_nodes:
2947
        if (secondary in self.my_node_info
2948
            and inst.name not in self.my_inst_info):
2949
          inst.MapLVsByNode(node_vol_should)
2950
          break
2951

    
2952
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2953

    
2954
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2955
      feedback_fn("* Verifying N+1 Memory redundancy")
2956
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2957

    
2958
    feedback_fn("* Other Notes")
2959
    if i_non_redundant:
2960
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2961
                  % len(i_non_redundant))
2962

    
2963
    if i_non_a_balanced:
2964
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2965
                  % len(i_non_a_balanced))
2966

    
2967
    if n_offline:
2968
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2969

    
2970
    if n_drained:
2971
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2972

    
2973
    return not self.bad
2974

    
2975
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2976
    """Analyze the post-hooks' result
2977

2978
    This method analyses the hook result, handles it, and sends some
2979
    nicely-formatted feedback back to the user.
2980

2981
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2982
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2983
    @param hooks_results: the results of the multi-node hooks rpc call
2984
    @param feedback_fn: function used send feedback back to the caller
2985
    @param lu_result: previous Exec result
2986
    @return: the new Exec result, based on the previous result
2987
        and hook results
2988

2989
    """
2990
    # We only really run POST phase hooks, only for non-empty groups,
2991
    # and are only interested in their results
2992
    if not self.my_node_names:
2993
      # empty node group
2994
      pass
2995
    elif phase == constants.HOOKS_PHASE_POST:
2996
      # Used to change hooks' output to proper indentation
2997
      feedback_fn("* Hooks Results")
2998
      assert hooks_results, "invalid result from hooks"
2999

    
3000
      for node_name in hooks_results:
3001
        res = hooks_results[node_name]
3002
        msg = res.fail_msg
3003
        test = msg and not res.offline
3004
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3005
                      "Communication failure in hooks execution: %s", msg)
3006
        if res.offline or msg:
3007
          # No need to investigate payload if node is offline or gave
3008
          # an error.
3009
          continue
3010
        for script, hkr, output in res.payload:
3011
          test = hkr == constants.HKR_FAIL
3012
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3013
                        "Script %s failed, output:", script)
3014
          if test:
3015
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3016
            feedback_fn("%s" % output)
3017
            lu_result = False
3018

    
3019
    return lu_result
3020

    
3021

    
3022
class LUClusterVerifyDisks(NoHooksLU):
3023
  """Verifies the cluster disks status.
3024

3025
  """
3026
  REQ_BGL = False
3027

    
3028
  def ExpandNames(self):
3029
    self.share_locks = _ShareAll()
3030
    self.needed_locks = {
3031
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3032
      }
3033

    
3034
  def Exec(self, feedback_fn):
3035
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3036

    
3037
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3038
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3039
                           for group in group_names])
3040

    
3041

    
3042
class LUGroupVerifyDisks(NoHooksLU):
3043
  """Verifies the status of all disks in a node group.
3044

3045
  """
3046
  REQ_BGL = False
3047

    
3048
  def ExpandNames(self):
3049
    # Raises errors.OpPrereqError on its own if group can't be found
3050
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3051

    
3052
    self.share_locks = _ShareAll()
3053
    self.needed_locks = {
3054
      locking.LEVEL_INSTANCE: [],
3055
      locking.LEVEL_NODEGROUP: [],
3056
      locking.LEVEL_NODE: [],
3057
      }
3058

    
3059
  def DeclareLocks(self, level):
3060
    if level == locking.LEVEL_INSTANCE:
3061
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3062

    
3063
      # Lock instances optimistically, needs verification once node and group
3064
      # locks have been acquired
3065
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3066
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3067

    
3068
    elif level == locking.LEVEL_NODEGROUP:
3069
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3070

    
3071
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3072
        set([self.group_uuid] +
3073
            # Lock all groups used by instances optimistically; this requires
3074
            # going via the node before it's locked, requiring verification
3075
            # later on
3076
            [group_uuid
3077
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3078
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3079

    
3080
    elif level == locking.LEVEL_NODE:
3081
      # This will only lock the nodes in the group to be verified which contain
3082
      # actual instances
3083
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3084
      self._LockInstancesNodes()
3085

    
3086
      # Lock all nodes in group to be verified
3087
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3088
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3089
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3090

    
3091
  def CheckPrereq(self):
3092
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3093
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3094
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3095

    
3096
    assert self.group_uuid in owned_groups
3097

    
3098
    # Check if locked instances are still correct
3099
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3100

    
3101
    # Get instance information
3102
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3103

    
3104
    # Check if node groups for locked instances are still correct
3105
    for (instance_name, inst) in self.instances.items():
3106
      assert owned_nodes.issuperset(inst.all_nodes), \
3107
        "Instance %s's nodes changed while we kept the lock" % instance_name
3108

    
3109
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3110
                                             owned_groups)
3111

    
3112
      assert self.group_uuid in inst_groups, \
3113
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3114

    
3115
  def Exec(self, feedback_fn):
3116
    """Verify integrity of cluster disks.
3117

3118
    @rtype: tuple of three items
3119
    @return: a tuple of (dict of node-to-node_error, list of instances
3120
        which need activate-disks, dict of instance: (node, volume) for
3121
        missing volumes
3122

3123
    """
3124
    res_nodes = {}
3125
    res_instances = set()
3126
    res_missing = {}
3127

    
3128
    nv_dict = _MapInstanceDisksToNodes([inst
3129
                                        for inst in self.instances.values()
3130
                                        if inst.admin_up])
3131

    
3132
    if nv_dict:
3133
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3134
                             set(self.cfg.GetVmCapableNodeList()))
3135

    
3136
      node_lvs = self.rpc.call_lv_list(nodes, [])
3137

    
3138
      for (node, node_res) in node_lvs.items():
3139
        if node_res.offline:
3140
          continue
3141

    
3142
        msg = node_res.fail_msg
3143
        if msg:
3144
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3145
          res_nodes[node] = msg
3146
          continue
3147

    
3148
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3149
          inst = nv_dict.pop((node, lv_name), None)
3150
          if not (lv_online or inst is None):
3151
            res_instances.add(inst)
3152

    
3153
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3154
      # better
3155
      for key, inst in nv_dict.iteritems():
3156
        res_missing.setdefault(inst, []).append(key)
3157

    
3158
    return (res_nodes, list(res_instances), res_missing)
3159

    
3160

    
3161
class LUClusterRepairDiskSizes(NoHooksLU):
3162
  """Verifies the cluster disks sizes.
3163

3164
  """
3165
  REQ_BGL = False
3166

    
3167
  def ExpandNames(self):
3168
    if self.op.instances:
3169
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3170
      self.needed_locks = {
3171
        locking.LEVEL_NODE: [],
3172
        locking.LEVEL_INSTANCE: self.wanted_names,
3173
        }
3174
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3175
    else:
3176
      self.wanted_names = None
3177
      self.needed_locks = {
3178
        locking.LEVEL_NODE: locking.ALL_SET,
3179
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3180
        }
3181
    self.share_locks = _ShareAll()
3182

    
3183
  def DeclareLocks(self, level):
3184
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3185
      self._LockInstancesNodes(primary_only=True)
3186

    
3187
  def CheckPrereq(self):
3188
    """Check prerequisites.
3189

3190
    This only checks the optional instance list against the existing names.
3191

3192
    """
3193
    if self.wanted_names is None:
3194
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3195

    
3196
    self.wanted_instances = \
3197
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3198

    
3199
  def _EnsureChildSizes(self, disk):
3200
    """Ensure children of the disk have the needed disk size.
3201

3202
    This is valid mainly for DRBD8 and fixes an issue where the
3203
    children have smaller disk size.
3204

3205
    @param disk: an L{ganeti.objects.Disk} object
3206

3207
    """
3208
    if disk.dev_type == constants.LD_DRBD8:
3209
      assert disk.children, "Empty children for DRBD8?"
3210
      fchild = disk.children[0]
3211
      mismatch = fchild.size < disk.size
3212
      if mismatch:
3213
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3214
                     fchild.size, disk.size)
3215
        fchild.size = disk.size
3216

    
3217
      # and we recurse on this child only, not on the metadev
3218
      return self._EnsureChildSizes(fchild) or mismatch
3219
    else:
3220
      return False
3221

    
3222
  def Exec(self, feedback_fn):
3223
    """Verify the size of cluster disks.
3224

3225
    """
3226
    # TODO: check child disks too
3227
    # TODO: check differences in size between primary/secondary nodes
3228
    per_node_disks = {}
3229
    for instance in self.wanted_instances:
3230
      pnode = instance.primary_node
3231
      if pnode not in per_node_disks:
3232
        per_node_disks[pnode] = []
3233
      for idx, disk in enumerate(instance.disks):
3234
        per_node_disks[pnode].append((instance, idx, disk))
3235

    
3236
    changed = []
3237
    for node, dskl in per_node_disks.items():
3238
      newl = [v[2].Copy() for v in dskl]
3239
      for dsk in newl:
3240
        self.cfg.SetDiskID(dsk, node)
3241
      result = self.rpc.call_blockdev_getsize(node, newl)
3242
      if result.fail_msg:
3243
        self.LogWarning("Failure in blockdev_getsize call to node"
3244
                        " %s, ignoring", node)
3245
        continue
3246
      if len(result.payload) != len(dskl):
3247
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3248
                        " result.payload=%s", node, len(dskl), result.payload)
3249
        self.LogWarning("Invalid result from node %s, ignoring node results",
3250
                        node)
3251
        continue
3252
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3253
        if size is None:
3254
          self.LogWarning("Disk %d of instance %s did not return size"
3255
                          " information, ignoring", idx, instance.name)
3256
          continue
3257
        if not isinstance(size, (int, long)):
3258
          self.LogWarning("Disk %d of instance %s did not return valid"
3259
                          " size information, ignoring", idx, instance.name)
3260
          continue
3261
        size = size >> 20
3262
        if size != disk.size:
3263
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3264
                       " correcting: recorded %d, actual %d", idx,
3265
                       instance.name, disk.size, size)
3266
          disk.size = size
3267
          self.cfg.Update(instance, feedback_fn)
3268
          changed.append((instance.name, idx, size))
3269
        if self._EnsureChildSizes(disk):
3270
          self.cfg.Update(instance, feedback_fn)
3271
          changed.append((instance.name, idx, disk.size))
3272
    return changed
3273

    
3274

    
3275
class LUClusterRename(LogicalUnit):
3276
  """Rename the cluster.
3277

3278
  """
3279
  HPATH = "cluster-rename"
3280
  HTYPE = constants.HTYPE_CLUSTER
3281

    
3282
  def BuildHooksEnv(self):
3283
    """Build hooks env.
3284

3285
    """
3286
    return {
3287
      "OP_TARGET": self.cfg.GetClusterName(),
3288
      "NEW_NAME": self.op.name,
3289
      }
3290

    
3291
  def BuildHooksNodes(self):
3292
    """Build hooks nodes.
3293

3294
    """
3295
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3296

    
3297
  def CheckPrereq(self):
3298
    """Verify that the passed name is a valid one.
3299

3300
    """
3301
    hostname = netutils.GetHostname(name=self.op.name,
3302
                                    family=self.cfg.GetPrimaryIPFamily())
3303

    
3304
    new_name = hostname.name
3305
    self.ip = new_ip = hostname.ip
3306
    old_name = self.cfg.GetClusterName()
3307
    old_ip = self.cfg.GetMasterIP()
3308
    if new_name == old_name and new_ip == old_ip:
3309
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3310
                                 " cluster has changed",
3311
                                 errors.ECODE_INVAL)
3312
    if new_ip != old_ip:
3313
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3314
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3315
                                   " reachable on the network" %
3316
                                   new_ip, errors.ECODE_NOTUNIQUE)
3317

    
3318
    self.op.name = new_name
3319

    
3320
  def Exec(self, feedback_fn):
3321
    """Rename the cluster.
3322

3323
    """
3324
    clustername = self.op.name
3325
    ip = self.ip
3326

    
3327
    # shutdown the master IP
3328
    master = self.cfg.GetMasterNode()
3329
    result = self.rpc.call_node_deactivate_master_ip(master)
3330
    result.Raise("Could not disable the master role")
3331

    
3332
    try:
3333
      cluster = self.cfg.GetClusterInfo()
3334
      cluster.cluster_name = clustername
3335
      cluster.master_ip = ip
3336
      self.cfg.Update(cluster, feedback_fn)
3337

    
3338
      # update the known hosts file
3339
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3340
      node_list = self.cfg.GetOnlineNodeList()
3341
      try:
3342
        node_list.remove(master)
3343
      except ValueError:
3344
        pass
3345
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3346
    finally:
3347
      result = self.rpc.call_node_activate_master_ip(master)
3348
      msg = result.fail_msg
3349
      if msg:
3350
        self.LogWarning("Could not re-enable the master role on"
3351
                        " the master, please restart manually: %s", msg)
3352

    
3353
    return clustername
3354

    
3355

    
3356
def _ValidateNetmask(cfg, netmask):
3357
  """Checks if a netmask is valid.
3358

3359
  @type cfg: L{config.ConfigWriter}
3360
  @param cfg: The cluster configuration
3361
  @type netmask: int
3362
  @param netmask: the netmask to be verified
3363
  @raise errors.OpPrereqError: if the validation fails
3364

3365
  """
3366
  ip_family = cfg.GetPrimaryIPFamily()
3367
  try:
3368
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3369
  except errors.ProgrammerError:
3370
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3371
                               ip_family)
3372
  if not ipcls.ValidateNetmask(netmask):
3373
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3374
                                (netmask))
3375

    
3376

    
3377
class LUClusterSetParams(LogicalUnit):
3378
  """Change the parameters of the cluster.
3379

3380
  """
3381
  HPATH = "cluster-modify"
3382
  HTYPE = constants.HTYPE_CLUSTER
3383
  REQ_BGL = False
3384

    
3385
  def CheckArguments(self):
3386
    """Check parameters
3387

3388
    """
3389
    if self.op.uid_pool:
3390
      uidpool.CheckUidPool(self.op.uid_pool)
3391

    
3392
    if self.op.add_uids:
3393
      uidpool.CheckUidPool(self.op.add_uids)
3394

    
3395
    if self.op.remove_uids:
3396
      uidpool.CheckUidPool(self.op.remove_uids)
3397

    
3398
    if self.op.master_netmask is not None:
3399
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3400

    
3401
  def ExpandNames(self):
3402
    # FIXME: in the future maybe other cluster params won't require checking on
3403
    # all nodes to be modified.
3404
    self.needed_locks = {
3405
      locking.LEVEL_NODE: locking.ALL_SET,
3406
    }
3407
    self.share_locks[locking.LEVEL_NODE] = 1
3408

    
3409
  def BuildHooksEnv(self):
3410
    """Build hooks env.
3411

3412
    """
3413
    return {
3414
      "OP_TARGET": self.cfg.GetClusterName(),
3415
      "NEW_VG_NAME": self.op.vg_name,
3416
      }
3417

    
3418
  def BuildHooksNodes(self):
3419
    """Build hooks nodes.
3420

3421
    """
3422
    mn = self.cfg.GetMasterNode()
3423
    return ([mn], [mn])
3424

    
3425
  def CheckPrereq(self):
3426
    """Check prerequisites.
3427

3428
    This checks whether the given params don't conflict and
3429
    if the given volume group is valid.
3430

3431
    """
3432
    if self.op.vg_name is not None and not self.op.vg_name:
3433
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3434
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3435
                                   " instances exist", errors.ECODE_INVAL)
3436

    
3437
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3438
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3439
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3440
                                   " drbd-based instances exist",
3441
                                   errors.ECODE_INVAL)
3442

    
3443
    node_list = self.owned_locks(locking.LEVEL_NODE)
3444

    
3445
    # if vg_name not None, checks given volume group on all nodes
3446
    if self.op.vg_name:
3447
      vglist = self.rpc.call_vg_list(node_list)
3448
      for node in node_list:
3449
        msg = vglist[node].fail_msg
3450
        if msg:
3451
          # ignoring down node
3452
          self.LogWarning("Error while gathering data on node %s"
3453
                          " (ignoring node): %s", node, msg)
3454
          continue
3455
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3456
                                              self.op.vg_name,
3457
                                              constants.MIN_VG_SIZE)
3458
        if vgstatus:
3459
          raise errors.OpPrereqError("Error on node '%s': %s" %
3460
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3461

    
3462
    if self.op.drbd_helper:
3463
      # checks given drbd helper on all nodes
3464
      helpers = self.rpc.call_drbd_helper(node_list)
3465
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3466
        if ninfo.offline:
3467
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3468
          continue
3469
        msg = helpers[node].fail_msg
3470
        if msg:
3471
          raise errors.OpPrereqError("Error checking drbd helper on node"
3472
                                     " '%s': %s" % (node, msg),
3473
                                     errors.ECODE_ENVIRON)
3474
        node_helper = helpers[node].payload
3475
        if node_helper != self.op.drbd_helper:
3476
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3477
                                     (node, node_helper), errors.ECODE_ENVIRON)
3478

    
3479
    self.cluster = cluster = self.cfg.GetClusterInfo()
3480
    # validate params changes
3481
    if self.op.beparams:
3482
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3483
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3484

    
3485
    if self.op.ndparams:
3486
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3487
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3488

    
3489
      # TODO: we need a more general way to handle resetting
3490
      # cluster-level parameters to default values
3491
      if self.new_ndparams["oob_program"] == "":
3492
        self.new_ndparams["oob_program"] = \
3493
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3494

    
3495
    if self.op.nicparams:
3496
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3497
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3498
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3499
      nic_errors = []
3500

    
3501
      # check all instances for consistency
3502
      for instance in self.cfg.GetAllInstancesInfo().values():
3503
        for nic_idx, nic in enumerate(instance.nics):
3504
          params_copy = copy.deepcopy(nic.nicparams)
3505
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3506

    
3507
          # check parameter syntax
3508
          try:
3509
            objects.NIC.CheckParameterSyntax(params_filled)
3510
          except errors.ConfigurationError, err:
3511
            nic_errors.append("Instance %s, nic/%d: %s" %
3512
                              (instance.name, nic_idx, err))
3513

    
3514
          # if we're moving instances to routed, check that they have an ip
3515
          target_mode = params_filled[constants.NIC_MODE]
3516
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3517
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3518
                              " address" % (instance.name, nic_idx))
3519
      if nic_errors:
3520
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3521
                                   "\n".join(nic_errors))
3522

    
3523
    # hypervisor list/parameters
3524
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3525
    if self.op.hvparams:
3526
      for hv_name, hv_dict in self.op.hvparams.items():
3527
        if hv_name not in self.new_hvparams:
3528
          self.new_hvparams[hv_name] = hv_dict
3529
        else:
3530
          self.new_hvparams[hv_name].update(hv_dict)
3531

    
3532
    # os hypervisor parameters
3533
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3534
    if self.op.os_hvp:
3535
      for os_name, hvs in self.op.os_hvp.items():
3536
        if os_name not in self.new_os_hvp:
3537
          self.new_os_hvp[os_name] = hvs
3538
        else:
3539
          for hv_name, hv_dict in hvs.items():
3540
            if hv_name not in self.new_os_hvp[os_name]:
3541
              self.new_os_hvp[os_name][hv_name] = hv_dict
3542
            else:
3543
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3544

    
3545
    # os parameters
3546
    self.new_osp = objects.FillDict(cluster.osparams, {})
3547
    if self.op.osparams:
3548
      for os_name, osp in self.op.osparams.items():
3549
        if os_name not in self.new_osp:
3550
          self.new_osp[os_name] = {}
3551

    
3552
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3553
                                                  use_none=True)
3554

    
3555
        if not self.new_osp[os_name]:
3556
          # we removed all parameters
3557
          del self.new_osp[os_name]
3558
        else:
3559
          # check the parameter validity (remote check)
3560
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3561
                         os_name, self.new_osp[os_name])
3562

    
3563
    # changes to the hypervisor list
3564
    if self.op.enabled_hypervisors is not None:
3565
      self.hv_list = self.op.enabled_hypervisors
3566
      for hv in self.hv_list:
3567
        # if the hypervisor doesn't already exist in the cluster
3568
        # hvparams, we initialize it to empty, and then (in both
3569
        # cases) we make sure to fill the defaults, as we might not
3570
        # have a complete defaults list if the hypervisor wasn't
3571
        # enabled before
3572
        if hv not in new_hvp:
3573
          new_hvp[hv] = {}
3574
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3575
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3576
    else:
3577
      self.hv_list = cluster.enabled_hypervisors
3578

    
3579
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3580
      # either the enabled list has changed, or the parameters have, validate
3581
      for hv_name, hv_params in self.new_hvparams.items():
3582
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3583
            (self.op.enabled_hypervisors and
3584
             hv_name in self.op.enabled_hypervisors)):
3585
          # either this is a new hypervisor, or its parameters have changed
3586
          hv_class = hypervisor.GetHypervisor(hv_name)
3587
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3588
          hv_class.CheckParameterSyntax(hv_params)
3589
          _CheckHVParams(self, node_list, hv_name, hv_params)
3590

    
3591
    if self.op.os_hvp:
3592
      # no need to check any newly-enabled hypervisors, since the
3593
      # defaults have already been checked in the above code-block
3594
      for os_name, os_hvp in self.new_os_hvp.items():
3595
        for hv_name, hv_params in os_hvp.items():
3596
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3597
          # we need to fill in the new os_hvp on top of the actual hv_p
3598
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3599
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3600
          hv_class = hypervisor.GetHypervisor(hv_name)
3601
          hv_class.CheckParameterSyntax(new_osp)
3602
          _CheckHVParams(self, node_list, hv_name, new_osp)
3603

    
3604
    if self.op.default_iallocator:
3605
      alloc_script = utils.FindFile(self.op.default_iallocator,
3606
                                    constants.IALLOCATOR_SEARCH_PATH,
3607
                                    os.path.isfile)
3608
      if alloc_script is None:
3609
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3610
                                   " specified" % self.op.default_iallocator,
3611
                                   errors.ECODE_INVAL)
3612

    
3613
  def Exec(self, feedback_fn):
3614
    """Change the parameters of the cluster.
3615

3616
    """
3617
    if self.op.vg_name is not None:
3618
      new_volume = self.op.vg_name
3619
      if not new_volume:
3620
        new_volume = None
3621
      if new_volume != self.cfg.GetVGName():
3622
        self.cfg.SetVGName(new_volume)
3623
      else:
3624
        feedback_fn("Cluster LVM configuration already in desired"
3625
                    " state, not changing")
3626
    if self.op.drbd_helper is not None:
3627
      new_helper = self.op.drbd_helper
3628
      if not new_helper:
3629
        new_helper = None
3630
      if new_helper != self.cfg.GetDRBDHelper():
3631
        self.cfg.SetDRBDHelper(new_helper)
3632
      else:
3633
        feedback_fn("Cluster DRBD helper already in desired state,"
3634
                    " not changing")
3635
    if self.op.hvparams:
3636
      self.cluster.hvparams = self.new_hvparams
3637
    if self.op.os_hvp:
3638
      self.cluster.os_hvp = self.new_os_hvp
3639
    if self.op.enabled_hypervisors is not None:
3640
      self.cluster.hvparams = self.new_hvparams
3641
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3642
    if self.op.beparams:
3643
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3644
    if self.op.nicparams:
3645
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3646
    if self.op.osparams:
3647
      self.cluster.osparams = self.new_osp
3648
    if self.op.ndparams:
3649
      self.cluster.ndparams = self.new_ndparams
3650

    
3651
    if self.op.candidate_pool_size is not None:
3652
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3653
      # we need to update the pool size here, otherwise the save will fail
3654
      _AdjustCandidatePool(self, [])
3655

    
3656
    if self.op.maintain_node_health is not None:
3657
      self.cluster.maintain_node_health = self.op.maintain_node_health
3658

    
3659
    if self.op.prealloc_wipe_disks is not None:
3660
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3661

    
3662
    if self.op.add_uids is not None:
3663
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3664

    
3665
    if self.op.remove_uids is not None:
3666
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3667

    
3668
    if self.op.uid_pool is not None:
3669
      self.cluster.uid_pool = self.op.uid_pool
3670

    
3671
    if self.op.default_iallocator is not None:
3672
      self.cluster.default_iallocator = self.op.default_iallocator
3673

    
3674
    if self.op.reserved_lvs is not None:
3675
      self.cluster.reserved_lvs = self.op.reserved_lvs
3676

    
3677
    def helper_os(aname, mods, desc):
3678
      desc += " OS list"
3679
      lst = getattr(self.cluster, aname)
3680
      for key, val in mods:
3681
        if key == constants.DDM_ADD:
3682
          if val in lst:
3683
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3684
          else:
3685
            lst.append(val)
3686
        elif key == constants.DDM_REMOVE:
3687
          if val in lst:
3688
            lst.remove(val)
3689
          else:
3690
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3691
        else:
3692
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3693

    
3694
    if self.op.hidden_os:
3695
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3696

    
3697
    if self.op.blacklisted_os:
3698
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3699

    
3700
    if self.op.master_netdev:
3701
      master = self.cfg.GetMasterNode()
3702
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3703
                  self.cluster.master_netdev)
3704
      result = self.rpc.call_node_deactivate_master_ip(master)
3705
      result.Raise("Could not disable the master ip")
3706
      feedback_fn("Changing master_netdev from %s to %s" %
3707
                  (self.cluster.master_netdev, self.op.master_netdev))
3708
      self.cluster.master_netdev = self.op.master_netdev
3709

    
3710
    if self.op.master_netmask:
3711
      master = self.cfg.GetMasterNode()
3712
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3713
      result = self.rpc.call_node_change_master_netmask(master,
3714
                                                        self.op.master_netmask)
3715
      if result.fail_msg:
3716
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3717
        self.LogWarning(msg)
3718
        feedback_fn(msg)
3719
      else:
3720
        self.cluster.master_netmask = self.op.master_netmask
3721

    
3722
    self.cfg.Update(self.cluster, feedback_fn)
3723

    
3724
    if self.op.master_netdev:
3725
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3726
                  self.op.master_netdev)
3727
      result = self.rpc.call_node_activate_master_ip(master)
3728
      if result.fail_msg:
3729
        self.LogWarning("Could not re-enable the master ip on"
3730
                        " the master, please restart manually: %s",
3731
                        result.fail_msg)
3732

    
3733

    
3734
def _UploadHelper(lu, nodes, fname):
3735
  """Helper for uploading a file and showing warnings.
3736

3737
  """
3738
  if os.path.exists(fname):
3739
    result = lu.rpc.call_upload_file(nodes, fname)
3740
    for to_node, to_result in result.items():
3741
      msg = to_result.fail_msg
3742
      if msg:
3743
        msg = ("Copy of file %s to node %s failed: %s" %
3744
               (fname, to_node, msg))
3745
        lu.proc.LogWarning(msg)
3746

    
3747

    
3748
def _ComputeAncillaryFiles(cluster, redist):
3749
  """Compute files external to Ganeti which need to be consistent.
3750

3751
  @type redist: boolean
3752
  @param redist: Whether to include files which need to be redistributed
3753

3754
  """
3755
  # Compute files for all nodes
3756
  files_all = set([
3757
    constants.SSH_KNOWN_HOSTS_FILE,
3758
    constants.CONFD_HMAC_KEY,
3759
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3760
    constants.SPICE_CERT_FILE,
3761
    constants.SPICE_CACERT_FILE,
3762
    constants.RAPI_USERS_FILE,
3763
    ])
3764

    
3765
  if not redist:
3766
    files_all.update(constants.ALL_CERT_FILES)
3767
    files_all.update(ssconf.SimpleStore().GetFileList())
3768
  else:
3769
    # we need to ship at least the RAPI certificate
3770
    files_all.add(constants.RAPI_CERT_FILE)
3771

    
3772
  if cluster.modify_etc_hosts:
3773
    files_all.add(constants.ETC_HOSTS)
3774

    
3775
  # Files which are optional, these must:
3776
  # - be present in one other category as well
3777
  # - either exist or not exist on all nodes of that category (mc, vm all)
3778
  files_opt = set([
3779
    constants.RAPI_USERS_FILE,
3780
    ])
3781

    
3782
  # Files which should only be on master candidates
3783
  files_mc = set()
3784
  if not redist:
3785
    files_mc.add(constants.CLUSTER_CONF_FILE)
3786

    
3787
  # Files which should only be on VM-capable nodes
3788
  files_vm = set(filename
3789
    for hv_name in cluster.enabled_hypervisors
3790
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3791

    
3792
  files_opt |= set(filename
3793
    for hv_name in cluster.enabled_hypervisors
3794
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3795

    
3796
  # Filenames in each category must be unique
3797
  all_files_set = files_all | files_mc | files_vm
3798
  assert (len(all_files_set) ==
3799
          sum(map(len, [files_all, files_mc, files_vm]))), \
3800
         "Found file listed in more than one file list"
3801

    
3802
  # Optional files must be present in one other category
3803
  assert all_files_set.issuperset(files_opt), \
3804
         "Optional file not in a different required list"
3805

    
3806
  return (files_all, files_opt, files_mc, files_vm)
3807

    
3808

    
3809
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3810
  """Distribute additional files which are part of the cluster configuration.
3811

3812
  ConfigWriter takes care of distributing the config and ssconf files, but
3813
  there are more files which should be distributed to all nodes. This function
3814
  makes sure those are copied.
3815

3816
  @param lu: calling logical unit
3817
  @param additional_nodes: list of nodes not in the config to distribute to
3818
  @type additional_vm: boolean
3819
  @param additional_vm: whether the additional nodes are vm-capable or not
3820

3821
  """
3822
  # Gather target nodes
3823
  cluster = lu.cfg.GetClusterInfo()
3824
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3825

    
3826
  online_nodes = lu.cfg.GetOnlineNodeList()
3827
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3828

    
3829
  if additional_nodes is not None:
3830
    online_nodes.extend(additional_nodes)
3831
    if additional_vm:
3832
      vm_nodes.extend(additional_nodes)
3833

    
3834
  # Never distribute to master node
3835
  for nodelist in [online_nodes, vm_nodes]:
3836
    if master_info.name in nodelist:
3837
      nodelist.remove(master_info.name)
3838

    
3839
  # Gather file lists
3840
  (files_all, _, files_mc, files_vm) = \
3841
    _ComputeAncillaryFiles(cluster, True)
3842

    
3843
  # Never re-distribute configuration file from here
3844
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3845
              constants.CLUSTER_CONF_FILE in files_vm)
3846
  assert not files_mc, "Master candidates not handled in this function"
3847

    
3848
  filemap = [
3849
    (online_nodes, files_all),
3850
    (vm_nodes, files_vm),
3851
    ]
3852

    
3853
  # Upload the files
3854
  for (node_list, files) in filemap:
3855
    for fname in files:
3856
      _UploadHelper(lu, node_list, fname)
3857

    
3858

    
3859
class LUClusterRedistConf(NoHooksLU):
3860
  """Force the redistribution of cluster configuration.
3861

3862
  This is a very simple LU.
3863

3864
  """
3865
  REQ_BGL = False
3866

    
3867
  def ExpandNames(self):
3868
    self.needed_locks = {
3869
      locking.LEVEL_NODE: locking.ALL_SET,
3870
    }
3871
    self.share_locks[locking.LEVEL_NODE] = 1
3872

    
3873
  def Exec(self, feedback_fn):
3874
    """Redistribute the configuration.
3875

3876
    """
3877
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3878
    _RedistributeAncillaryFiles(self)
3879

    
3880

    
3881
class LUClusterActivateMasterIp(NoHooksLU):
3882
  """Activate the master IP on the master node.
3883

3884
  """
3885
  def Exec(self, feedback_fn):
3886
    """Activate the master IP.
3887

3888
    """
3889
    master = self.cfg.GetMasterNode()
3890
    self.rpc.call_node_activate_master_ip(master)
3891

    
3892

    
3893
class LUClusterDeactivateMasterIp(NoHooksLU):
3894
  """Deactivate the master IP on the master node.
3895

3896
  """
3897
  def Exec(self, feedback_fn):
3898
    """Deactivate the master IP.
3899

3900
    """
3901
    master = self.cfg.GetMasterNode()
3902
    self.rpc.call_node_deactivate_master_ip(master)
3903

    
3904

    
3905
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3906
  """Sleep and poll for an instance's disk to sync.
3907

3908
  """
3909
  if not instance.disks or disks is not None and not disks:
3910
    return True
3911

    
3912
  disks = _ExpandCheckDisks(instance, disks)
3913

    
3914
  if not oneshot:
3915
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3916

    
3917
  node = instance.primary_node
3918

    
3919
  for dev in disks:
3920
    lu.cfg.SetDiskID(dev, node)
3921

    
3922
  # TODO: Convert to utils.Retry
3923

    
3924
  retries = 0
3925
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3926
  while True:
3927
    max_time = 0
3928
    done = True
3929
    cumul_degraded = False
3930
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3931
    msg = rstats.fail_msg
3932
    if msg:
3933
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3934
      retries += 1
3935
      if retries >= 10:
3936
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3937
                                 " aborting." % node)
3938
      time.sleep(6)
3939
      continue
3940
    rstats = rstats.payload
3941
    retries = 0
3942
    for i, mstat in enumerate(rstats):
3943
      if mstat is None:
3944
        lu.LogWarning("Can't compute data for node %s/%s",
3945
                           node, disks[i].iv_name)
3946
        continue
3947

    
3948
      cumul_degraded = (cumul_degraded or
3949
                        (mstat.is_degraded and mstat.sync_percent is None))
3950
      if mstat.sync_percent is not None:
3951
        done = False
3952
        if mstat.estimated_time is not None:
3953
          rem_time = ("%s remaining (estimated)" %
3954
                      utils.FormatSeconds(mstat.estimated_time))
3955
          max_time = mstat.estimated_time
3956
        else:
3957
          rem_time = "no time estimate"
3958
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3959
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3960

    
3961
    # if we're done but degraded, let's do a few small retries, to
3962
    # make sure we see a stable and not transient situation; therefore
3963
    # we force restart of the loop
3964
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3965
      logging.info("Degraded disks found, %d retries left", degr_retries)
3966
      degr_retries -= 1
3967
      time.sleep(1)
3968
      continue
3969

    
3970
    if done or oneshot:
3971
      break
3972

    
3973
    time.sleep(min(60, max_time))
3974

    
3975
  if done:
3976
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3977
  return not cumul_degraded
3978

    
3979

    
3980
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3981
  """Check that mirrors are not degraded.
3982

3983
  The ldisk parameter, if True, will change the test from the
3984
  is_degraded attribute (which represents overall non-ok status for
3985
  the device(s)) to the ldisk (representing the local storage status).
3986

3987
  """
3988
  lu.cfg.SetDiskID(dev, node)
3989

    
3990
  result = True
3991

    
3992
  if on_primary or dev.AssembleOnSecondary():
3993
    rstats = lu.rpc.call_blockdev_find(node, dev)
3994
    msg = rstats.fail_msg
3995
    if msg:
3996
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3997
      result = False
3998
    elif not rstats.payload:
3999
      lu.LogWarning("Can't find disk on node %s", node)
4000
      result = False
4001
    else:
4002
      if ldisk:
4003
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4004
      else:
4005
        result = result and not rstats.payload.is_degraded
4006

    
4007
  if dev.children:
4008
    for child in dev.children:
4009
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4010

    
4011
  return result
4012

    
4013

    
4014
class LUOobCommand(NoHooksLU):
4015
  """Logical unit for OOB handling.
4016

4017
  """
4018
  REG_BGL = False
4019
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4020

    
4021
  def ExpandNames(self):
4022
    """Gather locks we need.
4023

4024
    """
4025
    if self.op.node_names:
4026
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4027
      lock_names = self.op.node_names
4028
    else:
4029
      lock_names = locking.ALL_SET
4030

    
4031
    self.needed_locks = {
4032
      locking.LEVEL_NODE: lock_names,
4033
      }
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks:
4039
     - the node exists in the configuration
4040
     - OOB is supported
4041

4042
    Any errors are signaled by raising errors.OpPrereqError.
4043

4044
    """
4045
    self.nodes = []
4046
    self.master_node = self.cfg.GetMasterNode()
4047

    
4048
    assert self.op.power_delay >= 0.0
4049

    
4050
    if self.op.node_names:
4051
      if (self.op.command in self._SKIP_MASTER and
4052
          self.master_node in self.op.node_names):
4053
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4054
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4055

    
4056
        if master_oob_handler:
4057
          additional_text = ("run '%s %s %s' if you want to operate on the"
4058
                             " master regardless") % (master_oob_handler,
4059
                                                      self.op.command,
4060
                                                      self.master_node)
4061
        else:
4062
          additional_text = "it does not support out-of-band operations"
4063

    
4064
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4065
                                    " allowed for %s; %s") %
4066
                                   (self.master_node, self.op.command,
4067
                                    additional_text), errors.ECODE_INVAL)
4068
    else:
4069
      self.op.node_names = self.cfg.GetNodeList()
4070
      if self.op.command in self._SKIP_MASTER:
4071
        self.op.node_names.remove(self.master_node)
4072

    
4073
    if self.op.command in self._SKIP_MASTER:
4074
      assert self.master_node not in self.op.node_names
4075

    
4076
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4077
      if node is None:
4078
        raise errors.OpPrereqError("Node %s not found" % node_name,
4079
                                   errors.ECODE_NOENT)
4080
      else:
4081
        self.nodes.append(node)
4082

    
4083
      if (not self.op.ignore_status and
4084
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4085
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4086
                                    " not marked offline") % node_name,
4087
                                   errors.ECODE_STATE)
4088

    
4089
  def Exec(self, feedback_fn):
4090
    """Execute OOB and return result if we expect any.
4091

4092
    """
4093
    master_node = self.master_node
4094
    ret = []
4095

    
4096
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4097
                                              key=lambda node: node.name)):
4098
      node_entry = [(constants.RS_NORMAL, node.name)]
4099
      ret.append(node_entry)
4100

    
4101
      oob_program = _SupportsOob(self.cfg, node)
4102

    
4103
      if not oob_program:
4104
        node_entry.append((constants.RS_UNAVAIL, None))
4105
        continue
4106

    
4107
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4108
                   self.op.command, oob_program, node.name)
4109
      result = self.rpc.call_run_oob(master_node, oob_program,
4110
                                     self.op.command, node.name,
4111
                                     self.op.timeout)
4112

    
4113
      if result.fail_msg:
4114
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4115
                        node.name, result.fail_msg)
4116
        node_entry.append((constants.RS_NODATA, None))
4117
      else:
4118
        try:
4119
          self._CheckPayload(result)
4120
        except errors.OpExecError, err:
4121
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4122
                          node.name, err)
4123
          node_entry.append((constants.RS_NODATA, None))
4124
        else:
4125
          if self.op.command == constants.OOB_HEALTH:
4126
            # For health we should log important events
4127
            for item, status in result.payload:
4128
              if status in [constants.OOB_STATUS_WARNING,
4129
                            constants.OOB_STATUS_CRITICAL]:
4130
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4131
                                item, node.name, status)
4132

    
4133
          if self.op.command == constants.OOB_POWER_ON:
4134
            node.powered = True
4135
          elif self.op.command == constants.OOB_POWER_OFF:
4136
            node.powered = False
4137
          elif self.op.command == constants.OOB_POWER_STATUS:
4138
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4139
            if powered != node.powered:
4140
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4141
                               " match actual power state (%s)"), node.powered,
4142
                              node.name, powered)
4143

    
4144
          # For configuration changing commands we should update the node
4145
          if self.op.command in (constants.OOB_POWER_ON,
4146
                                 constants.OOB_POWER_OFF):
4147
            self.cfg.Update(node, feedback_fn)
4148

    
4149
          node_entry.append((constants.RS_NORMAL, result.payload))
4150

    
4151
          if (self.op.command == constants.OOB_POWER_ON and
4152
              idx < len(self.nodes) - 1):
4153
            time.sleep(self.op.power_delay)
4154

    
4155
    return ret
4156

    
4157
  def _CheckPayload(self, result):
4158
    """Checks if the payload is valid.
4159

4160
    @param result: RPC result
4161
    @raises errors.OpExecError: If payload is not valid
4162

4163
    """
4164
    errs = []
4165
    if self.op.command == constants.OOB_HEALTH:
4166
      if not isinstance(result.payload, list):
4167
        errs.append("command 'health' is expected to return a list but got %s" %
4168
                    type(result.payload))
4169
      else:
4170
        for item, status in result.payload:
4171
          if status not in constants.OOB_STATUSES:
4172
            errs.append("health item '%s' has invalid status '%s'" %
4173
                        (item, status))
4174

    
4175
    if self.op.command == constants.OOB_POWER_STATUS:
4176
      if not isinstance(result.payload, dict):
4177
        errs.append("power-status is expected to return a dict but got %s" %
4178
                    type(result.payload))
4179

    
4180
    if self.op.command in [
4181
        constants.OOB_POWER_ON,
4182
        constants.OOB_POWER_OFF,
4183
        constants.OOB_POWER_CYCLE,
4184
        ]:
4185
      if result.payload is not None:
4186
        errs.append("%s is expected to not return payload but got '%s'" %
4187
                    (self.op.command, result.payload))
4188

    
4189
    if errs:
4190
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4191
                               utils.CommaJoin(errs))
4192

    
4193

    
4194
class _OsQuery(_QueryBase):
4195
  FIELDS = query.OS_FIELDS
4196

    
4197
  def ExpandNames(self, lu):
4198
    # Lock all nodes in shared mode
4199
    # Temporary removal of locks, should be reverted later
4200
    # TODO: reintroduce locks when they are lighter-weight
4201
    lu.needed_locks = {}
4202
    #self.share_locks[locking.LEVEL_NODE] = 1
4203
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4204

    
4205
    # The following variables interact with _QueryBase._GetNames
4206
    if self.names:
4207
      self.wanted = self.names
4208
    else:
4209
      self.wanted = locking.ALL_SET
4210

    
4211
    self.do_locking = self.use_locking
4212

    
4213
  def DeclareLocks(self, lu, level):
4214
    pass
4215

    
4216
  @staticmethod
4217
  def _DiagnoseByOS(rlist):
4218
    """Remaps a per-node return list into an a per-os per-node dictionary
4219

4220
    @param rlist: a map with node names as keys and OS objects as values
4221

4222
    @rtype: dict
4223
    @return: a dictionary with osnames as keys and as value another
4224
        map, with nodes as keys and tuples of (path, status, diagnose,
4225
        variants, parameters, api_versions) as values, eg::
4226

4227
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4228
                                     (/srv/..., False, "invalid api")],
4229
                           "node2": [(/srv/..., True, "", [], [])]}
4230
          }
4231

4232
    """
4233
    all_os = {}
4234
    # we build here the list of nodes that didn't fail the RPC (at RPC
4235
    # level), so that nodes with a non-responding node daemon don't
4236
    # make all OSes invalid
4237
    good_nodes = [node_name for node_name in rlist
4238
                  if not rlist[node_name].fail_msg]
4239
    for node_name, nr in rlist.items():
4240
      if nr.fail_msg or not nr.payload:
4241
        continue
4242
      for (name, path, status, diagnose, variants,
4243
           params, api_versions) in nr.payload:
4244
        if name not in all_os:
4245
          # build a list of nodes for this os containing empty lists
4246
          # for each node in node_list
4247
          all_os[name] = {}
4248
          for nname in good_nodes:
4249
            all_os[name][nname] = []
4250
        # convert params from [name, help] to (name, help)
4251
        params = [tuple(v) for v in params]
4252
        all_os[name][node_name].append((path, status, diagnose,
4253
                                        variants, params, api_versions))
4254
    return all_os
4255

    
4256
  def _GetQueryData(self, lu):
4257
    """Computes the list of nodes and their attributes.
4258

4259
    """
4260
    # Locking is not used
4261
    assert not (compat.any(lu.glm.is_owned(level)
4262
                           for level in locking.LEVELS
4263
                           if level != locking.LEVEL_CLUSTER) or
4264
                self.do_locking or self.use_locking)
4265

    
4266
    valid_nodes = [node.name
4267
                   for node in lu.cfg.GetAllNodesInfo().values()
4268
                   if not node.offline and node.vm_capable]
4269
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4270
    cluster = lu.cfg.GetClusterInfo()
4271

    
4272
    data = {}
4273

    
4274
    for (os_name, os_data) in pol.items():
4275
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4276
                          hidden=(os_name in cluster.hidden_os),
4277
                          blacklisted=(os_name in cluster.blacklisted_os))
4278

    
4279
      variants = set()
4280
      parameters = set()
4281
      api_versions = set()
4282

    
4283
      for idx, osl in enumerate(os_data.values()):
4284
        info.valid = bool(info.valid and osl and osl[0][1])
4285
        if not info.valid:
4286
          break
4287

    
4288
        (node_variants, node_params, node_api) = osl[0][3:6]
4289
        if idx == 0:
4290
          # First entry
4291
          variants.update(node_variants)
4292
          parameters.update(node_params)
4293
          api_versions.update(node_api)
4294
        else:
4295
          # Filter out inconsistent values
4296
          variants.intersection_update(node_variants)
4297
          parameters.intersection_update(node_params)
4298
          api_versions.intersection_update(node_api)
4299

    
4300
      info.variants = list(variants)
4301
      info.parameters = list(parameters)
4302
      info.api_versions = list(api_versions)
4303

    
4304
      data[os_name] = info
4305

    
4306
    # Prepare data in requested order
4307
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4308
            if name in data]
4309

    
4310

    
4311
class LUOsDiagnose(NoHooksLU):
4312
  """Logical unit for OS diagnose/query.
4313

4314
  """
4315
  REQ_BGL = False
4316

    
4317
  @staticmethod
4318
  def _BuildFilter(fields, names):
4319
    """Builds a filter for querying OSes.
4320

4321
    """
4322
    name_filter = qlang.MakeSimpleFilter("name", names)
4323

    
4324
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4325
    # respective field is not requested
4326
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4327
                     for fname in ["hidden", "blacklisted"]
4328
                     if fname not in fields]
4329
    if "valid" not in fields:
4330
      status_filter.append([qlang.OP_TRUE, "valid"])
4331

    
4332
    if status_filter:
4333
      status_filter.insert(0, qlang.OP_AND)
4334
    else:
4335
      status_filter = None
4336

    
4337
    if name_filter and status_filter:
4338
      return [qlang.OP_AND, name_filter, status_filter]
4339
    elif name_filter:
4340
      return name_filter
4341
    else:
4342
      return status_filter
4343

    
4344
  def CheckArguments(self):
4345
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4346
                       self.op.output_fields, False)
4347

    
4348
  def ExpandNames(self):
4349
    self.oq.ExpandNames(self)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    return self.oq.OldStyleQuery(self)
4353

    
4354

    
4355
class LUNodeRemove(LogicalUnit):
4356
  """Logical unit for removing a node.
4357

4358
  """
4359
  HPATH = "node-remove"
4360
  HTYPE = constants.HTYPE_NODE
4361

    
4362
  def BuildHooksEnv(self):
4363
    """Build hooks env.
4364

4365
    This doesn't run on the target node in the pre phase as a failed
4366
    node would then be impossible to remove.
4367

4368
    """
4369
    return {
4370
      "OP_TARGET": self.op.node_name,
4371
      "NODE_NAME": self.op.node_name,
4372
      }
4373

    
4374
  def BuildHooksNodes(self):
4375
    """Build hooks nodes.
4376

4377
    """
4378
    all_nodes = self.cfg.GetNodeList()
4379
    try:
4380
      all_nodes.remove(self.op.node_name)
4381
    except ValueError:
4382
      logging.warning("Node '%s', which is about to be removed, was not found"
4383
                      " in the list of all nodes", self.op.node_name)
4384
    return (all_nodes, all_nodes)
4385

    
4386
  def CheckPrereq(self):
4387
    """Check prerequisites.
4388

4389
    This checks:
4390
     - the node exists in the configuration
4391
     - it does not have primary or secondary instances
4392
     - it's not the master
4393

4394
    Any errors are signaled by raising errors.OpPrereqError.
4395

4396
    """
4397
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4398
    node = self.cfg.GetNodeInfo(self.op.node_name)
4399
    assert node is not None
4400

    
4401
    masternode = self.cfg.GetMasterNode()
4402
    if node.name == masternode:
4403
      raise errors.OpPrereqError("Node is the master node, failover to another"
4404
                                 " node is required", errors.ECODE_INVAL)
4405

    
4406
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4407
      if node.name in instance.all_nodes:
4408
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4409
                                   " please remove first" % instance_name,
4410
                                   errors.ECODE_INVAL)
4411
    self.op.node_name = node.name
4412
    self.node = node
4413

    
4414
  def Exec(self, feedback_fn):
4415
    """Removes the node from the cluster.
4416

4417
    """
4418
    node = self.node
4419
    logging.info("Stopping the node daemon and removing configs from node %s",
4420
                 node.name)
4421

    
4422
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4423

    
4424
    # Promote nodes to master candidate as needed
4425
    _AdjustCandidatePool(self, exceptions=[node.name])
4426
    self.context.RemoveNode(node.name)
4427

    
4428
    # Run post hooks on the node before it's removed
4429
    _RunPostHook(self, node.name)
4430

    
4431
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4432
    msg = result.fail_msg
4433
    if msg:
4434
      self.LogWarning("Errors encountered on the remote node while leaving"
4435
                      " the cluster: %s", msg)
4436

    
4437
    # Remove node from our /etc/hosts
4438
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4439
      master_node = self.cfg.GetMasterNode()
4440
      result = self.rpc.call_etc_hosts_modify(master_node,
4441
                                              constants.ETC_HOSTS_REMOVE,
4442
                                              node.name, None)
4443
      result.Raise("Can't update hosts file with new host data")
4444
      _RedistributeAncillaryFiles(self)
4445

    
4446

    
4447
class _NodeQuery(_QueryBase):
4448
  FIELDS = query.NODE_FIELDS
4449

    
4450
  def ExpandNames(self, lu):
4451
    lu.needed_locks = {}
4452
    lu.share_locks = _ShareAll()
4453

    
4454
    if self.names:
4455
      self.wanted = _GetWantedNodes(lu, self.names)
4456
    else:
4457
      self.wanted = locking.ALL_SET
4458

    
4459
    self.do_locking = (self.use_locking and
4460
                       query.NQ_LIVE in self.requested_data)
4461

    
4462
    if self.do_locking:
4463
      # If any non-static field is requested we need to lock the nodes
4464
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4465

    
4466
  def DeclareLocks(self, lu, level):
4467
    pass
4468

    
4469
  def _GetQueryData(self, lu):
4470
    """Computes the list of nodes and their attributes.
4471

4472
    """
4473
    all_info = lu.cfg.GetAllNodesInfo()
4474

    
4475
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4476

    
4477
    # Gather data as requested
4478
    if query.NQ_LIVE in self.requested_data:
4479
      # filter out non-vm_capable nodes
4480
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4481

    
4482
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4483
                                        lu.cfg.GetHypervisorType())
4484
      live_data = dict((name, nresult.payload)
4485
                       for (name, nresult) in node_data.items()
4486
                       if not nresult.fail_msg and nresult.payload)
4487
    else:
4488
      live_data = None
4489

    
4490
    if query.NQ_INST in self.requested_data:
4491
      node_to_primary = dict([(name, set()) for name in nodenames])
4492
      node_to_secondary = dict([(name, set()) for name in nodenames])
4493

    
4494
      inst_data = lu.cfg.GetAllInstancesInfo()
4495

    
4496
      for inst in inst_data.values():
4497
        if inst.primary_node in node_to_primary:
4498
          node_to_primary[inst.primary_node].add(inst.name)
4499
        for secnode in inst.secondary_nodes:
4500
          if secnode in node_to_secondary:
4501
            node_to_secondary[secnode].add(inst.name)
4502
    else:
4503
      node_to_primary = None
4504
      node_to_secondary = None
4505

    
4506
    if query.NQ_OOB in self.requested_data:
4507
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4508
                         for name, node in all_info.iteritems())
4509
    else:
4510
      oob_support = None
4511

    
4512
    if query.NQ_GROUP in self.requested_data:
4513
      groups = lu.cfg.GetAllNodeGroupsInfo()
4514
    else:
4515
      groups = {}
4516

    
4517
    return query.NodeQueryData([all_info[name] for name in nodenames],
4518
                               live_data, lu.cfg.GetMasterNode(),
4519
                               node_to_primary, node_to_secondary, groups,
4520
                               oob_support, lu.cfg.GetClusterInfo())
4521

    
4522

    
4523
class LUNodeQuery(NoHooksLU):
4524
  """Logical unit for querying nodes.
4525

4526
  """
4527
  # pylint: disable=W0142
4528
  REQ_BGL = False
4529

    
4530
  def CheckArguments(self):
4531
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4532
                         self.op.output_fields, self.op.use_locking)
4533

    
4534
  def ExpandNames(self):
4535
    self.nq.ExpandNames(self)
4536

    
4537
  def Exec(self, feedback_fn):
4538
    return self.nq.OldStyleQuery(self)
4539

    
4540

    
4541
class LUNodeQueryvols(NoHooksLU):
4542
  """Logical unit for getting volumes on node(s).
4543

4544
  """
4545
  REQ_BGL = False
4546
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4547
  _FIELDS_STATIC = utils.FieldSet("node")
4548

    
4549
  def CheckArguments(self):
4550
    _CheckOutputFields(static=self._FIELDS_STATIC,
4551
                       dynamic=self._FIELDS_DYNAMIC,
4552
                       selected=self.op.output_fields)
4553

    
4554
  def ExpandNames(self):
4555
    self.needed_locks = {}
4556
    self.share_locks[locking.LEVEL_NODE] = 1
4557
    if not self.op.nodes:
4558
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559
    else:
4560
      self.needed_locks[locking.LEVEL_NODE] = \
4561
        _GetWantedNodes(self, self.op.nodes)
4562

    
4563
  def Exec(self, feedback_fn):
4564
    """Computes the list of nodes and their attributes.
4565

4566
    """
4567
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4568
    volumes = self.rpc.call_node_volumes(nodenames)
4569

    
4570
    ilist = self.cfg.GetAllInstancesInfo()
4571
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4572

    
4573
    output = []
4574
    for node in nodenames:
4575
      nresult = volumes[node]
4576
      if nresult.offline:
4577
        continue
4578
      msg = nresult.fail_msg
4579
      if msg:
4580
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4581
        continue
4582

    
4583
      node_vols = sorted(nresult.payload,
4584
                         key=operator.itemgetter("dev"))
4585

    
4586
      for vol in node_vols:
4587
        node_output = []
4588
        for field in self.op.output_fields:
4589
          if field == "node":
4590
            val = node
4591
          elif field == "phys":
4592
            val = vol["dev"]
4593
          elif field == "vg":
4594
            val = vol["vg"]
4595
          elif field == "name":
4596
            val = vol["name"]
4597
          elif field == "size":
4598
            val = int(float(vol["size"]))
4599
          elif field == "instance":
4600
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4601
          else:
4602
            raise errors.ParameterError(field)
4603
          node_output.append(str(val))
4604

    
4605
        output.append(node_output)
4606

    
4607
    return output
4608

    
4609

    
4610
class LUNodeQueryStorage(NoHooksLU):
4611
  """Logical unit for getting information on storage units on node(s).
4612

4613
  """
4614
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4615
  REQ_BGL = False
4616

    
4617
  def CheckArguments(self):
4618
    _CheckOutputFields(static=self._FIELDS_STATIC,
4619
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4620
                       selected=self.op.output_fields)
4621

    
4622
  def ExpandNames(self):
4623
    self.needed_locks = {}
4624
    self.share_locks[locking.LEVEL_NODE] = 1
4625

    
4626
    if self.op.nodes:
4627
      self.needed_locks[locking.LEVEL_NODE] = \
4628
        _GetWantedNodes(self, self.op.nodes)
4629
    else:
4630
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4631

    
4632
  def Exec(self, feedback_fn):
4633
    """Computes the list of nodes and their attributes.
4634

4635
    """
4636
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4637

    
4638
    # Always get name to sort by
4639
    if constants.SF_NAME in self.op.output_fields:
4640
      fields = self.op.output_fields[:]
4641
    else:
4642
      fields = [constants.SF_NAME] + self.op.output_fields
4643

    
4644
    # Never ask for node or type as it's only known to the LU
4645
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4646
      while extra in fields:
4647
        fields.remove(extra)
4648

    
4649
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4650
    name_idx = field_idx[constants.SF_NAME]
4651

    
4652
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4653
    data = self.rpc.call_storage_list(self.nodes,
4654
                                      self.op.storage_type, st_args,
4655
                                      self.op.name, fields)
4656

    
4657
    result = []
4658

    
4659
    for node in utils.NiceSort(self.nodes):
4660
      nresult = data[node]
4661
      if nresult.offline:
4662
        continue
4663

    
4664
      msg = nresult.fail_msg
4665
      if msg:
4666
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4667
        continue
4668

    
4669
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4670

    
4671
      for name in utils.NiceSort(rows.keys()):
4672
        row = rows[name]
4673

    
4674
        out = []
4675

    
4676
        for field in self.op.output_fields:
4677
          if field == constants.SF_NODE:
4678
            val = node
4679
          elif field == constants.SF_TYPE:
4680
            val = self.op.storage_type
4681
          elif field in field_idx:
4682
            val = row[field_idx[field]]
4683
          else:
4684
            raise errors.ParameterError(field)
4685

    
4686
          out.append(val)
4687

    
4688
        result.append(out)
4689

    
4690
    return result
4691

    
4692

    
4693
class _InstanceQuery(_QueryBase):
4694
  FIELDS = query.INSTANCE_FIELDS
4695

    
4696
  def ExpandNames(self, lu):
4697
    lu.needed_locks = {}
4698
    lu.share_locks = _ShareAll()
4699

    
4700
    if self.names:
4701
      self.wanted = _GetWantedInstances(lu, self.names)
4702
    else:
4703
      self.wanted = locking.ALL_SET
4704

    
4705
    self.do_locking = (self.use_locking and
4706
                       query.IQ_LIVE in self.requested_data)
4707
    if self.do_locking:
4708
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4709
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4710
      lu.needed_locks[locking.LEVEL_NODE] = []
4711
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
    self.do_grouplocks = (self.do_locking and
4714
                          query.IQ_NODES in self.requested_data)
4715

    
4716
  def DeclareLocks(self, lu, level):
4717
    if self.do_locking:
4718
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4719
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4720

    
4721
        # Lock all groups used by instances optimistically; this requires going
4722
        # via the node before it's locked, requiring verification later on
4723
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4724
          set(group_uuid
4725
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4726
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4727
      elif level == locking.LEVEL_NODE:
4728
        lu._LockInstancesNodes() # pylint: disable=W0212
4729

    
4730
  @staticmethod
4731
  def _CheckGroupLocks(lu):
4732
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4733
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4734

    
4735
    # Check if node groups for locked instances are still correct
4736
    for instance_name in owned_instances:
4737
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4738

    
4739
  def _GetQueryData(self, lu):
4740
    """Computes the list of instances and their attributes.
4741

4742
    """
4743
    if self.do_grouplocks:
4744
      self._CheckGroupLocks(lu)
4745

    
4746
    cluster = lu.cfg.GetClusterInfo()
4747
    all_info = lu.cfg.GetAllInstancesInfo()
4748

    
4749
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4750

    
4751
    instance_list = [all_info[name] for name in instance_names]
4752
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4753
                                        for inst in instance_list)))
4754
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755
    bad_nodes = []
4756
    offline_nodes = []
4757
    wrongnode_inst = set()
4758

    
4759
    # Gather data as requested
4760
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4761
      live_data = {}
4762
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4763
      for name in nodes:
4764
        result = node_data[name]
4765
        if result.offline:
4766
          # offline nodes will be in both lists
4767
          assert result.fail_msg
4768
          offline_nodes.append(name)
4769
        if result.fail_msg:
4770
          bad_nodes.append(name)
4771
        elif result.payload:
4772
          for inst in result.payload:
4773
            if inst in all_info:
4774
              if all_info[inst].primary_node == name:
4775
                live_data.update(result.payload)
4776
              else:
4777
                wrongnode_inst.add(inst)
4778
            else:
4779
              # orphan instance; we don't list it here as we don't
4780
              # handle this case yet in the output of instance listing
4781
              logging.warning("Orphan instance '%s' found on node %s",
4782
                              inst, name)
4783
        # else no instance is alive
4784
    else:
4785
      live_data = {}
4786

    
4787
    if query.IQ_DISKUSAGE in self.requested_data:
4788
      disk_usage = dict((inst.name,
4789
                         _ComputeDiskSize(inst.disk_template,
4790
                                          [{constants.IDISK_SIZE: disk.size}
4791
                                           for disk in inst.disks]))
4792
                        for inst in instance_list)
4793
    else:
4794
      disk_usage = None
4795

    
4796
    if query.IQ_CONSOLE in self.requested_data:
4797
      consinfo = {}
4798
      for inst in instance_list:
4799
        if inst.name in live_data:
4800
          # Instance is running
4801
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4802
        else:
4803
          consinfo[inst.name] = None
4804
      assert set(consinfo.keys()) == set(instance_names)
4805
    else:
4806
      consinfo = None
4807

    
4808
    if query.IQ_NODES in self.requested_data:
4809
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4810
                                            instance_list)))
4811
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4812
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4813
                    for uuid in set(map(operator.attrgetter("group"),
4814
                                        nodes.values())))
4815
    else:
4816
      nodes = None
4817
      groups = None
4818

    
4819
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4820
                                   disk_usage, offline_nodes, bad_nodes,
4821
                                   live_data, wrongnode_inst, consinfo,
4822
                                   nodes, groups)
4823

    
4824

    
4825
class LUQuery(NoHooksLU):
4826
  """Query for resources/items of a certain kind.
4827

4828
  """
4829
  # pylint: disable=W0142
4830
  REQ_BGL = False
4831

    
4832
  def CheckArguments(self):
4833
    qcls = _GetQueryImplementation(self.op.what)
4834

    
4835
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4836

    
4837
  def ExpandNames(self):
4838
    self.impl.ExpandNames(self)
4839

    
4840
  def DeclareLocks(self, level):
4841
    self.impl.DeclareLocks(self, level)
4842

    
4843
  def Exec(self, feedback_fn):
4844
    return self.impl.NewStyleQuery(self)
4845

    
4846

    
4847
class LUQueryFields(NoHooksLU):
4848
  """Query for resources/items of a certain kind.
4849

4850
  """
4851
  # pylint: disable=W0142
4852
  REQ_BGL = False
4853

    
4854
  def CheckArguments(self):
4855
    self.qcls = _GetQueryImplementation(self.op.what)
4856

    
4857
  def ExpandNames(self):
4858
    self.needed_locks = {}
4859

    
4860
  def Exec(self, feedback_fn):
4861
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4862

    
4863

    
4864
class LUNodeModifyStorage(NoHooksLU):
4865
  """Logical unit for modifying a storage volume on a node.
4866

4867
  """
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872

    
4873
    storage_type = self.op.storage_type
4874

    
4875
    try:
4876
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4877
    except KeyError:
4878
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4879
                                 " modified" % storage_type,
4880
                                 errors.ECODE_INVAL)
4881

    
4882
    diff = set(self.op.changes.keys()) - modifiable
4883
    if diff:
4884
      raise errors.OpPrereqError("The following fields can not be modified for"
4885
                                 " storage units of type '%s': %r" %
4886
                                 (storage_type, list(diff)),
4887
                                 errors.ECODE_INVAL)
4888

    
4889
  def ExpandNames(self):
4890
    self.needed_locks = {
4891
      locking.LEVEL_NODE: self.op.node_name,
4892
      }
4893

    
4894
  def Exec(self, feedback_fn):
4895
    """Computes the list of nodes and their attributes.
4896

4897
    """
4898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4899
    result = self.rpc.call_storage_modify(self.op.node_name,
4900
                                          self.op.storage_type, st_args,
4901
                                          self.op.name, self.op.changes)
4902
    result.Raise("Failed to modify storage unit '%s' on %s" %
4903
                 (self.op.name, self.op.node_name))
4904

    
4905

    
4906
class LUNodeAdd(LogicalUnit):
4907
  """Logical unit for adding node to the cluster.
4908

4909
  """
4910
  HPATH = "node-add"
4911
  HTYPE = constants.HTYPE_NODE
4912
  _NFLAGS = ["master_capable", "vm_capable"]
4913

    
4914
  def CheckArguments(self):
4915
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4916
    # validate/normalize the node name
4917
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4918
                                         family=self.primary_ip_family)
4919
    self.op.node_name = self.hostname.name
4920

    
4921
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4922
      raise errors.OpPrereqError("Cannot readd the master node",
4923
                                 errors.ECODE_STATE)
4924

    
4925
    if self.op.readd and self.op.group:
4926
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4927
                                 " being readded", errors.ECODE_INVAL)
4928

    
4929
  def BuildHooksEnv(self):
4930
    """Build hooks env.
4931

4932
    This will run on all nodes before, and on all nodes + the new node after.
4933

4934
    """
4935
    return {
4936
      "OP_TARGET": self.op.node_name,
4937
      "NODE_NAME": self.op.node_name,
4938
      "NODE_PIP": self.op.primary_ip,
4939
      "NODE_SIP": self.op.secondary_ip,
4940
      "MASTER_CAPABLE": str(self.op.master_capable),
4941
      "VM_CAPABLE": str(self.op.vm_capable),
4942
      }
4943

    
4944
  def BuildHooksNodes(self):
4945
    """Build hooks nodes.
4946

4947
    """
4948
    # Exclude added node
4949
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4950
    post_nodes = pre_nodes + [self.op.node_name, ]
4951

    
4952
    return (pre_nodes, post_nodes)
4953

    
4954
  def CheckPrereq(self):
4955
    """Check prerequisites.
4956

4957
    This checks:
4958
     - the new node is not already in the config
4959
     - it is resolvable
4960
     - its parameters (single/dual homed) matches the cluster
4961

4962
    Any errors are signaled by raising errors.OpPrereqError.
4963

4964
    """
4965
    cfg = self.cfg
4966
    hostname = self.hostname
4967
    node = hostname.name
4968
    primary_ip = self.op.primary_ip = hostname.ip
4969
    if self.op.secondary_ip is None:
4970
      if self.primary_ip_family == netutils.IP6Address.family:
4971
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4972
                                   " IPv4 address must be given as secondary",
4973
                                   errors.ECODE_INVAL)
4974
      self.op.secondary_ip = primary_ip
4975

    
4976
    secondary_ip = self.op.secondary_ip
4977
    if not netutils.IP4Address.IsValid(secondary_ip):
4978
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4979
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4980

    
4981
    node_list = cfg.GetNodeList()
4982
    if not self.op.readd and node in node_list:
4983
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4984
                                 node, errors.ECODE_EXISTS)
4985
    elif self.op.readd and node not in node_list:
4986
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4987
                                 errors.ECODE_NOENT)
4988

    
4989
    self.changed_primary_ip = False
4990

    
4991
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4992
      if self.op.readd and node == existing_node_name:
4993
        if existing_node.secondary_ip != secondary_ip:
4994
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4995
                                     " address configuration as before",
4996
                                     errors.ECODE_INVAL)
4997
        if existing_node.primary_ip != primary_ip:
4998
          self.changed_primary_ip = True
4999

    
5000
        continue
5001

    
5002
      if (existing_node.primary_ip == primary_ip or
5003
          existing_node.secondary_ip == primary_ip or
5004
          existing_node.primary_ip == secondary_ip or
5005
          existing_node.secondary_ip == secondary_ip):
5006
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5007
                                   " existing node %s" % existing_node.name,
5008
                                   errors.ECODE_NOTUNIQUE)
5009

    
5010
    # After this 'if' block, None is no longer a valid value for the
5011
    # _capable op attributes
5012
    if self.op.readd:
5013
      old_node = self.cfg.GetNodeInfo(node)
5014
      assert old_node is not None, "Can't retrieve locked node %s" % node
5015
      for attr in self._NFLAGS:
5016
        if getattr(self.op, attr) is None:
5017
          setattr(self.op, attr, getattr(old_node, attr))
5018
    else:
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, True)
5022

    
5023
    if self.op.readd and not self.op.vm_capable:
5024
      pri, sec = cfg.GetNodeInstances(node)
5025
      if pri or sec:
5026
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5027
                                   " flag set to false, but it already holds"
5028
                                   " instances" % node,
5029
                                   errors.ECODE_STATE)
5030

    
5031
    # check that the type of the node (single versus dual homed) is the
5032
    # same as for the master
5033
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5034
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5035
    newbie_singlehomed = secondary_ip == primary_ip
5036
    if master_singlehomed != newbie_singlehomed:
5037
      if master_singlehomed:
5038
        raise errors.OpPrereqError("The master has no secondary ip but the"
5039
                                   " new node has one",
5040
                                   errors.ECODE_INVAL)
5041
      else:
5042
        raise errors.OpPrereqError("The master has a secondary ip but the"
5043
                                   " new node doesn't have one",
5044
                                   errors.ECODE_INVAL)
5045

    
5046
    # checks reachability
5047
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5048
      raise errors.OpPrereqError("Node not reachable by ping",
5049
                                 errors.ECODE_ENVIRON)
5050

    
5051
    if not newbie_singlehomed:
5052
      # check reachability from my secondary ip to newbie's secondary ip
5053
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5054
                           source=myself.secondary_ip):
5055
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5056
                                   " based ping to node daemon port",
5057
                                   errors.ECODE_ENVIRON)
5058

    
5059
    if self.op.readd:
5060
      exceptions = [node]
5061
    else:
5062
      exceptions = []
5063

    
5064
    if self.op.master_capable:
5065
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5066
    else:
5067
      self.master_candidate = False
5068

    
5069
    if self.op.readd:
5070
      self.new_node = old_node
5071
    else:
5072
      node_group = cfg.LookupNodeGroup(self.op.group)
5073
      self.new_node = objects.Node(name=node,
5074
                                   primary_ip=primary_ip,
5075
                                   secondary_ip=secondary_ip,
5076
                                   master_candidate=self.master_candidate,
5077
                                   offline=False, drained=False,
5078
                                   group=node_group)
5079

    
5080
    if self.op.ndparams:
5081
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5082

    
5083
  def Exec(self, feedback_fn):
5084
    """Adds the new node to the cluster.
5085

5086
    """
5087
    new_node = self.new_node
5088
    node = new_node.name
5089

    
5090
    # We adding a new node so we assume it's powered
5091
    new_node.powered = True
5092

    
5093
    # for re-adds, reset the offline/drained/master-candidate flags;
5094
    # we need to reset here, otherwise offline would prevent RPC calls
5095
    # later in the procedure; this also means that if the re-add
5096
    # fails, we are left with a non-offlined, broken node
5097
    if self.op.readd:
5098
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5099
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5100
      # if we demote the node, we do cleanup later in the procedure
5101
      new_node.master_candidate = self.master_candidate
5102
      if self.changed_primary_ip:
5103
        new_node.primary_ip = self.op.primary_ip
5104

    
5105
    # copy the master/vm_capable flags
5106
    for attr in self._NFLAGS:
5107
      setattr(new_node, attr, getattr(self.op, attr))
5108

    
5109
    # notify the user about any possible mc promotion
5110
    if new_node.master_candidate:
5111
      self.LogInfo("Node will be a master candidate")
5112

    
5113
    if self.op.ndparams:
5114
      new_node.ndparams = self.op.ndparams
5115
    else:
5116
      new_node.ndparams = {}
5117

    
5118
    # check connectivity
5119
    result = self.rpc.call_version([node])[node]
5120
    result.Raise("Can't get version information from node %s" % node)
5121
    if constants.PROTOCOL_VERSION == result.payload:
5122
      logging.info("Communication to node %s fine, sw version %s match",
5123
                   node, result.payload)
5124
    else:
5125
      raise errors.OpExecError("Version mismatch master version %s,"
5126
                               " node version %s" %
5127
                               (constants.PROTOCOL_VERSION, result.payload))
5128

    
5129
    # Add node to our /etc/hosts, and add key to known_hosts
5130
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5131
      master_node = self.cfg.GetMasterNode()
5132
      result = self.rpc.call_etc_hosts_modify(master_node,
5133
                                              constants.ETC_HOSTS_ADD,
5134
                                              self.hostname.name,
5135
                                              self.hostname.ip)
5136
      result.Raise("Can't update hosts file with new host data")
5137

    
5138
    if new_node.secondary_ip != new_node.primary_ip:
5139
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5140
                               False)
5141

    
5142
    node_verify_list = [self.cfg.GetMasterNode()]
5143
    node_verify_param = {
5144
      constants.NV_NODELIST: ([node], {}),
5145
      # TODO: do a node-net-test as well?
5146
    }
5147

    
5148
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5149
                                       self.cfg.GetClusterName())
5150
    for verifier in node_verify_list:
5151
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5152
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5153
      if nl_payload:
5154
        for failed in nl_payload:
5155
          feedback_fn("ssh/hostname verification failed"
5156
                      " (checking from %s): %s" %
5157
                      (verifier, nl_payload[failed]))
5158
        raise errors.OpExecError("ssh/hostname verification failed")
5159

    
5160
    if self.op.readd:
5161
      _RedistributeAncillaryFiles(self)
5162
      self.context.ReaddNode(new_node)
5163
      # make sure we redistribute the config
5164
      self.cfg.Update(new_node, feedback_fn)
5165
      # and make sure the new node will not have old files around
5166
      if not new_node.master_candidate:
5167
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5168
        msg = result.fail_msg
5169
        if msg:
5170
          self.LogWarning("Node failed to demote itself from master"
5171
                          " candidate status: %s" % msg)
5172
    else:
5173
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5174
                                  additional_vm=self.op.vm_capable)
5175
      self.context.AddNode(new_node, self.proc.GetECId())
5176

    
5177

    
5178
class LUNodeSetParams(LogicalUnit):
5179
  """Modifies the parameters of a node.
5180

5181
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5182
      to the node role (as _ROLE_*)
5183
  @cvar _R2F: a dictionary from node role to tuples of flags
5184
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5185

5186
  """
5187
  HPATH = "node-modify"
5188
  HTYPE = constants.HTYPE_NODE
5189
  REQ_BGL = False
5190
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5191
  _F2R = {
5192
    (True, False, False): _ROLE_CANDIDATE,
5193
    (False, True, False): _ROLE_DRAINED,
5194
    (False, False, True): _ROLE_OFFLINE,
5195
    (False, False, False): _ROLE_REGULAR,
5196
    }
5197
  _R2F = dict((v, k) for k, v in _F2R.items())
5198
  _FLAGS = ["master_candidate", "drained", "offline"]
5199

    
5200
  def CheckArguments(self):
5201
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5202
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5203
                self.op.master_capable, self.op.vm_capable,
5204
                self.op.secondary_ip, self.op.ndparams]
5205
    if all_mods.count(None) == len(all_mods):
5206
      raise errors.OpPrereqError("Please pass at least one modification",
5207
                                 errors.ECODE_INVAL)
5208
    if all_mods.count(True) > 1:
5209
      raise errors.OpPrereqError("Can't set the node into more than one"
5210
                                 " state at the same time",
5211
                                 errors.ECODE_INVAL)
5212

    
5213
    # Boolean value that tells us whether we might be demoting from MC
5214
    self.might_demote = (self.op.master_candidate == False or
5215
                         self.op.offline == True or
5216
                         self.op.drained == True or
5217
                         self.op.master_capable == False)
5218

    
5219
    if self.op.secondary_ip:
5220
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5221
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5222
                                   " address" % self.op.secondary_ip,
5223
                                   errors.ECODE_INVAL)
5224

    
5225
    self.lock_all = self.op.auto_promote and self.might_demote
5226
    self.lock_instances = self.op.secondary_ip is not None
5227

    
5228
  def ExpandNames(self):
5229
    if self.lock_all:
5230
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5231
    else:
5232
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5233

    
5234
    if self.lock_instances:
5235
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5236

    
5237
  def DeclareLocks(self, level):
5238
    # If we have locked all instances, before waiting to lock nodes, release
5239
    # all the ones living on nodes unrelated to the current operation.
5240
    if level == locking.LEVEL_NODE and self.lock_instances:
5241
      self.affected_instances = []
5242
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5243
        instances_keep = []
5244

    
5245
        # Build list of instances to release
5246
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5247
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5248
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5249
              self.op.node_name in instance.all_nodes):
5250
            instances_keep.append(instance_name)
5251
            self.affected_instances.append(instance)
5252

    
5253
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5254

    
5255
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5256
                set(instances_keep))
5257

    
5258
  def BuildHooksEnv(self):
5259
    """Build hooks env.
5260

5261
    This runs on the master node.
5262

5263
    """
5264
    return {
5265
      "OP_TARGET": self.op.node_name,
5266
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5267
      "OFFLINE": str(self.op.offline),
5268
      "DRAINED": str(self.op.drained),
5269
      "MASTER_CAPABLE": str(self.op.master_capable),
5270
      "VM_CAPABLE": str(self.op.vm_capable),
5271
      }
5272

    
5273
  def BuildHooksNodes(self):
5274
    """Build hooks nodes.
5275

5276
    """
5277
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5278
    return (nl, nl)
5279

    
5280
  def CheckPrereq(self):
5281
    """Check prerequisites.
5282

5283
    This only checks the instance list against the existing names.
5284

5285
    """
5286
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5287

    
5288
    if (self.op.master_candidate is not None or
5289
        self.op.drained is not None or
5290
        self.op.offline is not None):
5291
      # we can't change the master's node flags
5292
      if self.op.node_name == self.cfg.GetMasterNode():
5293
        raise errors.OpPrereqError("The master role can be changed"
5294
                                   " only via master-failover",
5295
                                   errors.ECODE_INVAL)
5296

    
5297
    if self.op.master_candidate and not node.master_capable:
5298
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5299
                                 " it a master candidate" % node.name,
5300
                                 errors.ECODE_STATE)
5301

    
5302
    if self.op.vm_capable == False:
5303
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5304
      if ipri or isec:
5305
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5306
                                   " the vm_capable flag" % node.name,
5307
                                   errors.ECODE_STATE)
5308

    
5309
    if node.master_candidate and self.might_demote and not self.lock_all:
5310
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5311
      # check if after removing the current node, we're missing master
5312
      # candidates
5313
      (mc_remaining, mc_should, _) = \
5314
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5315
      if mc_remaining < mc_should:
5316
        raise errors.OpPrereqError("Not enough master candidates, please"
5317
                                   " pass auto promote option to allow"
5318
                                   " promotion", errors.ECODE_STATE)
5319

    
5320
    self.old_flags = old_flags = (node.master_candidate,
5321
                                  node.drained, node.offline)
5322
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5323
    self.old_role = old_role = self._F2R[old_flags]
5324

    
5325
    # Check for ineffective changes
5326
    for attr in self._FLAGS:
5327
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5328
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5329
        setattr(self.op, attr, None)
5330

    
5331
    # Past this point, any flag change to False means a transition
5332
    # away from the respective state, as only real changes are kept
5333

    
5334
    # TODO: We might query the real power state if it supports OOB
5335
    if _SupportsOob(self.cfg, node):
5336
      if self.op.offline is False and not (node.powered or
5337
                                           self.op.powered == True):
5338
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5339
                                    " offline status can be reset") %
5340
                                   self.op.node_name)
5341
    elif self.op.powered is not None:
5342
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5343
                                  " as it does not support out-of-band"
5344
                                  " handling") % self.op.node_name)
5345

    
5346
    # If we're being deofflined/drained, we'll MC ourself if needed
5347
    if (self.op.drained == False or self.op.offline == False or
5348
        (self.op.master_capable and not node.master_capable)):
5349
      if _DecideSelfPromotion(self):
5350
        self.op.master_candidate = True
5351
        self.LogInfo("Auto-promoting node to master candidate")
5352

    
5353
    # If we're no longer master capable, we'll demote ourselves from MC
5354
    if self.op.master_capable == False and node.master_candidate:
5355
      self.LogInfo("Demoting from master candidate")
5356
      self.op.master_candidate = False
5357

    
5358
    # Compute new role
5359
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5360
    if self.op.master_candidate:
5361
      new_role = self._ROLE_CANDIDATE
5362
    elif self.op.drained:
5363
      new_role = self._ROLE_DRAINED
5364
    elif self.op.offline:
5365
      new_role = self._ROLE_OFFLINE
5366
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5367
      # False is still in new flags, which means we're un-setting (the
5368
      # only) True flag
5369
      new_role = self._ROLE_REGULAR
5370
    else: # no new flags, nothing, keep old role
5371
      new_role = old_role
5372

    
5373
    self.new_role = new_role
5374

    
5375
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5376
      # Trying to transition out of offline status
5377
      result = self.rpc.call_version([node.name])[node.name]
5378
      if result.fail_msg:
5379
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5380
                                   " to report its version: %s" %
5381
                                   (node.name, result.fail_msg),
5382
                                   errors.ECODE_STATE)
5383
      else:
5384
        self.LogWarning("Transitioning node from offline to online state"
5385
                        " without using re-add. Please make sure the node"
5386
                        " is healthy!")
5387

    
5388
    if self.op.secondary_ip:
5389
      # Ok even without locking, because this can't be changed by any LU
5390
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5391
      master_singlehomed = master.secondary_ip == master.primary_ip
5392
      if master_singlehomed and self.op.secondary_ip:
5393
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5394
                                   " homed cluster", errors.ECODE_INVAL)
5395

    
5396
      if node.offline:
5397
        if self.affected_instances:
5398
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5399
                                     " node has instances (%s) configured"
5400
                                     " to use it" % self.affected_instances)
5401
      else:
5402
        # On online nodes, check that no instances are running, and that
5403
        # the node has the new ip and we can reach it.
5404
        for instance in self.affected_instances:
5405
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5406

    
5407
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5408
        if master.name != node.name:
5409
          # check reachability from master secondary ip to new secondary ip
5410
          if not netutils.TcpPing(self.op.secondary_ip,
5411
                                  constants.DEFAULT_NODED_PORT,
5412
                                  source=master.secondary_ip):
5413
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5414
                                       " based ping to node daemon port",
5415
                                       errors.ECODE_ENVIRON)
5416

    
5417
    if self.op.ndparams:
5418
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5419
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5420
      self.new_ndparams = new_ndparams
5421

    
5422
  def Exec(self, feedback_fn):
5423
    """Modifies a node.
5424

5425
    """
5426
    node = self.node
5427
    old_role = self.old_role
5428
    new_role = self.new_role
5429

    
5430
    result = []
5431

    
5432
    if self.op.ndparams:
5433
      node.ndparams = self.new_ndparams
5434

    
5435
    if self.op.powered is not None:
5436
      node.powered = self.op.powered
5437

    
5438
    for attr in ["master_capable", "vm_capable"]:
5439
      val = getattr(self.op, attr)
5440
      if val is not None:
5441
        setattr(node, attr, val)
5442
        result.append((attr, str(val)))
5443

    
5444
    if new_role != old_role:
5445
      # Tell the node to demote itself, if no longer MC and not offline
5446
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5447
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5448
        if msg:
5449
          self.LogWarning("Node failed to demote itself: %s", msg)
5450

    
5451
      new_flags = self._R2F[new_role]
5452
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5453
        if of != nf:
5454
          result.append((desc, str(nf)))
5455
      (node.master_candidate, node.drained, node.offline) = new_flags
5456

    
5457
      # we locked all nodes, we adjust the CP before updating this node
5458
      if self.lock_all:
5459
        _AdjustCandidatePool(self, [node.name])
5460

    
5461
    if self.op.secondary_ip:
5462
      node.secondary_ip = self.op.secondary_ip
5463
      result.append(("secondary_ip", self.op.secondary_ip))
5464

    
5465
    # this will trigger configuration file update, if needed
5466
    self.cfg.Update(node, feedback_fn)
5467

    
5468
    # this will trigger job queue propagation or cleanup if the mc
5469
    # flag changed
5470
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5471
      self.context.ReaddNode(node)
5472

    
5473
    return result
5474

    
5475

    
5476
class LUNodePowercycle(NoHooksLU):
5477
  """Powercycles a node.
5478

5479
  """
5480
  REQ_BGL = False
5481

    
5482
  def CheckArguments(self):
5483
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5484
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5485
      raise errors.OpPrereqError("The node is the master and the force"
5486
                                 " parameter was not set",
5487
                                 errors.ECODE_INVAL)
5488

    
5489
  def ExpandNames(self):
5490
    """Locking for PowercycleNode.
5491

5492
    This is a last-resort option and shouldn't block on other
5493
    jobs. Therefore, we grab no locks.
5494

5495
    """
5496
    self.needed_locks = {}
5497

    
5498
  def Exec(self, feedback_fn):
5499
    """Reboots a node.
5500

5501
    """
5502
    result = self.rpc.call_node_powercycle(self.op.node_name,
5503
                                           self.cfg.GetHypervisorType())
5504
    result.Raise("Failed to schedule the reboot")
5505
    return result.payload
5506

    
5507

    
5508
class LUClusterQuery(NoHooksLU):
5509
  """Query cluster configuration.
5510

5511
  """
5512
  REQ_BGL = False
5513

    
5514
  def ExpandNames(self):
5515
    self.needed_locks = {}
5516

    
5517
  def Exec(self, feedback_fn):
5518
    """Return cluster config.
5519

5520
    """
5521
    cluster = self.cfg.GetClusterInfo()
5522
    os_hvp = {}
5523

    
5524
    # Filter just for enabled hypervisors
5525
    for os_name, hv_dict in cluster.os_hvp.items():
5526
      os_hvp[os_name] = {}
5527
      for hv_name, hv_params in hv_dict.items():
5528
        if hv_name in cluster.enabled_hypervisors:
5529
          os_hvp[os_name][hv_name] = hv_params
5530

    
5531
    # Convert ip_family to ip_version
5532
    primary_ip_version = constants.IP4_VERSION
5533
    if cluster.primary_ip_family == netutils.IP6Address.family:
5534
      primary_ip_version = constants.IP6_VERSION
5535

    
5536
    result = {
5537
      "software_version": constants.RELEASE_VERSION,
5538
      "protocol_version": constants.PROTOCOL_VERSION,
5539
      "config_version": constants.CONFIG_VERSION,
5540
      "os_api_version": max(constants.OS_API_VERSIONS),
5541
      "export_version": constants.EXPORT_VERSION,
5542
      "architecture": (platform.architecture()[0], platform.machine()),
5543
      "name": cluster.cluster_name,
5544
      "master": cluster.master_node,
5545
      "default_hypervisor": cluster.enabled_hypervisors[0],
5546
      "enabled_hypervisors": cluster.enabled_hypervisors,
5547
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5548
                        for hypervisor_name in cluster.enabled_hypervisors]),
5549
      "os_hvp": os_hvp,
5550
      "beparams": cluster.beparams,
5551
      "osparams": cluster.osparams,
5552
      "nicparams": cluster.nicparams,
5553
      "ndparams": cluster.ndparams,
5554
      "candidate_pool_size": cluster.candidate_pool_size,
5555
      "master_netdev": cluster.master_netdev,
5556
      "master_netmask": cluster.master_netmask,
5557
      "volume_group_name": cluster.volume_group_name,
5558
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5559
      "file_storage_dir": cluster.file_storage_dir,
5560
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5561
      "maintain_node_health": cluster.maintain_node_health,
5562
      "ctime": cluster.ctime,
5563
      "mtime": cluster.mtime,
5564
      "uuid": cluster.uuid,
5565
      "tags": list(cluster.GetTags()),
5566
      "uid_pool": cluster.uid_pool,
5567
      "default_iallocator": cluster.default_iallocator,
5568
      "reserved_lvs": cluster.reserved_lvs,
5569
      "primary_ip_version": primary_ip_version,
5570
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5571
      "hidden_os": cluster.hidden_os,
5572
      "blacklisted_os": cluster.blacklisted_os,
5573
      }
5574

    
5575
    return result
5576

    
5577

    
5578
class LUClusterConfigQuery(NoHooksLU):
5579
  """Return configuration values.
5580

5581
  """
5582
  REQ_BGL = False
5583
  _FIELDS_DYNAMIC = utils.FieldSet()
5584
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5585
                                  "watcher_pause", "volume_group_name")
5586

    
5587
  def CheckArguments(self):
5588
    _CheckOutputFields(static=self._FIELDS_STATIC,
5589
                       dynamic=self._FIELDS_DYNAMIC,
5590
                       selected=self.op.output_fields)
5591

    
5592
  def ExpandNames(self):
5593
    self.needed_locks = {}
5594

    
5595
  def Exec(self, feedback_fn):
5596
    """Dump a representation of the cluster config to the standard output.
5597

5598
    """
5599
    values = []
5600
    for field in self.op.output_fields:
5601
      if field == "cluster_name":
5602
        entry = self.cfg.GetClusterName()
5603
      elif field == "master_node":
5604
        entry = self.cfg.GetMasterNode()
5605
      elif field == "drain_flag":
5606
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5607
      elif field == "watcher_pause":
5608
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5609
      elif field == "volume_group_name":
5610
        entry = self.cfg.GetVGName()
5611
      else:
5612
        raise errors.ParameterError(field)
5613
      values.append(entry)
5614
    return values
5615

    
5616

    
5617
class LUInstanceActivateDisks(NoHooksLU):
5618
  """Bring up an instance's disks.
5619

5620
  """
5621
  REQ_BGL = False
5622

    
5623
  def ExpandNames(self):
5624
    self._ExpandAndLockInstance()
5625
    self.needed_locks[locking.LEVEL_NODE] = []
5626
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5627

    
5628
  def DeclareLocks(self, level):
5629
    if level == locking.LEVEL_NODE:
5630
      self._LockInstancesNodes()
5631

    
5632
  def CheckPrereq(self):
5633
    """Check prerequisites.
5634

5635
    This checks that the instance is in the cluster.
5636

5637
    """
5638
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5639
    assert self.instance is not None, \
5640
      "Cannot retrieve locked instance %s" % self.op.instance_name
5641
    _CheckNodeOnline(self, self.instance.primary_node)
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Activate the disks.
5645

5646
    """
5647
    disks_ok, disks_info = \
5648
              _AssembleInstanceDisks(self, self.instance,
5649
                                     ignore_size=self.op.ignore_size)
5650
    if not disks_ok:
5651
      raise errors.OpExecError("Cannot activate block devices")
5652

    
5653
    return disks_info
5654

    
5655

    
5656
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5657
                           ignore_size=False):
5658
  """Prepare the block devices for an instance.
5659

5660
  This sets up the block devices on all nodes.
5661

5662
  @type lu: L{LogicalUnit}
5663
  @param lu: the logical unit on whose behalf we execute
5664
  @type instance: L{objects.Instance}
5665
  @param instance: the instance for whose disks we assemble
5666
  @type disks: list of L{objects.Disk} or None
5667
  @param disks: which disks to assemble (or all, if None)
5668
  @type ignore_secondaries: boolean
5669
  @param ignore_secondaries: if true, errors on secondary nodes
5670
      won't result in an error return from the function
5671
  @type ignore_size: boolean
5672
  @param ignore_size: if true, the current known size of the disk
5673
      will not be used during the disk activation, useful for cases
5674
      when the size is wrong
5675
  @return: False if the operation failed, otherwise a list of
5676
      (host, instance_visible_name, node_visible_name)
5677
      with the mapping from node devices to instance devices
5678

5679
  """
5680
  device_info = []
5681
  disks_ok = True
5682
  iname = instance.name
5683
  disks = _ExpandCheckDisks(instance, disks)
5684

    
5685
  # With the two passes mechanism we try to reduce the window of
5686
  # opportunity for the race condition of switching DRBD to primary
5687
  # before handshaking occured, but we do not eliminate it
5688

    
5689
  # The proper fix would be to wait (with some limits) until the
5690
  # connection has been made and drbd transitions from WFConnection
5691
  # into any other network-connected state (Connected, SyncTarget,
5692
  # SyncSource, etc.)
5693

    
5694
  # 1st pass, assemble on all nodes in secondary mode
5695
  for idx, inst_disk in enumerate(disks):
5696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697
      if ignore_size:
5698
        node_disk = node_disk.Copy()
5699
        node_disk.UnsetSize()
5700
      lu.cfg.SetDiskID(node_disk, node)
5701
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5705
                           " (is_primary=False, pass=1): %s",
5706
                           inst_disk.iv_name, node, msg)
5707
        if not ignore_secondaries:
5708
          disks_ok = False
5709

    
5710
  # FIXME: race condition on drbd migration to primary
5711

    
5712
  # 2nd pass, do only the primary node
5713
  for idx, inst_disk in enumerate(disks):
5714
    dev_path = None
5715

    
5716
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5717
      if node != instance.primary_node:
5718
        continue
5719
      if ignore_size:
5720
        node_disk = node_disk.Copy()
5721
        node_disk.UnsetSize()
5722
      lu.cfg.SetDiskID(node_disk, node)
5723
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5724
      msg = result.fail_msg
5725
      if msg:
5726
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5727
                           " (is_primary=True, pass=2): %s",
5728
                           inst_disk.iv_name, node, msg)
5729
        disks_ok = False
5730
      else:
5731
        dev_path = result.payload
5732

    
5733
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5734

    
5735
  # leave the disks configured for the primary node
5736
  # this is a workaround that would be fixed better by
5737
  # improving the logical/physical id handling
5738
  for disk in disks:
5739
    lu.cfg.SetDiskID(disk, instance.primary_node)
5740

    
5741
  return disks_ok, device_info
5742

    
5743

    
5744
def _StartInstanceDisks(lu, instance, force):
5745
  """Start the disks of an instance.
5746

5747
  """
5748
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5749
                                           ignore_secondaries=force)
5750
  if not disks_ok:
5751
    _ShutdownInstanceDisks(lu, instance)
5752
    if force is not None and not force:
5753
      lu.proc.LogWarning("", hint="If the message above refers to a"
5754
                         " secondary node,"
5755
                         " you can retry the operation using '--force'.")
5756
    raise errors.OpExecError("Disk consistency error")
5757

    
5758

    
5759
class LUInstanceDeactivateDisks(NoHooksLU):
5760
  """Shutdown an instance's disks.
5761

5762
  """
5763
  REQ_BGL = False
5764

    
5765
  def ExpandNames(self):
5766
    self._ExpandAndLockInstance()
5767
    self.needed_locks[locking.LEVEL_NODE] = []
5768
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5769

    
5770
  def DeclareLocks(self, level):
5771
    if level == locking.LEVEL_NODE:
5772
      self._LockInstancesNodes()
5773

    
5774
  def CheckPrereq(self):
5775
    """Check prerequisites.
5776

5777
    This checks that the instance is in the cluster.
5778

5779
    """
5780
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781
    assert self.instance is not None, \
5782
      "Cannot retrieve locked instance %s" % self.op.instance_name
5783

    
5784
  def Exec(self, feedback_fn):
5785
    """Deactivate the disks
5786

5787
    """
5788
    instance = self.instance
5789
    if self.op.force:
5790
      _ShutdownInstanceDisks(self, instance)
5791
    else:
5792
      _SafeShutdownInstanceDisks(self, instance)
5793

    
5794

    
5795
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5796
  """Shutdown block devices of an instance.
5797

5798
  This function checks if an instance is running, before calling
5799
  _ShutdownInstanceDisks.
5800

5801
  """
5802
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5803
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5804

    
5805

    
5806
def _ExpandCheckDisks(instance, disks):
5807
  """Return the instance disks selected by the disks list
5808

5809
  @type disks: list of L{objects.Disk} or None
5810
  @param disks: selected disks
5811
  @rtype: list of L{objects.Disk}
5812
  @return: selected instance disks to act on
5813

5814
  """
5815
  if disks is None:
5816
    return instance.disks
5817
  else:
5818
    if not set(disks).issubset(instance.disks):
5819
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5820
                                   " target instance")
5821
    return disks
5822

    
5823

    
5824
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5825
  """Shutdown block devices of an instance.
5826

5827
  This does the shutdown on all nodes of the instance.
5828

5829
  If the ignore_primary is false, errors on the primary node are
5830
  ignored.
5831

5832
  """
5833
  all_result = True
5834
  disks = _ExpandCheckDisks(instance, disks)
5835

    
5836
  for disk in disks:
5837
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5838
      lu.cfg.SetDiskID(top_disk, node)
5839
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5840
      msg = result.fail_msg
5841
      if msg:
5842
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5843
                      disk.iv_name, node, msg)
5844
        if ((node == instance.primary_node and not ignore_primary) or
5845
            (node != instance.primary_node and not result.offline)):
5846
          all_result = False
5847
  return all_result
5848

    
5849

    
5850
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5851
  """Checks if a node has enough free memory.
5852

5853
  This function check if a given node has the needed amount of free
5854
  memory. In case the node has less memory or we cannot get the
5855
  information from the node, this function raise an OpPrereqError
5856
  exception.
5857

5858
  @type lu: C{LogicalUnit}
5859
  @param lu: a logical unit from which we get configuration data
5860
  @type node: C{str}
5861
  @param node: the node to check
5862
  @type reason: C{str}
5863
  @param reason: string to use in the error message
5864
  @type requested: C{int}
5865
  @param requested: the amount of memory in MiB to check for
5866
  @type hypervisor_name: C{str}
5867
  @param hypervisor_name: the hypervisor to ask for memory stats
5868
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5869
      we cannot check the node
5870

5871
  """
5872
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5873
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5874
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5875
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5876
  if not isinstance(free_mem, int):
5877
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5878
                               " was '%s'" % (node, free_mem),
5879
                               errors.ECODE_ENVIRON)
5880
  if requested > free_mem:
5881
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5882
                               " needed %s MiB, available %s MiB" %
5883
                               (node, reason, requested, free_mem),
5884
                               errors.ECODE_NORES)
5885

    
5886

    
5887
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5888
  """Checks if nodes have enough free disk space in the all VGs.
5889

5890
  This function check if all given nodes have the needed amount of
5891
  free disk. In case any node has less disk or we cannot get the
5892
  information from the node, this function raise an OpPrereqError
5893
  exception.
5894

5895
  @type lu: C{LogicalUnit}
5896
  @param lu: a logical unit from which we get configuration data
5897
  @type nodenames: C{list}
5898
  @param nodenames: the list of node names to check
5899
  @type req_sizes: C{dict}
5900
  @param req_sizes: the hash of vg and corresponding amount of disk in
5901
      MiB to check for
5902
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5903
      or we cannot check the node
5904

5905
  """
5906
  for vg, req_size in req_sizes.items():
5907
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5908

    
5909

    
5910
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5911
  """Checks if nodes have enough free disk space in the specified VG.
5912

5913
  This function check if all given nodes have the needed amount of
5914
  free disk. In case any node has less disk or we cannot get the
5915
  information from the node, this function raise an OpPrereqError
5916
  exception.
5917

5918
  @type lu: C{LogicalUnit}
5919
  @param lu: a logical unit from which we get configuration data
5920
  @type nodenames: C{list}
5921
  @param nodenames: the list of node names to check
5922
  @type vg: C{str}
5923
  @param vg: the volume group to check
5924
  @type requested: C{int}
5925
  @param requested: the amount of disk in MiB to check for
5926
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5927
      or we cannot check the node
5928

5929
  """
5930
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5931
  for node in nodenames:
5932
    info = nodeinfo[node]
5933
    info.Raise("Cannot get current information from node %s" % node,
5934
               prereq=True, ecode=errors.ECODE_ENVIRON)
5935
    vg_free = info.payload.get("vg_free", None)
5936
    if not isinstance(vg_free, int):
5937
      raise errors.OpPrereqError("Can't compute free disk space on node"
5938
                                 " %s for vg %s, result was '%s'" %
5939
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5940
    if requested > vg_free:
5941
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5942
                                 " vg %s: required %d MiB, available %d MiB" %
5943
                                 (node, vg, requested, vg_free),
5944
                                 errors.ECODE_NORES)
5945

    
5946

    
5947
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5948
  """Checks if nodes have enough physical CPUs
5949

5950
  This function checks if all given nodes have the needed number of
5951
  physical CPUs. In case any node has less CPUs or we cannot get the
5952
  information from the node, this function raises an OpPrereqError
5953
  exception.
5954

5955
  @type lu: C{LogicalUnit}
5956
  @param lu: a logical unit from which we get configuration data
5957
  @type nodenames: C{list}
5958
  @param nodenames: the list of node names to check
5959
  @type requested: C{int}
5960
  @param requested: the minimum acceptable number of physical CPUs
5961
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5962
      or we cannot check the node
5963

5964
  """
5965
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5966
  for node in nodenames:
5967
    info = nodeinfo[node]
5968
    info.Raise("Cannot get current information from node %s" % node,
5969
               prereq=True, ecode=errors.ECODE_ENVIRON)
5970
    num_cpus = info.payload.get("cpu_total", None)
5971
    if not isinstance(num_cpus, int):
5972
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5973
                                 " on node %s, result was '%s'" %
5974
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5975
    if requested > num_cpus:
5976
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5977
                                 "required" % (node, num_cpus, requested),
5978
                                 errors.ECODE_NORES)
5979

    
5980

    
5981
class LUInstanceStartup(LogicalUnit):
5982
  """Starts an instance.
5983

5984
  """
5985
  HPATH = "instance-start"
5986
  HTYPE = constants.HTYPE_INSTANCE
5987
  REQ_BGL = False
5988

    
5989
  def CheckArguments(self):
5990
    # extra beparams
5991
    if self.op.beparams:
5992
      # fill the beparams dict
5993
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5994

    
5995
  def ExpandNames(self):
5996
    self._ExpandAndLockInstance()
5997

    
5998
  def BuildHooksEnv(self):
5999
    """Build hooks env.
6000

6001
    This runs on master, primary and secondary nodes of the instance.
6002

6003
    """
6004
    env = {
6005
      "FORCE": self.op.force,
6006
      }
6007

    
6008
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6009

    
6010
    return env
6011

    
6012
  def BuildHooksNodes(self):
6013
    """Build hooks nodes.
6014

6015
    """
6016
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6017
    return (nl, nl)
6018

    
6019
  def CheckPrereq(self):
6020
    """Check prerequisites.
6021

6022
    This checks that the instance is in the cluster.
6023

6024
    """
6025
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6026
    assert self.instance is not None, \
6027
      "Cannot retrieve locked instance %s" % self.op.instance_name
6028

    
6029
    # extra hvparams
6030
    if self.op.hvparams:
6031
      # check hypervisor parameter syntax (locally)
6032
      cluster = self.cfg.GetClusterInfo()
6033
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6034
      filled_hvp = cluster.FillHV(instance)
6035
      filled_hvp.update(self.op.hvparams)
6036
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6037
      hv_type.CheckParameterSyntax(filled_hvp)
6038
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6039

    
6040
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6041

    
6042
    if self.primary_offline and self.op.ignore_offline_nodes:
6043
      self.proc.LogWarning("Ignoring offline primary node")
6044

    
6045
      if self.op.hvparams or self.op.beparams:
6046
        self.proc.LogWarning("Overridden parameters are ignored")
6047
    else:
6048
      _CheckNodeOnline(self, instance.primary_node)
6049

    
6050
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6051

    
6052
      # check bridges existence
6053
      _CheckInstanceBridgesExist(self, instance)
6054

    
6055
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6056
                                                instance.name,
6057
                                                instance.hypervisor)
6058
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6059
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6060
      if not remote_info.payload: # not running already
6061
        _CheckNodeFreeMemory(self, instance.primary_node,
6062
                             "starting instance %s" % instance.name,
6063
                             bep[constants.BE_MEMORY], instance.hypervisor)
6064

    
6065
  def Exec(self, feedback_fn):
6066
    """Start the instance.
6067

6068
    """
6069
    instance = self.instance
6070
    force = self.op.force
6071

    
6072
    if not self.op.no_remember:
6073
      self.cfg.MarkInstanceUp(instance.name)
6074

    
6075
    if self.primary_offline:
6076
      assert self.op.ignore_offline_nodes
6077
      self.proc.LogInfo("Primary node offline, marked instance as started")
6078
    else:
6079
      node_current = instance.primary_node
6080

    
6081
      _StartInstanceDisks(self, instance, force)
6082

    
6083
      result = \
6084
        self.rpc.call_instance_start(node_current,
6085
                                     (instance, self.op.hvparams,
6086
                                      self.op.beparams),
6087
                                     self.op.startup_paused)
6088
      msg = result.fail_msg
6089
      if msg:
6090
        _ShutdownInstanceDisks(self, instance)
6091
        raise errors.OpExecError("Could not start instance: %s" % msg)
6092

    
6093

    
6094
class LUInstanceReboot(LogicalUnit):
6095
  """Reboot an instance.
6096

6097
  """
6098
  HPATH = "instance-reboot"
6099
  HTYPE = constants.HTYPE_INSTANCE
6100
  REQ_BGL = False
6101

    
6102
  def ExpandNames(self):
6103
    self._ExpandAndLockInstance()
6104

    
6105
  def BuildHooksEnv(self):
6106
    """Build hooks env.
6107

6108
    This runs on master, primary and secondary nodes of the instance.
6109

6110
    """
6111
    env = {
6112
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6113
      "REBOOT_TYPE": self.op.reboot_type,
6114
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6115
      }
6116

    
6117
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6118

    
6119
    return env
6120

    
6121
  def BuildHooksNodes(self):
6122
    """Build hooks nodes.
6123

6124
    """
6125
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6126
    return (nl, nl)
6127

    
6128
  def CheckPrereq(self):
6129
    """Check prerequisites.
6130

6131
    This checks that the instance is in the cluster.
6132

6133
    """
6134
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6135
    assert self.instance is not None, \
6136
      "Cannot retrieve locked instance %s" % self.op.instance_name
6137

    
6138
    _CheckNodeOnline(self, instance.primary_node)
6139

    
6140
    # check bridges existence
6141
    _CheckInstanceBridgesExist(self, instance)
6142

    
6143
  def Exec(self, feedback_fn):
6144
    """Reboot the instance.
6145

6146
    """
6147
    instance = self.instance
6148
    ignore_secondaries = self.op.ignore_secondaries
6149
    reboot_type = self.op.reboot_type
6150

    
6151
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6152
                                              instance.name,
6153
                                              instance.hypervisor)
6154
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6155
    instance_running = bool(remote_info.payload)
6156

    
6157
    node_current = instance.primary_node
6158

    
6159
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6160
                                            constants.INSTANCE_REBOOT_HARD]:
6161
      for disk in instance.disks:
6162
        self.cfg.SetDiskID(disk, node_current)
6163
      result = self.rpc.call_instance_reboot(node_current, instance,
6164
                                             reboot_type,
6165
                                             self.op.shutdown_timeout)
6166
      result.Raise("Could not reboot instance")
6167
    else:
6168
      if instance_running:
6169
        result = self.rpc.call_instance_shutdown(node_current, instance,
6170
                                                 self.op.shutdown_timeout)
6171
        result.Raise("Could not shutdown instance for full reboot")
6172
        _ShutdownInstanceDisks(self, instance)
6173
      else:
6174
        self.LogInfo("Instance %s was already stopped, starting now",
6175
                     instance.name)
6176
      _StartInstanceDisks(self, instance, ignore_secondaries)
6177
      result = self.rpc.call_instance_start(node_current,
6178
                                            (instance, None, None), False)
6179
      msg = result.fail_msg
6180
      if msg:
6181
        _ShutdownInstanceDisks(self, instance)
6182
        raise errors.OpExecError("Could not start instance for"
6183
                                 " full reboot: %s" % msg)
6184

    
6185
    self.cfg.MarkInstanceUp(instance.name)
6186

    
6187

    
6188
class LUInstanceShutdown(LogicalUnit):
6189
  """Shutdown an instance.
6190

6191
  """
6192
  HPATH = "instance-stop"
6193
  HTYPE = constants.HTYPE_INSTANCE
6194
  REQ_BGL = False
6195

    
6196
  def ExpandNames(self):
6197
    self._ExpandAndLockInstance()
6198

    
6199
  def BuildHooksEnv(self):
6200
    """Build hooks env.
6201

6202
    This runs on master, primary and secondary nodes of the instance.
6203

6204
    """
6205
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6206
    env["TIMEOUT"] = self.op.timeout
6207
    return env
6208

    
6209
  def BuildHooksNodes(self):
6210
    """Build hooks nodes.
6211

6212
    """
6213
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6214
    return (nl, nl)
6215

    
6216
  def CheckPrereq(self):
6217
    """Check prerequisites.
6218

6219
    This checks that the instance is in the cluster.
6220

6221
    """
6222
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6223
    assert self.instance is not None, \
6224
      "Cannot retrieve locked instance %s" % self.op.instance_name
6225

    
6226
    self.primary_offline = \
6227
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6228

    
6229
    if self.primary_offline and self.op.ignore_offline_nodes:
6230
      self.proc.LogWarning("Ignoring offline primary node")
6231
    else:
6232
      _CheckNodeOnline(self, self.instance.primary_node)
6233

    
6234
  def Exec(self, feedback_fn):
6235
    """Shutdown the instance.
6236

6237
    """
6238
    instance = self.instance
6239
    node_current = instance.primary_node
6240
    timeout = self.op.timeout
6241

    
6242
    if not self.op.no_remember:
6243
      self.cfg.MarkInstanceDown(instance.name)
6244

    
6245
    if self.primary_offline:
6246
      assert self.op.ignore_offline_nodes
6247
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6248
    else:
6249
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6250
      msg = result.fail_msg
6251
      if msg:
6252
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6253

    
6254
      _ShutdownInstanceDisks(self, instance)
6255

    
6256

    
6257
class LUInstanceReinstall(LogicalUnit):
6258
  """Reinstall an instance.
6259

6260
  """
6261
  HPATH = "instance-reinstall"
6262
  HTYPE = constants.HTYPE_INSTANCE
6263
  REQ_BGL = False
6264

    
6265
  def ExpandNames(self):
6266
    self._ExpandAndLockInstance()
6267

    
6268
  def BuildHooksEnv(self):
6269
    """Build hooks env.
6270

6271
    This runs on master, primary and secondary nodes of the instance.
6272

6273
    """
6274
    return _BuildInstanceHookEnvByObject(self, self.instance)
6275

    
6276
  def BuildHooksNodes(self):
6277
    """Build hooks nodes.
6278

6279
    """
6280
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6281
    return (nl, nl)
6282

    
6283
  def CheckPrereq(self):
6284
    """Check prerequisites.
6285

6286
    This checks that the instance is in the cluster and is not running.
6287

6288
    """
6289
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6290
    assert instance is not None, \
6291
      "Cannot retrieve locked instance %s" % self.op.instance_name
6292
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6293
                     " offline, cannot reinstall")
6294
    for node in instance.secondary_nodes:
6295
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6296
                       " cannot reinstall")
6297

    
6298
    if instance.disk_template == constants.DT_DISKLESS:
6299
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6300
                                 self.op.instance_name,
6301
                                 errors.ECODE_INVAL)
6302
    _CheckInstanceDown(self, instance, "cannot reinstall")
6303

    
6304
    if self.op.os_type is not None:
6305
      # OS verification
6306
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6307
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6308
      instance_os = self.op.os_type
6309
    else:
6310
      instance_os = instance.os
6311

    
6312
    nodelist = list(instance.all_nodes)
6313

    
6314
    if self.op.osparams:
6315
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6316
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6317
      self.os_inst = i_osdict # the new dict (without defaults)
6318
    else:
6319
      self.os_inst = None
6320

    
6321
    self.instance = instance
6322

    
6323
  def Exec(self, feedback_fn):
6324
    """Reinstall the instance.
6325

6326
    """
6327
    inst = self.instance
6328

    
6329
    if self.op.os_type is not None:
6330
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6331
      inst.os = self.op.os_type
6332
      # Write to configuration
6333
      self.cfg.Update(inst, feedback_fn)
6334

    
6335
    _StartInstanceDisks(self, inst, None)
6336
    try:
6337
      feedback_fn("Running the instance OS create scripts...")
6338
      # FIXME: pass debug option from opcode to backend
6339
      result = self.rpc.call_instance_os_add(inst.primary_node,
6340
                                             (inst, self.os_inst), True,
6341
                                             self.op.debug_level)
6342
      result.Raise("Could not install OS for instance %s on node %s" %
6343
                   (inst.name, inst.primary_node))
6344
    finally:
6345
      _ShutdownInstanceDisks(self, inst)
6346

    
6347

    
6348
class LUInstanceRecreateDisks(LogicalUnit):
6349
  """Recreate an instance's missing disks.
6350

6351
  """
6352
  HPATH = "instance-recreate-disks"
6353
  HTYPE = constants.HTYPE_INSTANCE
6354
  REQ_BGL = False
6355

    
6356
  def CheckArguments(self):
6357
    # normalise the disk list
6358
    self.op.disks = sorted(frozenset(self.op.disks))
6359

    
6360
  def ExpandNames(self):
6361
    self._ExpandAndLockInstance()
6362
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6363
    if self.op.nodes:
6364
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6365
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6366
    else:
6367
      self.needed_locks[locking.LEVEL_NODE] = []
6368

    
6369
  def DeclareLocks(self, level):
6370
    if level == locking.LEVEL_NODE:
6371
      # if we replace the nodes, we only need to lock the old primary,
6372
      # otherwise we need to lock all nodes for disk re-creation
6373
      primary_only = bool(self.op.nodes)
6374
      self._LockInstancesNodes(primary_only=primary_only)
6375

    
6376
  def BuildHooksEnv(self):
6377
    """Build hooks env.
6378

6379
    This runs on master, primary and secondary nodes of the instance.
6380

6381
    """
6382
    return _BuildInstanceHookEnvByObject(self, self.instance)
6383

    
6384
  def BuildHooksNodes(self):
6385
    """Build hooks nodes.
6386

6387
    """
6388
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6389
    return (nl, nl)
6390

    
6391
  def CheckPrereq(self):
6392
    """Check prerequisites.
6393

6394
    This checks that the instance is in the cluster and is not running.
6395

6396
    """
6397
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6398
    assert instance is not None, \
6399
      "Cannot retrieve locked instance %s" % self.op.instance_name
6400
    if self.op.nodes:
6401
      if len(self.op.nodes) != len(instance.all_nodes):
6402
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6403
                                   " %d replacement nodes were specified" %
6404
                                   (instance.name, len(instance.all_nodes),
6405
                                    len(self.op.nodes)),
6406
                                   errors.ECODE_INVAL)
6407
      assert instance.disk_template != constants.DT_DRBD8 or \
6408
          len(self.op.nodes) == 2
6409
      assert instance.disk_template != constants.DT_PLAIN or \
6410
          len(self.op.nodes) == 1
6411
      primary_node = self.op.nodes[0]
6412
    else:
6413
      primary_node = instance.primary_node
6414
    _CheckNodeOnline(self, primary_node)
6415

    
6416
    if instance.disk_template == constants.DT_DISKLESS:
6417
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6418
                                 self.op.instance_name, errors.ECODE_INVAL)
6419
    # if we replace nodes *and* the old primary is offline, we don't
6420
    # check
6421
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6422
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6423
    if not (self.op.nodes and old_pnode.offline):
6424
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6425

    
6426
    if not self.op.disks:
6427
      self.op.disks = range(len(instance.disks))
6428
    else:
6429
      for idx in self.op.disks:
6430
        if idx >= len(instance.disks):
6431
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6432
                                     errors.ECODE_INVAL)
6433
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6434
      raise errors.OpPrereqError("Can't recreate disks partially and"
6435
                                 " change the nodes at the same time",
6436
                                 errors.ECODE_INVAL)
6437
    self.instance = instance
6438

    
6439
  def Exec(self, feedback_fn):
6440
    """Recreate the disks.
6441

6442
    """
6443
    instance = self.instance
6444

    
6445
    to_skip = []
6446
    mods = [] # keeps track of needed logical_id changes
6447

    
6448
    for idx, disk in enumerate(instance.disks):
6449
      if idx not in self.op.disks: # disk idx has not been passed in
6450
        to_skip.append(idx)
6451
        continue
6452
      # update secondaries for disks, if needed
6453
      if self.op.nodes:
6454
        if disk.dev_type == constants.LD_DRBD8:
6455
          # need to update the nodes and minors
6456
          assert len(self.op.nodes) == 2
6457
          assert len(disk.logical_id) == 6 # otherwise disk internals
6458
                                           # have changed
6459
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6460
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6461
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6462
                    new_minors[0], new_minors[1], old_secret)
6463
          assert len(disk.logical_id) == len(new_id)
6464
          mods.append((idx, new_id))
6465

    
6466
    # now that we have passed all asserts above, we can apply the mods
6467
    # in a single run (to avoid partial changes)
6468
    for idx, new_id in mods:
6469
      instance.disks[idx].logical_id = new_id
6470

    
6471
    # change primary node, if needed
6472
    if self.op.nodes:
6473
      instance.primary_node = self.op.nodes[0]
6474
      self.LogWarning("Changing the instance's nodes, you will have to"
6475
                      " remove any disks left on the older nodes manually")
6476

    
6477
    if self.op.nodes:
6478
      self.cfg.Update(instance, feedback_fn)
6479

    
6480
    _CreateDisks(self, instance, to_skip=to_skip)
6481

    
6482

    
6483
class LUInstanceRename(LogicalUnit):
6484
  """Rename an instance.
6485

6486
  """
6487
  HPATH = "instance-rename"
6488
  HTYPE = constants.HTYPE_INSTANCE
6489

    
6490
  def CheckArguments(self):
6491
    """Check arguments.
6492

6493
    """
6494
    if self.op.ip_check and not self.op.name_check:
6495
      # TODO: make the ip check more flexible and not depend on the name check
6496
      raise errors.OpPrereqError("IP address check requires a name check",
6497
                                 errors.ECODE_INVAL)
6498

    
6499
  def BuildHooksEnv(self):
6500
    """Build hooks env.
6501

6502
    This runs on master, primary and secondary nodes of the instance.
6503

6504
    """
6505
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6506
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6507
    return env
6508

    
6509
  def BuildHooksNodes(self):
6510
    """Build hooks nodes.
6511

6512
    """
6513
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6514
    return (nl, nl)
6515

    
6516
  def CheckPrereq(self):
6517
    """Check prerequisites.
6518

6519
    This checks that the instance is in the cluster and is not running.
6520

6521
    """
6522
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6523
                                                self.op.instance_name)
6524
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6525
    assert instance is not None
6526
    _CheckNodeOnline(self, instance.primary_node)
6527
    _CheckInstanceDown(self, instance, "cannot rename")
6528
    self.instance = instance
6529

    
6530
    new_name = self.op.new_name
6531
    if self.op.name_check:
6532
      hostname = netutils.GetHostname(name=new_name)
6533
      if hostname != new_name:
6534
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6535
                     hostname.name)
6536
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6537
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6538
                                    " same as given hostname '%s'") %
6539
                                    (hostname.name, self.op.new_name),
6540
                                    errors.ECODE_INVAL)
6541
      new_name = self.op.new_name = hostname.name
6542
      if (self.op.ip_check and
6543
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6544
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6545
                                   (hostname.ip, new_name),
6546
                                   errors.ECODE_NOTUNIQUE)
6547

    
6548
    instance_list = self.cfg.GetInstanceList()
6549
    if new_name in instance_list and new_name != instance.name:
6550
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6551
                                 new_name, errors.ECODE_EXISTS)
6552

    
6553
  def Exec(self, feedback_fn):
6554
    """Rename the instance.
6555

6556
    """
6557
    inst = self.instance
6558
    old_name = inst.name
6559

    
6560
    rename_file_storage = False
6561
    if (inst.disk_template in constants.DTS_FILEBASED and
6562
        self.op.new_name != inst.name):
6563
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6564
      rename_file_storage = True
6565

    
6566
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6567
    # Change the instance lock. This is definitely safe while we hold the BGL.
6568
    # Otherwise the new lock would have to be added in acquired mode.
6569
    assert self.REQ_BGL
6570
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6571
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6572

    
6573
    # re-read the instance from the configuration after rename
6574
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6575

    
6576
    if rename_file_storage:
6577
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6578
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6579
                                                     old_file_storage_dir,
6580
                                                     new_file_storage_dir)
6581
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6582
                   " (but the instance has been renamed in Ganeti)" %
6583
                   (inst.primary_node, old_file_storage_dir,
6584
                    new_file_storage_dir))
6585

    
6586
    _StartInstanceDisks(self, inst, None)
6587
    try:
6588
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6589
                                                 old_name, self.op.debug_level)
6590
      msg = result.fail_msg
6591
      if msg:
6592
        msg = ("Could not run OS rename script for instance %s on node %s"
6593
               " (but the instance has been renamed in Ganeti): %s" %
6594
               (inst.name, inst.primary_node, msg))
6595
        self.proc.LogWarning(msg)
6596
    finally:
6597
      _ShutdownInstanceDisks(self, inst)
6598

    
6599
    return inst.name
6600

    
6601

    
6602
class LUInstanceRemove(LogicalUnit):
6603
  """Remove an instance.
6604

6605
  """
6606
  HPATH = "instance-remove"
6607
  HTYPE = constants.HTYPE_INSTANCE
6608
  REQ_BGL = False
6609

    
6610
  def ExpandNames(self):
6611
    self._ExpandAndLockInstance()
6612
    self.needed_locks[locking.LEVEL_NODE] = []
6613
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6614

    
6615
  def DeclareLocks(self, level):
6616
    if level == locking.LEVEL_NODE:
6617
      self._LockInstancesNodes()
6618

    
6619
  def BuildHooksEnv(self):
6620
    """Build hooks env.
6621

6622
    This runs on master, primary and secondary nodes of the instance.
6623

6624
    """
6625
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6626
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6627
    return env
6628

    
6629
  def BuildHooksNodes(self):
6630
    """Build hooks nodes.
6631

6632
    """
6633
    nl = [self.cfg.GetMasterNode()]
6634
    nl_post = list(self.instance.all_nodes) + nl
6635
    return (nl, nl_post)
6636

    
6637
  def CheckPrereq(self):
6638
    """Check prerequisites.
6639

6640
    This checks that the instance is in the cluster.
6641

6642
    """
6643
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6644
    assert self.instance is not None, \
6645
      "Cannot retrieve locked instance %s" % self.op.instance_name
6646

    
6647
  def Exec(self, feedback_fn):
6648
    """Remove the instance.
6649

6650
    """
6651
    instance = self.instance
6652
    logging.info("Shutting down instance %s on node %s",
6653
                 instance.name, instance.primary_node)
6654

    
6655
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6656
                                             self.op.shutdown_timeout)
6657
    msg = result.fail_msg
6658
    if msg:
6659
      if self.op.ignore_failures:
6660
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6661
      else:
6662
        raise errors.OpExecError("Could not shutdown instance %s on"
6663
                                 " node %s: %s" %
6664
                                 (instance.name, instance.primary_node, msg))
6665

    
6666
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6667

    
6668

    
6669
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6670
  """Utility function to remove an instance.
6671

6672
  """
6673
  logging.info("Removing block devices for instance %s", instance.name)
6674

    
6675
  if not _RemoveDisks(lu, instance):
6676
    if not ignore_failures:
6677
      raise errors.OpExecError("Can't remove instance's disks")
6678
    feedback_fn("Warning: can't remove instance's disks")
6679

    
6680
  logging.info("Removing instance %s out of cluster config", instance.name)
6681

    
6682
  lu.cfg.RemoveInstance(instance.name)
6683

    
6684
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6685
    "Instance lock removal conflict"
6686

    
6687
  # Remove lock for the instance
6688
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6689

    
6690

    
6691
class LUInstanceQuery(NoHooksLU):
6692
  """Logical unit for querying instances.
6693

6694
  """
6695
  # pylint: disable=W0142
6696
  REQ_BGL = False
6697

    
6698
  def CheckArguments(self):
6699
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6700
                             self.op.output_fields, self.op.use_locking)
6701

    
6702
  def ExpandNames(self):
6703
    self.iq.ExpandNames(self)
6704

    
6705
  def DeclareLocks(self, level):
6706
    self.iq.DeclareLocks(self, level)
6707

    
6708
  def Exec(self, feedback_fn):
6709
    return self.iq.OldStyleQuery(self)
6710

    
6711

    
6712
class LUInstanceFailover(LogicalUnit):
6713
  """Failover an instance.
6714

6715
  """
6716
  HPATH = "instance-failover"
6717
  HTYPE = constants.HTYPE_INSTANCE
6718
  REQ_BGL = False
6719

    
6720
  def CheckArguments(self):
6721
    """Check the arguments.
6722

6723
    """
6724
    self.iallocator = getattr(self.op, "iallocator", None)
6725
    self.target_node = getattr(self.op, "target_node", None)
6726

    
6727
  def ExpandNames(self):
6728
    self._ExpandAndLockInstance()
6729

    
6730
    if self.op.target_node is not None:
6731
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6732

    
6733
    self.needed_locks[locking.LEVEL_NODE] = []
6734
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6735

    
6736
    ignore_consistency = self.op.ignore_consistency
6737
    shutdown_timeout = self.op.shutdown_timeout
6738
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6739
                                       cleanup=False,
6740
                                       failover=True,
6741
                                       ignore_consistency=ignore_consistency,
6742
                                       shutdown_timeout=shutdown_timeout)
6743
    self.tasklets = [self._migrater]
6744

    
6745
  def DeclareLocks(self, level):
6746
    if level == locking.LEVEL_NODE:
6747
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6748
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6749
        if self.op.target_node is None:
6750
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6751
        else:
6752
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6753
                                                   self.op.target_node]
6754
        del self.recalculate_locks[locking.LEVEL_NODE]
6755
      else:
6756
        self._LockInstancesNodes()
6757

    
6758
  def BuildHooksEnv(self):
6759
    """Build hooks env.
6760

6761
    This runs on master, primary and secondary nodes of the instance.
6762

6763
    """
6764
    instance = self._migrater.instance
6765
    source_node = instance.primary_node
6766
    target_node = self.op.target_node
6767
    env = {
6768
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6769
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6770
      "OLD_PRIMARY": source_node,
6771
      "NEW_PRIMARY": target_node,
6772
      }
6773

    
6774
    if instance.disk_template in constants.DTS_INT_MIRROR:
6775
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6776
      env["NEW_SECONDARY"] = source_node
6777
    else:
6778
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6779

    
6780
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6781

    
6782
    return env
6783

    
6784
  def BuildHooksNodes(self):
6785
    """Build hooks nodes.
6786

6787
    """
6788
    instance = self._migrater.instance
6789
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6790
    return (nl, nl + [instance.primary_node])
6791

    
6792

    
6793
class LUInstanceMigrate(LogicalUnit):
6794
  """Migrate an instance.
6795

6796
  This is migration without shutting down, compared to the failover,
6797
  which is done with shutdown.
6798

6799
  """
6800
  HPATH = "instance-migrate"
6801
  HTYPE = constants.HTYPE_INSTANCE
6802
  REQ_BGL = False
6803

    
6804
  def ExpandNames(self):
6805
    self._ExpandAndLockInstance()
6806

    
6807
    if self.op.target_node is not None:
6808
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6809

    
6810
    self.needed_locks[locking.LEVEL_NODE] = []
6811
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6812

    
6813
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6814
                                       cleanup=self.op.cleanup,
6815
                                       failover=False,
6816
                                       fallback=self.op.allow_failover)
6817
    self.tasklets = [self._migrater]
6818

    
6819
  def DeclareLocks(self, level):
6820
    if level == locking.LEVEL_NODE:
6821
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6822
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6823
        if self.op.target_node is None:
6824
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6825
        else:
6826
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6827
                                                   self.op.target_node]
6828
        del self.recalculate_locks[locking.LEVEL_NODE]
6829
      else:
6830
        self._LockInstancesNodes()
6831

    
6832
  def BuildHooksEnv(self):
6833
    """Build hooks env.
6834

6835
    This runs on master, primary and secondary nodes of the instance.
6836

6837
    """
6838
    instance = self._migrater.instance
6839
    source_node = instance.primary_node
6840
    target_node = self.op.target_node
6841
    env = _BuildInstanceHookEnvByObject(self, instance)
6842
    env.update({
6843
      "MIGRATE_LIVE": self._migrater.live,
6844
      "MIGRATE_CLEANUP": self.op.cleanup,
6845
      "OLD_PRIMARY": source_node,
6846
      "NEW_PRIMARY": target_node,
6847
      })
6848

    
6849
    if instance.disk_template in constants.DTS_INT_MIRROR:
6850
      env["OLD_SECONDARY"] = target_node
6851
      env["NEW_SECONDARY"] = source_node
6852
    else:
6853
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6854

    
6855
    return env
6856

    
6857
  def BuildHooksNodes(self):
6858
    """Build hooks nodes.
6859

6860
    """
6861
    instance = self._migrater.instance
6862
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6863
    return (nl, nl + [instance.primary_node])
6864

    
6865

    
6866
class LUInstanceMove(LogicalUnit):
6867
  """Move an instance by data-copying.
6868

6869
  """
6870
  HPATH = "instance-move"
6871
  HTYPE = constants.HTYPE_INSTANCE
6872
  REQ_BGL = False
6873

    
6874
  def ExpandNames(self):
6875
    self._ExpandAndLockInstance()
6876
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6877
    self.op.target_node = target_node
6878
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6879
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6880

    
6881
  def DeclareLocks(self, level):
6882
    if level == locking.LEVEL_NODE:
6883
      self._LockInstancesNodes(primary_only=True)
6884

    
6885
  def BuildHooksEnv(self):
6886
    """Build hooks env.
6887

6888
    This runs on master, primary and secondary nodes of the instance.
6889

6890
    """
6891
    env = {
6892
      "TARGET_NODE": self.op.target_node,
6893
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6894
      }
6895
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6896
    return env
6897

    
6898
  def BuildHooksNodes(self):
6899
    """Build hooks nodes.
6900

6901
    """
6902
    nl = [
6903
      self.cfg.GetMasterNode(),
6904
      self.instance.primary_node,
6905
      self.op.target_node,
6906
      ]
6907
    return (nl, nl)
6908

    
6909
  def CheckPrereq(self):
6910
    """Check prerequisites.
6911

6912
    This checks that the instance is in the cluster.
6913

6914
    """
6915
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6916
    assert self.instance is not None, \
6917
      "Cannot retrieve locked instance %s" % self.op.instance_name
6918

    
6919
    node = self.cfg.GetNodeInfo(self.op.target_node)
6920
    assert node is not None, \
6921
      "Cannot retrieve locked node %s" % self.op.target_node
6922

    
6923
    self.target_node = target_node = node.name
6924

    
6925
    if target_node == instance.primary_node:
6926
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6927
                                 (instance.name, target_node),
6928
                                 errors.ECODE_STATE)
6929

    
6930
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6931

    
6932
    for idx, dsk in enumerate(instance.disks):
6933
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6934
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6935
                                   " cannot copy" % idx, errors.ECODE_STATE)
6936

    
6937
    _CheckNodeOnline(self, target_node)
6938
    _CheckNodeNotDrained(self, target_node)
6939
    _CheckNodeVmCapable(self, target_node)
6940

    
6941
    if instance.admin_up:
6942
      # check memory requirements on the secondary node
6943
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6944
                           instance.name, bep[constants.BE_MEMORY],
6945
                           instance.hypervisor)
6946
    else:
6947
      self.LogInfo("Not checking memory on the secondary node as"
6948
                   " instance will not be started")
6949

    
6950
    # check bridge existance
6951
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6952

    
6953
  def Exec(self, feedback_fn):
6954
    """Move an instance.
6955

6956
    The move is done by shutting it down on its present node, copying
6957
    the data over (slow) and starting it on the new node.
6958

6959
    """
6960
    instance = self.instance
6961

    
6962
    source_node = instance.primary_node
6963
    target_node = self.target_node
6964

    
6965
    self.LogInfo("Shutting down instance %s on source node %s",
6966
                 instance.name, source_node)
6967

    
6968
    result = self.rpc.call_instance_shutdown(source_node, instance,
6969
                                             self.op.shutdown_timeout)
6970
    msg = result.fail_msg
6971
    if msg:
6972
      if self.op.ignore_consistency:
6973
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6974
                             " Proceeding anyway. Please make sure node"
6975
                             " %s is down. Error details: %s",
6976
                             instance.name, source_node, source_node, msg)
6977
      else:
6978
        raise errors.OpExecError("Could not shutdown instance %s on"
6979
                                 " node %s: %s" %
6980
                                 (instance.name, source_node, msg))
6981

    
6982
    # create the target disks
6983
    try:
6984
      _CreateDisks(self, instance, target_node=target_node)
6985
    except errors.OpExecError:
6986
      self.LogWarning("Device creation failed, reverting...")
6987
      try:
6988
        _RemoveDisks(self, instance, target_node=target_node)
6989
      finally:
6990
        self.cfg.ReleaseDRBDMinors(instance.name)
6991
        raise
6992

    
6993
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6994

    
6995
    errs = []
6996
    # activate, get path, copy the data over
6997
    for idx, disk in enumerate(instance.disks):
6998
      self.LogInfo("Copying data for disk %d", idx)
6999
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7000
                                               instance.name, True, idx)
7001
      if result.fail_msg:
7002
        self.LogWarning("Can't assemble newly created disk %d: %s",
7003
                        idx, result.fail_msg)
7004
        errs.append(result.fail_msg)
7005
        break
7006
      dev_path = result.payload
7007
      result = self.rpc.call_blockdev_export(source_node, disk,
7008
                                             target_node, dev_path,
7009
                                             cluster_name)
7010
      if result.fail_msg:
7011
        self.LogWarning("Can't copy data over for disk %d: %s",
7012
                        idx, result.fail_msg)
7013
        errs.append(result.fail_msg)
7014
        break
7015

    
7016
    if errs:
7017
      self.LogWarning("Some disks failed to copy, aborting")
7018
      try:
7019
        _RemoveDisks(self, instance, target_node=target_node)
7020
      finally:
7021
        self.cfg.ReleaseDRBDMinors(instance.name)
7022
        raise errors.OpExecError("Errors during disk copy: %s" %
7023
                                 (",".join(errs),))
7024

    
7025
    instance.primary_node = target_node
7026
    self.cfg.Update(instance, feedback_fn)
7027

    
7028
    self.LogInfo("Removing the disks on the original node")
7029
    _RemoveDisks(self, instance, target_node=source_node)
7030

    
7031
    # Only start the instance if it's marked as up
7032
    if instance.admin_up:
7033
      self.LogInfo("Starting instance %s on node %s",
7034
                   instance.name, target_node)
7035

    
7036
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7037
                                           ignore_secondaries=True)
7038
      if not disks_ok:
7039
        _ShutdownInstanceDisks(self, instance)
7040
        raise errors.OpExecError("Can't activate the instance's disks")
7041

    
7042
      result = self.rpc.call_instance_start(target_node,
7043
                                            (instance, None, None), False)
7044
      msg = result.fail_msg
7045
      if msg:
7046
        _ShutdownInstanceDisks(self, instance)
7047
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7048
                                 (instance.name, target_node, msg))
7049

    
7050

    
7051
class LUNodeMigrate(LogicalUnit):
7052
  """Migrate all instances from a node.
7053

7054
  """
7055
  HPATH = "node-migrate"
7056
  HTYPE = constants.HTYPE_NODE
7057
  REQ_BGL = False
7058

    
7059
  def CheckArguments(self):
7060
    pass
7061

    
7062
  def ExpandNames(self):
7063
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7064

    
7065
    self.share_locks = _ShareAll()
7066
    self.needed_locks = {
7067
      locking.LEVEL_NODE: [self.op.node_name],
7068
      }
7069

    
7070
  def BuildHooksEnv(self):
7071
    """Build hooks env.
7072

7073
    This runs on the master, the primary and all the secondaries.
7074

7075
    """
7076
    return {
7077
      "NODE_NAME": self.op.node_name,
7078
      }
7079

    
7080
  def BuildHooksNodes(self):
7081
    """Build hooks nodes.
7082

7083
    """
7084
    nl = [self.cfg.GetMasterNode()]
7085
    return (nl, nl)
7086

    
7087
  def CheckPrereq(self):
7088
    pass
7089

    
7090
  def Exec(self, feedback_fn):
7091
    # Prepare jobs for migration instances
7092
    jobs = [
7093
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7094
                                 mode=self.op.mode,
7095
                                 live=self.op.live,
7096
                                 iallocator=self.op.iallocator,
7097
                                 target_node=self.op.target_node)]
7098
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7099
      ]
7100

    
7101
    # TODO: Run iallocator in this opcode and pass correct placement options to
7102
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7103
    # running the iallocator and the actual migration, a good consistency model
7104
    # will have to be found.
7105

    
7106
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7107
            frozenset([self.op.node_name]))
7108

    
7109
    return ResultWithJobs(jobs)
7110

    
7111

    
7112
class TLMigrateInstance(Tasklet):
7113
  """Tasklet class for instance migration.
7114

7115
  @type live: boolean
7116
  @ivar live: whether the migration will be done live or non-live;
7117
      this variable is initalized only after CheckPrereq has run
7118
  @type cleanup: boolean
7119
  @ivar cleanup: Wheater we cleanup from a failed migration
7120
  @type iallocator: string
7121
  @ivar iallocator: The iallocator used to determine target_node
7122
  @type target_node: string
7123
  @ivar target_node: If given, the target_node to reallocate the instance to
7124
  @type failover: boolean
7125
  @ivar failover: Whether operation results in failover or migration
7126
  @type fallback: boolean
7127
  @ivar fallback: Whether fallback to failover is allowed if migration not
7128
                  possible
7129
  @type ignore_consistency: boolean
7130
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7131
                            and target node
7132
  @type shutdown_timeout: int
7133
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7134

7135
  """
7136

    
7137
  # Constants
7138
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7139
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7140

    
7141
  def __init__(self, lu, instance_name, cleanup=False,
7142
               failover=False, fallback=False,
7143
               ignore_consistency=False,
7144
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7145
    """Initializes this class.
7146

7147
    """
7148
    Tasklet.__init__(self, lu)
7149

    
7150
    # Parameters
7151
    self.instance_name = instance_name
7152
    self.cleanup = cleanup
7153
    self.live = False # will be overridden later
7154
    self.failover = failover
7155
    self.fallback = fallback
7156
    self.ignore_consistency = ignore_consistency
7157
    self.shutdown_timeout = shutdown_timeout
7158

    
7159
  def CheckPrereq(self):
7160
    """Check prerequisites.
7161

7162
    This checks that the instance is in the cluster.
7163

7164
    """
7165
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7166
    instance = self.cfg.GetInstanceInfo(instance_name)
7167
    assert instance is not None
7168
    self.instance = instance
7169

    
7170
    if (not self.cleanup and not instance.admin_up and not self.failover and
7171
        self.fallback):
7172
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7173
                      " to failover")
7174
      self.failover = True
7175

    
7176
    if instance.disk_template not in constants.DTS_MIRRORED:
7177
      if self.failover:
7178
        text = "failovers"
7179
      else:
7180
        text = "migrations"
7181
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7182
                                 " %s" % (instance.disk_template, text),
7183
                                 errors.ECODE_STATE)
7184

    
7185
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7186
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7187

    
7188
      if self.lu.op.iallocator:
7189
        self._RunAllocator()
7190
      else:
7191
        # We set set self.target_node as it is required by
7192
        # BuildHooksEnv
7193
        self.target_node = self.lu.op.target_node
7194

    
7195
      # self.target_node is already populated, either directly or by the
7196
      # iallocator run
7197
      target_node = self.target_node
7198
      if self.target_node == instance.primary_node:
7199
        raise errors.OpPrereqError("Cannot migrate instance %s"
7200
                                   " to its primary (%s)" %
7201
                                   (instance.name, instance.primary_node))
7202

    
7203
      if len(self.lu.tasklets) == 1:
7204
        # It is safe to release locks only when we're the only tasklet
7205
        # in the LU
7206
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7207
                      keep=[instance.primary_node, self.target_node])
7208

    
7209
    else:
7210
      secondary_nodes = instance.secondary_nodes
7211
      if not secondary_nodes:
7212
        raise errors.ConfigurationError("No secondary node but using"
7213
                                        " %s disk template" %
7214
                                        instance.disk_template)
7215
      target_node = secondary_nodes[0]
7216
      if self.lu.op.iallocator or (self.lu.op.target_node and
7217
                                   self.lu.op.target_node != target_node):
7218
        if self.failover:
7219
          text = "failed over"
7220
        else:
7221
          text = "migrated"
7222
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7223
                                   " be %s to arbitrary nodes"
7224
                                   " (neither an iallocator nor a target"
7225
                                   " node can be passed)" %
7226
                                   (instance.disk_template, text),
7227
                                   errors.ECODE_INVAL)
7228

    
7229
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7230

    
7231
    # check memory requirements on the secondary node
7232
    if not self.failover or instance.admin_up:
7233
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7234
                           instance.name, i_be[constants.BE_MEMORY],
7235
                           instance.hypervisor)
7236
    else:
7237
      self.lu.LogInfo("Not checking memory on the secondary node as"
7238
                      " instance will not be started")
7239

    
7240
    # check bridge existance
7241
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7242

    
7243
    if not self.cleanup:
7244
      _CheckNodeNotDrained(self.lu, target_node)
7245
      if not self.failover:
7246
        result = self.rpc.call_instance_migratable(instance.primary_node,
7247
                                                   instance)
7248
        if result.fail_msg and self.fallback:
7249
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7250
                          " failover")
7251
          self.failover = True
7252
        else:
7253
          result.Raise("Can't migrate, please use failover",
7254
                       prereq=True, ecode=errors.ECODE_STATE)
7255

    
7256
    assert not (self.failover and self.cleanup)
7257

    
7258
    if not self.failover:
7259
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7260
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7261
                                   " parameters are accepted",
7262
                                   errors.ECODE_INVAL)
7263
      if self.lu.op.live is not None:
7264
        if self.lu.op.live:
7265
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7266
        else:
7267
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7268
        # reset the 'live' parameter to None so that repeated
7269
        # invocations of CheckPrereq do not raise an exception
7270
        self.lu.op.live = None
7271
      elif self.lu.op.mode is None:
7272
        # read the default value from the hypervisor
7273
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7274
                                                skip_globals=False)
7275
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7276

    
7277
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7278
    else:
7279
      # Failover is never live
7280
      self.live = False
7281

    
7282
  def _RunAllocator(self):
7283
    """Run the allocator based on input opcode.
7284

7285
    """
7286
    ial = IAllocator(self.cfg, self.rpc,
7287
                     mode=constants.IALLOCATOR_MODE_RELOC,
7288
                     name=self.instance_name,
7289
                     # TODO See why hail breaks with a single node below
7290
                     relocate_from=[self.instance.primary_node,
7291
                                    self.instance.primary_node],
7292
                     )
7293

    
7294
    ial.Run(self.lu.op.iallocator)
7295

    
7296
    if not ial.success:
7297
      raise errors.OpPrereqError("Can't compute nodes using"
7298
                                 " iallocator '%s': %s" %
7299
                                 (self.lu.op.iallocator, ial.info),
7300
                                 errors.ECODE_NORES)
7301
    if len(ial.result) != ial.required_nodes:
7302
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7303
                                 " of nodes (%s), required %s" %
7304
                                 (self.lu.op.iallocator, len(ial.result),
7305
                                  ial.required_nodes), errors.ECODE_FAULT)
7306
    self.target_node = ial.result[0]
7307
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7308
                 self.instance_name, self.lu.op.iallocator,
7309
                 utils.CommaJoin(ial.result))
7310

    
7311
  def _WaitUntilSync(self):
7312
    """Poll with custom rpc for disk sync.
7313

7314
    This uses our own step-based rpc call.
7315

7316
    """
7317
    self.feedback_fn("* wait until resync is done")
7318
    all_done = False
7319
    while not all_done:
7320
      all_done = True
7321
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7322
                                            self.nodes_ip,
7323
                                            self.instance.disks)
7324
      min_percent = 100
7325
      for node, nres in result.items():
7326
        nres.Raise("Cannot resync disks on node %s" % node)
7327
        node_done, node_percent = nres.payload
7328
        all_done = all_done and node_done
7329
        if node_percent is not None:
7330
          min_percent = min(min_percent, node_percent)
7331
      if not all_done:
7332
        if min_percent < 100:
7333
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7334
        time.sleep(2)
7335

    
7336
  def _EnsureSecondary(self, node):
7337
    """Demote a node to secondary.
7338

7339
    """
7340
    self.feedback_fn("* switching node %s to secondary mode" % node)
7341

    
7342
    for dev in self.instance.disks:
7343
      self.cfg.SetDiskID(dev, node)
7344

    
7345
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7346
                                          self.instance.disks)
7347
    result.Raise("Cannot change disk to secondary on node %s" % node)
7348

    
7349
  def _GoStandalone(self):
7350
    """Disconnect from the network.
7351

7352
    """
7353
    self.feedback_fn("* changing into standalone mode")
7354
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7355
                                               self.instance.disks)
7356
    for node, nres in result.items():
7357
      nres.Raise("Cannot disconnect disks node %s" % node)
7358

    
7359
  def _GoReconnect(self, multimaster):
7360
    """Reconnect to the network.
7361

7362
    """
7363
    if multimaster:
7364
      msg = "dual-master"
7365
    else:
7366
      msg = "single-master"
7367
    self.feedback_fn("* changing disks into %s mode" % msg)
7368
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7369
                                           self.instance.disks,
7370
                                           self.instance.name, multimaster)
7371
    for node, nres in result.items():
7372
      nres.Raise("Cannot change disks config on node %s" % node)
7373

    
7374
  def _ExecCleanup(self):
7375
    """Try to cleanup after a failed migration.
7376

7377
    The cleanup is done by:
7378
      - check that the instance is running only on one node
7379
        (and update the config if needed)
7380
      - change disks on its secondary node to secondary
7381
      - wait until disks are fully synchronized
7382
      - disconnect from the network
7383
      - change disks into single-master mode
7384
      - wait again until disks are fully synchronized
7385

7386
    """
7387
    instance = self.instance
7388
    target_node = self.target_node
7389
    source_node = self.source_node
7390

    
7391
    # check running on only one node
7392
    self.feedback_fn("* checking where the instance actually runs"
7393
                     " (if this hangs, the hypervisor might be in"
7394
                     " a bad state)")
7395
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7396
    for node, result in ins_l.items():
7397
      result.Raise("Can't contact node %s" % node)
7398

    
7399
    runningon_source = instance.name in ins_l[source_node].payload
7400
    runningon_target = instance.name in ins_l[target_node].payload
7401

    
7402
    if runningon_source and runningon_target:
7403
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7404
                               " or the hypervisor is confused; you will have"
7405
                               " to ensure manually that it runs only on one"
7406
                               " and restart this operation")
7407

    
7408
    if not (runningon_source or runningon_target):
7409
      raise errors.OpExecError("Instance does not seem to be running at all;"
7410
                               " in this case it's safer to repair by"
7411
                               " running 'gnt-instance stop' to ensure disk"
7412
                               " shutdown, and then restarting it")
7413

    
7414
    if runningon_target:
7415
      # the migration has actually succeeded, we need to update the config
7416
      self.feedback_fn("* instance running on secondary node (%s),"
7417
                       " updating config" % target_node)
7418
      instance.primary_node = target_node
7419
      self.cfg.Update(instance, self.feedback_fn)
7420
      demoted_node = source_node
7421
    else:
7422
      self.feedback_fn("* instance confirmed to be running on its"
7423
                       " primary node (%s)" % source_node)
7424
      demoted_node = target_node
7425

    
7426
    if instance.disk_template in constants.DTS_INT_MIRROR:
7427
      self._EnsureSecondary(demoted_node)
7428
      try:
7429
        self._WaitUntilSync()
7430
      except errors.OpExecError:
7431
        # we ignore here errors, since if the device is standalone, it
7432
        # won't be able to sync
7433
        pass
7434
      self._GoStandalone()
7435
      self._GoReconnect(False)
7436
      self._WaitUntilSync()
7437

    
7438
    self.feedback_fn("* done")
7439

    
7440
  def _RevertDiskStatus(self):
7441
    """Try to revert the disk status after a failed migration.
7442

7443
    """
7444
    target_node = self.target_node
7445
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7446
      return
7447

    
7448
    try:
7449
      self._EnsureSecondary(target_node)
7450
      self._GoStandalone()
7451
      self._GoReconnect(False)
7452
      self._WaitUntilSync()
7453
    except errors.OpExecError, err:
7454
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7455
                         " please try to recover the instance manually;"
7456
                         " error '%s'" % str(err))
7457

    
7458
  def _AbortMigration(self):
7459
    """Call the hypervisor code to abort a started migration.
7460

7461
    """
7462
    instance = self.instance
7463
    target_node = self.target_node
7464
    source_node = self.source_node
7465
    migration_info = self.migration_info
7466

    
7467
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7468
                                                                 instance,
7469
                                                                 migration_info,
7470
                                                                 False)
7471
    abort_msg = abort_result.fail_msg
7472
    if abort_msg:
7473
      logging.error("Aborting migration failed on target node %s: %s",
7474
                    target_node, abort_msg)
7475
      # Don't raise an exception here, as we stil have to try to revert the
7476
      # disk status, even if this step failed.
7477

    
7478
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7479
        instance, False, self.live)
7480
    abort_msg = abort_result.fail_msg
7481
    if abort_msg:
7482
      logging.error("Aborting migration failed on source node %s: %s",
7483
                    source_node, abort_msg)
7484

    
7485
  def _ExecMigration(self):
7486
    """Migrate an instance.
7487

7488
    The migrate is done by:
7489
      - change the disks into dual-master mode
7490
      - wait until disks are fully synchronized again
7491
      - migrate the instance
7492
      - change disks on the new secondary node (the old primary) to secondary
7493
      - wait until disks are fully synchronized
7494
      - change disks into single-master mode
7495

7496
    """
7497
    instance = self.instance
7498
    target_node = self.target_node
7499
    source_node = self.source_node
7500

    
7501
    # Check for hypervisor version mismatch and warn the user.
7502
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7503
                                       None, self.instance.hypervisor)
7504
    src_info = nodeinfo[source_node]
7505
    dst_info = nodeinfo[target_node]
7506

    
7507
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7508
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7509
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7510
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7511
      if src_version != dst_version:
7512
        self.feedback_fn("* warning: hypervisor version mismatch between"
7513
                         " source (%s) and target (%s) node" %
7514
                         (src_version, dst_version))
7515

    
7516
    self.feedback_fn("* checking disk consistency between source and target")
7517
    for dev in instance.disks:
7518
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7519
        raise errors.OpExecError("Disk %s is degraded or not fully"
7520
                                 " synchronized on target node,"
7521
                                 " aborting migration" % dev.iv_name)
7522

    
7523
    # First get the migration information from the remote node
7524
    result = self.rpc.call_migration_info(source_node, instance)
7525
    msg = result.fail_msg
7526
    if msg:
7527
      log_err = ("Failed fetching source migration information from %s: %s" %
7528
                 (source_node, msg))
7529
      logging.error(log_err)
7530
      raise errors.OpExecError(log_err)
7531

    
7532
    self.migration_info = migration_info = result.payload
7533

    
7534
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7535
      # Then switch the disks to master/master mode
7536
      self._EnsureSecondary(target_node)
7537
      self._GoStandalone()
7538
      self._GoReconnect(True)
7539
      self._WaitUntilSync()
7540

    
7541
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7542
    result = self.rpc.call_accept_instance(target_node,
7543
                                           instance,
7544
                                           migration_info,
7545
                                           self.nodes_ip[target_node])
7546

    
7547
    msg = result.fail_msg
7548
    if msg:
7549
      logging.error("Instance pre-migration failed, trying to revert"
7550
                    " disk status: %s", msg)
7551
      self.feedback_fn("Pre-migration failed, aborting")
7552
      self._AbortMigration()
7553
      self._RevertDiskStatus()
7554
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7555
                               (instance.name, msg))
7556

    
7557
    self.feedback_fn("* migrating instance to %s" % target_node)
7558
    result = self.rpc.call_instance_migrate(source_node, instance,
7559
                                            self.nodes_ip[target_node],
7560
                                            self.live)
7561
    msg = result.fail_msg
7562
    if msg:
7563
      logging.error("Instance migration failed, trying to revert"
7564
                    " disk status: %s", msg)
7565
      self.feedback_fn("Migration failed, aborting")
7566
      self._AbortMigration()
7567
      self._RevertDiskStatus()
7568
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7569
                               (instance.name, msg))
7570

    
7571
    self.feedback_fn("* starting memory transfer")
7572
    last_feedback = time.time()
7573
    while True:
7574
      result = self.rpc.call_instance_get_migration_status(source_node,
7575
                                                           instance)
7576
      msg = result.fail_msg
7577
      ms = result.payload   # MigrationStatus instance
7578
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7579
        logging.error("Instance migration failed, trying to revert"
7580
                      " disk status: %s", msg)
7581
        self.feedback_fn("Migration failed, aborting")
7582
        self._AbortMigration()
7583
        self._RevertDiskStatus()
7584
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7585
                                 (instance.name, msg))
7586

    
7587
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7588
        self.feedback_fn("* memory transfer complete")
7589
        break
7590

    
7591
      if (utils.TimeoutExpired(last_feedback,
7592
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7593
          ms.transferred_ram is not None):
7594
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7595
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7596
        last_feedback = time.time()
7597

    
7598
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7599

    
7600
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7601
                                                           instance,
7602
                                                           True,
7603
                                                           self.live)
7604
    msg = result.fail_msg
7605
    if msg:
7606
      logging.error("Instance migration succeeded, but finalization failed"
7607
                    " on the source node: %s", msg)
7608
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7609
                               msg)
7610

    
7611
    instance.primary_node = target_node
7612

    
7613
    # distribute new instance config to the other nodes
7614
    self.cfg.Update(instance, self.feedback_fn)
7615

    
7616
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7617
                                                           instance,
7618
                                                           migration_info,
7619
                                                           True)
7620
    msg = result.fail_msg
7621
    if msg:
7622
      logging.error("Instance migration succeeded, but finalization failed"
7623
                    " on the target node: %s", msg)
7624
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7625
                               msg)
7626

    
7627
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7628
      self._EnsureSecondary(source_node)
7629
      self._WaitUntilSync()
7630
      self._GoStandalone()
7631
      self._GoReconnect(False)
7632
      self._WaitUntilSync()
7633

    
7634
    self.feedback_fn("* done")
7635

    
7636
  def _ExecFailover(self):
7637
    """Failover an instance.
7638

7639
    The failover is done by shutting it down on its present node and
7640
    starting it on the secondary.
7641

7642
    """
7643
    instance = self.instance
7644
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7645

    
7646
    source_node = instance.primary_node
7647
    target_node = self.target_node
7648

    
7649
    if instance.admin_up:
7650
      self.feedback_fn("* checking disk consistency between source and target")
7651
      for dev in instance.disks:
7652
        # for drbd, these are drbd over lvm
7653
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7654
          if primary_node.offline:
7655
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7656
                             " target node %s" %
7657
                             (primary_node.name, dev.iv_name, target_node))
7658
          elif not self.ignore_consistency:
7659
            raise errors.OpExecError("Disk %s is degraded on target node,"
7660
                                     " aborting failover" % dev.iv_name)
7661
    else:
7662
      self.feedback_fn("* not checking disk consistency as instance is not"
7663
                       " running")
7664

    
7665
    self.feedback_fn("* shutting down instance on source node")
7666
    logging.info("Shutting down instance %s on node %s",
7667
                 instance.name, source_node)
7668

    
7669
    result = self.rpc.call_instance_shutdown(source_node, instance,
7670
                                             self.shutdown_timeout)
7671
    msg = result.fail_msg
7672
    if msg:
7673
      if self.ignore_consistency or primary_node.offline:
7674
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7675
                           " proceeding anyway; please make sure node"
7676
                           " %s is down; error details: %s",
7677
                           instance.name, source_node, source_node, msg)
7678
      else:
7679
        raise errors.OpExecError("Could not shutdown instance %s on"
7680
                                 " node %s: %s" %
7681
                                 (instance.name, source_node, msg))
7682

    
7683
    self.feedback_fn("* deactivating the instance's disks on source node")
7684
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7685
      raise errors.OpExecError("Can't shut down the instance's disks")
7686

    
7687
    instance.primary_node = target_node
7688
    # distribute new instance config to the other nodes
7689
    self.cfg.Update(instance, self.feedback_fn)
7690

    
7691
    # Only start the instance if it's marked as up
7692
    if instance.admin_up:
7693
      self.feedback_fn("* activating the instance's disks on target node %s" %
7694
                       target_node)
7695
      logging.info("Starting instance %s on node %s",
7696
                   instance.name, target_node)
7697

    
7698
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7699
                                           ignore_secondaries=True)
7700
      if not disks_ok:
7701
        _ShutdownInstanceDisks(self.lu, instance)
7702
        raise errors.OpExecError("Can't activate the instance's disks")
7703

    
7704
      self.feedback_fn("* starting the instance on the target node %s" %
7705
                       target_node)
7706
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7707
                                            False)
7708
      msg = result.fail_msg
7709
      if msg:
7710
        _ShutdownInstanceDisks(self.lu, instance)
7711
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7712
                                 (instance.name, target_node, msg))
7713

    
7714
  def Exec(self, feedback_fn):
7715
    """Perform the migration.
7716

7717
    """
7718
    self.feedback_fn = feedback_fn
7719
    self.source_node = self.instance.primary_node
7720

    
7721
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7722
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7723
      self.target_node = self.instance.secondary_nodes[0]
7724
      # Otherwise self.target_node has been populated either
7725
      # directly, or through an iallocator.
7726

    
7727
    self.all_nodes = [self.source_node, self.target_node]
7728
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7729
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7730

    
7731
    if self.failover:
7732
      feedback_fn("Failover instance %s" % self.instance.name)
7733
      self._ExecFailover()
7734
    else:
7735
      feedback_fn("Migrating instance %s" % self.instance.name)
7736

    
7737
      if self.cleanup:
7738
        return self._ExecCleanup()
7739
      else:
7740
        return self._ExecMigration()
7741

    
7742

    
7743
def _CreateBlockDev(lu, node, instance, device, force_create,
7744
                    info, force_open):
7745
  """Create a tree of block devices on a given node.
7746

7747
  If this device type has to be created on secondaries, create it and
7748
  all its children.
7749

7750
  If not, just recurse to children keeping the same 'force' value.
7751

7752
  @param lu: the lu on whose behalf we execute
7753
  @param node: the node on which to create the device
7754
  @type instance: L{objects.Instance}
7755
  @param instance: the instance which owns the device
7756
  @type device: L{objects.Disk}
7757
  @param device: the device to create
7758
  @type force_create: boolean
7759
  @param force_create: whether to force creation of this device; this
7760
      will be change to True whenever we find a device which has
7761
      CreateOnSecondary() attribute
7762
  @param info: the extra 'metadata' we should attach to the device
7763
      (this will be represented as a LVM tag)
7764
  @type force_open: boolean
7765
  @param force_open: this parameter will be passes to the
7766
      L{backend.BlockdevCreate} function where it specifies
7767
      whether we run on primary or not, and it affects both
7768
      the child assembly and the device own Open() execution
7769

7770
  """
7771
  if device.CreateOnSecondary():
7772
    force_create = True
7773

    
7774
  if device.children:
7775
    for child in device.children:
7776
      _CreateBlockDev(lu, node, instance, child, force_create,
7777
                      info, force_open)
7778

    
7779
  if not force_create:
7780
    return
7781

    
7782
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7783

    
7784

    
7785
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7786
  """Create a single block device on a given node.
7787

7788
  This will not recurse over children of the device, so they must be
7789
  created in advance.
7790

7791
  @param lu: the lu on whose behalf we execute
7792
  @param node: the node on which to create the device
7793
  @type instance: L{objects.Instance}
7794
  @param instance: the instance which owns the device
7795
  @type device: L{objects.Disk}
7796
  @param device: the device to create
7797
  @param info: the extra 'metadata' we should attach to the device
7798
      (this will be represented as a LVM tag)
7799
  @type force_open: boolean
7800
  @param force_open: this parameter will be passes to the
7801
      L{backend.BlockdevCreate} function where it specifies
7802
      whether we run on primary or not, and it affects both
7803
      the child assembly and the device own Open() execution
7804

7805
  """
7806
  lu.cfg.SetDiskID(device, node)
7807
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7808
                                       instance.name, force_open, info)
7809
  result.Raise("Can't create block device %s on"
7810
               " node %s for instance %s" % (device, node, instance.name))
7811
  if device.physical_id is None:
7812
    device.physical_id = result.payload
7813

    
7814

    
7815
def _GenerateUniqueNames(lu, exts):
7816
  """Generate a suitable LV name.
7817

7818
  This will generate a logical volume name for the given instance.
7819

7820
  """
7821
  results = []
7822
  for val in exts:
7823
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7824
    results.append("%s%s" % (new_id, val))
7825
  return results
7826

    
7827

    
7828
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7829
                         iv_name, p_minor, s_minor):
7830
  """Generate a drbd8 device complete with its children.
7831

7832
  """
7833
  assert len(vgnames) == len(names) == 2
7834
  port = lu.cfg.AllocatePort()
7835
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7836
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7837
                          logical_id=(vgnames[0], names[0]))
7838
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7839
                          logical_id=(vgnames[1], names[1]))
7840
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7841
                          logical_id=(primary, secondary, port,
7842
                                      p_minor, s_minor,
7843
                                      shared_secret),
7844
                          children=[dev_data, dev_meta],
7845
                          iv_name=iv_name)
7846
  return drbd_dev
7847

    
7848

    
7849
def _GenerateDiskTemplate(lu, template_name,
7850
                          instance_name, primary_node,
7851
                          secondary_nodes, disk_info,
7852
                          file_storage_dir, file_driver,
7853
                          base_index, feedback_fn):
7854
  """Generate the entire disk layout for a given template type.
7855

7856
  """
7857
  #TODO: compute space requirements
7858

    
7859
  vgname = lu.cfg.GetVGName()
7860
  disk_count = len(disk_info)
7861
  disks = []
7862
  if template_name == constants.DT_DISKLESS:
7863
    pass
7864
  elif template_name == constants.DT_PLAIN:
7865
    if len(secondary_nodes) != 0:
7866
      raise errors.ProgrammerError("Wrong template configuration")
7867

    
7868
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7869
                                      for i in range(disk_count)])
7870
    for idx, disk in enumerate(disk_info):
7871
      disk_index = idx + base_index
7872
      vg = disk.get(constants.IDISK_VG, vgname)
7873
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7874
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7875
                              size=disk[constants.IDISK_SIZE],
7876
                              logical_id=(vg, names[idx]),
7877
                              iv_name="disk/%d" % disk_index,
7878
                              mode=disk[constants.IDISK_MODE])
7879
      disks.append(disk_dev)
7880
  elif template_name == constants.DT_DRBD8:
7881
    if len(secondary_nodes) != 1:
7882
      raise errors.ProgrammerError("Wrong template configuration")
7883
    remote_node = secondary_nodes[0]
7884
    minors = lu.cfg.AllocateDRBDMinor(
7885
      [primary_node, remote_node] * len(disk_info), instance_name)
7886

    
7887
    names = []
7888
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7889
                                               for i in range(disk_count)]):
7890
      names.append(lv_prefix + "_data")
7891
      names.append(lv_prefix + "_meta")
7892
    for idx, disk in enumerate(disk_info):
7893
      disk_index = idx + base_index
7894
      data_vg = disk.get(constants.IDISK_VG, vgname)
7895
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7896
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7897
                                      disk[constants.IDISK_SIZE],
7898
                                      [data_vg, meta_vg],
7899
                                      names[idx * 2:idx * 2 + 2],
7900
                                      "disk/%d" % disk_index,
7901
                                      minors[idx * 2], minors[idx * 2 + 1])
7902
      disk_dev.mode = disk[constants.IDISK_MODE]
7903
      disks.append(disk_dev)
7904
  elif template_name == constants.DT_FILE:
7905
    if len(secondary_nodes) != 0:
7906
      raise errors.ProgrammerError("Wrong template configuration")
7907

    
7908
    opcodes.RequireFileStorage()
7909

    
7910
    for idx, disk in enumerate(disk_info):
7911
      disk_index = idx + base_index
7912
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7913
                              size=disk[constants.IDISK_SIZE],
7914
                              iv_name="disk/%d" % disk_index,
7915
                              logical_id=(file_driver,
7916
                                          "%s/disk%d" % (file_storage_dir,
7917
                                                         disk_index)),
7918
                              mode=disk[constants.IDISK_MODE])
7919
      disks.append(disk_dev)
7920
  elif template_name == constants.DT_SHARED_FILE:
7921
    if len(secondary_nodes) != 0:
7922
      raise errors.ProgrammerError("Wrong template configuration")
7923

    
7924
    opcodes.RequireSharedFileStorage()
7925

    
7926
    for idx, disk in enumerate(disk_info):
7927
      disk_index = idx + base_index
7928
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7929
                              size=disk[constants.IDISK_SIZE],
7930
                              iv_name="disk/%d" % disk_index,
7931
                              logical_id=(file_driver,
7932
                                          "%s/disk%d" % (file_storage_dir,
7933
                                                         disk_index)),
7934
                              mode=disk[constants.IDISK_MODE])
7935
      disks.append(disk_dev)
7936
  elif template_name == constants.DT_BLOCK:
7937
    if len(secondary_nodes) != 0:
7938
      raise errors.ProgrammerError("Wrong template configuration")
7939

    
7940
    for idx, disk in enumerate(disk_info):
7941
      disk_index = idx + base_index
7942
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7943
                              size=disk[constants.IDISK_SIZE],
7944
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7945
                                          disk[constants.IDISK_ADOPT]),
7946
                              iv_name="disk/%d" % disk_index,
7947
                              mode=disk[constants.IDISK_MODE])
7948
      disks.append(disk_dev)
7949

    
7950
  else:
7951
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7952
  return disks
7953

    
7954

    
7955
def _GetInstanceInfoText(instance):
7956
  """Compute that text that should be added to the disk's metadata.
7957

7958
  """
7959
  return "originstname+%s" % instance.name
7960

    
7961

    
7962
def _CalcEta(time_taken, written, total_size):
7963
  """Calculates the ETA based on size written and total size.
7964

7965
  @param time_taken: The time taken so far
7966
  @param written: amount written so far
7967
  @param total_size: The total size of data to be written
7968
  @return: The remaining time in seconds
7969

7970
  """
7971
  avg_time = time_taken / float(written)
7972
  return (total_size - written) * avg_time
7973

    
7974

    
7975
def _WipeDisks(lu, instance):
7976
  """Wipes instance disks.
7977

7978
  @type lu: L{LogicalUnit}
7979
  @param lu: the logical unit on whose behalf we execute
7980
  @type instance: L{objects.Instance}
7981
  @param instance: the instance whose disks we should create
7982
  @return: the success of the wipe
7983

7984
  """
7985
  node = instance.primary_node
7986

    
7987
  for device in instance.disks:
7988
    lu.cfg.SetDiskID(device, node)
7989

    
7990
  logging.info("Pause sync of instance %s disks", instance.name)
7991
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7992

    
7993
  for idx, success in enumerate(result.payload):
7994
    if not success:
7995
      logging.warn("pause-sync of instance %s for disks %d failed",
7996
                   instance.name, idx)
7997

    
7998
  try:
7999
    for idx, device in enumerate(instance.disks):
8000
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8001
      # MAX_WIPE_CHUNK at max
8002
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8003
                            constants.MIN_WIPE_CHUNK_PERCENT)
8004
      # we _must_ make this an int, otherwise rounding errors will
8005
      # occur
8006
      wipe_chunk_size = int(wipe_chunk_size)
8007

    
8008
      lu.LogInfo("* Wiping disk %d", idx)
8009
      logging.info("Wiping disk %d for instance %s, node %s using"
8010
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8011

    
8012
      offset = 0
8013
      size = device.size
8014
      last_output = 0
8015
      start_time = time.time()
8016

    
8017
      while offset < size:
8018
        wipe_size = min(wipe_chunk_size, size - offset)
8019
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8020
                      idx, offset, wipe_size)
8021
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8022
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8023
                     (idx, offset, wipe_size))
8024
        now = time.time()
8025
        offset += wipe_size
8026
        if now - last_output >= 60:
8027
          eta = _CalcEta(now - start_time, offset, size)
8028
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8029
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8030
          last_output = now
8031
  finally:
8032
    logging.info("Resume sync of instance %s disks", instance.name)
8033

    
8034
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8035

    
8036
    for idx, success in enumerate(result.payload):
8037
      if not success:
8038
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8039
                      " look at the status and troubleshoot the issue", idx)
8040
        logging.warn("resume-sync of instance %s for disks %d failed",
8041
                     instance.name, idx)
8042

    
8043

    
8044
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8045
  """Create all disks for an instance.
8046

8047
  This abstracts away some work from AddInstance.
8048

8049
  @type lu: L{LogicalUnit}
8050
  @param lu: the logical unit on whose behalf we execute
8051
  @type instance: L{objects.Instance}
8052
  @param instance: the instance whose disks we should create
8053
  @type to_skip: list
8054
  @param to_skip: list of indices to skip
8055
  @type target_node: string
8056
  @param target_node: if passed, overrides the target node for creation
8057
  @rtype: boolean
8058
  @return: the success of the creation
8059

8060
  """
8061
  info = _GetInstanceInfoText(instance)
8062
  if target_node is None:
8063
    pnode = instance.primary_node
8064
    all_nodes = instance.all_nodes
8065
  else:
8066
    pnode = target_node
8067
    all_nodes = [pnode]
8068

    
8069
  if instance.disk_template in constants.DTS_FILEBASED:
8070
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8071
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8072

    
8073
    result.Raise("Failed to create directory '%s' on"
8074
                 " node %s" % (file_storage_dir, pnode))
8075

    
8076
  # Note: this needs to be kept in sync with adding of disks in
8077
  # LUInstanceSetParams
8078
  for idx, device in enumerate(instance.disks):
8079
    if to_skip and idx in to_skip:
8080
      continue
8081
    logging.info("Creating volume %s for instance %s",
8082
                 device.iv_name, instance.name)
8083
    #HARDCODE
8084
    for node in all_nodes:
8085
      f_create = node == pnode
8086
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8087

    
8088

    
8089
def _RemoveDisks(lu, instance, target_node=None):
8090
  """Remove all disks for an instance.
8091

8092
  This abstracts away some work from `AddInstance()` and
8093
  `RemoveInstance()`. Note that in case some of the devices couldn't
8094
  be removed, the removal will continue with the other ones (compare
8095
  with `_CreateDisks()`).
8096

8097
  @type lu: L{LogicalUnit}
8098
  @param lu: the logical unit on whose behalf we execute
8099
  @type instance: L{objects.Instance}
8100
  @param instance: the instance whose disks we should remove
8101
  @type target_node: string
8102
  @param target_node: used to override the node on which to remove the disks
8103
  @rtype: boolean
8104
  @return: the success of the removal
8105

8106
  """
8107
  logging.info("Removing block devices for instance %s", instance.name)
8108

    
8109
  all_result = True
8110
  for device in instance.disks:
8111
    if target_node:
8112
      edata = [(target_node, device)]
8113
    else:
8114
      edata = device.ComputeNodeTree(instance.primary_node)
8115
    for node, disk in edata:
8116
      lu.cfg.SetDiskID(disk, node)
8117
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8118
      if msg:
8119
        lu.LogWarning("Could not remove block device %s on node %s,"
8120
                      " continuing anyway: %s", device.iv_name, node, msg)
8121
        all_result = False
8122

    
8123
  if instance.disk_template == constants.DT_FILE:
8124
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8125
    if target_node:
8126
      tgt = target_node
8127
    else:
8128
      tgt = instance.primary_node
8129
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8130
    if result.fail_msg:
8131
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8132
                    file_storage_dir, instance.primary_node, result.fail_msg)
8133
      all_result = False
8134

    
8135
  return all_result
8136

    
8137

    
8138
def _ComputeDiskSizePerVG(disk_template, disks):
8139
  """Compute disk size requirements in the volume group
8140

8141
  """
8142
  def _compute(disks, payload):
8143
    """Universal algorithm.
8144

8145
    """
8146
    vgs = {}
8147
    for disk in disks:
8148
      vgs[disk[constants.IDISK_VG]] = \
8149
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8150

    
8151
    return vgs
8152

    
8153
  # Required free disk space as a function of disk and swap space
8154
  req_size_dict = {
8155
    constants.DT_DISKLESS: {},
8156
    constants.DT_PLAIN: _compute(disks, 0),
8157
    # 128 MB are added for drbd metadata for each disk
8158
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8159
    constants.DT_FILE: {},
8160
    constants.DT_SHARED_FILE: {},
8161
  }
8162

    
8163
  if disk_template not in req_size_dict:
8164
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8165
                                 " is unknown" % disk_template)
8166

    
8167
  return req_size_dict[disk_template]
8168

    
8169

    
8170
def _ComputeDiskSize(disk_template, disks):
8171
  """Compute disk size requirements in the volume group
8172

8173
  """
8174
  # Required free disk space as a function of disk and swap space
8175
  req_size_dict = {
8176
    constants.DT_DISKLESS: None,
8177
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8178
    # 128 MB are added for drbd metadata for each disk
8179
    constants.DT_DRBD8:
8180
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8181
    constants.DT_FILE: None,
8182
    constants.DT_SHARED_FILE: 0,
8183
    constants.DT_BLOCK: 0,
8184
  }
8185

    
8186
  if disk_template not in req_size_dict:
8187
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8188
                                 " is unknown" % disk_template)
8189

    
8190
  return req_size_dict[disk_template]
8191

    
8192

    
8193
def _FilterVmNodes(lu, nodenames):
8194
  """Filters out non-vm_capable nodes from a list.
8195

8196
  @type lu: L{LogicalUnit}
8197
  @param lu: the logical unit for which we check
8198
  @type nodenames: list
8199
  @param nodenames: the list of nodes on which we should check
8200
  @rtype: list
8201
  @return: the list of vm-capable nodes
8202

8203
  """
8204
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8205
  return [name for name in nodenames if name not in vm_nodes]
8206

    
8207

    
8208
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8209
  """Hypervisor parameter validation.
8210

8211
  This function abstract the hypervisor parameter validation to be
8212
  used in both instance create and instance modify.
8213

8214
  @type lu: L{LogicalUnit}
8215
  @param lu: the logical unit for which we check
8216
  @type nodenames: list
8217
  @param nodenames: the list of nodes on which we should check
8218
  @type hvname: string
8219
  @param hvname: the name of the hypervisor we should use
8220
  @type hvparams: dict
8221
  @param hvparams: the parameters which we need to check
8222
  @raise errors.OpPrereqError: if the parameters are not valid
8223

8224
  """
8225
  nodenames = _FilterVmNodes(lu, nodenames)
8226
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8227
                                                  hvname,
8228
                                                  hvparams)
8229
  for node in nodenames:
8230
    info = hvinfo[node]
8231
    if info.offline:
8232
      continue
8233
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8234

    
8235

    
8236
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8237
  """OS parameters validation.
8238

8239
  @type lu: L{LogicalUnit}
8240
  @param lu: the logical unit for which we check
8241
  @type required: boolean
8242
  @param required: whether the validation should fail if the OS is not
8243
      found
8244
  @type nodenames: list
8245
  @param nodenames: the list of nodes on which we should check
8246
  @type osname: string
8247
  @param osname: the name of the hypervisor we should use
8248
  @type osparams: dict
8249
  @param osparams: the parameters which we need to check
8250
  @raise errors.OpPrereqError: if the parameters are not valid
8251

8252
  """
8253
  nodenames = _FilterVmNodes(lu, nodenames)
8254
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8255
                                   [constants.OS_VALIDATE_PARAMETERS],
8256
                                   osparams)
8257
  for node, nres in result.items():
8258
    # we don't check for offline cases since this should be run only
8259
    # against the master node and/or an instance's nodes
8260
    nres.Raise("OS Parameters validation failed on node %s" % node)
8261
    if not nres.payload:
8262
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8263
                 osname, node)
8264

    
8265

    
8266
class LUInstanceCreate(LogicalUnit):
8267
  """Create an instance.
8268

8269
  """
8270
  HPATH = "instance-add"
8271
  HTYPE = constants.HTYPE_INSTANCE
8272
  REQ_BGL = False
8273

    
8274
  def CheckArguments(self):
8275
    """Check arguments.
8276

8277
    """
8278
    # do not require name_check to ease forward/backward compatibility
8279
    # for tools
8280
    if self.op.no_install and self.op.start:
8281
      self.LogInfo("No-installation mode selected, disabling startup")
8282
      self.op.start = False
8283
    # validate/normalize the instance name
8284
    self.op.instance_name = \
8285
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8286

    
8287
    if self.op.ip_check and not self.op.name_check:
8288
      # TODO: make the ip check more flexible and not depend on the name check
8289
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8290
                                 " check", errors.ECODE_INVAL)
8291

    
8292
    # check nics' parameter names
8293
    for nic in self.op.nics:
8294
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8295

    
8296
    # check disks. parameter names and consistent adopt/no-adopt strategy
8297
    has_adopt = has_no_adopt = False
8298
    for disk in self.op.disks:
8299
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8300
      if constants.IDISK_ADOPT in disk:
8301
        has_adopt = True
8302
      else:
8303
        has_no_adopt = True
8304
    if has_adopt and has_no_adopt:
8305
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8306
                                 errors.ECODE_INVAL)
8307
    if has_adopt:
8308
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8309
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8310
                                   " '%s' disk template" %
8311
                                   self.op.disk_template,
8312
                                   errors.ECODE_INVAL)
8313
      if self.op.iallocator is not None:
8314
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8315
                                   " iallocator script", errors.ECODE_INVAL)
8316
      if self.op.mode == constants.INSTANCE_IMPORT:
8317
        raise errors.OpPrereqError("Disk adoption not allowed for"
8318
                                   " instance import", errors.ECODE_INVAL)
8319
    else:
8320
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8321
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8322
                                   " but no 'adopt' parameter given" %
8323
                                   self.op.disk_template,
8324
                                   errors.ECODE_INVAL)
8325

    
8326
    self.adopt_disks = has_adopt
8327

    
8328
    # instance name verification
8329
    if self.op.name_check:
8330
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8331
      self.op.instance_name = self.hostname1.name
8332
      # used in CheckPrereq for ip ping check
8333
      self.check_ip = self.hostname1.ip
8334
    else:
8335
      self.check_ip = None
8336

    
8337
    # file storage checks
8338
    if (self.op.file_driver and
8339
        not self.op.file_driver in constants.FILE_DRIVER):
8340
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8341
                                 self.op.file_driver, errors.ECODE_INVAL)
8342

    
8343
    if self.op.disk_template == constants.DT_FILE:
8344
      opcodes.RequireFileStorage()
8345
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8346
      opcodes.RequireSharedFileStorage()
8347

    
8348
    ### Node/iallocator related checks
8349
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8350

    
8351
    if self.op.pnode is not None:
8352
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8353
        if self.op.snode is None:
8354
          raise errors.OpPrereqError("The networked disk templates need"
8355
                                     " a mirror node", errors.ECODE_INVAL)
8356
      elif self.op.snode:
8357
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8358
                        " template")
8359
        self.op.snode = None
8360

    
8361
    self._cds = _GetClusterDomainSecret()
8362

    
8363
    if self.op.mode == constants.INSTANCE_IMPORT:
8364
      # On import force_variant must be True, because if we forced it at
8365
      # initial install, our only chance when importing it back is that it
8366
      # works again!
8367
      self.op.force_variant = True
8368

    
8369
      if self.op.no_install:
8370
        self.LogInfo("No-installation mode has no effect during import")
8371

    
8372
    elif self.op.mode == constants.INSTANCE_CREATE:
8373
      if self.op.os_type is None:
8374
        raise errors.OpPrereqError("No guest OS specified",
8375
                                   errors.ECODE_INVAL)
8376
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8377
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8378
                                   " installation" % self.op.os_type,
8379
                                   errors.ECODE_STATE)
8380
      if self.op.disk_template is None:
8381
        raise errors.OpPrereqError("No disk template specified",
8382
                                   errors.ECODE_INVAL)
8383

    
8384
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8385
      # Check handshake to ensure both clusters have the same domain secret
8386
      src_handshake = self.op.source_handshake
8387
      if not src_handshake:
8388
        raise errors.OpPrereqError("Missing source handshake",
8389
                                   errors.ECODE_INVAL)
8390

    
8391
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8392
                                                           src_handshake)
8393
      if errmsg:
8394
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8395
                                   errors.ECODE_INVAL)
8396

    
8397
      # Load and check source CA
8398
      self.source_x509_ca_pem = self.op.source_x509_ca
8399
      if not self.source_x509_ca_pem:
8400
        raise errors.OpPrereqError("Missing source X509 CA",
8401
                                   errors.ECODE_INVAL)
8402

    
8403
      try:
8404
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8405
                                                    self._cds)
8406
      except OpenSSL.crypto.Error, err:
8407
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8408
                                   (err, ), errors.ECODE_INVAL)
8409

    
8410
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8411
      if errcode is not None:
8412
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8413
                                   errors.ECODE_INVAL)
8414

    
8415
      self.source_x509_ca = cert
8416

    
8417
      src_instance_name = self.op.source_instance_name
8418
      if not src_instance_name:
8419
        raise errors.OpPrereqError("Missing source instance name",
8420
                                   errors.ECODE_INVAL)
8421

    
8422
      self.source_instance_name = \
8423
          netutils.GetHostname(name=src_instance_name).name
8424

    
8425
    else:
8426
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8427
                                 self.op.mode, errors.ECODE_INVAL)
8428

    
8429
  def ExpandNames(self):
8430
    """ExpandNames for CreateInstance.
8431

8432
    Figure out the right locks for instance creation.
8433

8434
    """
8435
    self.needed_locks = {}
8436

    
8437
    instance_name = self.op.instance_name
8438
    # this is just a preventive check, but someone might still add this
8439
    # instance in the meantime, and creation will fail at lock-add time
8440
    if instance_name in self.cfg.GetInstanceList():
8441
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8442
                                 instance_name, errors.ECODE_EXISTS)
8443

    
8444
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8445

    
8446
    if self.op.iallocator:
8447
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8448
    else:
8449
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8450
      nodelist = [self.op.pnode]
8451
      if self.op.snode is not None:
8452
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8453
        nodelist.append(self.op.snode)
8454
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8455

    
8456
    # in case of import lock the source node too
8457
    if self.op.mode == constants.INSTANCE_IMPORT:
8458
      src_node = self.op.src_node
8459
      src_path = self.op.src_path
8460

    
8461
      if src_path is None:
8462
        self.op.src_path = src_path = self.op.instance_name
8463

    
8464
      if src_node is None:
8465
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8466
        self.op.src_node = None
8467
        if os.path.isabs(src_path):
8468
          raise errors.OpPrereqError("Importing an instance from a path"
8469
                                     " requires a source node option",
8470
                                     errors.ECODE_INVAL)
8471
      else:
8472
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8473
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8474
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8475
        if not os.path.isabs(src_path):
8476
          self.op.src_path = src_path = \
8477
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8478

    
8479
  def _RunAllocator(self):
8480
    """Run the allocator based on input opcode.
8481

8482
    """
8483
    nics = [n.ToDict() for n in self.nics]
8484
    ial = IAllocator(self.cfg, self.rpc,
8485
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8486
                     name=self.op.instance_name,
8487
                     disk_template=self.op.disk_template,
8488
                     tags=self.op.tags,
8489
                     os=self.op.os_type,
8490
                     vcpus=self.be_full[constants.BE_VCPUS],
8491
                     memory=self.be_full[constants.BE_MEMORY],
8492
                     disks=self.disks,
8493
                     nics=nics,
8494
                     hypervisor=self.op.hypervisor,
8495
                     )
8496

    
8497
    ial.Run(self.op.iallocator)
8498

    
8499
    if not ial.success:
8500
      raise errors.OpPrereqError("Can't compute nodes using"
8501
                                 " iallocator '%s': %s" %
8502
                                 (self.op.iallocator, ial.info),
8503
                                 errors.ECODE_NORES)
8504
    if len(ial.result) != ial.required_nodes:
8505
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8506
                                 " of nodes (%s), required %s" %
8507
                                 (self.op.iallocator, len(ial.result),
8508
                                  ial.required_nodes), errors.ECODE_FAULT)
8509
    self.op.pnode = ial.result[0]
8510
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8511
                 self.op.instance_name, self.op.iallocator,
8512
                 utils.CommaJoin(ial.result))
8513
    if ial.required_nodes == 2:
8514
      self.op.snode = ial.result[1]
8515

    
8516
  def BuildHooksEnv(self):
8517
    """Build hooks env.
8518

8519
    This runs on master, primary and secondary nodes of the instance.
8520

8521
    """
8522
    env = {
8523
      "ADD_MODE": self.op.mode,
8524
      }
8525
    if self.op.mode == constants.INSTANCE_IMPORT:
8526
      env["SRC_NODE"] = self.op.src_node
8527
      env["SRC_PATH"] = self.op.src_path
8528
      env["SRC_IMAGES"] = self.src_images
8529

    
8530
    env.update(_BuildInstanceHookEnv(
8531
      name=self.op.instance_name,
8532
      primary_node=self.op.pnode,
8533
      secondary_nodes=self.secondaries,
8534
      status=self.op.start,
8535
      os_type=self.op.os_type,
8536
      memory=self.be_full[constants.BE_MEMORY],
8537
      vcpus=self.be_full[constants.BE_VCPUS],
8538
      nics=_NICListToTuple(self, self.nics),
8539
      disk_template=self.op.disk_template,
8540
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8541
             for d in self.disks],
8542
      bep=self.be_full,
8543
      hvp=self.hv_full,
8544
      hypervisor_name=self.op.hypervisor,
8545
      tags=self.op.tags,
8546
    ))
8547

    
8548
    return env
8549

    
8550
  def BuildHooksNodes(self):
8551
    """Build hooks nodes.
8552

8553
    """
8554
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8555
    return nl, nl
8556

    
8557
  def _ReadExportInfo(self):
8558
    """Reads the export information from disk.
8559

8560
    It will override the opcode source node and path with the actual
8561
    information, if these two were not specified before.
8562

8563
    @return: the export information
8564

8565
    """
8566
    assert self.op.mode == constants.INSTANCE_IMPORT
8567

    
8568
    src_node = self.op.src_node
8569
    src_path = self.op.src_path
8570

    
8571
    if src_node is None:
8572
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8573
      exp_list = self.rpc.call_export_list(locked_nodes)
8574
      found = False
8575
      for node in exp_list:
8576
        if exp_list[node].fail_msg:
8577
          continue
8578
        if src_path in exp_list[node].payload:
8579
          found = True
8580
          self.op.src_node = src_node = node
8581
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8582
                                                       src_path)
8583
          break
8584
      if not found:
8585
        raise errors.OpPrereqError("No export found for relative path %s" %
8586
                                    src_path, errors.ECODE_INVAL)
8587

    
8588
    _CheckNodeOnline(self, src_node)
8589
    result = self.rpc.call_export_info(src_node, src_path)
8590
    result.Raise("No export or invalid export found in dir %s" % src_path)
8591

    
8592
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8593
    if not export_info.has_section(constants.INISECT_EXP):
8594
      raise errors.ProgrammerError("Corrupted export config",
8595
                                   errors.ECODE_ENVIRON)
8596

    
8597
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8598
    if (int(ei_version) != constants.EXPORT_VERSION):
8599
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8600
                                 (ei_version, constants.EXPORT_VERSION),
8601
                                 errors.ECODE_ENVIRON)
8602
    return export_info
8603

    
8604
  def _ReadExportParams(self, einfo):
8605
    """Use export parameters as defaults.
8606

8607
    In case the opcode doesn't specify (as in override) some instance
8608
    parameters, then try to use them from the export information, if
8609
    that declares them.
8610

8611
    """
8612
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8613

    
8614
    if self.op.disk_template is None:
8615
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8616
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8617
                                          "disk_template")
8618
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8619
          raise errors.OpPrereqError("Disk template specified in configuration"
8620
                                     " file is not one of the allowed values:"
8621
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8622
      else:
8623
        raise errors.OpPrereqError("No disk template specified and the export"
8624
                                   " is missing the disk_template information",
8625
                                   errors.ECODE_INVAL)
8626

    
8627
    if not self.op.disks:
8628
      disks = []
8629
      # TODO: import the disk iv_name too
8630
      for idx in range(constants.MAX_DISKS):
8631
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8632
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8633
          disks.append({constants.IDISK_SIZE: disk_sz})
8634
      self.op.disks = disks
8635
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8636
        raise errors.OpPrereqError("No disk info specified and the export"
8637
                                   " is missing the disk information",
8638
                                   errors.ECODE_INVAL)
8639

    
8640
    if not self.op.nics:
8641
      nics = []
8642
      for idx in range(constants.MAX_NICS):
8643
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8644
          ndict = {}
8645
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8646
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8647
            ndict[name] = v
8648
          nics.append(ndict)
8649
        else:
8650
          break
8651
      self.op.nics = nics
8652

    
8653
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8654
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8655

    
8656
    if (self.op.hypervisor is None and
8657
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8658
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8659

    
8660
    if einfo.has_section(constants.INISECT_HYP):
8661
      # use the export parameters but do not override the ones
8662
      # specified by the user
8663
      for name, value in einfo.items(constants.INISECT_HYP):
8664
        if name not in self.op.hvparams:
8665
          self.op.hvparams[name] = value
8666

    
8667
    if einfo.has_section(constants.INISECT_BEP):
8668
      # use the parameters, without overriding
8669
      for name, value in einfo.items(constants.INISECT_BEP):
8670
        if name not in self.op.beparams:
8671
          self.op.beparams[name] = value
8672
    else:
8673
      # try to read the parameters old style, from the main section
8674
      for name in constants.BES_PARAMETERS:
8675
        if (name not in self.op.beparams and
8676
            einfo.has_option(constants.INISECT_INS, name)):
8677
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8678

    
8679
    if einfo.has_section(constants.INISECT_OSP):
8680
      # use the parameters, without overriding
8681
      for name, value in einfo.items(constants.INISECT_OSP):
8682
        if name not in self.op.osparams:
8683
          self.op.osparams[name] = value
8684

    
8685
  def _RevertToDefaults(self, cluster):
8686
    """Revert the instance parameters to the default values.
8687

8688
    """
8689
    # hvparams
8690
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8691
    for name in self.op.hvparams.keys():
8692
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8693
        del self.op.hvparams[name]
8694
    # beparams
8695
    be_defs = cluster.SimpleFillBE({})
8696
    for name in self.op.beparams.keys():
8697
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8698
        del self.op.beparams[name]
8699
    # nic params
8700
    nic_defs = cluster.SimpleFillNIC({})
8701
    for nic in self.op.nics:
8702
      for name in constants.NICS_PARAMETERS:
8703
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8704
          del nic[name]
8705
    # osparams
8706
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8707
    for name in self.op.osparams.keys():
8708
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8709
        del self.op.osparams[name]
8710

    
8711
  def _CalculateFileStorageDir(self):
8712
    """Calculate final instance file storage dir.
8713

8714
    """
8715
    # file storage dir calculation/check
8716
    self.instance_file_storage_dir = None
8717
    if self.op.disk_template in constants.DTS_FILEBASED:
8718
      # build the full file storage dir path
8719
      joinargs = []
8720

    
8721
      if self.op.disk_template == constants.DT_SHARED_FILE:
8722
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8723
      else:
8724
        get_fsd_fn = self.cfg.GetFileStorageDir
8725

    
8726
      cfg_storagedir = get_fsd_fn()
8727
      if not cfg_storagedir:
8728
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8729
      joinargs.append(cfg_storagedir)
8730

    
8731
      if self.op.file_storage_dir is not None:
8732
        joinargs.append(self.op.file_storage_dir)
8733

    
8734
      joinargs.append(self.op.instance_name)
8735

    
8736
      # pylint: disable=W0142
8737
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8738

    
8739
  def CheckPrereq(self):
8740
    """Check prerequisites.
8741

8742
    """
8743
    self._CalculateFileStorageDir()
8744

    
8745
    if self.op.mode == constants.INSTANCE_IMPORT:
8746
      export_info = self._ReadExportInfo()
8747
      self._ReadExportParams(export_info)
8748

    
8749
    if (not self.cfg.GetVGName() and
8750
        self.op.disk_template not in constants.DTS_NOT_LVM):
8751
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8752
                                 " instances", errors.ECODE_STATE)
8753

    
8754
    if (self.op.hypervisor is None or
8755
        self.op.hypervisor == constants.VALUE_AUTO):
8756
      self.op.hypervisor = self.cfg.GetHypervisorType()
8757

    
8758
    cluster = self.cfg.GetClusterInfo()
8759
    enabled_hvs = cluster.enabled_hypervisors
8760
    if self.op.hypervisor not in enabled_hvs:
8761
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8762
                                 " cluster (%s)" % (self.op.hypervisor,
8763
                                  ",".join(enabled_hvs)),
8764
                                 errors.ECODE_STATE)
8765

    
8766
    # Check tag validity
8767
    for tag in self.op.tags:
8768
      objects.TaggableObject.ValidateTag(tag)
8769

    
8770
    # check hypervisor parameter syntax (locally)
8771
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8772
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8773
                                      self.op.hvparams)
8774
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8775
    hv_type.CheckParameterSyntax(filled_hvp)
8776
    self.hv_full = filled_hvp
8777
    # check that we don't specify global parameters on an instance
8778
    _CheckGlobalHvParams(self.op.hvparams)
8779

    
8780
    # fill and remember the beparams dict
8781
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8782
    for param, value in self.op.beparams.iteritems():
8783
      if value == constants.VALUE_AUTO:
8784
        self.op.beparams[param] = default_beparams[param]
8785
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8786
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8787

    
8788
    # build os parameters
8789
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8790

    
8791
    # now that hvp/bep are in final format, let's reset to defaults,
8792
    # if told to do so
8793
    if self.op.identify_defaults:
8794
      self._RevertToDefaults(cluster)
8795

    
8796
    # NIC buildup
8797
    self.nics = []
8798
    for idx, nic in enumerate(self.op.nics):
8799
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8800
      nic_mode = nic_mode_req
8801
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8802
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8803

    
8804
      # in routed mode, for the first nic, the default ip is 'auto'
8805
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8806
        default_ip_mode = constants.VALUE_AUTO
8807
      else:
8808
        default_ip_mode = constants.VALUE_NONE
8809

    
8810
      # ip validity checks
8811
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8812
      if ip is None or ip.lower() == constants.VALUE_NONE:
8813
        nic_ip = None
8814
      elif ip.lower() == constants.VALUE_AUTO:
8815
        if not self.op.name_check:
8816
          raise errors.OpPrereqError("IP address set to auto but name checks"
8817
                                     " have been skipped",
8818
                                     errors.ECODE_INVAL)
8819
        nic_ip = self.hostname1.ip
8820
      else:
8821
        if not netutils.IPAddress.IsValid(ip):
8822
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8823
                                     errors.ECODE_INVAL)
8824
        nic_ip = ip
8825

    
8826
      # TODO: check the ip address for uniqueness
8827
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8828
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8829
                                   errors.ECODE_INVAL)
8830

    
8831
      # MAC address verification
8832
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8833
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8834
        mac = utils.NormalizeAndValidateMac(mac)
8835

    
8836
        try:
8837
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8838
        except errors.ReservationError:
8839
          raise errors.OpPrereqError("MAC address %s already in use"
8840
                                     " in cluster" % mac,
8841
                                     errors.ECODE_NOTUNIQUE)
8842

    
8843
      #  Build nic parameters
8844
      link = nic.get(constants.INIC_LINK, None)
8845
      if link == constants.VALUE_AUTO:
8846
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8847
      nicparams = {}
8848
      if nic_mode_req:
8849
        nicparams[constants.NIC_MODE] = nic_mode
8850
      if link:
8851
        nicparams[constants.NIC_LINK] = link
8852

    
8853
      check_params = cluster.SimpleFillNIC(nicparams)
8854
      objects.NIC.CheckParameterSyntax(check_params)
8855
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8856

    
8857
    # disk checks/pre-build
8858
    default_vg = self.cfg.GetVGName()
8859
    self.disks = []
8860
    for disk in self.op.disks:
8861
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8862
      if mode not in constants.DISK_ACCESS_SET:
8863
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8864
                                   mode, errors.ECODE_INVAL)
8865
      size = disk.get(constants.IDISK_SIZE, None)
8866
      if size is None:
8867
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8868
      try:
8869
        size = int(size)
8870
      except (TypeError, ValueError):
8871
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8872
                                   errors.ECODE_INVAL)
8873

    
8874
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8875
      new_disk = {
8876
        constants.IDISK_SIZE: size,
8877
        constants.IDISK_MODE: mode,
8878
        constants.IDISK_VG: data_vg,
8879
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8880
        }
8881
      if constants.IDISK_ADOPT in disk:
8882
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8883
      self.disks.append(new_disk)
8884

    
8885
    if self.op.mode == constants.INSTANCE_IMPORT:
8886
      disk_images = []
8887
      for idx in range(len(self.disks)):
8888
        option = "disk%d_dump" % idx
8889
        if export_info.has_option(constants.INISECT_INS, option):
8890
          # FIXME: are the old os-es, disk sizes, etc. useful?
8891
          export_name = export_info.get(constants.INISECT_INS, option)
8892
          image = utils.PathJoin(self.op.src_path, export_name)
8893
          disk_images.append(image)
8894
        else:
8895
          disk_images.append(False)
8896

    
8897
      self.src_images = disk_images
8898

    
8899
      old_name = export_info.get(constants.INISECT_INS, "name")
8900
      if self.op.instance_name == old_name:
8901
        for idx, nic in enumerate(self.nics):
8902
          if nic.mac == constants.VALUE_AUTO:
8903
            nic_mac_ini = "nic%d_mac" % idx
8904
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8905

    
8906
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8907

    
8908
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8909
    if self.op.ip_check:
8910
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8911
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8912
                                   (self.check_ip, self.op.instance_name),
8913
                                   errors.ECODE_NOTUNIQUE)
8914

    
8915
    #### mac address generation
8916
    # By generating here the mac address both the allocator and the hooks get
8917
    # the real final mac address rather than the 'auto' or 'generate' value.
8918
    # There is a race condition between the generation and the instance object
8919
    # creation, which means that we know the mac is valid now, but we're not
8920
    # sure it will be when we actually add the instance. If things go bad
8921
    # adding the instance will abort because of a duplicate mac, and the
8922
    # creation job will fail.
8923
    for nic in self.nics:
8924
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8925
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8926

    
8927
    #### allocator run
8928

    
8929
    if self.op.iallocator is not None:
8930
      self._RunAllocator()
8931

    
8932
    #### node related checks
8933

    
8934
    # check primary node
8935
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8936
    assert self.pnode is not None, \
8937
      "Cannot retrieve locked node %s" % self.op.pnode
8938
    if pnode.offline:
8939
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8940
                                 pnode.name, errors.ECODE_STATE)
8941
    if pnode.drained:
8942
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8943
                                 pnode.name, errors.ECODE_STATE)
8944
    if not pnode.vm_capable:
8945
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8946
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8947

    
8948
    self.secondaries = []
8949

    
8950
    # mirror node verification
8951
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8952
      if self.op.snode == pnode.name:
8953
        raise errors.OpPrereqError("The secondary node cannot be the"
8954
                                   " primary node", errors.ECODE_INVAL)
8955
      _CheckNodeOnline(self, self.op.snode)
8956
      _CheckNodeNotDrained(self, self.op.snode)
8957
      _CheckNodeVmCapable(self, self.op.snode)
8958
      self.secondaries.append(self.op.snode)
8959

    
8960
    nodenames = [pnode.name] + self.secondaries
8961

    
8962
    if not self.adopt_disks:
8963
      # Check lv size requirements, if not adopting
8964
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8965
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8966

    
8967
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8968
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8969
                                disk[constants.IDISK_ADOPT])
8970
                     for disk in self.disks])
8971
      if len(all_lvs) != len(self.disks):
8972
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8973
                                   errors.ECODE_INVAL)
8974
      for lv_name in all_lvs:
8975
        try:
8976
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8977
          # to ReserveLV uses the same syntax
8978
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8979
        except errors.ReservationError:
8980
          raise errors.OpPrereqError("LV named %s used by another instance" %
8981
                                     lv_name, errors.ECODE_NOTUNIQUE)
8982

    
8983
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8984
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8985

    
8986
      node_lvs = self.rpc.call_lv_list([pnode.name],
8987
                                       vg_names.payload.keys())[pnode.name]
8988
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8989
      node_lvs = node_lvs.payload
8990

    
8991
      delta = all_lvs.difference(node_lvs.keys())
8992
      if delta:
8993
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8994
                                   utils.CommaJoin(delta),
8995
                                   errors.ECODE_INVAL)
8996
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8997
      if online_lvs:
8998
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8999
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9000
                                   errors.ECODE_STATE)
9001
      # update the size of disk based on what is found
9002
      for dsk in self.disks:
9003
        dsk[constants.IDISK_SIZE] = \
9004
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9005
                                        dsk[constants.IDISK_ADOPT])][0]))
9006

    
9007
    elif self.op.disk_template == constants.DT_BLOCK:
9008
      # Normalize and de-duplicate device paths
9009
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9010
                       for disk in self.disks])
9011
      if len(all_disks) != len(self.disks):
9012
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9013
                                   errors.ECODE_INVAL)
9014
      baddisks = [d for d in all_disks
9015
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9016
      if baddisks:
9017
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9018
                                   " cannot be adopted" %
9019
                                   (", ".join(baddisks),
9020
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9021
                                   errors.ECODE_INVAL)
9022

    
9023
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9024
                                            list(all_disks))[pnode.name]
9025
      node_disks.Raise("Cannot get block device information from node %s" %
9026
                       pnode.name)
9027
      node_disks = node_disks.payload
9028
      delta = all_disks.difference(node_disks.keys())
9029
      if delta:
9030
        raise errors.OpPrereqError("Missing block device(s): %s" %
9031
                                   utils.CommaJoin(delta),
9032
                                   errors.ECODE_INVAL)
9033
      for dsk in self.disks:
9034
        dsk[constants.IDISK_SIZE] = \
9035
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9036

    
9037
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9038

    
9039
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9040
    # check OS parameters (remotely)
9041
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9042

    
9043
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9044

    
9045
    # memory check on primary node
9046
    if self.op.start:
9047
      _CheckNodeFreeMemory(self, self.pnode.name,
9048
                           "creating instance %s" % self.op.instance_name,
9049
                           self.be_full[constants.BE_MEMORY],
9050
                           self.op.hypervisor)
9051

    
9052
    self.dry_run_result = list(nodenames)
9053

    
9054
  def Exec(self, feedback_fn):
9055
    """Create and add the instance to the cluster.
9056

9057
    """
9058
    instance = self.op.instance_name
9059
    pnode_name = self.pnode.name
9060

    
9061
    ht_kind = self.op.hypervisor
9062
    if ht_kind in constants.HTS_REQ_PORT:
9063
      network_port = self.cfg.AllocatePort()
9064
    else:
9065
      network_port = None
9066

    
9067
    disks = _GenerateDiskTemplate(self,
9068
                                  self.op.disk_template,
9069
                                  instance, pnode_name,
9070
                                  self.secondaries,
9071
                                  self.disks,
9072
                                  self.instance_file_storage_dir,
9073
                                  self.op.file_driver,
9074
                                  0,
9075
                                  feedback_fn)
9076

    
9077
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9078
                            primary_node=pnode_name,
9079
                            nics=self.nics, disks=disks,
9080
                            disk_template=self.op.disk_template,
9081
                            admin_up=False,
9082
                            network_port=network_port,
9083
                            beparams=self.op.beparams,
9084
                            hvparams=self.op.hvparams,
9085
                            hypervisor=self.op.hypervisor,
9086
                            osparams=self.op.osparams,
9087
                            )
9088

    
9089
    if self.op.tags:
9090
      for tag in self.op.tags:
9091
        iobj.AddTag(tag)
9092

    
9093
    if self.adopt_disks:
9094
      if self.op.disk_template == constants.DT_PLAIN:
9095
        # rename LVs to the newly-generated names; we need to construct
9096
        # 'fake' LV disks with the old data, plus the new unique_id
9097
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9098
        rename_to = []
9099
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9100
          rename_to.append(t_dsk.logical_id)
9101
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9102
          self.cfg.SetDiskID(t_dsk, pnode_name)
9103
        result = self.rpc.call_blockdev_rename(pnode_name,
9104
                                               zip(tmp_disks, rename_to))
9105
        result.Raise("Failed to rename adoped LVs")
9106
    else:
9107
      feedback_fn("* creating instance disks...")
9108
      try:
9109
        _CreateDisks(self, iobj)
9110
      except errors.OpExecError:
9111
        self.LogWarning("Device creation failed, reverting...")
9112
        try:
9113
          _RemoveDisks(self, iobj)
9114
        finally:
9115
          self.cfg.ReleaseDRBDMinors(instance)
9116
          raise
9117

    
9118
    feedback_fn("adding instance %s to cluster config" % instance)
9119

    
9120
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9121

    
9122
    # Declare that we don't want to remove the instance lock anymore, as we've
9123
    # added the instance to the config
9124
    del self.remove_locks[locking.LEVEL_INSTANCE]
9125

    
9126
    if self.op.mode == constants.INSTANCE_IMPORT:
9127
      # Release unused nodes
9128
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9129
    else:
9130
      # Release all nodes
9131
      _ReleaseLocks(self, locking.LEVEL_NODE)
9132

    
9133
    disk_abort = False
9134
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9135
      feedback_fn("* wiping instance disks...")
9136
      try:
9137
        _WipeDisks(self, iobj)
9138
      except errors.OpExecError, err:
9139
        logging.exception("Wiping disks failed")
9140
        self.LogWarning("Wiping instance disks failed (%s)", err)
9141
        disk_abort = True
9142

    
9143
    if disk_abort:
9144
      # Something is already wrong with the disks, don't do anything else
9145
      pass
9146
    elif self.op.wait_for_sync:
9147
      disk_abort = not _WaitForSync(self, iobj)
9148
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9149
      # make sure the disks are not degraded (still sync-ing is ok)
9150
      feedback_fn("* checking mirrors status")
9151
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9152
    else:
9153
      disk_abort = False
9154

    
9155
    if disk_abort:
9156
      _RemoveDisks(self, iobj)
9157
      self.cfg.RemoveInstance(iobj.name)
9158
      # Make sure the instance lock gets removed
9159
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9160
      raise errors.OpExecError("There are some degraded disks for"
9161
                               " this instance")
9162

    
9163
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9164
      if self.op.mode == constants.INSTANCE_CREATE:
9165
        if not self.op.no_install:
9166
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9167
                        not self.op.wait_for_sync)
9168
          if pause_sync:
9169
            feedback_fn("* pausing disk sync to install instance OS")
9170
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9171
                                                              iobj.disks, True)
9172
            for idx, success in enumerate(result.payload):
9173
              if not success:
9174
                logging.warn("pause-sync of instance %s for disk %d failed",
9175
                             instance, idx)
9176

    
9177
          feedback_fn("* running the instance OS create scripts...")
9178
          # FIXME: pass debug option from opcode to backend
9179
          os_add_result = \
9180
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9181
                                          self.op.debug_level)
9182
          if pause_sync:
9183
            feedback_fn("* resuming disk sync")
9184
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9185
                                                              iobj.disks, False)
9186
            for idx, success in enumerate(result.payload):
9187
              if not success:
9188
                logging.warn("resume-sync of instance %s for disk %d failed",
9189
                             instance, idx)
9190

    
9191
          os_add_result.Raise("Could not add os for instance %s"
9192
                              " on node %s" % (instance, pnode_name))
9193

    
9194
      elif self.op.mode == constants.INSTANCE_IMPORT:
9195
        feedback_fn("* running the instance OS import scripts...")
9196

    
9197
        transfers = []
9198

    
9199
        for idx, image in enumerate(self.src_images):
9200
          if not image:
9201
            continue
9202

    
9203
          # FIXME: pass debug option from opcode to backend
9204
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9205
                                             constants.IEIO_FILE, (image, ),
9206
                                             constants.IEIO_SCRIPT,
9207
                                             (iobj.disks[idx], idx),
9208
                                             None)
9209
          transfers.append(dt)
9210

    
9211
        import_result = \
9212
          masterd.instance.TransferInstanceData(self, feedback_fn,
9213
                                                self.op.src_node, pnode_name,
9214
                                                self.pnode.secondary_ip,
9215
                                                iobj, transfers)
9216
        if not compat.all(import_result):
9217
          self.LogWarning("Some disks for instance %s on node %s were not"
9218
                          " imported successfully" % (instance, pnode_name))
9219

    
9220
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9221
        feedback_fn("* preparing remote import...")
9222
        # The source cluster will stop the instance before attempting to make a
9223
        # connection. In some cases stopping an instance can take a long time,
9224
        # hence the shutdown timeout is added to the connection timeout.
9225
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9226
                           self.op.source_shutdown_timeout)
9227
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9228

    
9229
        assert iobj.primary_node == self.pnode.name
9230
        disk_results = \
9231
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9232
                                        self.source_x509_ca,
9233
                                        self._cds, timeouts)
9234
        if not compat.all(disk_results):
9235
          # TODO: Should the instance still be started, even if some disks
9236
          # failed to import (valid for local imports, too)?
9237
          self.LogWarning("Some disks for instance %s on node %s were not"
9238
                          " imported successfully" % (instance, pnode_name))
9239

    
9240
        # Run rename script on newly imported instance
9241
        assert iobj.name == instance
9242
        feedback_fn("Running rename script for %s" % instance)
9243
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9244
                                                   self.source_instance_name,
9245
                                                   self.op.debug_level)
9246
        if result.fail_msg:
9247
          self.LogWarning("Failed to run rename script for %s on node"
9248
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9249

    
9250
      else:
9251
        # also checked in the prereq part
9252
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9253
                                     % self.op.mode)
9254

    
9255
    if self.op.start:
9256
      iobj.admin_up = True
9257
      self.cfg.Update(iobj, feedback_fn)
9258
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9259
      feedback_fn("* starting instance...")
9260
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9261
                                            False)
9262
      result.Raise("Could not start instance")
9263

    
9264
    return list(iobj.all_nodes)
9265

    
9266

    
9267
class LUInstanceConsole(NoHooksLU):
9268
  """Connect to an instance's console.
9269

9270
  This is somewhat special in that it returns the command line that
9271
  you need to run on the master node in order to connect to the
9272
  console.
9273

9274
  """
9275
  REQ_BGL = False
9276

    
9277
  def ExpandNames(self):
9278
    self._ExpandAndLockInstance()
9279

    
9280
  def CheckPrereq(self):
9281
    """Check prerequisites.
9282

9283
    This checks that the instance is in the cluster.
9284

9285
    """
9286
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9287
    assert self.instance is not None, \
9288
      "Cannot retrieve locked instance %s" % self.op.instance_name
9289
    _CheckNodeOnline(self, self.instance.primary_node)
9290

    
9291
  def Exec(self, feedback_fn):
9292
    """Connect to the console of an instance
9293

9294
    """
9295
    instance = self.instance
9296
    node = instance.primary_node
9297

    
9298
    node_insts = self.rpc.call_instance_list([node],
9299
                                             [instance.hypervisor])[node]
9300
    node_insts.Raise("Can't get node information from %s" % node)
9301

    
9302
    if instance.name not in node_insts.payload:
9303
      if instance.admin_up:
9304
        state = constants.INSTST_ERRORDOWN
9305
      else:
9306
        state = constants.INSTST_ADMINDOWN
9307
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9308
                               (instance.name, state))
9309

    
9310
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9311

    
9312
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9313

    
9314

    
9315
def _GetInstanceConsole(cluster, instance):
9316
  """Returns console information for an instance.
9317

9318
  @type cluster: L{objects.Cluster}
9319
  @type instance: L{objects.Instance}
9320
  @rtype: dict
9321

9322
  """
9323
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9324
  # beparams and hvparams are passed separately, to avoid editing the
9325
  # instance and then saving the defaults in the instance itself.
9326
  hvparams = cluster.FillHV(instance)
9327
  beparams = cluster.FillBE(instance)
9328
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9329

    
9330
  assert console.instance == instance.name
9331
  assert console.Validate()
9332

    
9333
  return console.ToDict()
9334

    
9335

    
9336
class LUInstanceReplaceDisks(LogicalUnit):
9337
  """Replace the disks of an instance.
9338

9339
  """
9340
  HPATH = "mirrors-replace"
9341
  HTYPE = constants.HTYPE_INSTANCE
9342
  REQ_BGL = False
9343

    
9344
  def CheckArguments(self):
9345
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9346
                                  self.op.iallocator)
9347

    
9348
  def ExpandNames(self):
9349
    self._ExpandAndLockInstance()
9350

    
9351
    assert locking.LEVEL_NODE not in self.needed_locks
9352
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9353

    
9354
    assert self.op.iallocator is None or self.op.remote_node is None, \
9355
      "Conflicting options"
9356

    
9357
    if self.op.remote_node is not None:
9358
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9359

    
9360
      # Warning: do not remove the locking of the new secondary here
9361
      # unless DRBD8.AddChildren is changed to work in parallel;
9362
      # currently it doesn't since parallel invocations of
9363
      # FindUnusedMinor will conflict
9364
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9365
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9366
    else:
9367
      self.needed_locks[locking.LEVEL_NODE] = []
9368
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9369

    
9370
      if self.op.iallocator is not None:
9371
        # iallocator will select a new node in the same group
9372
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9373

    
9374
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9375
                                   self.op.iallocator, self.op.remote_node,
9376
                                   self.op.disks, False, self.op.early_release)
9377

    
9378
    self.tasklets = [self.replacer]
9379

    
9380
  def DeclareLocks(self, level):
9381
    if level == locking.LEVEL_NODEGROUP:
9382
      assert self.op.remote_node is None
9383
      assert self.op.iallocator is not None
9384
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9385

    
9386
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9387
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9388
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9389

    
9390
    elif level == locking.LEVEL_NODE:
9391
      if self.op.iallocator is not None:
9392
        assert self.op.remote_node is None
9393
        assert not self.needed_locks[locking.LEVEL_NODE]
9394

    
9395
        # Lock member nodes of all locked groups
9396
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9397
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9398
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9399
      else:
9400
        self._LockInstancesNodes()
9401

    
9402
  def BuildHooksEnv(self):
9403
    """Build hooks env.
9404

9405
    This runs on the master, the primary and all the secondaries.
9406

9407
    """
9408
    instance = self.replacer.instance
9409
    env = {
9410
      "MODE": self.op.mode,
9411
      "NEW_SECONDARY": self.op.remote_node,
9412
      "OLD_SECONDARY": instance.secondary_nodes[0],
9413
      }
9414
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9415
    return env
9416

    
9417
  def BuildHooksNodes(self):
9418
    """Build hooks nodes.
9419

9420
    """
9421
    instance = self.replacer.instance
9422
    nl = [
9423
      self.cfg.GetMasterNode(),
9424
      instance.primary_node,
9425
      ]
9426
    if self.op.remote_node is not None:
9427
      nl.append(self.op.remote_node)
9428
    return nl, nl
9429

    
9430
  def CheckPrereq(self):
9431
    """Check prerequisites.
9432

9433
    """
9434
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9435
            self.op.iallocator is None)
9436

    
9437
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9438
    if owned_groups:
9439
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9440

    
9441
    return LogicalUnit.CheckPrereq(self)
9442

    
9443

    
9444
class TLReplaceDisks(Tasklet):
9445
  """Replaces disks for an instance.
9446

9447
  Note: Locking is not within the scope of this class.
9448

9449
  """
9450
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9451
               disks, delay_iallocator, early_release):
9452
    """Initializes this class.
9453

9454
    """
9455
    Tasklet.__init__(self, lu)
9456

    
9457
    # Parameters
9458
    self.instance_name = instance_name
9459
    self.mode = mode
9460
    self.iallocator_name = iallocator_name
9461
    self.remote_node = remote_node
9462
    self.disks = disks
9463
    self.delay_iallocator = delay_iallocator
9464
    self.early_release = early_release
9465

    
9466
    # Runtime data
9467
    self.instance = None
9468
    self.new_node = None
9469
    self.target_node = None
9470
    self.other_node = None
9471
    self.remote_node_info = None
9472
    self.node_secondary_ip = None
9473

    
9474
  @staticmethod
9475
  def CheckArguments(mode, remote_node, iallocator):
9476
    """Helper function for users of this class.
9477

9478
    """
9479
    # check for valid parameter combination
9480
    if mode == constants.REPLACE_DISK_CHG:
9481
      if remote_node is None and iallocator is None:
9482
        raise errors.OpPrereqError("When changing the secondary either an"
9483
                                   " iallocator script must be used or the"
9484
                                   " new node given", errors.ECODE_INVAL)
9485

    
9486
      if remote_node is not None and iallocator is not None:
9487
        raise errors.OpPrereqError("Give either the iallocator or the new"
9488
                                   " secondary, not both", errors.ECODE_INVAL)
9489

    
9490
    elif remote_node is not None or iallocator is not None:
9491
      # Not replacing the secondary
9492
      raise errors.OpPrereqError("The iallocator and new node options can"
9493
                                 " only be used when changing the"
9494
                                 " secondary node", errors.ECODE_INVAL)
9495

    
9496
  @staticmethod
9497
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9498
    """Compute a new secondary node using an IAllocator.
9499

9500
    """
9501
    ial = IAllocator(lu.cfg, lu.rpc,
9502
                     mode=constants.IALLOCATOR_MODE_RELOC,
9503
                     name=instance_name,
9504
                     relocate_from=list(relocate_from))
9505

    
9506
    ial.Run(iallocator_name)
9507

    
9508
    if not ial.success:
9509
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9510
                                 " %s" % (iallocator_name, ial.info),
9511
                                 errors.ECODE_NORES)
9512

    
9513
    if len(ial.result) != ial.required_nodes:
9514
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9515
                                 " of nodes (%s), required %s" %
9516
                                 (iallocator_name,
9517
                                  len(ial.result), ial.required_nodes),
9518
                                 errors.ECODE_FAULT)
9519

    
9520
    remote_node_name = ial.result[0]
9521

    
9522
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9523
               instance_name, remote_node_name)
9524

    
9525
    return remote_node_name
9526

    
9527
  def _FindFaultyDisks(self, node_name):
9528
    """Wrapper for L{_FindFaultyInstanceDisks}.
9529

9530
    """
9531
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9532
                                    node_name, True)
9533

    
9534
  def _CheckDisksActivated(self, instance):
9535
    """Checks if the instance disks are activated.
9536

9537
    @param instance: The instance to check disks
9538
    @return: True if they are activated, False otherwise
9539

9540
    """
9541
    nodes = instance.all_nodes
9542

    
9543
    for idx, dev in enumerate(instance.disks):
9544
      for node in nodes:
9545
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9546
        self.cfg.SetDiskID(dev, node)
9547

    
9548
        result = self.rpc.call_blockdev_find(node, dev)
9549

    
9550
        if result.offline:
9551
          continue
9552
        elif result.fail_msg or not result.payload:
9553
          return False
9554

    
9555
    return True
9556

    
9557
  def CheckPrereq(self):
9558
    """Check prerequisites.
9559

9560
    This checks that the instance is in the cluster.
9561

9562
    """
9563
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9564
    assert instance is not None, \
9565
      "Cannot retrieve locked instance %s" % self.instance_name
9566

    
9567
    if instance.disk_template != constants.DT_DRBD8:
9568
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9569
                                 " instances", errors.ECODE_INVAL)
9570

    
9571
    if len(instance.secondary_nodes) != 1:
9572
      raise errors.OpPrereqError("The instance has a strange layout,"
9573
                                 " expected one secondary but found %d" %
9574
                                 len(instance.secondary_nodes),
9575
                                 errors.ECODE_FAULT)
9576

    
9577
    if not self.delay_iallocator:
9578
      self._CheckPrereq2()
9579

    
9580
  def _CheckPrereq2(self):
9581
    """Check prerequisites, second part.
9582

9583
    This function should always be part of CheckPrereq. It was separated and is
9584
    now called from Exec because during node evacuation iallocator was only
9585
    called with an unmodified cluster model, not taking planned changes into
9586
    account.
9587

9588
    """
9589
    instance = self.instance
9590
    secondary_node = instance.secondary_nodes[0]
9591

    
9592
    if self.iallocator_name is None:
9593
      remote_node = self.remote_node
9594
    else:
9595
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9596
                                       instance.name, instance.secondary_nodes)
9597

    
9598
    if remote_node is None:
9599
      self.remote_node_info = None
9600
    else:
9601
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9602
             "Remote node '%s' is not locked" % remote_node
9603

    
9604
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9605
      assert self.remote_node_info is not None, \
9606
        "Cannot retrieve locked node %s" % remote_node
9607

    
9608
    if remote_node == self.instance.primary_node:
9609
      raise errors.OpPrereqError("The specified node is the primary node of"
9610
                                 " the instance", errors.ECODE_INVAL)
9611

    
9612
    if remote_node == secondary_node:
9613
      raise errors.OpPrereqError("The specified node is already the"
9614
                                 " secondary node of the instance",
9615
                                 errors.ECODE_INVAL)
9616

    
9617
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9618
                                    constants.REPLACE_DISK_CHG):
9619
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9620
                                 errors.ECODE_INVAL)
9621

    
9622
    if self.mode == constants.REPLACE_DISK_AUTO:
9623
      if not self._CheckDisksActivated(instance):
9624
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9625
                                   " first" % self.instance_name,
9626
                                   errors.ECODE_STATE)
9627
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9628
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9629

    
9630
      if faulty_primary and faulty_secondary:
9631
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9632
                                   " one node and can not be repaired"
9633
                                   " automatically" % self.instance_name,
9634
                                   errors.ECODE_STATE)
9635

    
9636
      if faulty_primary:
9637
        self.disks = faulty_primary
9638
        self.target_node = instance.primary_node
9639
        self.other_node = secondary_node
9640
        check_nodes = [self.target_node, self.other_node]
9641
      elif faulty_secondary:
9642
        self.disks = faulty_secondary
9643
        self.target_node = secondary_node
9644
        self.other_node = instance.primary_node
9645
        check_nodes = [self.target_node, self.other_node]
9646
      else:
9647
        self.disks = []
9648
        check_nodes = []
9649

    
9650
    else:
9651
      # Non-automatic modes
9652
      if self.mode == constants.REPLACE_DISK_PRI:
9653
        self.target_node = instance.primary_node
9654
        self.other_node = secondary_node
9655
        check_nodes = [self.target_node, self.other_node]
9656

    
9657
      elif self.mode == constants.REPLACE_DISK_SEC:
9658
        self.target_node = secondary_node
9659
        self.other_node = instance.primary_node
9660
        check_nodes = [self.target_node, self.other_node]
9661

    
9662
      elif self.mode == constants.REPLACE_DISK_CHG:
9663
        self.new_node = remote_node
9664
        self.other_node = instance.primary_node
9665
        self.target_node = secondary_node
9666
        check_nodes = [self.new_node, self.other_node]
9667

    
9668
        _CheckNodeNotDrained(self.lu, remote_node)
9669
        _CheckNodeVmCapable(self.lu, remote_node)
9670

    
9671
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9672
        assert old_node_info is not None
9673
        if old_node_info.offline and not self.early_release:
9674
          # doesn't make sense to delay the release
9675
          self.early_release = True
9676
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9677
                          " early-release mode", secondary_node)
9678

    
9679
      else:
9680
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9681
                                     self.mode)
9682

    
9683
      # If not specified all disks should be replaced
9684
      if not self.disks:
9685
        self.disks = range(len(self.instance.disks))
9686

    
9687
    for node in check_nodes:
9688
      _CheckNodeOnline(self.lu, node)
9689

    
9690
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9691
                                                          self.other_node,
9692
                                                          self.target_node]
9693
                              if node_name is not None)
9694

    
9695
    # Release unneeded node locks
9696
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9697

    
9698
    # Release any owned node group
9699
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9700
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9701

    
9702
    # Check whether disks are valid
9703
    for disk_idx in self.disks:
9704
      instance.FindDisk(disk_idx)
9705

    
9706
    # Get secondary node IP addresses
9707
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9708
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9709

    
9710
  def Exec(self, feedback_fn):
9711
    """Execute disk replacement.
9712

9713
    This dispatches the disk replacement to the appropriate handler.
9714

9715
    """
9716
    if self.delay_iallocator:
9717
      self._CheckPrereq2()
9718

    
9719
    if __debug__:
9720
      # Verify owned locks before starting operation
9721
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9722
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9723
          ("Incorrect node locks, owning %s, expected %s" %
9724
           (owned_nodes, self.node_secondary_ip.keys()))
9725

    
9726
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9727
      assert list(owned_instances) == [self.instance_name], \
9728
          "Instance '%s' not locked" % self.instance_name
9729

    
9730
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9731
          "Should not own any node group lock at this point"
9732

    
9733
    if not self.disks:
9734
      feedback_fn("No disks need replacement")
9735
      return
9736

    
9737
    feedback_fn("Replacing disk(s) %s for %s" %
9738
                (utils.CommaJoin(self.disks), self.instance.name))
9739

    
9740
    activate_disks = (not self.instance.admin_up)
9741

    
9742
    # Activate the instance disks if we're replacing them on a down instance
9743
    if activate_disks:
9744
      _StartInstanceDisks(self.lu, self.instance, True)
9745

    
9746
    try:
9747
      # Should we replace the secondary node?
9748
      if self.new_node is not None:
9749
        fn = self._ExecDrbd8Secondary
9750
      else:
9751
        fn = self._ExecDrbd8DiskOnly
9752

    
9753
      result = fn(feedback_fn)
9754
    finally:
9755
      # Deactivate the instance disks if we're replacing them on a
9756
      # down instance
9757
      if activate_disks:
9758
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9759

    
9760
    if __debug__:
9761
      # Verify owned locks
9762
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9763
      nodes = frozenset(self.node_secondary_ip)
9764
      assert ((self.early_release and not owned_nodes) or
9765
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9766
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9767
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9768

    
9769
    return result
9770

    
9771
  def _CheckVolumeGroup(self, nodes):
9772
    self.lu.LogInfo("Checking volume groups")
9773

    
9774
    vgname = self.cfg.GetVGName()
9775

    
9776
    # Make sure volume group exists on all involved nodes
9777
    results = self.rpc.call_vg_list(nodes)
9778
    if not results:
9779
      raise errors.OpExecError("Can't list volume groups on the nodes")
9780

    
9781
    for node in nodes:
9782
      res = results[node]
9783
      res.Raise("Error checking node %s" % node)
9784
      if vgname not in res.payload:
9785
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9786
                                 (vgname, node))
9787

    
9788
  def _CheckDisksExistence(self, nodes):
9789
    # Check disk existence
9790
    for idx, dev in enumerate(self.instance.disks):
9791
      if idx not in self.disks:
9792
        continue
9793

    
9794
      for node in nodes:
9795
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9796
        self.cfg.SetDiskID(dev, node)
9797

    
9798
        result = self.rpc.call_blockdev_find(node, dev)
9799

    
9800
        msg = result.fail_msg
9801
        if msg or not result.payload:
9802
          if not msg:
9803
            msg = "disk not found"
9804
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9805
                                   (idx, node, msg))
9806

    
9807
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9808
    for idx, dev in enumerate(self.instance.disks):
9809
      if idx not in self.disks:
9810
        continue
9811

    
9812
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9813
                      (idx, node_name))
9814

    
9815
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9816
                                   ldisk=ldisk):
9817
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9818
                                 " replace disks for instance %s" %
9819
                                 (node_name, self.instance.name))
9820

    
9821
  def _CreateNewStorage(self, node_name):
9822
    """Create new storage on the primary or secondary node.
9823

9824
    This is only used for same-node replaces, not for changing the
9825
    secondary node, hence we don't want to modify the existing disk.
9826

9827
    """
9828
    iv_names = {}
9829

    
9830
    for idx, dev in enumerate(self.instance.disks):
9831
      if idx not in self.disks:
9832
        continue
9833

    
9834
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9835

    
9836
      self.cfg.SetDiskID(dev, node_name)
9837

    
9838
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9839
      names = _GenerateUniqueNames(self.lu, lv_names)
9840

    
9841
      vg_data = dev.children[0].logical_id[0]
9842
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9843
                             logical_id=(vg_data, names[0]))
9844
      vg_meta = dev.children[1].logical_id[0]
9845
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9846
                             logical_id=(vg_meta, names[1]))
9847

    
9848
      new_lvs = [lv_data, lv_meta]
9849
      old_lvs = [child.Copy() for child in dev.children]
9850
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9851

    
9852
      # we pass force_create=True to force the LVM creation
9853
      for new_lv in new_lvs:
9854
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9855
                        _GetInstanceInfoText(self.instance), False)
9856

    
9857
    return iv_names
9858

    
9859
  def _CheckDevices(self, node_name, iv_names):
9860
    for name, (dev, _, _) in iv_names.iteritems():
9861
      self.cfg.SetDiskID(dev, node_name)
9862

    
9863
      result = self.rpc.call_blockdev_find(node_name, dev)
9864

    
9865
      msg = result.fail_msg
9866
      if msg or not result.payload:
9867
        if not msg:
9868
          msg = "disk not found"
9869
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9870
                                 (name, msg))
9871

    
9872
      if result.payload.is_degraded:
9873
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9874

    
9875
  def _RemoveOldStorage(self, node_name, iv_names):
9876
    for name, (_, old_lvs, _) in iv_names.iteritems():
9877
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9878

    
9879
      for lv in old_lvs:
9880
        self.cfg.SetDiskID(lv, node_name)
9881

    
9882
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9883
        if msg:
9884
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9885
                             hint="remove unused LVs manually")
9886

    
9887
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9888
    """Replace a disk on the primary or secondary for DRBD 8.
9889

9890
    The algorithm for replace is quite complicated:
9891

9892
      1. for each disk to be replaced:
9893

9894
        1. create new LVs on the target node with unique names
9895
        1. detach old LVs from the drbd device
9896
        1. rename old LVs to name_replaced.<time_t>
9897
        1. rename new LVs to old LVs
9898
        1. attach the new LVs (with the old names now) to the drbd device
9899

9900
      1. wait for sync across all devices
9901

9902
      1. for each modified disk:
9903

9904
        1. remove old LVs (which have the name name_replaces.<time_t>)
9905

9906
    Failures are not very well handled.
9907

9908
    """
9909
    steps_total = 6
9910

    
9911
    # Step: check device activation
9912
    self.lu.LogStep(1, steps_total, "Check device existence")
9913
    self._CheckDisksExistence([self.other_node, self.target_node])
9914
    self._CheckVolumeGroup([self.target_node, self.other_node])
9915

    
9916
    # Step: check other node consistency
9917
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9918
    self._CheckDisksConsistency(self.other_node,
9919
                                self.other_node == self.instance.primary_node,
9920
                                False)
9921

    
9922
    # Step: create new storage
9923
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9924
    iv_names = self._CreateNewStorage(self.target_node)
9925

    
9926
    # Step: for each lv, detach+rename*2+attach
9927
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9928
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9929
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9930

    
9931
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9932
                                                     old_lvs)
9933
      result.Raise("Can't detach drbd from local storage on node"
9934
                   " %s for device %s" % (self.target_node, dev.iv_name))
9935
      #dev.children = []
9936
      #cfg.Update(instance)
9937

    
9938
      # ok, we created the new LVs, so now we know we have the needed
9939
      # storage; as such, we proceed on the target node to rename
9940
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9941
      # using the assumption that logical_id == physical_id (which in
9942
      # turn is the unique_id on that node)
9943

    
9944
      # FIXME(iustin): use a better name for the replaced LVs
9945
      temp_suffix = int(time.time())
9946
      ren_fn = lambda d, suff: (d.physical_id[0],
9947
                                d.physical_id[1] + "_replaced-%s" % suff)
9948

    
9949
      # Build the rename list based on what LVs exist on the node
9950
      rename_old_to_new = []
9951
      for to_ren in old_lvs:
9952
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9953
        if not result.fail_msg and result.payload:
9954
          # device exists
9955
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9956

    
9957
      self.lu.LogInfo("Renaming the old LVs on the target node")
9958
      result = self.rpc.call_blockdev_rename(self.target_node,
9959
                                             rename_old_to_new)
9960
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9961

    
9962
      # Now we rename the new LVs to the old LVs
9963
      self.lu.LogInfo("Renaming the new LVs on the target node")
9964
      rename_new_to_old = [(new, old.physical_id)
9965
                           for old, new in zip(old_lvs, new_lvs)]
9966
      result = self.rpc.call_blockdev_rename(self.target_node,
9967
                                             rename_new_to_old)
9968
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9969

    
9970
      # Intermediate steps of in memory modifications
9971
      for old, new in zip(old_lvs, new_lvs):
9972
        new.logical_id = old.logical_id
9973
        self.cfg.SetDiskID(new, self.target_node)
9974

    
9975
      # We need to modify old_lvs so that removal later removes the
9976
      # right LVs, not the newly added ones; note that old_lvs is a
9977
      # copy here
9978
      for disk in old_lvs:
9979
        disk.logical_id = ren_fn(disk, temp_suffix)
9980
        self.cfg.SetDiskID(disk, self.target_node)
9981

    
9982
      # Now that the new lvs have the old name, we can add them to the device
9983
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9984
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9985
                                                  new_lvs)
9986
      msg = result.fail_msg
9987
      if msg:
9988
        for new_lv in new_lvs:
9989
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9990
                                               new_lv).fail_msg
9991
          if msg2:
9992
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9993
                               hint=("cleanup manually the unused logical"
9994
                                     "volumes"))
9995
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9996

    
9997
    cstep = 5
9998
    if self.early_release:
9999
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10000
      cstep += 1
10001
      self._RemoveOldStorage(self.target_node, iv_names)
10002
      # WARNING: we release both node locks here, do not do other RPCs
10003
      # than WaitForSync to the primary node
10004
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10005
                    names=[self.target_node, self.other_node])
10006

    
10007
    # Wait for sync
10008
    # This can fail as the old devices are degraded and _WaitForSync
10009
    # does a combined result over all disks, so we don't check its return value
10010
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10011
    cstep += 1
10012
    _WaitForSync(self.lu, self.instance)
10013

    
10014
    # Check all devices manually
10015
    self._CheckDevices(self.instance.primary_node, iv_names)
10016

    
10017
    # Step: remove old storage
10018
    if not self.early_release:
10019
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10020
      cstep += 1
10021
      self._RemoveOldStorage(self.target_node, iv_names)
10022

    
10023
  def _ExecDrbd8Secondary(self, feedback_fn):
10024
    """Replace the secondary node for DRBD 8.
10025

10026
    The algorithm for replace is quite complicated:
10027
      - for all disks of the instance:
10028
        - create new LVs on the new node with same names
10029
        - shutdown the drbd device on the old secondary
10030
        - disconnect the drbd network on the primary
10031
        - create the drbd device on the new secondary
10032
        - network attach the drbd on the primary, using an artifice:
10033
          the drbd code for Attach() will connect to the network if it
10034
          finds a device which is connected to the good local disks but
10035
          not network enabled
10036
      - wait for sync across all devices
10037
      - remove all disks from the old secondary
10038

10039
    Failures are not very well handled.
10040

10041
    """
10042
    steps_total = 6
10043

    
10044
    pnode = self.instance.primary_node
10045

    
10046
    # Step: check device activation
10047
    self.lu.LogStep(1, steps_total, "Check device existence")
10048
    self._CheckDisksExistence([self.instance.primary_node])
10049
    self._CheckVolumeGroup([self.instance.primary_node])
10050

    
10051
    # Step: check other node consistency
10052
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10053
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10054

    
10055
    # Step: create new storage
10056
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10057
    for idx, dev in enumerate(self.instance.disks):
10058
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10059
                      (self.new_node, idx))
10060
      # we pass force_create=True to force LVM creation
10061
      for new_lv in dev.children:
10062
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10063
                        _GetInstanceInfoText(self.instance), False)
10064

    
10065
    # Step 4: dbrd minors and drbd setups changes
10066
    # after this, we must manually remove the drbd minors on both the
10067
    # error and the success paths
10068
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10069
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10070
                                         for dev in self.instance.disks],
10071
                                        self.instance.name)
10072
    logging.debug("Allocated minors %r", minors)
10073

    
10074
    iv_names = {}
10075
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10076
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10077
                      (self.new_node, idx))
10078
      # create new devices on new_node; note that we create two IDs:
10079
      # one without port, so the drbd will be activated without
10080
      # networking information on the new node at this stage, and one
10081
      # with network, for the latter activation in step 4
10082
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10083
      if self.instance.primary_node == o_node1:
10084
        p_minor = o_minor1
10085
      else:
10086
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10087
        p_minor = o_minor2
10088

    
10089
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10090
                      p_minor, new_minor, o_secret)
10091
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10092
                    p_minor, new_minor, o_secret)
10093

    
10094
      iv_names[idx] = (dev, dev.children, new_net_id)
10095
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10096
                    new_net_id)
10097
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10098
                              logical_id=new_alone_id,
10099
                              children=dev.children,
10100
                              size=dev.size)
10101
      try:
10102
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10103
                              _GetInstanceInfoText(self.instance), False)
10104
      except errors.GenericError:
10105
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10106
        raise
10107

    
10108
    # We have new devices, shutdown the drbd on the old secondary
10109
    for idx, dev in enumerate(self.instance.disks):
10110
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10111
      self.cfg.SetDiskID(dev, self.target_node)
10112
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10113
      if msg:
10114
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10115
                           "node: %s" % (idx, msg),
10116
                           hint=("Please cleanup this device manually as"
10117
                                 " soon as possible"))
10118

    
10119
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10120
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10121
                                               self.instance.disks)[pnode]
10122

    
10123
    msg = result.fail_msg
10124
    if msg:
10125
      # detaches didn't succeed (unlikely)
10126
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10127
      raise errors.OpExecError("Can't detach the disks from the network on"
10128
                               " old node: %s" % (msg,))
10129

    
10130
    # if we managed to detach at least one, we update all the disks of
10131
    # the instance to point to the new secondary
10132
    self.lu.LogInfo("Updating instance configuration")
10133
    for dev, _, new_logical_id in iv_names.itervalues():
10134
      dev.logical_id = new_logical_id
10135
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10136

    
10137
    self.cfg.Update(self.instance, feedback_fn)
10138

    
10139
    # and now perform the drbd attach
10140
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10141
                    " (standalone => connected)")
10142
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10143
                                            self.new_node],
10144
                                           self.node_secondary_ip,
10145
                                           self.instance.disks,
10146
                                           self.instance.name,
10147
                                           False)
10148
    for to_node, to_result in result.items():
10149
      msg = to_result.fail_msg
10150
      if msg:
10151
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10152
                           to_node, msg,
10153
                           hint=("please do a gnt-instance info to see the"
10154
                                 " status of disks"))
10155
    cstep = 5
10156
    if self.early_release:
10157
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10158
      cstep += 1
10159
      self._RemoveOldStorage(self.target_node, iv_names)
10160
      # WARNING: we release all node locks here, do not do other RPCs
10161
      # than WaitForSync to the primary node
10162
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10163
                    names=[self.instance.primary_node,
10164
                           self.target_node,
10165
                           self.new_node])
10166

    
10167
    # Wait for sync
10168
    # This can fail as the old devices are degraded and _WaitForSync
10169
    # does a combined result over all disks, so we don't check its return value
10170
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10171
    cstep += 1
10172
    _WaitForSync(self.lu, self.instance)
10173

    
10174
    # Check all devices manually
10175
    self._CheckDevices(self.instance.primary_node, iv_names)
10176

    
10177
    # Step: remove old storage
10178
    if not self.early_release:
10179
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10180
      self._RemoveOldStorage(self.target_node, iv_names)
10181

    
10182

    
10183
class LURepairNodeStorage(NoHooksLU):
10184
  """Repairs the volume group on a node.
10185

10186
  """
10187
  REQ_BGL = False
10188

    
10189
  def CheckArguments(self):
10190
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10191

    
10192
    storage_type = self.op.storage_type
10193

    
10194
    if (constants.SO_FIX_CONSISTENCY not in
10195
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10196
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10197
                                 " repaired" % storage_type,
10198
                                 errors.ECODE_INVAL)
10199

    
10200
  def ExpandNames(self):
10201
    self.needed_locks = {
10202
      locking.LEVEL_NODE: [self.op.node_name],
10203
      }
10204

    
10205
  def _CheckFaultyDisks(self, instance, node_name):
10206
    """Ensure faulty disks abort the opcode or at least warn."""
10207
    try:
10208
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10209
                                  node_name, True):
10210
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10211
                                   " node '%s'" % (instance.name, node_name),
10212
                                   errors.ECODE_STATE)
10213
    except errors.OpPrereqError, err:
10214
      if self.op.ignore_consistency:
10215
        self.proc.LogWarning(str(err.args[0]))
10216
      else:
10217
        raise
10218

    
10219
  def CheckPrereq(self):
10220
    """Check prerequisites.
10221

10222
    """
10223
    # Check whether any instance on this node has faulty disks
10224
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10225
      if not inst.admin_up:
10226
        continue
10227
      check_nodes = set(inst.all_nodes)
10228
      check_nodes.discard(self.op.node_name)
10229
      for inst_node_name in check_nodes:
10230
        self._CheckFaultyDisks(inst, inst_node_name)
10231

    
10232
  def Exec(self, feedback_fn):
10233
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10234
                (self.op.name, self.op.node_name))
10235

    
10236
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10237
    result = self.rpc.call_storage_execute(self.op.node_name,
10238
                                           self.op.storage_type, st_args,
10239
                                           self.op.name,
10240
                                           constants.SO_FIX_CONSISTENCY)
10241
    result.Raise("Failed to repair storage unit '%s' on %s" %
10242
                 (self.op.name, self.op.node_name))
10243

    
10244

    
10245
class LUNodeEvacuate(NoHooksLU):
10246
  """Evacuates instances off a list of nodes.
10247

10248
  """
10249
  REQ_BGL = False
10250

    
10251
  def CheckArguments(self):
10252
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10253

    
10254
  def ExpandNames(self):
10255
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10256

    
10257
    if self.op.remote_node is not None:
10258
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10259
      assert self.op.remote_node
10260

    
10261
      if self.op.remote_node == self.op.node_name:
10262
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10263
                                   " secondary node", errors.ECODE_INVAL)
10264

    
10265
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10266
        raise errors.OpPrereqError("Without the use of an iallocator only"
10267
                                   " secondary instances can be evacuated",
10268
                                   errors.ECODE_INVAL)
10269

    
10270
    # Declare locks
10271
    self.share_locks = _ShareAll()
10272
    self.needed_locks = {
10273
      locking.LEVEL_INSTANCE: [],
10274
      locking.LEVEL_NODEGROUP: [],
10275
      locking.LEVEL_NODE: [],
10276
      }
10277

    
10278
    if self.op.remote_node is None:
10279
      # Iallocator will choose any node(s) in the same group
10280
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10281
    else:
10282
      group_nodes = frozenset([self.op.remote_node])
10283

    
10284
    # Determine nodes to be locked
10285
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10286

    
10287
  def _DetermineInstances(self):
10288
    """Builds list of instances to operate on.
10289

10290
    """
10291
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10292

    
10293
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10294
      # Primary instances only
10295
      inst_fn = _GetNodePrimaryInstances
10296
      assert self.op.remote_node is None, \
10297
        "Evacuating primary instances requires iallocator"
10298
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10299
      # Secondary instances only
10300
      inst_fn = _GetNodeSecondaryInstances
10301
    else:
10302
      # All instances
10303
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10304
      inst_fn = _GetNodeInstances
10305

    
10306
    return inst_fn(self.cfg, self.op.node_name)
10307

    
10308
  def DeclareLocks(self, level):
10309
    if level == locking.LEVEL_INSTANCE:
10310
      # Lock instances optimistically, needs verification once node and group
10311
      # locks have been acquired
10312
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10313
        set(i.name for i in self._DetermineInstances())
10314

    
10315
    elif level == locking.LEVEL_NODEGROUP:
10316
      # Lock node groups optimistically, needs verification once nodes have
10317
      # been acquired
10318
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10319
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10320

    
10321
    elif level == locking.LEVEL_NODE:
10322
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10323

    
10324
  def CheckPrereq(self):
10325
    # Verify locks
10326
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10327
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10328
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10329

    
10330
    assert owned_nodes == self.lock_nodes
10331

    
10332
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10333
    if owned_groups != wanted_groups:
10334
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10335
                               " current groups are '%s', used to be '%s'" %
10336
                               (utils.CommaJoin(wanted_groups),
10337
                                utils.CommaJoin(owned_groups)))
10338

    
10339
    # Determine affected instances
10340
    self.instances = self._DetermineInstances()
10341
    self.instance_names = [i.name for i in self.instances]
10342

    
10343
    if set(self.instance_names) != owned_instances:
10344
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10345
                               " were acquired, current instances are '%s',"
10346
                               " used to be '%s'" %
10347
                               (self.op.node_name,
10348
                                utils.CommaJoin(self.instance_names),
10349
                                utils.CommaJoin(owned_instances)))
10350

    
10351
    if self.instance_names:
10352
      self.LogInfo("Evacuating instances from node '%s': %s",
10353
                   self.op.node_name,
10354
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10355
    else:
10356
      self.LogInfo("No instances to evacuate from node '%s'",
10357
                   self.op.node_name)
10358

    
10359
    if self.op.remote_node is not None:
10360
      for i in self.instances:
10361
        if i.primary_node == self.op.remote_node:
10362
          raise errors.OpPrereqError("Node %s is the primary node of"
10363
                                     " instance %s, cannot use it as"
10364
                                     " secondary" %
10365
                                     (self.op.remote_node, i.name),
10366
                                     errors.ECODE_INVAL)
10367

    
10368
  def Exec(self, feedback_fn):
10369
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10370

    
10371
    if not self.instance_names:
10372
      # No instances to evacuate
10373
      jobs = []
10374

    
10375
    elif self.op.iallocator is not None:
10376
      # TODO: Implement relocation to other group
10377
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10378
                       evac_mode=self.op.mode,
10379
                       instances=list(self.instance_names))
10380

    
10381
      ial.Run(self.op.iallocator)
10382

    
10383
      if not ial.success:
10384
        raise errors.OpPrereqError("Can't compute node evacuation using"
10385
                                   " iallocator '%s': %s" %
10386
                                   (self.op.iallocator, ial.info),
10387
                                   errors.ECODE_NORES)
10388

    
10389
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10390

    
10391
    elif self.op.remote_node is not None:
10392
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10393
      jobs = [
10394
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10395
                                        remote_node=self.op.remote_node,
10396
                                        disks=[],
10397
                                        mode=constants.REPLACE_DISK_CHG,
10398
                                        early_release=self.op.early_release)]
10399
        for instance_name in self.instance_names
10400
        ]
10401

    
10402
    else:
10403
      raise errors.ProgrammerError("No iallocator or remote node")
10404

    
10405
    return ResultWithJobs(jobs)
10406

    
10407

    
10408
def _SetOpEarlyRelease(early_release, op):
10409
  """Sets C{early_release} flag on opcodes if available.
10410

10411
  """
10412
  try:
10413
    op.early_release = early_release
10414
  except AttributeError:
10415
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10416

    
10417
  return op
10418

    
10419

    
10420
def _NodeEvacDest(use_nodes, group, nodes):
10421
  """Returns group or nodes depending on caller's choice.
10422

10423
  """
10424
  if use_nodes:
10425
    return utils.CommaJoin(nodes)
10426
  else:
10427
    return group
10428

    
10429

    
10430
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10431
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10432

10433
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10434
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10435

10436
  @type lu: L{LogicalUnit}
10437
  @param lu: Logical unit instance
10438
  @type alloc_result: tuple/list
10439
  @param alloc_result: Result from iallocator
10440
  @type early_release: bool
10441
  @param early_release: Whether to release locks early if possible
10442
  @type use_nodes: bool
10443
  @param use_nodes: Whether to display node names instead of groups
10444

10445
  """
10446
  (moved, failed, jobs) = alloc_result
10447

    
10448
  if failed:
10449
    lu.LogWarning("Unable to evacuate instances %s",
10450
                  utils.CommaJoin("%s (%s)" % (name, reason)
10451
                                  for (name, reason) in failed))
10452

    
10453
  if moved:
10454
    lu.LogInfo("Instances to be moved: %s",
10455
               utils.CommaJoin("%s (to %s)" %
10456
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10457
                               for (name, group, nodes) in moved))
10458

    
10459
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10460
              map(opcodes.OpCode.LoadOpCode, ops))
10461
          for ops in jobs]
10462

    
10463

    
10464
class LUInstanceGrowDisk(LogicalUnit):
10465
  """Grow a disk of an instance.
10466

10467
  """
10468
  HPATH = "disk-grow"
10469
  HTYPE = constants.HTYPE_INSTANCE
10470
  REQ_BGL = False
10471

    
10472
  def ExpandNames(self):
10473
    self._ExpandAndLockInstance()
10474
    self.needed_locks[locking.LEVEL_NODE] = []
10475
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10476

    
10477
  def DeclareLocks(self, level):
10478
    if level == locking.LEVEL_NODE:
10479
      self._LockInstancesNodes()
10480

    
10481
  def BuildHooksEnv(self):
10482
    """Build hooks env.
10483

10484
    This runs on the master, the primary and all the secondaries.
10485

10486
    """
10487
    env = {
10488
      "DISK": self.op.disk,
10489
      "AMOUNT": self.op.amount,
10490
      }
10491
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10492
    return env
10493

    
10494
  def BuildHooksNodes(self):
10495
    """Build hooks nodes.
10496

10497
    """
10498
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10499
    return (nl, nl)
10500

    
10501
  def CheckPrereq(self):
10502
    """Check prerequisites.
10503

10504
    This checks that the instance is in the cluster.
10505

10506
    """
10507
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10508
    assert instance is not None, \
10509
      "Cannot retrieve locked instance %s" % self.op.instance_name
10510
    nodenames = list(instance.all_nodes)
10511
    for node in nodenames:
10512
      _CheckNodeOnline(self, node)
10513

    
10514
    self.instance = instance
10515

    
10516
    if instance.disk_template not in constants.DTS_GROWABLE:
10517
      raise errors.OpPrereqError("Instance's disk layout does not support"
10518
                                 " growing", errors.ECODE_INVAL)
10519

    
10520
    self.disk = instance.FindDisk(self.op.disk)
10521

    
10522
    if instance.disk_template not in (constants.DT_FILE,
10523
                                      constants.DT_SHARED_FILE):
10524
      # TODO: check the free disk space for file, when that feature will be
10525
      # supported
10526
      _CheckNodesFreeDiskPerVG(self, nodenames,
10527
                               self.disk.ComputeGrowth(self.op.amount))
10528

    
10529
  def Exec(self, feedback_fn):
10530
    """Execute disk grow.
10531

10532
    """
10533
    instance = self.instance
10534
    disk = self.disk
10535

    
10536
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10537
    if not disks_ok:
10538
      raise errors.OpExecError("Cannot activate block device to grow")
10539

    
10540
    # First run all grow ops in dry-run mode
10541
    for node in instance.all_nodes:
10542
      self.cfg.SetDiskID(disk, node)
10543
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10544
      result.Raise("Grow request failed to node %s" % node)
10545

    
10546
    # We know that (as far as we can test) operations across different
10547
    # nodes will succeed, time to run it for real
10548
    for node in instance.all_nodes:
10549
      self.cfg.SetDiskID(disk, node)
10550
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10551
      result.Raise("Grow request failed to node %s" % node)
10552

    
10553
      # TODO: Rewrite code to work properly
10554
      # DRBD goes into sync mode for a short amount of time after executing the
10555
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10556
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10557
      # time is a work-around.
10558
      time.sleep(5)
10559

    
10560
    disk.RecordGrow(self.op.amount)
10561
    self.cfg.Update(instance, feedback_fn)
10562
    if self.op.wait_for_sync:
10563
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10564
      if disk_abort:
10565
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10566
                             " status; please check the instance")
10567
      if not instance.admin_up:
10568
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10569
    elif not instance.admin_up:
10570
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10571
                           " not supposed to be running because no wait for"
10572
                           " sync mode was requested")
10573

    
10574

    
10575
class LUInstanceQueryData(NoHooksLU):
10576
  """Query runtime instance data.
10577

10578
  """
10579
  REQ_BGL = False
10580

    
10581
  def ExpandNames(self):
10582
    self.needed_locks = {}
10583

    
10584
    # Use locking if requested or when non-static information is wanted
10585
    if not (self.op.static or self.op.use_locking):
10586
      self.LogWarning("Non-static data requested, locks need to be acquired")
10587
      self.op.use_locking = True
10588

    
10589
    if self.op.instances or not self.op.use_locking:
10590
      # Expand instance names right here
10591
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10592
    else:
10593
      # Will use acquired locks
10594
      self.wanted_names = None
10595

    
10596
    if self.op.use_locking:
10597
      self.share_locks = _ShareAll()
10598

    
10599
      if self.wanted_names is None:
10600
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10601
      else:
10602
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10603

    
10604
      self.needed_locks[locking.LEVEL_NODE] = []
10605
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10606

    
10607
  def DeclareLocks(self, level):
10608
    if self.op.use_locking and level == locking.LEVEL_NODE:
10609
      self._LockInstancesNodes()
10610

    
10611
  def CheckPrereq(self):
10612
    """Check prerequisites.
10613

10614
    This only checks the optional instance list against the existing names.
10615

10616
    """
10617
    if self.wanted_names is None:
10618
      assert self.op.use_locking, "Locking was not used"
10619
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10620

    
10621
    self.wanted_instances = \
10622
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10623

    
10624
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10625
    """Returns the status of a block device
10626

10627
    """
10628
    if self.op.static or not node:
10629
      return None
10630

    
10631
    self.cfg.SetDiskID(dev, node)
10632

    
10633
    result = self.rpc.call_blockdev_find(node, dev)
10634
    if result.offline:
10635
      return None
10636

    
10637
    result.Raise("Can't compute disk status for %s" % instance_name)
10638

    
10639
    status = result.payload
10640
    if status is None:
10641
      return None
10642

    
10643
    return (status.dev_path, status.major, status.minor,
10644
            status.sync_percent, status.estimated_time,
10645
            status.is_degraded, status.ldisk_status)
10646

    
10647
  def _ComputeDiskStatus(self, instance, snode, dev):
10648
    """Compute block device status.
10649

10650
    """
10651
    if dev.dev_type in constants.LDS_DRBD:
10652
      # we change the snode then (otherwise we use the one passed in)
10653
      if dev.logical_id[0] == instance.primary_node:
10654
        snode = dev.logical_id[1]
10655
      else:
10656
        snode = dev.logical_id[0]
10657

    
10658
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10659
                                              instance.name, dev)
10660
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10661

    
10662
    if dev.children:
10663
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10664
                                        instance, snode),
10665
                         dev.children)
10666
    else:
10667
      dev_children = []
10668

    
10669
    return {
10670
      "iv_name": dev.iv_name,
10671
      "dev_type": dev.dev_type,
10672
      "logical_id": dev.logical_id,
10673
      "physical_id": dev.physical_id,
10674
      "pstatus": dev_pstatus,
10675
      "sstatus": dev_sstatus,
10676
      "children": dev_children,
10677
      "mode": dev.mode,
10678
      "size": dev.size,
10679
      }
10680

    
10681
  def Exec(self, feedback_fn):
10682
    """Gather and return data"""
10683
    result = {}
10684

    
10685
    cluster = self.cfg.GetClusterInfo()
10686

    
10687
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10688
                                          for i in self.wanted_instances)
10689
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10690
      if self.op.static or pnode.offline:
10691
        remote_state = None
10692
        if pnode.offline:
10693
          self.LogWarning("Primary node %s is marked offline, returning static"
10694
                          " information only for instance %s" %
10695
                          (pnode.name, instance.name))
10696
      else:
10697
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10698
                                                  instance.name,
10699
                                                  instance.hypervisor)
10700
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10701
        remote_info = remote_info.payload
10702
        if remote_info and "state" in remote_info:
10703
          remote_state = "up"
10704
        else:
10705
          remote_state = "down"
10706

    
10707
      if instance.admin_up:
10708
        config_state = "up"
10709
      else:
10710
        config_state = "down"
10711

    
10712
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10713
                  instance.disks)
10714

    
10715
      result[instance.name] = {
10716
        "name": instance.name,
10717
        "config_state": config_state,
10718
        "run_state": remote_state,
10719
        "pnode": instance.primary_node,
10720
        "snodes": instance.secondary_nodes,
10721
        "os": instance.os,
10722
        # this happens to be the same format used for hooks
10723
        "nics": _NICListToTuple(self, instance.nics),
10724
        "disk_template": instance.disk_template,
10725
        "disks": disks,
10726
        "hypervisor": instance.hypervisor,
10727
        "network_port": instance.network_port,
10728
        "hv_instance": instance.hvparams,
10729
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10730
        "be_instance": instance.beparams,
10731
        "be_actual": cluster.FillBE(instance),
10732
        "os_instance": instance.osparams,
10733
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10734
        "serial_no": instance.serial_no,
10735
        "mtime": instance.mtime,
10736
        "ctime": instance.ctime,
10737
        "uuid": instance.uuid,
10738
        }
10739

    
10740
    return result
10741

    
10742

    
10743
class LUInstanceSetParams(LogicalUnit):
10744
  """Modifies an instances's parameters.
10745

10746
  """
10747
  HPATH = "instance-modify"
10748
  HTYPE = constants.HTYPE_INSTANCE
10749
  REQ_BGL = False
10750

    
10751
  def CheckArguments(self):
10752
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10753
            self.op.hvparams or self.op.beparams or self.op.os_name):
10754
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10755

    
10756
    if self.op.hvparams:
10757
      _CheckGlobalHvParams(self.op.hvparams)
10758

    
10759
    # Disk validation
10760
    disk_addremove = 0
10761
    for disk_op, disk_dict in self.op.disks:
10762
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10763
      if disk_op == constants.DDM_REMOVE:
10764
        disk_addremove += 1
10765
        continue
10766
      elif disk_op == constants.DDM_ADD:
10767
        disk_addremove += 1
10768
      else:
10769
        if not isinstance(disk_op, int):
10770
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10771
        if not isinstance(disk_dict, dict):
10772
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10773
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10774

    
10775
      if disk_op == constants.DDM_ADD:
10776
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10777
        if mode not in constants.DISK_ACCESS_SET:
10778
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10779
                                     errors.ECODE_INVAL)
10780
        size = disk_dict.get(constants.IDISK_SIZE, None)
10781
        if size is None:
10782
          raise errors.OpPrereqError("Required disk parameter size missing",
10783
                                     errors.ECODE_INVAL)
10784
        try:
10785
          size = int(size)
10786
        except (TypeError, ValueError), err:
10787
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10788
                                     str(err), errors.ECODE_INVAL)
10789
        disk_dict[constants.IDISK_SIZE] = size
10790
      else:
10791
        # modification of disk
10792
        if constants.IDISK_SIZE in disk_dict:
10793
          raise errors.OpPrereqError("Disk size change not possible, use"
10794
                                     " grow-disk", errors.ECODE_INVAL)
10795

    
10796
    if disk_addremove > 1:
10797
      raise errors.OpPrereqError("Only one disk add or remove operation"
10798
                                 " supported at a time", errors.ECODE_INVAL)
10799

    
10800
    if self.op.disks and self.op.disk_template is not None:
10801
      raise errors.OpPrereqError("Disk template conversion and other disk"
10802
                                 " changes not supported at the same time",
10803
                                 errors.ECODE_INVAL)
10804

    
10805
    if (self.op.disk_template and
10806
        self.op.disk_template in constants.DTS_INT_MIRROR and
10807
        self.op.remote_node is None):
10808
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10809
                                 " one requires specifying a secondary node",
10810
                                 errors.ECODE_INVAL)
10811

    
10812
    # NIC validation
10813
    nic_addremove = 0
10814
    for nic_op, nic_dict in self.op.nics:
10815
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10816
      if nic_op == constants.DDM_REMOVE:
10817
        nic_addremove += 1
10818
        continue
10819
      elif nic_op == constants.DDM_ADD:
10820
        nic_addremove += 1
10821
      else:
10822
        if not isinstance(nic_op, int):
10823
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10824
        if not isinstance(nic_dict, dict):
10825
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10826
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10827

    
10828
      # nic_dict should be a dict
10829
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10830
      if nic_ip is not None:
10831
        if nic_ip.lower() == constants.VALUE_NONE:
10832
          nic_dict[constants.INIC_IP] = None
10833
        else:
10834
          if not netutils.IPAddress.IsValid(nic_ip):
10835
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10836
                                       errors.ECODE_INVAL)
10837

    
10838
      nic_bridge = nic_dict.get("bridge", None)
10839
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10840
      if nic_bridge and nic_link:
10841
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10842
                                   " at the same time", errors.ECODE_INVAL)
10843
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10844
        nic_dict["bridge"] = None
10845
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10846
        nic_dict[constants.INIC_LINK] = None
10847

    
10848
      if nic_op == constants.DDM_ADD:
10849
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10850
        if nic_mac is None:
10851
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10852

    
10853
      if constants.INIC_MAC in nic_dict:
10854
        nic_mac = nic_dict[constants.INIC_MAC]
10855
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10856
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10857

    
10858
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10859
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10860
                                     " modifying an existing nic",
10861
                                     errors.ECODE_INVAL)
10862

    
10863
    if nic_addremove > 1:
10864
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10865
                                 " supported at a time", errors.ECODE_INVAL)
10866

    
10867
  def ExpandNames(self):
10868
    self._ExpandAndLockInstance()
10869
    self.needed_locks[locking.LEVEL_NODE] = []
10870
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10871

    
10872
  def DeclareLocks(self, level):
10873
    if level == locking.LEVEL_NODE:
10874
      self._LockInstancesNodes()
10875
      if self.op.disk_template and self.op.remote_node:
10876
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10877
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10878

    
10879
  def BuildHooksEnv(self):
10880
    """Build hooks env.
10881

10882
    This runs on the master, primary and secondaries.
10883

10884
    """
10885
    args = dict()
10886
    if constants.BE_MEMORY in self.be_new:
10887
      args["memory"] = self.be_new[constants.BE_MEMORY]
10888
    if constants.BE_VCPUS in self.be_new:
10889
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10890
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10891
    # information at all.
10892
    if self.op.nics:
10893
      args["nics"] = []
10894
      nic_override = dict(self.op.nics)
10895
      for idx, nic in enumerate(self.instance.nics):
10896
        if idx in nic_override:
10897
          this_nic_override = nic_override[idx]
10898
        else:
10899
          this_nic_override = {}
10900
        if constants.INIC_IP in this_nic_override:
10901
          ip = this_nic_override[constants.INIC_IP]
10902
        else:
10903
          ip = nic.ip
10904
        if constants.INIC_MAC in this_nic_override:
10905
          mac = this_nic_override[constants.INIC_MAC]
10906
        else:
10907
          mac = nic.mac
10908
        if idx in self.nic_pnew:
10909
          nicparams = self.nic_pnew[idx]
10910
        else:
10911
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10912
        mode = nicparams[constants.NIC_MODE]
10913
        link = nicparams[constants.NIC_LINK]
10914
        args["nics"].append((ip, mac, mode, link))
10915
      if constants.DDM_ADD in nic_override:
10916
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10917
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10918
        nicparams = self.nic_pnew[constants.DDM_ADD]
10919
        mode = nicparams[constants.NIC_MODE]
10920
        link = nicparams[constants.NIC_LINK]
10921
        args["nics"].append((ip, mac, mode, link))
10922
      elif constants.DDM_REMOVE in nic_override:
10923
        del args["nics"][-1]
10924

    
10925
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10926
    if self.op.disk_template:
10927
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10928

    
10929
    return env
10930

    
10931
  def BuildHooksNodes(self):
10932
    """Build hooks nodes.
10933

10934
    """
10935
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10936
    return (nl, nl)
10937

    
10938
  def CheckPrereq(self):
10939
    """Check prerequisites.
10940

10941
    This only checks the instance list against the existing names.
10942

10943
    """
10944
    # checking the new params on the primary/secondary nodes
10945

    
10946
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10947
    cluster = self.cluster = self.cfg.GetClusterInfo()
10948
    assert self.instance is not None, \
10949
      "Cannot retrieve locked instance %s" % self.op.instance_name
10950
    pnode = instance.primary_node
10951
    nodelist = list(instance.all_nodes)
10952

    
10953
    # OS change
10954
    if self.op.os_name and not self.op.force:
10955
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10956
                      self.op.force_variant)
10957
      instance_os = self.op.os_name
10958
    else:
10959
      instance_os = instance.os
10960

    
10961
    if self.op.disk_template:
10962
      if instance.disk_template == self.op.disk_template:
10963
        raise errors.OpPrereqError("Instance already has disk template %s" %
10964
                                   instance.disk_template, errors.ECODE_INVAL)
10965

    
10966
      if (instance.disk_template,
10967
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10968
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10969
                                   " %s to %s" % (instance.disk_template,
10970
                                                  self.op.disk_template),
10971
                                   errors.ECODE_INVAL)
10972
      _CheckInstanceDown(self, instance, "cannot change disk template")
10973
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10974
        if self.op.remote_node == pnode:
10975
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10976
                                     " as the primary node of the instance" %
10977
                                     self.op.remote_node, errors.ECODE_STATE)
10978
        _CheckNodeOnline(self, self.op.remote_node)
10979
        _CheckNodeNotDrained(self, self.op.remote_node)
10980
        # FIXME: here we assume that the old instance type is DT_PLAIN
10981
        assert instance.disk_template == constants.DT_PLAIN
10982
        disks = [{constants.IDISK_SIZE: d.size,
10983
                  constants.IDISK_VG: d.logical_id[0]}
10984
                 for d in instance.disks]
10985
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10986
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10987

    
10988
    # hvparams processing
10989
    if self.op.hvparams:
10990
      hv_type = instance.hypervisor
10991
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10992
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10993
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10994

    
10995
      # local check
10996
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10997
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10998
      self.hv_proposed = self.hv_new = hv_new # the new actual values
10999
      self.hv_inst = i_hvdict # the new dict (without defaults)
11000
    else:
11001
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11002
                                              instance.hvparams)
11003
      self.hv_new = self.hv_inst = {}
11004

    
11005
    # beparams processing
11006
    if self.op.beparams:
11007
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11008
                                   use_none=True)
11009
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11010
      be_new = cluster.SimpleFillBE(i_bedict)
11011
      self.be_proposed = self.be_new = be_new # the new actual values
11012
      self.be_inst = i_bedict # the new dict (without defaults)
11013
    else:
11014
      self.be_new = self.be_inst = {}
11015
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11016
    be_old = cluster.FillBE(instance)
11017

    
11018
    # CPU param validation -- checking every time a paramtere is
11019
    # changed to cover all cases where either CPU mask or vcpus have
11020
    # changed
11021
    if (constants.BE_VCPUS in self.be_proposed and
11022
        constants.HV_CPU_MASK in self.hv_proposed):
11023
      cpu_list = \
11024
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11025
      # Verify mask is consistent with number of vCPUs. Can skip this
11026
      # test if only 1 entry in the CPU mask, which means same mask
11027
      # is applied to all vCPUs.
11028
      if (len(cpu_list) > 1 and
11029
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11030
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11031
                                   " CPU mask [%s]" %
11032
                                   (self.be_proposed[constants.BE_VCPUS],
11033
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11034
                                   errors.ECODE_INVAL)
11035

    
11036
      # Only perform this test if a new CPU mask is given
11037
      if constants.HV_CPU_MASK in self.hv_new:
11038
        # Calculate the largest CPU number requested
11039
        max_requested_cpu = max(map(max, cpu_list))
11040
        # Check that all of the instance's nodes have enough physical CPUs to
11041
        # satisfy the requested CPU mask
11042
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11043
                                max_requested_cpu + 1, instance.hypervisor)
11044

    
11045
    # osparams processing
11046
    if self.op.osparams:
11047
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11048
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11049
      self.os_inst = i_osdict # the new dict (without defaults)
11050
    else:
11051
      self.os_inst = {}
11052

    
11053
    self.warn = []
11054

    
11055
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11056
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11057
      mem_check_list = [pnode]
11058
      if be_new[constants.BE_AUTO_BALANCE]:
11059
        # either we changed auto_balance to yes or it was from before
11060
        mem_check_list.extend(instance.secondary_nodes)
11061
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11062
                                                  instance.hypervisor)
11063
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11064
                                         instance.hypervisor)
11065
      pninfo = nodeinfo[pnode]
11066
      msg = pninfo.fail_msg
11067
      if msg:
11068
        # Assume the primary node is unreachable and go ahead
11069
        self.warn.append("Can't get info from primary node %s: %s" %
11070
                         (pnode, msg))
11071
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11072
        self.warn.append("Node data from primary node %s doesn't contain"
11073
                         " free memory information" % pnode)
11074
      elif instance_info.fail_msg:
11075
        self.warn.append("Can't get instance runtime information: %s" %
11076
                        instance_info.fail_msg)
11077
      else:
11078
        if instance_info.payload:
11079
          current_mem = int(instance_info.payload["memory"])
11080
        else:
11081
          # Assume instance not running
11082
          # (there is a slight race condition here, but it's not very probable,
11083
          # and we have no other way to check)
11084
          current_mem = 0
11085
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11086
                    pninfo.payload["memory_free"])
11087
        if miss_mem > 0:
11088
          raise errors.OpPrereqError("This change will prevent the instance"
11089
                                     " from starting, due to %d MB of memory"
11090
                                     " missing on its primary node" % miss_mem,
11091
                                     errors.ECODE_NORES)
11092

    
11093
      if be_new[constants.BE_AUTO_BALANCE]:
11094
        for node, nres in nodeinfo.items():
11095
          if node not in instance.secondary_nodes:
11096
            continue
11097
          nres.Raise("Can't get info from secondary node %s" % node,
11098
                     prereq=True, ecode=errors.ECODE_STATE)
11099
          if not isinstance(nres.payload.get("memory_free", None), int):
11100
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11101
                                       " memory information" % node,
11102
                                       errors.ECODE_STATE)
11103
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11104
            raise errors.OpPrereqError("This change will prevent the instance"
11105
                                       " from failover to its secondary node"
11106
                                       " %s, due to not enough memory" % node,
11107
                                       errors.ECODE_STATE)
11108

    
11109
    # NIC processing
11110
    self.nic_pnew = {}
11111
    self.nic_pinst = {}
11112
    for nic_op, nic_dict in self.op.nics:
11113
      if nic_op == constants.DDM_REMOVE:
11114
        if not instance.nics:
11115
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11116
                                     errors.ECODE_INVAL)
11117
        continue
11118
      if nic_op != constants.DDM_ADD:
11119
        # an existing nic
11120
        if not instance.nics:
11121
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11122
                                     " no NICs" % nic_op,
11123
                                     errors.ECODE_INVAL)
11124
        if nic_op < 0 or nic_op >= len(instance.nics):
11125
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11126
                                     " are 0 to %d" %
11127
                                     (nic_op, len(instance.nics) - 1),
11128
                                     errors.ECODE_INVAL)
11129
        old_nic_params = instance.nics[nic_op].nicparams
11130
        old_nic_ip = instance.nics[nic_op].ip
11131
      else:
11132
        old_nic_params = {}
11133
        old_nic_ip = None
11134

    
11135
      update_params_dict = dict([(key, nic_dict[key])
11136
                                 for key in constants.NICS_PARAMETERS
11137
                                 if key in nic_dict])
11138

    
11139
      if "bridge" in nic_dict:
11140
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11141

    
11142
      new_nic_params = _GetUpdatedParams(old_nic_params,
11143
                                         update_params_dict)
11144
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11145
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11146
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11147
      self.nic_pinst[nic_op] = new_nic_params
11148
      self.nic_pnew[nic_op] = new_filled_nic_params
11149
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11150

    
11151
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11152
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11153
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11154
        if msg:
11155
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11156
          if self.op.force:
11157
            self.warn.append(msg)
11158
          else:
11159
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11160
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11161
        if constants.INIC_IP in nic_dict:
11162
          nic_ip = nic_dict[constants.INIC_IP]
11163
        else:
11164
          nic_ip = old_nic_ip
11165
        if nic_ip is None:
11166
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11167
                                     " on a routed nic", errors.ECODE_INVAL)
11168
      if constants.INIC_MAC in nic_dict:
11169
        nic_mac = nic_dict[constants.INIC_MAC]
11170
        if nic_mac is None:
11171
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11172
                                     errors.ECODE_INVAL)
11173
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11174
          # otherwise generate the mac
11175
          nic_dict[constants.INIC_MAC] = \
11176
            self.cfg.GenerateMAC(self.proc.GetECId())
11177
        else:
11178
          # or validate/reserve the current one
11179
          try:
11180
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11181
          except errors.ReservationError:
11182
            raise errors.OpPrereqError("MAC address %s already in use"
11183
                                       " in cluster" % nic_mac,
11184
                                       errors.ECODE_NOTUNIQUE)
11185

    
11186
    # DISK processing
11187
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11188
      raise errors.OpPrereqError("Disk operations not supported for"
11189
                                 " diskless instances",
11190
                                 errors.ECODE_INVAL)
11191
    for disk_op, _ in self.op.disks:
11192
      if disk_op == constants.DDM_REMOVE:
11193
        if len(instance.disks) == 1:
11194
          raise errors.OpPrereqError("Cannot remove the last disk of"
11195
                                     " an instance", errors.ECODE_INVAL)
11196
        _CheckInstanceDown(self, instance, "cannot remove disks")
11197

    
11198
      if (disk_op == constants.DDM_ADD and
11199
          len(instance.disks) >= constants.MAX_DISKS):
11200
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11201
                                   " add more" % constants.MAX_DISKS,
11202
                                   errors.ECODE_STATE)
11203
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11204
        # an existing disk
11205
        if disk_op < 0 or disk_op >= len(instance.disks):
11206
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11207
                                     " are 0 to %d" %
11208
                                     (disk_op, len(instance.disks)),
11209
                                     errors.ECODE_INVAL)
11210

    
11211
    return
11212

    
11213
  def _ConvertPlainToDrbd(self, feedback_fn):
11214
    """Converts an instance from plain to drbd.
11215

11216
    """
11217
    feedback_fn("Converting template to drbd")
11218
    instance = self.instance
11219
    pnode = instance.primary_node
11220
    snode = self.op.remote_node
11221

    
11222
    # create a fake disk info for _GenerateDiskTemplate
11223
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11224
                  constants.IDISK_VG: d.logical_id[0]}
11225
                 for d in instance.disks]
11226
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11227
                                      instance.name, pnode, [snode],
11228
                                      disk_info, None, None, 0, feedback_fn)
11229
    info = _GetInstanceInfoText(instance)
11230
    feedback_fn("Creating aditional volumes...")
11231
    # first, create the missing data and meta devices
11232
    for disk in new_disks:
11233
      # unfortunately this is... not too nice
11234
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11235
                            info, True)
11236
      for child in disk.children:
11237
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11238
    # at this stage, all new LVs have been created, we can rename the
11239
    # old ones
11240
    feedback_fn("Renaming original volumes...")
11241
    rename_list = [(o, n.children[0].logical_id)
11242
                   for (o, n) in zip(instance.disks, new_disks)]
11243
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11244
    result.Raise("Failed to rename original LVs")
11245

    
11246
    feedback_fn("Initializing DRBD devices...")
11247
    # all child devices are in place, we can now create the DRBD devices
11248
    for disk in new_disks:
11249
      for node in [pnode, snode]:
11250
        f_create = node == pnode
11251
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11252

    
11253
    # at this point, the instance has been modified
11254
    instance.disk_template = constants.DT_DRBD8
11255
    instance.disks = new_disks
11256
    self.cfg.Update(instance, feedback_fn)
11257

    
11258
    # disks are created, waiting for sync
11259
    disk_abort = not _WaitForSync(self, instance,
11260
                                  oneshot=not self.op.wait_for_sync)
11261
    if disk_abort:
11262
      raise errors.OpExecError("There are some degraded disks for"
11263
                               " this instance, please cleanup manually")
11264

    
11265
  def _ConvertDrbdToPlain(self, feedback_fn):
11266
    """Converts an instance from drbd to plain.
11267

11268
    """
11269
    instance = self.instance
11270
    assert len(instance.secondary_nodes) == 1
11271
    pnode = instance.primary_node
11272
    snode = instance.secondary_nodes[0]
11273
    feedback_fn("Converting template to plain")
11274

    
11275
    old_disks = instance.disks
11276
    new_disks = [d.children[0] for d in old_disks]
11277

    
11278
    # copy over size and mode
11279
    for parent, child in zip(old_disks, new_disks):
11280
      child.size = parent.size
11281
      child.mode = parent.mode
11282

    
11283
    # update instance structure
11284
    instance.disks = new_disks
11285
    instance.disk_template = constants.DT_PLAIN
11286
    self.cfg.Update(instance, feedback_fn)
11287

    
11288
    feedback_fn("Removing volumes on the secondary node...")
11289
    for disk in old_disks:
11290
      self.cfg.SetDiskID(disk, snode)
11291
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11292
      if msg:
11293
        self.LogWarning("Could not remove block device %s on node %s,"
11294
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11295

    
11296
    feedback_fn("Removing unneeded volumes on the primary node...")
11297
    for idx, disk in enumerate(old_disks):
11298
      meta = disk.children[1]
11299
      self.cfg.SetDiskID(meta, pnode)
11300
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11301
      if msg:
11302
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11303
                        " continuing anyway: %s", idx, pnode, msg)
11304

    
11305
  def Exec(self, feedback_fn):
11306
    """Modifies an instance.
11307

11308
    All parameters take effect only at the next restart of the instance.
11309

11310
    """
11311
    # Process here the warnings from CheckPrereq, as we don't have a
11312
    # feedback_fn there.
11313
    for warn in self.warn:
11314
      feedback_fn("WARNING: %s" % warn)
11315

    
11316
    result = []
11317
    instance = self.instance
11318
    # disk changes
11319
    for disk_op, disk_dict in self.op.disks:
11320
      if disk_op == constants.DDM_REMOVE:
11321
        # remove the last disk
11322
        device = instance.disks.pop()
11323
        device_idx = len(instance.disks)
11324
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11325
          self.cfg.SetDiskID(disk, node)
11326
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11327
          if msg:
11328
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11329
                            " continuing anyway", device_idx, node, msg)
11330
        result.append(("disk/%d" % device_idx, "remove"))
11331
      elif disk_op == constants.DDM_ADD:
11332
        # add a new disk
11333
        if instance.disk_template in (constants.DT_FILE,
11334
                                        constants.DT_SHARED_FILE):
11335
          file_driver, file_path = instance.disks[0].logical_id
11336
          file_path = os.path.dirname(file_path)
11337
        else:
11338
          file_driver = file_path = None
11339
        disk_idx_base = len(instance.disks)
11340
        new_disk = _GenerateDiskTemplate(self,
11341
                                         instance.disk_template,
11342
                                         instance.name, instance.primary_node,
11343
                                         instance.secondary_nodes,
11344
                                         [disk_dict],
11345
                                         file_path,
11346
                                         file_driver,
11347
                                         disk_idx_base, feedback_fn)[0]
11348
        instance.disks.append(new_disk)
11349
        info = _GetInstanceInfoText(instance)
11350

    
11351
        logging.info("Creating volume %s for instance %s",
11352
                     new_disk.iv_name, instance.name)
11353
        # Note: this needs to be kept in sync with _CreateDisks
11354
        #HARDCODE
11355
        for node in instance.all_nodes:
11356
          f_create = node == instance.primary_node
11357
          try:
11358
            _CreateBlockDev(self, node, instance, new_disk,
11359
                            f_create, info, f_create)
11360
          except errors.OpExecError, err:
11361
            self.LogWarning("Failed to create volume %s (%s) on"
11362
                            " node %s: %s",
11363
                            new_disk.iv_name, new_disk, node, err)
11364
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11365
                       (new_disk.size, new_disk.mode)))
11366
      else:
11367
        # change a given disk
11368
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11369
        result.append(("disk.mode/%d" % disk_op,
11370
                       disk_dict[constants.IDISK_MODE]))
11371

    
11372
    if self.op.disk_template:
11373
      r_shut = _ShutdownInstanceDisks(self, instance)
11374
      if not r_shut:
11375
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11376
                                 " proceed with disk template conversion")
11377
      mode = (instance.disk_template, self.op.disk_template)
11378
      try:
11379
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11380
      except:
11381
        self.cfg.ReleaseDRBDMinors(instance.name)
11382
        raise
11383
      result.append(("disk_template", self.op.disk_template))
11384

    
11385
    # NIC changes
11386
    for nic_op, nic_dict in self.op.nics:
11387
      if nic_op == constants.DDM_REMOVE:
11388
        # remove the last nic
11389
        del instance.nics[-1]
11390
        result.append(("nic.%d" % len(instance.nics), "remove"))
11391
      elif nic_op == constants.DDM_ADD:
11392
        # mac and bridge should be set, by now
11393
        mac = nic_dict[constants.INIC_MAC]
11394
        ip = nic_dict.get(constants.INIC_IP, None)
11395
        nicparams = self.nic_pinst[constants.DDM_ADD]
11396
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11397
        instance.nics.append(new_nic)
11398
        result.append(("nic.%d" % (len(instance.nics) - 1),
11399
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11400
                       (new_nic.mac, new_nic.ip,
11401
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11402
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11403
                       )))
11404
      else:
11405
        for key in (constants.INIC_MAC, constants.INIC_IP):
11406
          if key in nic_dict:
11407
            setattr(instance.nics[nic_op], key, nic_dict[key])
11408
        if nic_op in self.nic_pinst:
11409
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11410
        for key, val in nic_dict.iteritems():
11411
          result.append(("nic.%s/%d" % (key, nic_op), val))
11412

    
11413
    # hvparams changes
11414
    if self.op.hvparams:
11415
      instance.hvparams = self.hv_inst
11416
      for key, val in self.op.hvparams.iteritems():
11417
        result.append(("hv/%s" % key, val))
11418

    
11419
    # beparams changes
11420
    if self.op.beparams:
11421
      instance.beparams = self.be_inst
11422
      for key, val in self.op.beparams.iteritems():
11423
        result.append(("be/%s" % key, val))
11424

    
11425
    # OS change
11426
    if self.op.os_name:
11427
      instance.os = self.op.os_name
11428

    
11429
    # osparams changes
11430
    if self.op.osparams:
11431
      instance.osparams = self.os_inst
11432
      for key, val in self.op.osparams.iteritems():
11433
        result.append(("os/%s" % key, val))
11434

    
11435
    self.cfg.Update(instance, feedback_fn)
11436

    
11437
    return result
11438

    
11439
  _DISK_CONVERSIONS = {
11440
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11441
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11442
    }
11443

    
11444

    
11445
class LUInstanceChangeGroup(LogicalUnit):
11446
  HPATH = "instance-change-group"
11447
  HTYPE = constants.HTYPE_INSTANCE
11448
  REQ_BGL = False
11449

    
11450
  def ExpandNames(self):
11451
    self.share_locks = _ShareAll()
11452
    self.needed_locks = {
11453
      locking.LEVEL_NODEGROUP: [],
11454
      locking.LEVEL_NODE: [],
11455
      }
11456

    
11457
    self._ExpandAndLockInstance()
11458

    
11459
    if self.op.target_groups:
11460
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11461
                                  self.op.target_groups)
11462
    else:
11463
      self.req_target_uuids = None
11464

    
11465
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11466

    
11467
  def DeclareLocks(self, level):
11468
    if level == locking.LEVEL_NODEGROUP:
11469
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11470

    
11471
      if self.req_target_uuids:
11472
        lock_groups = set(self.req_target_uuids)
11473

    
11474
        # Lock all groups used by instance optimistically; this requires going
11475
        # via the node before it's locked, requiring verification later on
11476
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11477
        lock_groups.update(instance_groups)
11478
      else:
11479
        # No target groups, need to lock all of them
11480
        lock_groups = locking.ALL_SET
11481

    
11482
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11483

    
11484
    elif level == locking.LEVEL_NODE:
11485
      if self.req_target_uuids:
11486
        # Lock all nodes used by instances
11487
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11488
        self._LockInstancesNodes()
11489

    
11490
        # Lock all nodes in all potential target groups
11491
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11492
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11493
        member_nodes = [node_name
11494
                        for group in lock_groups
11495
                        for node_name in self.cfg.GetNodeGroup(group).members]
11496
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11497
      else:
11498
        # Lock all nodes as all groups are potential targets
11499
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11500

    
11501
  def CheckPrereq(self):
11502
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11503
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11504
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11505

    
11506
    assert (self.req_target_uuids is None or
11507
            owned_groups.issuperset(self.req_target_uuids))
11508
    assert owned_instances == set([self.op.instance_name])
11509

    
11510
    # Get instance information
11511
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11512

    
11513
    # Check if node groups for locked instance are still correct
11514
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11515
      ("Instance %s's nodes changed while we kept the lock" %
11516
       self.op.instance_name)
11517

    
11518
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11519
                                           owned_groups)
11520

    
11521
    if self.req_target_uuids:
11522
      # User requested specific target groups
11523
      self.target_uuids = self.req_target_uuids
11524
    else:
11525
      # All groups except those used by the instance are potential targets
11526
      self.target_uuids = owned_groups - inst_groups
11527

    
11528
    conflicting_groups = self.target_uuids & inst_groups
11529
    if conflicting_groups:
11530
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11531
                                 " used by the instance '%s'" %
11532
                                 (utils.CommaJoin(conflicting_groups),
11533
                                  self.op.instance_name),
11534
                                 errors.ECODE_INVAL)
11535

    
11536
    if not self.target_uuids:
11537
      raise errors.OpPrereqError("There are no possible target groups",
11538
                                 errors.ECODE_INVAL)
11539

    
11540
  def BuildHooksEnv(self):
11541
    """Build hooks env.
11542

11543
    """
11544
    assert self.target_uuids
11545

    
11546
    env = {
11547
      "TARGET_GROUPS": " ".join(self.target_uuids),
11548
      }
11549

    
11550
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11551

    
11552
    return env
11553

    
11554
  def BuildHooksNodes(self):
11555
    """Build hooks nodes.
11556

11557
    """
11558
    mn = self.cfg.GetMasterNode()
11559
    return ([mn], [mn])
11560

    
11561
  def Exec(self, feedback_fn):
11562
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11563

    
11564
    assert instances == [self.op.instance_name], "Instance not locked"
11565

    
11566
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11567
                     instances=instances, target_groups=list(self.target_uuids))
11568

    
11569
    ial.Run(self.op.iallocator)
11570

    
11571
    if not ial.success:
11572
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11573
                                 " instance '%s' using iallocator '%s': %s" %
11574
                                 (self.op.instance_name, self.op.iallocator,
11575
                                  ial.info),
11576
                                 errors.ECODE_NORES)
11577

    
11578
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11579

    
11580
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11581
                 " instance '%s'", len(jobs), self.op.instance_name)
11582

    
11583
    return ResultWithJobs(jobs)
11584

    
11585

    
11586
class LUBackupQuery(NoHooksLU):
11587
  """Query the exports list
11588

11589
  """
11590
  REQ_BGL = False
11591

    
11592
  def ExpandNames(self):
11593
    self.needed_locks = {}
11594
    self.share_locks[locking.LEVEL_NODE] = 1
11595
    if not self.op.nodes:
11596
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11597
    else:
11598
      self.needed_locks[locking.LEVEL_NODE] = \
11599
        _GetWantedNodes(self, self.op.nodes)
11600

    
11601
  def Exec(self, feedback_fn):
11602
    """Compute the list of all the exported system images.
11603

11604
    @rtype: dict
11605
    @return: a dictionary with the structure node->(export-list)
11606
        where export-list is a list of the instances exported on
11607
        that node.
11608

11609
    """
11610
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11611
    rpcresult = self.rpc.call_export_list(self.nodes)
11612
    result = {}
11613
    for node in rpcresult:
11614
      if rpcresult[node].fail_msg:
11615
        result[node] = False
11616
      else:
11617
        result[node] = rpcresult[node].payload
11618

    
11619
    return result
11620

    
11621

    
11622
class LUBackupPrepare(NoHooksLU):
11623
  """Prepares an instance for an export and returns useful information.
11624

11625
  """
11626
  REQ_BGL = False
11627

    
11628
  def ExpandNames(self):
11629
    self._ExpandAndLockInstance()
11630

    
11631
  def CheckPrereq(self):
11632
    """Check prerequisites.
11633

11634
    """
11635
    instance_name = self.op.instance_name
11636

    
11637
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11638
    assert self.instance is not None, \
11639
          "Cannot retrieve locked instance %s" % self.op.instance_name
11640
    _CheckNodeOnline(self, self.instance.primary_node)
11641

    
11642
    self._cds = _GetClusterDomainSecret()
11643

    
11644
  def Exec(self, feedback_fn):
11645
    """Prepares an instance for an export.
11646

11647
    """
11648
    instance = self.instance
11649

    
11650
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11651
      salt = utils.GenerateSecret(8)
11652

    
11653
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11654
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11655
                                              constants.RIE_CERT_VALIDITY)
11656
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11657

    
11658
      (name, cert_pem) = result.payload
11659

    
11660
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11661
                                             cert_pem)
11662

    
11663
      return {
11664
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11665
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11666
                          salt),
11667
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11668
        }
11669

    
11670
    return None
11671

    
11672

    
11673
class LUBackupExport(LogicalUnit):
11674
  """Export an instance to an image in the cluster.
11675

11676
  """
11677
  HPATH = "instance-export"
11678
  HTYPE = constants.HTYPE_INSTANCE
11679
  REQ_BGL = False
11680

    
11681
  def CheckArguments(self):
11682
    """Check the arguments.
11683

11684
    """
11685
    self.x509_key_name = self.op.x509_key_name
11686
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11687

    
11688
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11689
      if not self.x509_key_name:
11690
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11691
                                   errors.ECODE_INVAL)
11692

    
11693
      if not self.dest_x509_ca_pem:
11694
        raise errors.OpPrereqError("Missing destination X509 CA",
11695
                                   errors.ECODE_INVAL)
11696

    
11697
  def ExpandNames(self):
11698
    self._ExpandAndLockInstance()
11699

    
11700
    # Lock all nodes for local exports
11701
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11702
      # FIXME: lock only instance primary and destination node
11703
      #
11704
      # Sad but true, for now we have do lock all nodes, as we don't know where
11705
      # the previous export might be, and in this LU we search for it and
11706
      # remove it from its current node. In the future we could fix this by:
11707
      #  - making a tasklet to search (share-lock all), then create the
11708
      #    new one, then one to remove, after
11709
      #  - removing the removal operation altogether
11710
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11711

    
11712
  def DeclareLocks(self, level):
11713
    """Last minute lock declaration."""
11714
    # All nodes are locked anyway, so nothing to do here.
11715

    
11716
  def BuildHooksEnv(self):
11717
    """Build hooks env.
11718

11719
    This will run on the master, primary node and target node.
11720

11721
    """
11722
    env = {
11723
      "EXPORT_MODE": self.op.mode,
11724
      "EXPORT_NODE": self.op.target_node,
11725
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11726
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11727
      # TODO: Generic function for boolean env variables
11728
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11729
      }
11730

    
11731
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11732

    
11733
    return env
11734

    
11735
  def BuildHooksNodes(self):
11736
    """Build hooks nodes.
11737

11738
    """
11739
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11740

    
11741
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11742
      nl.append(self.op.target_node)
11743

    
11744
    return (nl, nl)
11745

    
11746
  def CheckPrereq(self):
11747
    """Check prerequisites.
11748

11749
    This checks that the instance and node names are valid.
11750

11751
    """
11752
    instance_name = self.op.instance_name
11753

    
11754
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11755
    assert self.instance is not None, \
11756
          "Cannot retrieve locked instance %s" % self.op.instance_name
11757
    _CheckNodeOnline(self, self.instance.primary_node)
11758

    
11759
    if (self.op.remove_instance and self.instance.admin_up and
11760
        not self.op.shutdown):
11761
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11762
                                 " down before")
11763

    
11764
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11765
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11766
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11767
      assert self.dst_node is not None
11768

    
11769
      _CheckNodeOnline(self, self.dst_node.name)
11770
      _CheckNodeNotDrained(self, self.dst_node.name)
11771

    
11772
      self._cds = None
11773
      self.dest_disk_info = None
11774
      self.dest_x509_ca = None
11775

    
11776
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11777
      self.dst_node = None
11778

    
11779
      if len(self.op.target_node) != len(self.instance.disks):
11780
        raise errors.OpPrereqError(("Received destination information for %s"
11781
                                    " disks, but instance %s has %s disks") %
11782
                                   (len(self.op.target_node), instance_name,
11783
                                    len(self.instance.disks)),
11784
                                   errors.ECODE_INVAL)
11785

    
11786
      cds = _GetClusterDomainSecret()
11787

    
11788
      # Check X509 key name
11789
      try:
11790
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11791
      except (TypeError, ValueError), err:
11792
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11793

    
11794
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11795
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11796
                                   errors.ECODE_INVAL)
11797

    
11798
      # Load and verify CA
11799
      try:
11800
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11801
      except OpenSSL.crypto.Error, err:
11802
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11803
                                   (err, ), errors.ECODE_INVAL)
11804

    
11805
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11806
      if errcode is not None:
11807
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11808
                                   (msg, ), errors.ECODE_INVAL)
11809

    
11810
      self.dest_x509_ca = cert
11811

    
11812
      # Verify target information
11813
      disk_info = []
11814
      for idx, disk_data in enumerate(self.op.target_node):
11815
        try:
11816
          (host, port, magic) = \
11817
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11818
        except errors.GenericError, err:
11819
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11820
                                     (idx, err), errors.ECODE_INVAL)
11821

    
11822
        disk_info.append((host, port, magic))
11823

    
11824
      assert len(disk_info) == len(self.op.target_node)
11825
      self.dest_disk_info = disk_info
11826

    
11827
    else:
11828
      raise errors.ProgrammerError("Unhandled export mode %r" %
11829
                                   self.op.mode)
11830

    
11831
    # instance disk type verification
11832
    # TODO: Implement export support for file-based disks
11833
    for disk in self.instance.disks:
11834
      if disk.dev_type == constants.LD_FILE:
11835
        raise errors.OpPrereqError("Export not supported for instances with"
11836
                                   " file-based disks", errors.ECODE_INVAL)
11837

    
11838
  def _CleanupExports(self, feedback_fn):
11839
    """Removes exports of current instance from all other nodes.
11840

11841
    If an instance in a cluster with nodes A..D was exported to node C, its
11842
    exports will be removed from the nodes A, B and D.
11843

11844
    """
11845
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11846

    
11847
    nodelist = self.cfg.GetNodeList()
11848
    nodelist.remove(self.dst_node.name)
11849

    
11850
    # on one-node clusters nodelist will be empty after the removal
11851
    # if we proceed the backup would be removed because OpBackupQuery
11852
    # substitutes an empty list with the full cluster node list.
11853
    iname = self.instance.name
11854
    if nodelist:
11855
      feedback_fn("Removing old exports for instance %s" % iname)
11856
      exportlist = self.rpc.call_export_list(nodelist)
11857
      for node in exportlist:
11858
        if exportlist[node].fail_msg:
11859
          continue
11860
        if iname in exportlist[node].payload:
11861
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11862
          if msg:
11863
            self.LogWarning("Could not remove older export for instance %s"
11864
                            " on node %s: %s", iname, node, msg)
11865

    
11866
  def Exec(self, feedback_fn):
11867
    """Export an instance to an image in the cluster.
11868

11869
    """
11870
    assert self.op.mode in constants.EXPORT_MODES
11871

    
11872
    instance = self.instance
11873
    src_node = instance.primary_node
11874

    
11875
    if self.op.shutdown:
11876
      # shutdown the instance, but not the disks
11877
      feedback_fn("Shutting down instance %s" % instance.name)
11878
      result = self.rpc.call_instance_shutdown(src_node, instance,
11879
                                               self.op.shutdown_timeout)
11880
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11881
      result.Raise("Could not shutdown instance %s on"
11882
                   " node %s" % (instance.name, src_node))
11883

    
11884
    # set the disks ID correctly since call_instance_start needs the
11885
    # correct drbd minor to create the symlinks
11886
    for disk in instance.disks:
11887
      self.cfg.SetDiskID(disk, src_node)
11888

    
11889
    activate_disks = (not instance.admin_up)
11890

    
11891
    if activate_disks:
11892
      # Activate the instance disks if we'exporting a stopped instance
11893
      feedback_fn("Activating disks for %s" % instance.name)
11894
      _StartInstanceDisks(self, instance, None)
11895

    
11896
    try:
11897
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11898
                                                     instance)
11899

    
11900
      helper.CreateSnapshots()
11901
      try:
11902
        if (self.op.shutdown and instance.admin_up and
11903
            not self.op.remove_instance):
11904
          assert not activate_disks
11905
          feedback_fn("Starting instance %s" % instance.name)
11906
          result = self.rpc.call_instance_start(src_node,
11907
                                                (instance, None, None), False)
11908
          msg = result.fail_msg
11909
          if msg:
11910
            feedback_fn("Failed to start instance: %s" % msg)
11911
            _ShutdownInstanceDisks(self, instance)
11912
            raise errors.OpExecError("Could not start instance: %s" % msg)
11913

    
11914
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11915
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11916
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11917
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11918
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11919

    
11920
          (key_name, _, _) = self.x509_key_name
11921

    
11922
          dest_ca_pem = \
11923
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11924
                                            self.dest_x509_ca)
11925

    
11926
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11927
                                                     key_name, dest_ca_pem,
11928
                                                     timeouts)
11929
      finally:
11930
        helper.Cleanup()
11931

    
11932
      # Check for backwards compatibility
11933
      assert len(dresults) == len(instance.disks)
11934
      assert compat.all(isinstance(i, bool) for i in dresults), \
11935
             "Not all results are boolean: %r" % dresults
11936

    
11937
    finally:
11938
      if activate_disks:
11939
        feedback_fn("Deactivating disks for %s" % instance.name)
11940
        _ShutdownInstanceDisks(self, instance)
11941

    
11942
    if not (compat.all(dresults) and fin_resu):
11943
      failures = []
11944
      if not fin_resu:
11945
        failures.append("export finalization")
11946
      if not compat.all(dresults):
11947
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11948
                               if not dsk)
11949
        failures.append("disk export: disk(s) %s" % fdsk)
11950

    
11951
      raise errors.OpExecError("Export failed, errors in %s" %
11952
                               utils.CommaJoin(failures))
11953

    
11954
    # At this point, the export was successful, we can cleanup/finish
11955

    
11956
    # Remove instance if requested
11957
    if self.op.remove_instance:
11958
      feedback_fn("Removing instance %s" % instance.name)
11959
      _RemoveInstance(self, feedback_fn, instance,
11960
                      self.op.ignore_remove_failures)
11961

    
11962
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11963
      self._CleanupExports(feedback_fn)
11964

    
11965
    return fin_resu, dresults
11966

    
11967

    
11968
class LUBackupRemove(NoHooksLU):
11969
  """Remove exports related to the named instance.
11970

11971
  """
11972
  REQ_BGL = False
11973

    
11974
  def ExpandNames(self):
11975
    self.needed_locks = {}
11976
    # We need all nodes to be locked in order for RemoveExport to work, but we
11977
    # don't need to lock the instance itself, as nothing will happen to it (and
11978
    # we can remove exports also for a removed instance)
11979
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11980

    
11981
  def Exec(self, feedback_fn):
11982
    """Remove any export.
11983

11984
    """
11985
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11986
    # If the instance was not found we'll try with the name that was passed in.
11987
    # This will only work if it was an FQDN, though.
11988
    fqdn_warn = False
11989
    if not instance_name:
11990
      fqdn_warn = True
11991
      instance_name = self.op.instance_name
11992

    
11993
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11994
    exportlist = self.rpc.call_export_list(locked_nodes)
11995
    found = False
11996
    for node in exportlist:
11997
      msg = exportlist[node].fail_msg
11998
      if msg:
11999
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12000
        continue
12001
      if instance_name in exportlist[node].payload:
12002
        found = True
12003
        result = self.rpc.call_export_remove(node, instance_name)
12004
        msg = result.fail_msg
12005
        if msg:
12006
          logging.error("Could not remove export for instance %s"
12007
                        " on node %s: %s", instance_name, node, msg)
12008

    
12009
    if fqdn_warn and not found:
12010
      feedback_fn("Export not found. If trying to remove an export belonging"
12011
                  " to a deleted instance please use its Fully Qualified"
12012
                  " Domain Name.")
12013

    
12014

    
12015
class LUGroupAdd(LogicalUnit):
12016
  """Logical unit for creating node groups.
12017

12018
  """
12019
  HPATH = "group-add"
12020
  HTYPE = constants.HTYPE_GROUP
12021
  REQ_BGL = False
12022

    
12023
  def ExpandNames(self):
12024
    # We need the new group's UUID here so that we can create and acquire the
12025
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12026
    # that it should not check whether the UUID exists in the configuration.
12027
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12028
    self.needed_locks = {}
12029
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12030

    
12031
  def CheckPrereq(self):
12032
    """Check prerequisites.
12033

12034
    This checks that the given group name is not an existing node group
12035
    already.
12036

12037
    """
12038
    try:
12039
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12040
    except errors.OpPrereqError:
12041
      pass
12042
    else:
12043
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12044
                                 " node group (UUID: %s)" %
12045
                                 (self.op.group_name, existing_uuid),
12046
                                 errors.ECODE_EXISTS)
12047

    
12048
    if self.op.ndparams:
12049
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12050

    
12051
  def BuildHooksEnv(self):
12052
    """Build hooks env.
12053

12054
    """
12055
    return {
12056
      "GROUP_NAME": self.op.group_name,
12057
      }
12058

    
12059
  def BuildHooksNodes(self):
12060
    """Build hooks nodes.
12061

12062
    """
12063
    mn = self.cfg.GetMasterNode()
12064
    return ([mn], [mn])
12065

    
12066
  def Exec(self, feedback_fn):
12067
    """Add the node group to the cluster.
12068

12069
    """
12070
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12071
                                  uuid=self.group_uuid,
12072
                                  alloc_policy=self.op.alloc_policy,
12073
                                  ndparams=self.op.ndparams)
12074

    
12075
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12076
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12077

    
12078

    
12079
class LUGroupAssignNodes(NoHooksLU):
12080
  """Logical unit for assigning nodes to groups.
12081

12082
  """
12083
  REQ_BGL = False
12084

    
12085
  def ExpandNames(self):
12086
    # These raise errors.OpPrereqError on their own:
12087
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12088
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12089

    
12090
    # We want to lock all the affected nodes and groups. We have readily
12091
    # available the list of nodes, and the *destination* group. To gather the
12092
    # list of "source" groups, we need to fetch node information later on.
12093
    self.needed_locks = {
12094
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12095
      locking.LEVEL_NODE: self.op.nodes,
12096
      }
12097

    
12098
  def DeclareLocks(self, level):
12099
    if level == locking.LEVEL_NODEGROUP:
12100
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12101

    
12102
      # Try to get all affected nodes' groups without having the group or node
12103
      # lock yet. Needs verification later in the code flow.
12104
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12105

    
12106
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12107

    
12108
  def CheckPrereq(self):
12109
    """Check prerequisites.
12110

12111
    """
12112
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12113
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12114
            frozenset(self.op.nodes))
12115

    
12116
    expected_locks = (set([self.group_uuid]) |
12117
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12118
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12119
    if actual_locks != expected_locks:
12120
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12121
                               " current groups are '%s', used to be '%s'" %
12122
                               (utils.CommaJoin(expected_locks),
12123
                                utils.CommaJoin(actual_locks)))
12124

    
12125
    self.node_data = self.cfg.GetAllNodesInfo()
12126
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12127
    instance_data = self.cfg.GetAllInstancesInfo()
12128

    
12129
    if self.group is None:
12130
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12131
                               (self.op.group_name, self.group_uuid))
12132

    
12133
    (new_splits, previous_splits) = \
12134
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12135
                                             for node in self.op.nodes],
12136
                                            self.node_data, instance_data)
12137

    
12138
    if new_splits:
12139
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12140

    
12141
      if not self.op.force:
12142
        raise errors.OpExecError("The following instances get split by this"
12143
                                 " change and --force was not given: %s" %
12144
                                 fmt_new_splits)
12145
      else:
12146
        self.LogWarning("This operation will split the following instances: %s",
12147
                        fmt_new_splits)
12148

    
12149
        if previous_splits:
12150
          self.LogWarning("In addition, these already-split instances continue"
12151
                          " to be split across groups: %s",
12152
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12153

    
12154
  def Exec(self, feedback_fn):
12155
    """Assign nodes to a new group.
12156

12157
    """
12158
    for node in self.op.nodes:
12159
      self.node_data[node].group = self.group_uuid
12160

    
12161
    # FIXME: Depends on side-effects of modifying the result of
12162
    # C{cfg.GetAllNodesInfo}
12163

    
12164
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12165

    
12166
  @staticmethod
12167
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12168
    """Check for split instances after a node assignment.
12169

12170
    This method considers a series of node assignments as an atomic operation,
12171
    and returns information about split instances after applying the set of
12172
    changes.
12173

12174
    In particular, it returns information about newly split instances, and
12175
    instances that were already split, and remain so after the change.
12176

12177
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12178
    considered.
12179

12180
    @type changes: list of (node_name, new_group_uuid) pairs.
12181
    @param changes: list of node assignments to consider.
12182
    @param node_data: a dict with data for all nodes
12183
    @param instance_data: a dict with all instances to consider
12184
    @rtype: a two-tuple
12185
    @return: a list of instances that were previously okay and result split as a
12186
      consequence of this change, and a list of instances that were previously
12187
      split and this change does not fix.
12188

12189
    """
12190
    changed_nodes = dict((node, group) for node, group in changes
12191
                         if node_data[node].group != group)
12192

    
12193
    all_split_instances = set()
12194
    previously_split_instances = set()
12195

    
12196
    def InstanceNodes(instance):
12197
      return [instance.primary_node] + list(instance.secondary_nodes)
12198

    
12199
    for inst in instance_data.values():
12200
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12201
        continue
12202

    
12203
      instance_nodes = InstanceNodes(inst)
12204

    
12205
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12206
        previously_split_instances.add(inst.name)
12207

    
12208
      if len(set(changed_nodes.get(node, node_data[node].group)
12209
                 for node in instance_nodes)) > 1:
12210
        all_split_instances.add(inst.name)
12211

    
12212
    return (list(all_split_instances - previously_split_instances),
12213
            list(previously_split_instances & all_split_instances))
12214

    
12215

    
12216
class _GroupQuery(_QueryBase):
12217
  FIELDS = query.GROUP_FIELDS
12218

    
12219
  def ExpandNames(self, lu):
12220
    lu.needed_locks = {}
12221

    
12222
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12223
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12224

    
12225
    if not self.names:
12226
      self.wanted = [name_to_uuid[name]
12227
                     for name in utils.NiceSort(name_to_uuid.keys())]
12228
    else:
12229
      # Accept names to be either names or UUIDs.
12230
      missing = []
12231
      self.wanted = []
12232
      all_uuid = frozenset(self._all_groups.keys())
12233

    
12234
      for name in self.names:
12235
        if name in all_uuid:
12236
          self.wanted.append(name)
12237
        elif name in name_to_uuid:
12238
          self.wanted.append(name_to_uuid[name])
12239
        else:
12240
          missing.append(name)
12241

    
12242
      if missing:
12243
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12244
                                   utils.CommaJoin(missing),
12245
                                   errors.ECODE_NOENT)
12246

    
12247
  def DeclareLocks(self, lu, level):
12248
    pass
12249

    
12250
  def _GetQueryData(self, lu):
12251
    """Computes the list of node groups and their attributes.
12252

12253
    """
12254
    do_nodes = query.GQ_NODE in self.requested_data
12255
    do_instances = query.GQ_INST in self.requested_data
12256

    
12257
    group_to_nodes = None
12258
    group_to_instances = None
12259

    
12260
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12261
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12262
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12263
    # instance->node. Hence, we will need to process nodes even if we only need
12264
    # instance information.
12265
    if do_nodes or do_instances:
12266
      all_nodes = lu.cfg.GetAllNodesInfo()
12267
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12268
      node_to_group = {}
12269

    
12270
      for node in all_nodes.values():
12271
        if node.group in group_to_nodes:
12272
          group_to_nodes[node.group].append(node.name)
12273
          node_to_group[node.name] = node.group
12274

    
12275
      if do_instances:
12276
        all_instances = lu.cfg.GetAllInstancesInfo()
12277
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12278

    
12279
        for instance in all_instances.values():
12280
          node = instance.primary_node
12281
          if node in node_to_group:
12282
            group_to_instances[node_to_group[node]].append(instance.name)
12283

    
12284
        if not do_nodes:
12285
          # Do not pass on node information if it was not requested.
12286
          group_to_nodes = None
12287

    
12288
    return query.GroupQueryData([self._all_groups[uuid]
12289
                                 for uuid in self.wanted],
12290
                                group_to_nodes, group_to_instances)
12291

    
12292

    
12293
class LUGroupQuery(NoHooksLU):
12294
  """Logical unit for querying node groups.
12295

12296
  """
12297
  REQ_BGL = False
12298

    
12299
  def CheckArguments(self):
12300
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12301
                          self.op.output_fields, False)
12302

    
12303
  def ExpandNames(self):
12304
    self.gq.ExpandNames(self)
12305

    
12306
  def DeclareLocks(self, level):
12307
    self.gq.DeclareLocks(self, level)
12308

    
12309
  def Exec(self, feedback_fn):
12310
    return self.gq.OldStyleQuery(self)
12311

    
12312

    
12313
class LUGroupSetParams(LogicalUnit):
12314
  """Modifies the parameters of a node group.
12315

12316
  """
12317
  HPATH = "group-modify"
12318
  HTYPE = constants.HTYPE_GROUP
12319
  REQ_BGL = False
12320

    
12321
  def CheckArguments(self):
12322
    all_changes = [
12323
      self.op.ndparams,
12324
      self.op.alloc_policy,
12325
      ]
12326

    
12327
    if all_changes.count(None) == len(all_changes):
12328
      raise errors.OpPrereqError("Please pass at least one modification",
12329
                                 errors.ECODE_INVAL)
12330

    
12331
  def ExpandNames(self):
12332
    # This raises errors.OpPrereqError on its own:
12333
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12334

    
12335
    self.needed_locks = {
12336
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12337
      }
12338

    
12339
  def CheckPrereq(self):
12340
    """Check prerequisites.
12341

12342
    """
12343
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12344

    
12345
    if self.group is None:
12346
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12347
                               (self.op.group_name, self.group_uuid))
12348

    
12349
    if self.op.ndparams:
12350
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12351
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12352
      self.new_ndparams = new_ndparams
12353

    
12354
  def BuildHooksEnv(self):
12355
    """Build hooks env.
12356

12357
    """
12358
    return {
12359
      "GROUP_NAME": self.op.group_name,
12360
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12361
      }
12362

    
12363
  def BuildHooksNodes(self):
12364
    """Build hooks nodes.
12365

12366
    """
12367
    mn = self.cfg.GetMasterNode()
12368
    return ([mn], [mn])
12369

    
12370
  def Exec(self, feedback_fn):
12371
    """Modifies the node group.
12372

12373
    """
12374
    result = []
12375

    
12376
    if self.op.ndparams:
12377
      self.group.ndparams = self.new_ndparams
12378
      result.append(("ndparams", str(self.group.ndparams)))
12379

    
12380
    if self.op.alloc_policy:
12381
      self.group.alloc_policy = self.op.alloc_policy
12382

    
12383
    self.cfg.Update(self.group, feedback_fn)
12384
    return result
12385

    
12386

    
12387
class LUGroupRemove(LogicalUnit):
12388
  HPATH = "group-remove"
12389
  HTYPE = constants.HTYPE_GROUP
12390
  REQ_BGL = False
12391

    
12392
  def ExpandNames(self):
12393
    # This will raises errors.OpPrereqError on its own:
12394
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12395
    self.needed_locks = {
12396
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12397
      }
12398

    
12399
  def CheckPrereq(self):
12400
    """Check prerequisites.
12401

12402
    This checks that the given group name exists as a node group, that is
12403
    empty (i.e., contains no nodes), and that is not the last group of the
12404
    cluster.
12405

12406
    """
12407
    # Verify that the group is empty.
12408
    group_nodes = [node.name
12409
                   for node in self.cfg.GetAllNodesInfo().values()
12410
                   if node.group == self.group_uuid]
12411

    
12412
    if group_nodes:
12413
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12414
                                 " nodes: %s" %
12415
                                 (self.op.group_name,
12416
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12417
                                 errors.ECODE_STATE)
12418

    
12419
    # Verify the cluster would not be left group-less.
12420
    if len(self.cfg.GetNodeGroupList()) == 1:
12421
      raise errors.OpPrereqError("Group '%s' is the only group,"
12422
                                 " cannot be removed" %
12423
                                 self.op.group_name,
12424
                                 errors.ECODE_STATE)
12425

    
12426
  def BuildHooksEnv(self):
12427
    """Build hooks env.
12428

12429
    """
12430
    return {
12431
      "GROUP_NAME": self.op.group_name,
12432
      }
12433

    
12434
  def BuildHooksNodes(self):
12435
    """Build hooks nodes.
12436

12437
    """
12438
    mn = self.cfg.GetMasterNode()
12439
    return ([mn], [mn])
12440

    
12441
  def Exec(self, feedback_fn):
12442
    """Remove the node group.
12443

12444
    """
12445
    try:
12446
      self.cfg.RemoveNodeGroup(self.group_uuid)
12447
    except errors.ConfigurationError:
12448
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12449
                               (self.op.group_name, self.group_uuid))
12450

    
12451
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12452

    
12453

    
12454
class LUGroupRename(LogicalUnit):
12455
  HPATH = "group-rename"
12456
  HTYPE = constants.HTYPE_GROUP
12457
  REQ_BGL = False
12458

    
12459
  def ExpandNames(self):
12460
    # This raises errors.OpPrereqError on its own:
12461
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12462

    
12463
    self.needed_locks = {
12464
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12465
      }
12466

    
12467
  def CheckPrereq(self):
12468
    """Check prerequisites.
12469

12470
    Ensures requested new name is not yet used.
12471

12472
    """
12473
    try:
12474
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12475
    except errors.OpPrereqError:
12476
      pass
12477
    else:
12478
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12479
                                 " node group (UUID: %s)" %
12480
                                 (self.op.new_name, new_name_uuid),
12481
                                 errors.ECODE_EXISTS)
12482

    
12483
  def BuildHooksEnv(self):
12484
    """Build hooks env.
12485

12486
    """
12487
    return {
12488
      "OLD_NAME": self.op.group_name,
12489
      "NEW_NAME": self.op.new_name,
12490
      }
12491

    
12492
  def BuildHooksNodes(self):
12493
    """Build hooks nodes.
12494

12495
    """
12496
    mn = self.cfg.GetMasterNode()
12497

    
12498
    all_nodes = self.cfg.GetAllNodesInfo()
12499
    all_nodes.pop(mn, None)
12500

    
12501
    run_nodes = [mn]
12502
    run_nodes.extend(node.name for node in all_nodes.values()
12503
                     if node.group == self.group_uuid)
12504

    
12505
    return (run_nodes, run_nodes)
12506

    
12507
  def Exec(self, feedback_fn):
12508
    """Rename the node group.
12509

12510
    """
12511
    group = self.cfg.GetNodeGroup(self.group_uuid)
12512

    
12513
    if group is None:
12514
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12515
                               (self.op.group_name, self.group_uuid))
12516

    
12517
    group.name = self.op.new_name
12518
    self.cfg.Update(group, feedback_fn)
12519

    
12520
    return self.op.new_name
12521

    
12522

    
12523
class LUGroupEvacuate(LogicalUnit):
12524
  HPATH = "group-evacuate"
12525
  HTYPE = constants.HTYPE_GROUP
12526
  REQ_BGL = False
12527

    
12528
  def ExpandNames(self):
12529
    # This raises errors.OpPrereqError on its own:
12530
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12531

    
12532
    if self.op.target_groups:
12533
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12534
                                  self.op.target_groups)
12535
    else:
12536
      self.req_target_uuids = []
12537

    
12538
    if self.group_uuid in self.req_target_uuids:
12539
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12540
                                 " as a target group (targets are %s)" %
12541
                                 (self.group_uuid,
12542
                                  utils.CommaJoin(self.req_target_uuids)),
12543
                                 errors.ECODE_INVAL)
12544

    
12545
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12546

    
12547
    self.share_locks = _ShareAll()
12548
    self.needed_locks = {
12549
      locking.LEVEL_INSTANCE: [],
12550
      locking.LEVEL_NODEGROUP: [],
12551
      locking.LEVEL_NODE: [],
12552
      }
12553

    
12554
  def DeclareLocks(self, level):
12555
    if level == locking.LEVEL_INSTANCE:
12556
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12557

    
12558
      # Lock instances optimistically, needs verification once node and group
12559
      # locks have been acquired
12560
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12561
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12562

    
12563
    elif level == locking.LEVEL_NODEGROUP:
12564
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12565

    
12566
      if self.req_target_uuids:
12567
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12568

    
12569
        # Lock all groups used by instances optimistically; this requires going
12570
        # via the node before it's locked, requiring verification later on
12571
        lock_groups.update(group_uuid
12572
                           for instance_name in
12573
                             self.owned_locks(locking.LEVEL_INSTANCE)
12574
                           for group_uuid in
12575
                             self.cfg.GetInstanceNodeGroups(instance_name))
12576
      else:
12577
        # No target groups, need to lock all of them
12578
        lock_groups = locking.ALL_SET
12579

    
12580
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12581

    
12582
    elif level == locking.LEVEL_NODE:
12583
      # This will only lock the nodes in the group to be evacuated which
12584
      # contain actual instances
12585
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12586
      self._LockInstancesNodes()
12587

    
12588
      # Lock all nodes in group to be evacuated and target groups
12589
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12590
      assert self.group_uuid in owned_groups
12591
      member_nodes = [node_name
12592
                      for group in owned_groups
12593
                      for node_name in self.cfg.GetNodeGroup(group).members]
12594
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12595

    
12596
  def CheckPrereq(self):
12597
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12598
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12599
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12600

    
12601
    assert owned_groups.issuperset(self.req_target_uuids)
12602
    assert self.group_uuid in owned_groups
12603

    
12604
    # Check if locked instances are still correct
12605
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12606

    
12607
    # Get instance information
12608
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12609

    
12610
    # Check if node groups for locked instances are still correct
12611
    for instance_name in owned_instances:
12612
      inst = self.instances[instance_name]
12613
      assert owned_nodes.issuperset(inst.all_nodes), \
12614
        "Instance %s's nodes changed while we kept the lock" % instance_name
12615

    
12616
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12617
                                             owned_groups)
12618

    
12619
      assert self.group_uuid in inst_groups, \
12620
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12621

    
12622
    if self.req_target_uuids:
12623
      # User requested specific target groups
12624
      self.target_uuids = self.req_target_uuids
12625
    else:
12626
      # All groups except the one to be evacuated are potential targets
12627
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12628
                           if group_uuid != self.group_uuid]
12629

    
12630
      if not self.target_uuids:
12631
        raise errors.OpPrereqError("There are no possible target groups",
12632
                                   errors.ECODE_INVAL)
12633

    
12634
  def BuildHooksEnv(self):
12635
    """Build hooks env.
12636

12637
    """
12638
    return {
12639
      "GROUP_NAME": self.op.group_name,
12640
      "TARGET_GROUPS": " ".join(self.target_uuids),
12641
      }
12642

    
12643
  def BuildHooksNodes(self):
12644
    """Build hooks nodes.
12645

12646
    """
12647
    mn = self.cfg.GetMasterNode()
12648

    
12649
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12650

    
12651
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12652

    
12653
    return (run_nodes, run_nodes)
12654

    
12655
  def Exec(self, feedback_fn):
12656
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12657

    
12658
    assert self.group_uuid not in self.target_uuids
12659

    
12660
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12661
                     instances=instances, target_groups=self.target_uuids)
12662

    
12663
    ial.Run(self.op.iallocator)
12664

    
12665
    if not ial.success:
12666
      raise errors.OpPrereqError("Can't compute group evacuation using"
12667
                                 " iallocator '%s': %s" %
12668
                                 (self.op.iallocator, ial.info),
12669
                                 errors.ECODE_NORES)
12670

    
12671
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12672

    
12673
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12674
                 len(jobs), self.op.group_name)
12675

    
12676
    return ResultWithJobs(jobs)
12677

    
12678

    
12679
class TagsLU(NoHooksLU): # pylint: disable=W0223
12680
  """Generic tags LU.
12681

12682
  This is an abstract class which is the parent of all the other tags LUs.
12683

12684
  """
12685
  def ExpandNames(self):
12686
    self.group_uuid = None
12687
    self.needed_locks = {}
12688
    if self.op.kind == constants.TAG_NODE:
12689
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12690
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12691
    elif self.op.kind == constants.TAG_INSTANCE:
12692
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12693
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12694
    elif self.op.kind == constants.TAG_NODEGROUP:
12695
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12696

    
12697
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12698
    # not possible to acquire the BGL based on opcode parameters)
12699

    
12700
  def CheckPrereq(self):
12701
    """Check prerequisites.
12702

12703
    """
12704
    if self.op.kind == constants.TAG_CLUSTER:
12705
      self.target = self.cfg.GetClusterInfo()
12706
    elif self.op.kind == constants.TAG_NODE:
12707
      self.target = self.cfg.GetNodeInfo(self.op.name)
12708
    elif self.op.kind == constants.TAG_INSTANCE:
12709
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12710
    elif self.op.kind == constants.TAG_NODEGROUP:
12711
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12712
    else:
12713
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12714
                                 str(self.op.kind), errors.ECODE_INVAL)
12715

    
12716

    
12717
class LUTagsGet(TagsLU):
12718
  """Returns the tags of a given object.
12719

12720
  """
12721
  REQ_BGL = False
12722

    
12723
  def ExpandNames(self):
12724
    TagsLU.ExpandNames(self)
12725

    
12726
    # Share locks as this is only a read operation
12727
    self.share_locks = _ShareAll()
12728

    
12729
  def Exec(self, feedback_fn):
12730
    """Returns the tag list.
12731

12732
    """
12733
    return list(self.target.GetTags())
12734

    
12735

    
12736
class LUTagsSearch(NoHooksLU):
12737
  """Searches the tags for a given pattern.
12738

12739
  """
12740
  REQ_BGL = False
12741

    
12742
  def ExpandNames(self):
12743
    self.needed_locks = {}
12744

    
12745
  def CheckPrereq(self):
12746
    """Check prerequisites.
12747

12748
    This checks the pattern passed for validity by compiling it.
12749

12750
    """
12751
    try:
12752
      self.re = re.compile(self.op.pattern)
12753
    except re.error, err:
12754
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12755
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12756

    
12757
  def Exec(self, feedback_fn):
12758
    """Returns the tag list.
12759

12760
    """
12761
    cfg = self.cfg
12762
    tgts = [("/cluster", cfg.GetClusterInfo())]
12763
    ilist = cfg.GetAllInstancesInfo().values()
12764
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12765
    nlist = cfg.GetAllNodesInfo().values()
12766
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12767
    tgts.extend(("/nodegroup/%s" % n.name, n)
12768
                for n in cfg.GetAllNodeGroupsInfo().values())
12769
    results = []
12770
    for path, target in tgts:
12771
      for tag in target.GetTags():
12772
        if self.re.search(tag):
12773
          results.append((path, tag))
12774
    return results
12775

    
12776

    
12777
class LUTagsSet(TagsLU):
12778
  """Sets a tag on a given object.
12779

12780
  """
12781
  REQ_BGL = False
12782

    
12783
  def CheckPrereq(self):
12784
    """Check prerequisites.
12785

12786
    This checks the type and length of the tag name and value.
12787

12788
    """
12789
    TagsLU.CheckPrereq(self)
12790
    for tag in self.op.tags:
12791
      objects.TaggableObject.ValidateTag(tag)
12792

    
12793
  def Exec(self, feedback_fn):
12794
    """Sets the tag.
12795

12796
    """
12797
    try:
12798
      for tag in self.op.tags:
12799
        self.target.AddTag(tag)
12800
    except errors.TagError, err:
12801
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12802
    self.cfg.Update(self.target, feedback_fn)
12803

    
12804

    
12805
class LUTagsDel(TagsLU):
12806
  """Delete a list of tags from a given object.
12807

12808
  """
12809
  REQ_BGL = False
12810

    
12811
  def CheckPrereq(self):
12812
    """Check prerequisites.
12813

12814
    This checks that we have the given tag.
12815

12816
    """
12817
    TagsLU.CheckPrereq(self)
12818
    for tag in self.op.tags:
12819
      objects.TaggableObject.ValidateTag(tag)
12820
    del_tags = frozenset(self.op.tags)
12821
    cur_tags = self.target.GetTags()
12822

    
12823
    diff_tags = del_tags - cur_tags
12824
    if diff_tags:
12825
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12826
      raise errors.OpPrereqError("Tag(s) %s not found" %
12827
                                 (utils.CommaJoin(diff_names), ),
12828
                                 errors.ECODE_NOENT)
12829

    
12830
  def Exec(self, feedback_fn):
12831
    """Remove the tag from the object.
12832

12833
    """
12834
    for tag in self.op.tags:
12835
      self.target.RemoveTag(tag)
12836
    self.cfg.Update(self.target, feedback_fn)
12837

    
12838

    
12839
class LUTestDelay(NoHooksLU):
12840
  """Sleep for a specified amount of time.
12841

12842
  This LU sleeps on the master and/or nodes for a specified amount of
12843
  time.
12844

12845
  """
12846
  REQ_BGL = False
12847

    
12848
  def ExpandNames(self):
12849
    """Expand names and set required locks.
12850

12851
    This expands the node list, if any.
12852

12853
    """
12854
    self.needed_locks = {}
12855
    if self.op.on_nodes:
12856
      # _GetWantedNodes can be used here, but is not always appropriate to use
12857
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12858
      # more information.
12859
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12860
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12861

    
12862
  def _TestDelay(self):
12863
    """Do the actual sleep.
12864

12865
    """
12866
    if self.op.on_master:
12867
      if not utils.TestDelay(self.op.duration):
12868
        raise errors.OpExecError("Error during master delay test")
12869
    if self.op.on_nodes:
12870
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12871
      for node, node_result in result.items():
12872
        node_result.Raise("Failure during rpc call to node %s" % node)
12873

    
12874
  def Exec(self, feedback_fn):
12875
    """Execute the test delay opcode, with the wanted repetitions.
12876

12877
    """
12878
    if self.op.repeat == 0:
12879
      self._TestDelay()
12880
    else:
12881
      top_value = self.op.repeat - 1
12882
      for i in range(self.op.repeat):
12883
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12884
        self._TestDelay()
12885

    
12886

    
12887
class LUTestJqueue(NoHooksLU):
12888
  """Utility LU to test some aspects of the job queue.
12889

12890
  """
12891
  REQ_BGL = False
12892

    
12893
  # Must be lower than default timeout for WaitForJobChange to see whether it
12894
  # notices changed jobs
12895
  _CLIENT_CONNECT_TIMEOUT = 20.0
12896
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12897

    
12898
  @classmethod
12899
  def _NotifyUsingSocket(cls, cb, errcls):
12900
    """Opens a Unix socket and waits for another program to connect.
12901

12902
    @type cb: callable
12903
    @param cb: Callback to send socket name to client
12904
    @type errcls: class
12905
    @param errcls: Exception class to use for errors
12906

12907
    """
12908
    # Using a temporary directory as there's no easy way to create temporary
12909
    # sockets without writing a custom loop around tempfile.mktemp and
12910
    # socket.bind
12911
    tmpdir = tempfile.mkdtemp()
12912
    try:
12913
      tmpsock = utils.PathJoin(tmpdir, "sock")
12914

    
12915
      logging.debug("Creating temporary socket at %s", tmpsock)
12916
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12917
      try:
12918
        sock.bind(tmpsock)
12919
        sock.listen(1)
12920

    
12921
        # Send details to client
12922
        cb(tmpsock)
12923

    
12924
        # Wait for client to connect before continuing
12925
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12926
        try:
12927
          (conn, _) = sock.accept()
12928
        except socket.error, err:
12929
          raise errcls("Client didn't connect in time (%s)" % err)
12930
      finally:
12931
        sock.close()
12932
    finally:
12933
      # Remove as soon as client is connected
12934
      shutil.rmtree(tmpdir)
12935

    
12936
    # Wait for client to close
12937
    try:
12938
      try:
12939
        # pylint: disable=E1101
12940
        # Instance of '_socketobject' has no ... member
12941
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12942
        conn.recv(1)
12943
      except socket.error, err:
12944
        raise errcls("Client failed to confirm notification (%s)" % err)
12945
    finally:
12946
      conn.close()
12947

    
12948
  def _SendNotification(self, test, arg, sockname):
12949
    """Sends a notification to the client.
12950

12951
    @type test: string
12952
    @param test: Test name
12953
    @param arg: Test argument (depends on test)
12954
    @type sockname: string
12955
    @param sockname: Socket path
12956

12957
    """
12958
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12959

    
12960
  def _Notify(self, prereq, test, arg):
12961
    """Notifies the client of a test.
12962

12963
    @type prereq: bool
12964
    @param prereq: Whether this is a prereq-phase test
12965
    @type test: string
12966
    @param test: Test name
12967
    @param arg: Test argument (depends on test)
12968

12969
    """
12970
    if prereq:
12971
      errcls = errors.OpPrereqError
12972
    else:
12973
      errcls = errors.OpExecError
12974

    
12975
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12976
                                                  test, arg),
12977
                                   errcls)
12978

    
12979
  def CheckArguments(self):
12980
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12981
    self.expandnames_calls = 0
12982

    
12983
  def ExpandNames(self):
12984
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12985
    if checkargs_calls < 1:
12986
      raise errors.ProgrammerError("CheckArguments was not called")
12987

    
12988
    self.expandnames_calls += 1
12989

    
12990
    if self.op.notify_waitlock:
12991
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12992

    
12993
    self.LogInfo("Expanding names")
12994

    
12995
    # Get lock on master node (just to get a lock, not for a particular reason)
12996
    self.needed_locks = {
12997
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12998
      }
12999

    
13000
  def Exec(self, feedback_fn):
13001
    if self.expandnames_calls < 1:
13002
      raise errors.ProgrammerError("ExpandNames was not called")
13003

    
13004
    if self.op.notify_exec:
13005
      self._Notify(False, constants.JQT_EXEC, None)
13006

    
13007
    self.LogInfo("Executing")
13008

    
13009
    if self.op.log_messages:
13010
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13011
      for idx, msg in enumerate(self.op.log_messages):
13012
        self.LogInfo("Sending log message %s", idx + 1)
13013
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13014
        # Report how many test messages have been sent
13015
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13016

    
13017
    if self.op.fail:
13018
      raise errors.OpExecError("Opcode failure was requested")
13019

    
13020
    return True
13021

    
13022

    
13023
class IAllocator(object):
13024
  """IAllocator framework.
13025

13026
  An IAllocator instance has three sets of attributes:
13027
    - cfg that is needed to query the cluster
13028
    - input data (all members of the _KEYS class attribute are required)
13029
    - four buffer attributes (in|out_data|text), that represent the
13030
      input (to the external script) in text and data structure format,
13031
      and the output from it, again in two formats
13032
    - the result variables from the script (success, info, nodes) for
13033
      easy usage
13034

13035
  """
13036
  # pylint: disable=R0902
13037
  # lots of instance attributes
13038

    
13039
  def __init__(self, cfg, rpc, mode, **kwargs):
13040
    self.cfg = cfg
13041
    self.rpc = rpc
13042
    # init buffer variables
13043
    self.in_text = self.out_text = self.in_data = self.out_data = None
13044
    # init all input fields so that pylint is happy
13045
    self.mode = mode
13046
    self.memory = self.disks = self.disk_template = None
13047
    self.os = self.tags = self.nics = self.vcpus = None
13048
    self.hypervisor = None
13049
    self.relocate_from = None
13050
    self.name = None
13051
    self.instances = None
13052
    self.evac_mode = None
13053
    self.target_groups = []
13054
    # computed fields
13055
    self.required_nodes = None
13056
    # init result fields
13057
    self.success = self.info = self.result = None
13058

    
13059
    try:
13060
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13061
    except KeyError:
13062
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13063
                                   " IAllocator" % self.mode)
13064

    
13065
    keyset = [n for (n, _) in keydata]
13066

    
13067
    for key in kwargs:
13068
      if key not in keyset:
13069
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13070
                                     " IAllocator" % key)
13071
      setattr(self, key, kwargs[key])
13072

    
13073
    for key in keyset:
13074
      if key not in kwargs:
13075
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13076
                                     " IAllocator" % key)
13077
    self._BuildInputData(compat.partial(fn, self), keydata)
13078

    
13079
  def _ComputeClusterData(self):
13080
    """Compute the generic allocator input data.
13081

13082
    This is the data that is independent of the actual operation.
13083

13084
    """
13085
    cfg = self.cfg
13086
    cluster_info = cfg.GetClusterInfo()
13087
    # cluster data
13088
    data = {
13089
      "version": constants.IALLOCATOR_VERSION,
13090
      "cluster_name": cfg.GetClusterName(),
13091
      "cluster_tags": list(cluster_info.GetTags()),
13092
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13093
      # we don't have job IDs
13094
      }
13095
    ninfo = cfg.GetAllNodesInfo()
13096
    iinfo = cfg.GetAllInstancesInfo().values()
13097
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13098

    
13099
    # node data
13100
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13101

    
13102
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13103
      hypervisor_name = self.hypervisor
13104
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13105
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13106
    else:
13107
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13108

    
13109
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13110
                                        hypervisor_name)
13111
    node_iinfo = \
13112
      self.rpc.call_all_instances_info(node_list,
13113
                                       cluster_info.enabled_hypervisors)
13114

    
13115
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13116

    
13117
    config_ndata = self._ComputeBasicNodeData(ninfo)
13118
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13119
                                                 i_list, config_ndata)
13120
    assert len(data["nodes"]) == len(ninfo), \
13121
        "Incomplete node data computed"
13122

    
13123
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13124

    
13125
    self.in_data = data
13126

    
13127
  @staticmethod
13128
  def _ComputeNodeGroupData(cfg):
13129
    """Compute node groups data.
13130

13131
    """
13132
    ng = dict((guuid, {
13133
      "name": gdata.name,
13134
      "alloc_policy": gdata.alloc_policy,
13135
      })
13136
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13137

    
13138
    return ng
13139

    
13140
  @staticmethod
13141
  def _ComputeBasicNodeData(node_cfg):
13142
    """Compute global node data.
13143

13144
    @rtype: dict
13145
    @returns: a dict of name: (node dict, node config)
13146

13147
    """
13148
    # fill in static (config-based) values
13149
    node_results = dict((ninfo.name, {
13150
      "tags": list(ninfo.GetTags()),
13151
      "primary_ip": ninfo.primary_ip,
13152
      "secondary_ip": ninfo.secondary_ip,
13153
      "offline": ninfo.offline,
13154
      "drained": ninfo.drained,
13155
      "master_candidate": ninfo.master_candidate,
13156
      "group": ninfo.group,
13157
      "master_capable": ninfo.master_capable,
13158
      "vm_capable": ninfo.vm_capable,
13159
      })
13160
      for ninfo in node_cfg.values())
13161

    
13162
    return node_results
13163

    
13164
  @staticmethod
13165
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13166
                              node_results):
13167
    """Compute global node data.
13168

13169
    @param node_results: the basic node structures as filled from the config
13170

13171
    """
13172
    # make a copy of the current dict
13173
    node_results = dict(node_results)
13174
    for nname, nresult in node_data.items():
13175
      assert nname in node_results, "Missing basic data for node %s" % nname
13176
      ninfo = node_cfg[nname]
13177

    
13178
      if not (ninfo.offline or ninfo.drained):
13179
        nresult.Raise("Can't get data for node %s" % nname)
13180
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13181
                                nname)
13182
        remote_info = nresult.payload
13183

    
13184
        for attr in ["memory_total", "memory_free", "memory_dom0",
13185
                     "vg_size", "vg_free", "cpu_total"]:
13186
          if attr not in remote_info:
13187
            raise errors.OpExecError("Node '%s' didn't return attribute"
13188
                                     " '%s'" % (nname, attr))
13189
          if not isinstance(remote_info[attr], int):
13190
            raise errors.OpExecError("Node '%s' returned invalid value"
13191
                                     " for '%s': %s" %
13192
                                     (nname, attr, remote_info[attr]))
13193
        # compute memory used by primary instances
13194
        i_p_mem = i_p_up_mem = 0
13195
        for iinfo, beinfo in i_list:
13196
          if iinfo.primary_node == nname:
13197
            i_p_mem += beinfo[constants.BE_MEMORY]
13198
            if iinfo.name not in node_iinfo[nname].payload:
13199
              i_used_mem = 0
13200
            else:
13201
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13202
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13203
            remote_info["memory_free"] -= max(0, i_mem_diff)
13204

    
13205
            if iinfo.admin_up:
13206
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13207

    
13208
        # compute memory used by instances
13209
        pnr_dyn = {
13210
          "total_memory": remote_info["memory_total"],
13211
          "reserved_memory": remote_info["memory_dom0"],
13212
          "free_memory": remote_info["memory_free"],
13213
          "total_disk": remote_info["vg_size"],
13214
          "free_disk": remote_info["vg_free"],
13215
          "total_cpus": remote_info["cpu_total"],
13216
          "i_pri_memory": i_p_mem,
13217
          "i_pri_up_memory": i_p_up_mem,
13218
          }
13219
        pnr_dyn.update(node_results[nname])
13220
        node_results[nname] = pnr_dyn
13221

    
13222
    return node_results
13223

    
13224
  @staticmethod
13225
  def _ComputeInstanceData(cluster_info, i_list):
13226
    """Compute global instance data.
13227

13228
    """
13229
    instance_data = {}
13230
    for iinfo, beinfo in i_list:
13231
      nic_data = []
13232
      for nic in iinfo.nics:
13233
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13234
        nic_dict = {
13235
          "mac": nic.mac,
13236
          "ip": nic.ip,
13237
          "mode": filled_params[constants.NIC_MODE],
13238
          "link": filled_params[constants.NIC_LINK],
13239
          }
13240
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13241
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13242
        nic_data.append(nic_dict)
13243
      pir = {
13244
        "tags": list(iinfo.GetTags()),
13245
        "admin_up": iinfo.admin_up,
13246
        "vcpus": beinfo[constants.BE_VCPUS],
13247
        "memory": beinfo[constants.BE_MEMORY],
13248
        "os": iinfo.os,
13249
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13250
        "nics": nic_data,
13251
        "disks": [{constants.IDISK_SIZE: dsk.size,
13252
                   constants.IDISK_MODE: dsk.mode}
13253
                  for dsk in iinfo.disks],
13254
        "disk_template": iinfo.disk_template,
13255
        "hypervisor": iinfo.hypervisor,
13256
        }
13257
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13258
                                                 pir["disks"])
13259
      instance_data[iinfo.name] = pir
13260

    
13261
    return instance_data
13262

    
13263
  def _AddNewInstance(self):
13264
    """Add new instance data to allocator structure.
13265

13266
    This in combination with _AllocatorGetClusterData will create the
13267
    correct structure needed as input for the allocator.
13268

13269
    The checks for the completeness of the opcode must have already been
13270
    done.
13271

13272
    """
13273
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13274

    
13275
    if self.disk_template in constants.DTS_INT_MIRROR:
13276
      self.required_nodes = 2
13277
    else:
13278
      self.required_nodes = 1
13279

    
13280
    request = {
13281
      "name": self.name,
13282
      "disk_template": self.disk_template,
13283
      "tags": self.tags,
13284
      "os": self.os,
13285
      "vcpus": self.vcpus,
13286
      "memory": self.memory,
13287
      "disks": self.disks,
13288
      "disk_space_total": disk_space,
13289
      "nics": self.nics,
13290
      "required_nodes": self.required_nodes,
13291
      "hypervisor": self.hypervisor,
13292
      }
13293

    
13294
    return request
13295

    
13296
  def _AddRelocateInstance(self):
13297
    """Add relocate instance data to allocator structure.
13298

13299
    This in combination with _IAllocatorGetClusterData will create the
13300
    correct structure needed as input for the allocator.
13301

13302
    The checks for the completeness of the opcode must have already been
13303
    done.
13304

13305
    """
13306
    instance = self.cfg.GetInstanceInfo(self.name)
13307
    if instance is None:
13308
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13309
                                   " IAllocator" % self.name)
13310

    
13311
    if instance.disk_template not in constants.DTS_MIRRORED:
13312
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13313
                                 errors.ECODE_INVAL)
13314

    
13315
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13316
        len(instance.secondary_nodes) != 1:
13317
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13318
                                 errors.ECODE_STATE)
13319

    
13320
    self.required_nodes = 1
13321
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13322
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13323

    
13324
    request = {
13325
      "name": self.name,
13326
      "disk_space_total": disk_space,
13327
      "required_nodes": self.required_nodes,
13328
      "relocate_from": self.relocate_from,
13329
      }
13330
    return request
13331

    
13332
  def _AddNodeEvacuate(self):
13333
    """Get data for node-evacuate requests.
13334

13335
    """
13336
    return {
13337
      "instances": self.instances,
13338
      "evac_mode": self.evac_mode,
13339
      }
13340

    
13341
  def _AddChangeGroup(self):
13342
    """Get data for node-evacuate requests.
13343

13344
    """
13345
    return {
13346
      "instances": self.instances,
13347
      "target_groups": self.target_groups,
13348
      }
13349

    
13350
  def _BuildInputData(self, fn, keydata):
13351
    """Build input data structures.
13352

13353
    """
13354
    self._ComputeClusterData()
13355

    
13356
    request = fn()
13357
    request["type"] = self.mode
13358
    for keyname, keytype in keydata:
13359
      if keyname not in request:
13360
        raise errors.ProgrammerError("Request parameter %s is missing" %
13361
                                     keyname)
13362
      val = request[keyname]
13363
      if not keytype(val):
13364
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13365
                                     " validation, value %s, expected"
13366
                                     " type %s" % (keyname, val, keytype))
13367
    self.in_data["request"] = request
13368

    
13369
    self.in_text = serializer.Dump(self.in_data)
13370

    
13371
  _STRING_LIST = ht.TListOf(ht.TString)
13372
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13373
     # pylint: disable=E1101
13374
     # Class '...' has no 'OP_ID' member
13375
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13376
                          opcodes.OpInstanceMigrate.OP_ID,
13377
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13378
     })))
13379

    
13380
  _NEVAC_MOVED = \
13381
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13382
                       ht.TItems([ht.TNonEmptyString,
13383
                                  ht.TNonEmptyString,
13384
                                  ht.TListOf(ht.TNonEmptyString),
13385
                                 ])))
13386
  _NEVAC_FAILED = \
13387
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13388
                       ht.TItems([ht.TNonEmptyString,
13389
                                  ht.TMaybeString,
13390
                                 ])))
13391
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13392
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13393

    
13394
  _MODE_DATA = {
13395
    constants.IALLOCATOR_MODE_ALLOC:
13396
      (_AddNewInstance,
13397
       [
13398
        ("name", ht.TString),
13399
        ("memory", ht.TInt),
13400
        ("disks", ht.TListOf(ht.TDict)),
13401
        ("disk_template", ht.TString),
13402
        ("os", ht.TString),
13403
        ("tags", _STRING_LIST),
13404
        ("nics", ht.TListOf(ht.TDict)),
13405
        ("vcpus", ht.TInt),
13406
        ("hypervisor", ht.TString),
13407
        ], ht.TList),
13408
    constants.IALLOCATOR_MODE_RELOC:
13409
      (_AddRelocateInstance,
13410
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13411
       ht.TList),
13412
     constants.IALLOCATOR_MODE_NODE_EVAC:
13413
      (_AddNodeEvacuate, [
13414
        ("instances", _STRING_LIST),
13415
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13416
        ], _NEVAC_RESULT),
13417
     constants.IALLOCATOR_MODE_CHG_GROUP:
13418
      (_AddChangeGroup, [
13419
        ("instances", _STRING_LIST),
13420
        ("target_groups", _STRING_LIST),
13421
        ], _NEVAC_RESULT),
13422
    }
13423

    
13424
  def Run(self, name, validate=True, call_fn=None):
13425
    """Run an instance allocator and return the results.
13426

13427
    """
13428
    if call_fn is None:
13429
      call_fn = self.rpc.call_iallocator_runner
13430

    
13431
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13432
    result.Raise("Failure while running the iallocator script")
13433

    
13434
    self.out_text = result.payload
13435
    if validate:
13436
      self._ValidateResult()
13437

    
13438
  def _ValidateResult(self):
13439
    """Process the allocator results.
13440

13441
    This will process and if successful save the result in
13442
    self.out_data and the other parameters.
13443

13444
    """
13445
    try:
13446
      rdict = serializer.Load(self.out_text)
13447
    except Exception, err:
13448
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13449

    
13450
    if not isinstance(rdict, dict):
13451
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13452

    
13453
    # TODO: remove backwards compatiblity in later versions
13454
    if "nodes" in rdict and "result" not in rdict:
13455
      rdict["result"] = rdict["nodes"]
13456
      del rdict["nodes"]
13457

    
13458
    for key in "success", "info", "result":
13459
      if key not in rdict:
13460
        raise errors.OpExecError("Can't parse iallocator results:"
13461
                                 " missing key '%s'" % key)
13462
      setattr(self, key, rdict[key])
13463

    
13464
    if not self._result_check(self.result):
13465
      raise errors.OpExecError("Iallocator returned invalid result,"
13466
                               " expected %s, got %s" %
13467
                               (self._result_check, self.result),
13468
                               errors.ECODE_INVAL)
13469

    
13470
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13471
      assert self.relocate_from is not None
13472
      assert self.required_nodes == 1
13473

    
13474
      node2group = dict((name, ndata["group"])
13475
                        for (name, ndata) in self.in_data["nodes"].items())
13476

    
13477
      fn = compat.partial(self._NodesToGroups, node2group,
13478
                          self.in_data["nodegroups"])
13479

    
13480
      instance = self.cfg.GetInstanceInfo(self.name)
13481
      request_groups = fn(self.relocate_from + [instance.primary_node])
13482
      result_groups = fn(rdict["result"] + [instance.primary_node])
13483

    
13484
      if self.success and not set(result_groups).issubset(request_groups):
13485
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13486
                                 " differ from original groups (%s)" %
13487
                                 (utils.CommaJoin(result_groups),
13488
                                  utils.CommaJoin(request_groups)))
13489

    
13490
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13491
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13492

    
13493
    self.out_data = rdict
13494

    
13495
  @staticmethod
13496
  def _NodesToGroups(node2group, groups, nodes):
13497
    """Returns a list of unique group names for a list of nodes.
13498

13499
    @type node2group: dict
13500
    @param node2group: Map from node name to group UUID
13501
    @type groups: dict
13502
    @param groups: Group information
13503
    @type nodes: list
13504
    @param nodes: Node names
13505

13506
    """
13507
    result = set()
13508

    
13509
    for node in nodes:
13510
      try:
13511
        group_uuid = node2group[node]
13512
      except KeyError:
13513
        # Ignore unknown node
13514
        pass
13515
      else:
13516
        try:
13517
          group = groups[group_uuid]
13518
        except KeyError:
13519
          # Can't find group, let's use UUID
13520
          group_name = group_uuid
13521
        else:
13522
          group_name = group["name"]
13523

    
13524
        result.add(group_name)
13525

    
13526
    return sorted(result)
13527

    
13528

    
13529
class LUTestAllocator(NoHooksLU):
13530
  """Run allocator tests.
13531

13532
  This LU runs the allocator tests
13533

13534
  """
13535
  def CheckPrereq(self):
13536
    """Check prerequisites.
13537

13538
    This checks the opcode parameters depending on the director and mode test.
13539

13540
    """
13541
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13542
      for attr in ["memory", "disks", "disk_template",
13543
                   "os", "tags", "nics", "vcpus"]:
13544
        if not hasattr(self.op, attr):
13545
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13546
                                     attr, errors.ECODE_INVAL)
13547
      iname = self.cfg.ExpandInstanceName(self.op.name)
13548
      if iname is not None:
13549
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13550
                                   iname, errors.ECODE_EXISTS)
13551
      if not isinstance(self.op.nics, list):
13552
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13553
                                   errors.ECODE_INVAL)
13554
      if not isinstance(self.op.disks, list):
13555
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13556
                                   errors.ECODE_INVAL)
13557
      for row in self.op.disks:
13558
        if (not isinstance(row, dict) or
13559
            constants.IDISK_SIZE not in row or
13560
            not isinstance(row[constants.IDISK_SIZE], int) or
13561
            constants.IDISK_MODE not in row or
13562
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13563
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13564
                                     " parameter", errors.ECODE_INVAL)
13565
      if self.op.hypervisor is None:
13566
        self.op.hypervisor = self.cfg.GetHypervisorType()
13567
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13568
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13569
      self.op.name = fname
13570
      self.relocate_from = \
13571
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13572
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13573
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13574
      if not self.op.instances:
13575
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13576
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13577
    else:
13578
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13579
                                 self.op.mode, errors.ECODE_INVAL)
13580

    
13581
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13582
      if self.op.allocator is None:
13583
        raise errors.OpPrereqError("Missing allocator name",
13584
                                   errors.ECODE_INVAL)
13585
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13586
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13587
                                 self.op.direction, errors.ECODE_INVAL)
13588

    
13589
  def Exec(self, feedback_fn):
13590
    """Run the allocator test.
13591

13592
    """
13593
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13594
      ial = IAllocator(self.cfg, self.rpc,
13595
                       mode=self.op.mode,
13596
                       name=self.op.name,
13597
                       memory=self.op.memory,
13598
                       disks=self.op.disks,
13599
                       disk_template=self.op.disk_template,
13600
                       os=self.op.os,
13601
                       tags=self.op.tags,
13602
                       nics=self.op.nics,
13603
                       vcpus=self.op.vcpus,
13604
                       hypervisor=self.op.hypervisor,
13605
                       )
13606
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13607
      ial = IAllocator(self.cfg, self.rpc,
13608
                       mode=self.op.mode,
13609
                       name=self.op.name,
13610
                       relocate_from=list(self.relocate_from),
13611
                       )
13612
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13613
      ial = IAllocator(self.cfg, self.rpc,
13614
                       mode=self.op.mode,
13615
                       instances=self.op.instances,
13616
                       target_groups=self.op.target_groups)
13617
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13618
      ial = IAllocator(self.cfg, self.rpc,
13619
                       mode=self.op.mode,
13620
                       instances=self.op.instances,
13621
                       evac_mode=self.op.evac_mode)
13622
    else:
13623
      raise errors.ProgrammerError("Uncatched mode %s in"
13624
                                   " LUTestAllocator.Exec", self.op.mode)
13625

    
13626
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13627
      result = ial.in_text
13628
    else:
13629
      ial.Run(self.op.allocator, validate=False)
13630
      result = ial.out_text
13631
    return result
13632

    
13633

    
13634
#: Query type implementations
13635
_QUERY_IMPL = {
13636
  constants.QR_INSTANCE: _InstanceQuery,
13637
  constants.QR_NODE: _NodeQuery,
13638
  constants.QR_GROUP: _GroupQuery,
13639
  constants.QR_OS: _OsQuery,
13640
  }
13641

    
13642
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13643

    
13644

    
13645
def _GetQueryImplementation(name):
13646
  """Returns the implemtnation for a query type.
13647

13648
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13649

13650
  """
13651
  try:
13652
    return _QUERY_IMPL[name]
13653
  except KeyError:
13654
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13655
                               errors.ECODE_INVAL)