Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 033684dd

History | View | Annotate | Download (481.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
#: Size of DRBD meta block device
67
DRBD_META_SIZE = 128
68

    
69

    
70
class ResultWithJobs:
71
  """Data container for LU results with jobs.
72

73
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
74
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
75
  contained in the C{jobs} attribute and include the job IDs in the opcode
76
  result.
77

78
  """
79
  def __init__(self, jobs, **kwargs):
80
    """Initializes this class.
81

82
    Additional return values can be specified as keyword arguments.
83

84
    @type jobs: list of lists of L{opcode.OpCode}
85
    @param jobs: A list of lists of opcode objects
86

87
    """
88
    self.jobs = jobs
89
    self.other = kwargs
90

    
91

    
92
class LogicalUnit(object):
93
  """Logical Unit base class.
94

95
  Subclasses must follow these rules:
96
    - implement ExpandNames
97
    - implement CheckPrereq (except when tasklets are used)
98
    - implement Exec (except when tasklets are used)
99
    - implement BuildHooksEnv
100
    - implement BuildHooksNodes
101
    - redefine HPATH and HTYPE
102
    - optionally redefine their run requirements:
103
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
104

105
  Note that all commands require root permissions.
106

107
  @ivar dry_run_result: the value (if any) that will be returned to the caller
108
      in dry-run mode (signalled by opcode dry_run parameter)
109

110
  """
111
  HPATH = None
112
  HTYPE = None
113
  REQ_BGL = True
114

    
115
  def __init__(self, processor, op, context, rpc):
116
    """Constructor for LogicalUnit.
117

118
    This needs to be overridden in derived classes in order to check op
119
    validity.
120

121
    """
122
    self.proc = processor
123
    self.op = op
124
    self.cfg = context.cfg
125
    self.glm = context.glm
126
    # readability alias
127
    self.owned_locks = context.glm.list_owned
128
    self.context = context
129
    self.rpc = rpc
130
    # Dicts used to declare locking needs to mcpu
131
    self.needed_locks = None
132
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
133
    self.add_locks = {}
134
    self.remove_locks = {}
135
    # Used to force good behavior when calling helper functions
136
    self.recalculate_locks = {}
137
    # logging
138
    self.Log = processor.Log # pylint: disable=C0103
139
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
140
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
141
    self.LogStep = processor.LogStep # pylint: disable=C0103
142
    # support for dry-run
143
    self.dry_run_result = None
144
    # support for generic debug attribute
145
    if (not hasattr(self.op, "debug_level") or
146
        not isinstance(self.op.debug_level, int)):
147
      self.op.debug_level = 0
148

    
149
    # Tasklets
150
    self.tasklets = None
151

    
152
    # Validate opcode parameters and set defaults
153
    self.op.Validate(True)
154

    
155
    self.CheckArguments()
156

    
157
  def CheckArguments(self):
158
    """Check syntactic validity for the opcode arguments.
159

160
    This method is for doing a simple syntactic check and ensure
161
    validity of opcode parameters, without any cluster-related
162
    checks. While the same can be accomplished in ExpandNames and/or
163
    CheckPrereq, doing these separate is better because:
164

165
      - ExpandNames is left as as purely a lock-related function
166
      - CheckPrereq is run after we have acquired locks (and possible
167
        waited for them)
168

169
    The function is allowed to change the self.op attribute so that
170
    later methods can no longer worry about missing parameters.
171

172
    """
173
    pass
174

    
175
  def ExpandNames(self):
176
    """Expand names for this LU.
177

178
    This method is called before starting to execute the opcode, and it should
179
    update all the parameters of the opcode to their canonical form (e.g. a
180
    short node name must be fully expanded after this method has successfully
181
    completed). This way locking, hooks, logging, etc. can work correctly.
182

183
    LUs which implement this method must also populate the self.needed_locks
184
    member, as a dict with lock levels as keys, and a list of needed lock names
185
    as values. Rules:
186

187
      - use an empty dict if you don't need any lock
188
      - if you don't need any lock at a particular level omit that level
189
      - don't put anything for the BGL level
190
      - if you want all locks at a level use locking.ALL_SET as a value
191

192
    If you need to share locks (rather than acquire them exclusively) at one
193
    level you can modify self.share_locks, setting a true value (usually 1) for
194
    that level. By default locks are not shared.
195

196
    This function can also define a list of tasklets, which then will be
197
    executed in order instead of the usual LU-level CheckPrereq and Exec
198
    functions, if those are not defined by the LU.
199

200
    Examples::
201

202
      # Acquire all nodes and one instance
203
      self.needed_locks = {
204
        locking.LEVEL_NODE: locking.ALL_SET,
205
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
206
      }
207
      # Acquire just two nodes
208
      self.needed_locks = {
209
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
210
      }
211
      # Acquire no locks
212
      self.needed_locks = {} # No, you can't leave it to the default value None
213

214
    """
215
    # The implementation of this method is mandatory only if the new LU is
216
    # concurrent, so that old LUs don't need to be changed all at the same
217
    # time.
218
    if self.REQ_BGL:
219
      self.needed_locks = {} # Exclusive LUs don't need locks.
220
    else:
221
      raise NotImplementedError
222

    
223
  def DeclareLocks(self, level):
224
    """Declare LU locking needs for a level
225

226
    While most LUs can just declare their locking needs at ExpandNames time,
227
    sometimes there's the need to calculate some locks after having acquired
228
    the ones before. This function is called just before acquiring locks at a
229
    particular level, but after acquiring the ones at lower levels, and permits
230
    such calculations. It can be used to modify self.needed_locks, and by
231
    default it does nothing.
232

233
    This function is only called if you have something already set in
234
    self.needed_locks for the level.
235

236
    @param level: Locking level which is going to be locked
237
    @type level: member of ganeti.locking.LEVELS
238

239
    """
240

    
241
  def CheckPrereq(self):
242
    """Check prerequisites for this LU.
243

244
    This method should check that the prerequisites for the execution
245
    of this LU are fulfilled. It can do internode communication, but
246
    it should be idempotent - no cluster or system changes are
247
    allowed.
248

249
    The method should raise errors.OpPrereqError in case something is
250
    not fulfilled. Its return value is ignored.
251

252
    This method should also update all the parameters of the opcode to
253
    their canonical form if it hasn't been done by ExpandNames before.
254

255
    """
256
    if self.tasklets is not None:
257
      for (idx, tl) in enumerate(self.tasklets):
258
        logging.debug("Checking prerequisites for tasklet %s/%s",
259
                      idx + 1, len(self.tasklets))
260
        tl.CheckPrereq()
261
    else:
262
      pass
263

    
264
  def Exec(self, feedback_fn):
265
    """Execute the LU.
266

267
    This method should implement the actual work. It should raise
268
    errors.OpExecError for failures that are somewhat dealt with in
269
    code, or expected.
270

271
    """
272
    if self.tasklets is not None:
273
      for (idx, tl) in enumerate(self.tasklets):
274
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
275
        tl.Exec(feedback_fn)
276
    else:
277
      raise NotImplementedError
278

    
279
  def BuildHooksEnv(self):
280
    """Build hooks environment for this LU.
281

282
    @rtype: dict
283
    @return: Dictionary containing the environment that will be used for
284
      running the hooks for this LU. The keys of the dict must not be prefixed
285
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
286
      will extend the environment with additional variables. If no environment
287
      should be defined, an empty dictionary should be returned (not C{None}).
288
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
289
      will not be called.
290

291
    """
292
    raise NotImplementedError
293

    
294
  def BuildHooksNodes(self):
295
    """Build list of nodes to run LU's hooks.
296

297
    @rtype: tuple; (list, list)
298
    @return: Tuple containing a list of node names on which the hook
299
      should run before the execution and a list of node names on which the
300
      hook should run after the execution. No nodes should be returned as an
301
      empty list (and not None).
302
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
303
      will not be called.
304

305
    """
306
    raise NotImplementedError
307

    
308
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
309
    """Notify the LU about the results of its hooks.
310

311
    This method is called every time a hooks phase is executed, and notifies
312
    the Logical Unit about the hooks' result. The LU can then use it to alter
313
    its result based on the hooks.  By default the method does nothing and the
314
    previous result is passed back unchanged but any LU can define it if it
315
    wants to use the local cluster hook-scripts somehow.
316

317
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
318
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
319
    @param hook_results: the results of the multi-node hooks rpc call
320
    @param feedback_fn: function used send feedback back to the caller
321
    @param lu_result: the previous Exec result this LU had, or None
322
        in the PRE phase
323
    @return: the new Exec result, based on the previous result
324
        and hook results
325

326
    """
327
    # API must be kept, thus we ignore the unused argument and could
328
    # be a function warnings
329
    # pylint: disable=W0613,R0201
330
    return lu_result
331

    
332
  def _ExpandAndLockInstance(self):
333
    """Helper function to expand and lock an instance.
334

335
    Many LUs that work on an instance take its name in self.op.instance_name
336
    and need to expand it and then declare the expanded name for locking. This
337
    function does it, and then updates self.op.instance_name to the expanded
338
    name. It also initializes needed_locks as a dict, if this hasn't been done
339
    before.
340

341
    """
342
    if self.needed_locks is None:
343
      self.needed_locks = {}
344
    else:
345
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
346
        "_ExpandAndLockInstance called with instance-level locks set"
347
    self.op.instance_name = _ExpandInstanceName(self.cfg,
348
                                                self.op.instance_name)
349
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
350

    
351
  def _LockInstancesNodes(self, primary_only=False):
352
    """Helper function to declare instances' nodes for locking.
353

354
    This function should be called after locking one or more instances to lock
355
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
356
    with all primary or secondary nodes for instances already locked and
357
    present in self.needed_locks[locking.LEVEL_INSTANCE].
358

359
    It should be called from DeclareLocks, and for safety only works if
360
    self.recalculate_locks[locking.LEVEL_NODE] is set.
361

362
    In the future it may grow parameters to just lock some instance's nodes, or
363
    to just lock primaries or secondary nodes, if needed.
364

365
    If should be called in DeclareLocks in a way similar to::
366

367
      if level == locking.LEVEL_NODE:
368
        self._LockInstancesNodes()
369

370
    @type primary_only: boolean
371
    @param primary_only: only lock primary nodes of locked instances
372

373
    """
374
    assert locking.LEVEL_NODE in self.recalculate_locks, \
375
      "_LockInstancesNodes helper function called with no nodes to recalculate"
376

    
377
    # TODO: check if we're really been called with the instance locks held
378

    
379
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
380
    # future we might want to have different behaviors depending on the value
381
    # of self.recalculate_locks[locking.LEVEL_NODE]
382
    wanted_nodes = []
383
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
384
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
385
      wanted_nodes.append(instance.primary_node)
386
      if not primary_only:
387
        wanted_nodes.extend(instance.secondary_nodes)
388

    
389
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
390
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
391
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
392
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
393

    
394
    del self.recalculate_locks[locking.LEVEL_NODE]
395

    
396

    
397
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
398
  """Simple LU which runs no hooks.
399

400
  This LU is intended as a parent for other LogicalUnits which will
401
  run no hooks, in order to reduce duplicate code.
402

403
  """
404
  HPATH = None
405
  HTYPE = None
406

    
407
  def BuildHooksEnv(self):
408
    """Empty BuildHooksEnv for NoHooksLu.
409

410
    This just raises an error.
411

412
    """
413
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
414

    
415
  def BuildHooksNodes(self):
416
    """Empty BuildHooksNodes for NoHooksLU.
417

418
    """
419
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
420

    
421

    
422
class Tasklet:
423
  """Tasklet base class.
424

425
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
426
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
427
  tasklets know nothing about locks.
428

429
  Subclasses must follow these rules:
430
    - Implement CheckPrereq
431
    - Implement Exec
432

433
  """
434
  def __init__(self, lu):
435
    self.lu = lu
436

    
437
    # Shortcuts
438
    self.cfg = lu.cfg
439
    self.rpc = lu.rpc
440

    
441
  def CheckPrereq(self):
442
    """Check prerequisites for this tasklets.
443

444
    This method should check whether the prerequisites for the execution of
445
    this tasklet are fulfilled. It can do internode communication, but it
446
    should be idempotent - no cluster or system changes are allowed.
447

448
    The method should raise errors.OpPrereqError in case something is not
449
    fulfilled. Its return value is ignored.
450

451
    This method should also update all parameters to their canonical form if it
452
    hasn't been done before.
453

454
    """
455
    pass
456

    
457
  def Exec(self, feedback_fn):
458
    """Execute the tasklet.
459

460
    This method should implement the actual work. It should raise
461
    errors.OpExecError for failures that are somewhat dealt with in code, or
462
    expected.
463

464
    """
465
    raise NotImplementedError
466

    
467

    
468
class _QueryBase:
469
  """Base for query utility classes.
470

471
  """
472
  #: Attribute holding field definitions
473
  FIELDS = None
474

    
475
  def __init__(self, qfilter, fields, use_locking):
476
    """Initializes this class.
477

478
    """
479
    self.use_locking = use_locking
480

    
481
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
482
                             namefield="name")
483
    self.requested_data = self.query.RequestedData()
484
    self.names = self.query.RequestedNames()
485

    
486
    # Sort only if no names were requested
487
    self.sort_by_name = not self.names
488

    
489
    self.do_locking = None
490
    self.wanted = None
491

    
492
  def _GetNames(self, lu, all_names, lock_level):
493
    """Helper function to determine names asked for in the query.
494

495
    """
496
    if self.do_locking:
497
      names = lu.owned_locks(lock_level)
498
    else:
499
      names = all_names
500

    
501
    if self.wanted == locking.ALL_SET:
502
      assert not self.names
503
      # caller didn't specify names, so ordering is not important
504
      return utils.NiceSort(names)
505

    
506
    # caller specified names and we must keep the same order
507
    assert self.names
508
    assert not self.do_locking or lu.glm.is_owned(lock_level)
509

    
510
    missing = set(self.wanted).difference(names)
511
    if missing:
512
      raise errors.OpExecError("Some items were removed before retrieving"
513
                               " their data: %s" % missing)
514

    
515
    # Return expanded names
516
    return self.wanted
517

    
518
  def ExpandNames(self, lu):
519
    """Expand names for this query.
520

521
    See L{LogicalUnit.ExpandNames}.
522

523
    """
524
    raise NotImplementedError()
525

    
526
  def DeclareLocks(self, lu, level):
527
    """Declare locks for this query.
528

529
    See L{LogicalUnit.DeclareLocks}.
530

531
    """
532
    raise NotImplementedError()
533

    
534
  def _GetQueryData(self, lu):
535
    """Collects all data for this query.
536

537
    @return: Query data object
538

539
    """
540
    raise NotImplementedError()
541

    
542
  def NewStyleQuery(self, lu):
543
    """Collect data and execute query.
544

545
    """
546
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
547
                                  sort_by_name=self.sort_by_name)
548

    
549
  def OldStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    return self.query.OldStyleQuery(self._GetQueryData(lu),
554
                                    sort_by_name=self.sort_by_name)
555

    
556

    
557
def _ShareAll():
558
  """Returns a dict declaring all lock levels shared.
559

560
  """
561
  return dict.fromkeys(locking.LEVELS, 1)
562

    
563

    
564
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
565
  """Checks if the owned node groups are still correct for an instance.
566

567
  @type cfg: L{config.ConfigWriter}
568
  @param cfg: The cluster configuration
569
  @type instance_name: string
570
  @param instance_name: Instance name
571
  @type owned_groups: set or frozenset
572
  @param owned_groups: List of currently owned node groups
573

574
  """
575
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
576

    
577
  if not owned_groups.issuperset(inst_groups):
578
    raise errors.OpPrereqError("Instance %s's node groups changed since"
579
                               " locks were acquired, current groups are"
580
                               " are '%s', owning groups '%s'; retry the"
581
                               " operation" %
582
                               (instance_name,
583
                                utils.CommaJoin(inst_groups),
584
                                utils.CommaJoin(owned_groups)),
585
                               errors.ECODE_STATE)
586

    
587
  return inst_groups
588

    
589

    
590
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
591
  """Checks if the instances in a node group are still correct.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type group_uuid: string
596
  @param group_uuid: Node group UUID
597
  @type owned_instances: set or frozenset
598
  @param owned_instances: List of currently owned instances
599

600
  """
601
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
602
  if owned_instances != wanted_instances:
603
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
604
                               " locks were acquired, wanted '%s', have '%s';"
605
                               " retry the operation" %
606
                               (group_uuid,
607
                                utils.CommaJoin(wanted_instances),
608
                                utils.CommaJoin(owned_instances)),
609
                               errors.ECODE_STATE)
610

    
611
  return wanted_instances
612

    
613

    
614
def _SupportsOob(cfg, node):
615
  """Tells if node supports OOB.
616

617
  @type cfg: L{config.ConfigWriter}
618
  @param cfg: The cluster configuration
619
  @type node: L{objects.Node}
620
  @param node: The node
621
  @return: The OOB script if supported or an empty string otherwise
622

623
  """
624
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
625

    
626

    
627
def _GetWantedNodes(lu, nodes):
628
  """Returns list of checked and expanded node names.
629

630
  @type lu: L{LogicalUnit}
631
  @param lu: the logical unit on whose behalf we execute
632
  @type nodes: list
633
  @param nodes: list of node names or None for all nodes
634
  @rtype: list
635
  @return: the list of nodes, sorted
636
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
637

638
  """
639
  if nodes:
640
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
641

    
642
  return utils.NiceSort(lu.cfg.GetNodeList())
643

    
644

    
645
def _GetWantedInstances(lu, instances):
646
  """Returns list of checked and expanded instance names.
647

648
  @type lu: L{LogicalUnit}
649
  @param lu: the logical unit on whose behalf we execute
650
  @type instances: list
651
  @param instances: list of instance names or None for all instances
652
  @rtype: list
653
  @return: the list of instances, sorted
654
  @raise errors.OpPrereqError: if the instances parameter is wrong type
655
  @raise errors.OpPrereqError: if any of the passed instances is not found
656

657
  """
658
  if instances:
659
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
660
  else:
661
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
662
  return wanted
663

    
664

    
665
def _GetUpdatedParams(old_params, update_dict,
666
                      use_default=True, use_none=False):
667
  """Return the new version of a parameter dictionary.
668

669
  @type old_params: dict
670
  @param old_params: old parameters
671
  @type update_dict: dict
672
  @param update_dict: dict containing new parameter values, or
673
      constants.VALUE_DEFAULT to reset the parameter to its default
674
      value
675
  @param use_default: boolean
676
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
677
      values as 'to be deleted' values
678
  @param use_none: boolean
679
  @type use_none: whether to recognise C{None} values as 'to be
680
      deleted' values
681
  @rtype: dict
682
  @return: the new parameter dictionary
683

684
  """
685
  params_copy = copy.deepcopy(old_params)
686
  for key, val in update_dict.iteritems():
687
    if ((use_default and val == constants.VALUE_DEFAULT) or
688
        (use_none and val is None)):
689
      try:
690
        del params_copy[key]
691
      except KeyError:
692
        pass
693
    else:
694
      params_copy[key] = val
695
  return params_copy
696

    
697

    
698
def _ReleaseLocks(lu, level, names=None, keep=None):
699
  """Releases locks owned by an LU.
700

701
  @type lu: L{LogicalUnit}
702
  @param level: Lock level
703
  @type names: list or None
704
  @param names: Names of locks to release
705
  @type keep: list or None
706
  @param keep: Names of locks to retain
707

708
  """
709
  assert not (keep is not None and names is not None), \
710
         "Only one of the 'names' and the 'keep' parameters can be given"
711

    
712
  if names is not None:
713
    should_release = names.__contains__
714
  elif keep:
715
    should_release = lambda name: name not in keep
716
  else:
717
    should_release = None
718

    
719
  if should_release:
720
    retain = []
721
    release = []
722

    
723
    # Determine which locks to release
724
    for name in lu.owned_locks(level):
725
      if should_release(name):
726
        release.append(name)
727
      else:
728
        retain.append(name)
729

    
730
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
731

    
732
    # Release just some locks
733
    lu.glm.release(level, names=release)
734

    
735
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
736
  else:
737
    # Release everything
738
    lu.glm.release(level)
739

    
740
    assert not lu.glm.is_owned(level), "No locks should be owned"
741

    
742

    
743
def _MapInstanceDisksToNodes(instances):
744
  """Creates a map from (node, volume) to instance name.
745

746
  @type instances: list of L{objects.Instance}
747
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
748

749
  """
750
  return dict(((node, vol), inst.name)
751
              for inst in instances
752
              for (node, vols) in inst.MapLVsByNode().items()
753
              for vol in vols)
754

    
755

    
756
def _RunPostHook(lu, node_name):
757
  """Runs the post-hook for an opcode on a single node.
758

759
  """
760
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
761
  try:
762
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
763
  except:
764
    # pylint: disable=W0702
765
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
766

    
767

    
768
def _CheckOutputFields(static, dynamic, selected):
769
  """Checks whether all selected fields are valid.
770

771
  @type static: L{utils.FieldSet}
772
  @param static: static fields set
773
  @type dynamic: L{utils.FieldSet}
774
  @param dynamic: dynamic fields set
775

776
  """
777
  f = utils.FieldSet()
778
  f.Extend(static)
779
  f.Extend(dynamic)
780

    
781
  delta = f.NonMatching(selected)
782
  if delta:
783
    raise errors.OpPrereqError("Unknown output fields selected: %s"
784
                               % ",".join(delta), errors.ECODE_INVAL)
785

    
786

    
787
def _CheckGlobalHvParams(params):
788
  """Validates that given hypervisor params are not global ones.
789

790
  This will ensure that instances don't get customised versions of
791
  global params.
792

793
  """
794
  used_globals = constants.HVC_GLOBALS.intersection(params)
795
  if used_globals:
796
    msg = ("The following hypervisor parameters are global and cannot"
797
           " be customized at instance level, please modify them at"
798
           " cluster level: %s" % utils.CommaJoin(used_globals))
799
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
800

    
801

    
802
def _CheckNodeOnline(lu, node, msg=None):
803
  """Ensure that a given node is online.
804

805
  @param lu: the LU on behalf of which we make the check
806
  @param node: the node to check
807
  @param msg: if passed, should be a message to replace the default one
808
  @raise errors.OpPrereqError: if the node is offline
809

810
  """
811
  if msg is None:
812
    msg = "Can't use offline node"
813
  if lu.cfg.GetNodeInfo(node).offline:
814
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
815

    
816

    
817
def _CheckNodeNotDrained(lu, node):
818
  """Ensure that a given node is not drained.
819

820
  @param lu: the LU on behalf of which we make the check
821
  @param node: the node to check
822
  @raise errors.OpPrereqError: if the node is drained
823

824
  """
825
  if lu.cfg.GetNodeInfo(node).drained:
826
    raise errors.OpPrereqError("Can't use drained node %s" % node,
827
                               errors.ECODE_STATE)
828

    
829

    
830
def _CheckNodeVmCapable(lu, node):
831
  """Ensure that a given node is vm capable.
832

833
  @param lu: the LU on behalf of which we make the check
834
  @param node: the node to check
835
  @raise errors.OpPrereqError: if the node is not vm capable
836

837
  """
838
  if not lu.cfg.GetNodeInfo(node).vm_capable:
839
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
840
                               errors.ECODE_STATE)
841

    
842

    
843
def _CheckNodeHasOS(lu, node, os_name, force_variant):
844
  """Ensure that a node supports a given OS.
845

846
  @param lu: the LU on behalf of which we make the check
847
  @param node: the node to check
848
  @param os_name: the OS to query about
849
  @param force_variant: whether to ignore variant errors
850
  @raise errors.OpPrereqError: if the node is not supporting the OS
851

852
  """
853
  result = lu.rpc.call_os_get(node, os_name)
854
  result.Raise("OS '%s' not in supported OS list for node %s" %
855
               (os_name, node),
856
               prereq=True, ecode=errors.ECODE_INVAL)
857
  if not force_variant:
858
    _CheckOSVariant(result.payload, os_name)
859

    
860

    
861
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
862
  """Ensure that a node has the given secondary ip.
863

864
  @type lu: L{LogicalUnit}
865
  @param lu: the LU on behalf of which we make the check
866
  @type node: string
867
  @param node: the node to check
868
  @type secondary_ip: string
869
  @param secondary_ip: the ip to check
870
  @type prereq: boolean
871
  @param prereq: whether to throw a prerequisite or an execute error
872
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
873
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
874

875
  """
876
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
877
  result.Raise("Failure checking secondary ip on node %s" % node,
878
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
879
  if not result.payload:
880
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
881
           " please fix and re-run this command" % secondary_ip)
882
    if prereq:
883
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
884
    else:
885
      raise errors.OpExecError(msg)
886

    
887

    
888
def _GetClusterDomainSecret():
889
  """Reads the cluster domain secret.
890

891
  """
892
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
893
                               strict=True)
894

    
895

    
896
def _CheckInstanceDown(lu, instance, reason):
897
  """Ensure that an instance is not running."""
898
  if instance.admin_up:
899
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
900
                               (instance.name, reason), errors.ECODE_STATE)
901

    
902
  pnode = instance.primary_node
903
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
904
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
905
              prereq=True, ecode=errors.ECODE_ENVIRON)
906

    
907
  if instance.name in ins_l.payload:
908
    raise errors.OpPrereqError("Instance %s is running, %s" %
909
                               (instance.name, reason), errors.ECODE_STATE)
910

    
911

    
912
def _ExpandItemName(fn, name, kind):
913
  """Expand an item name.
914

915
  @param fn: the function to use for expansion
916
  @param name: requested item name
917
  @param kind: text description ('Node' or 'Instance')
918
  @return: the resolved (full) name
919
  @raise errors.OpPrereqError: if the item is not found
920

921
  """
922
  full_name = fn(name)
923
  if full_name is None:
924
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925
                               errors.ECODE_NOENT)
926
  return full_name
927

    
928

    
929
def _ExpandNodeName(cfg, name):
930
  """Wrapper over L{_ExpandItemName} for nodes."""
931
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
932

    
933

    
934
def _ExpandInstanceName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for instance."""
936
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
937

    
938

    
939
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
940
                          memory, vcpus, nics, disk_template, disks,
941
                          bep, hvp, hypervisor_name, tags):
942
  """Builds instance related env variables for hooks
943

944
  This builds the hook environment from individual variables.
945

946
  @type name: string
947
  @param name: the name of the instance
948
  @type primary_node: string
949
  @param primary_node: the name of the instance's primary node
950
  @type secondary_nodes: list
951
  @param secondary_nodes: list of secondary nodes as strings
952
  @type os_type: string
953
  @param os_type: the name of the instance's OS
954
  @type status: boolean
955
  @param status: the should_run status of the instance
956
  @type memory: string
957
  @param memory: the memory size of the instance
958
  @type vcpus: string
959
  @param vcpus: the count of VCPUs the instance has
960
  @type nics: list
961
  @param nics: list of tuples (ip, mac, mode, link) representing
962
      the NICs the instance has
963
  @type disk_template: string
964
  @param disk_template: the disk template of the instance
965
  @type disks: list
966
  @param disks: the list of (size, mode) pairs
967
  @type bep: dict
968
  @param bep: the backend parameters for the instance
969
  @type hvp: dict
970
  @param hvp: the hypervisor parameters for the instance
971
  @type hypervisor_name: string
972
  @param hypervisor_name: the hypervisor for the instance
973
  @type tags: list
974
  @param tags: list of instance tags as strings
975
  @rtype: dict
976
  @return: the hook environment for this instance
977

978
  """
979
  if status:
980
    str_status = "up"
981
  else:
982
    str_status = "down"
983
  env = {
984
    "OP_TARGET": name,
985
    "INSTANCE_NAME": name,
986
    "INSTANCE_PRIMARY": primary_node,
987
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
988
    "INSTANCE_OS_TYPE": os_type,
989
    "INSTANCE_STATUS": str_status,
990
    "INSTANCE_MEMORY": memory,
991
    "INSTANCE_VCPUS": vcpus,
992
    "INSTANCE_DISK_TEMPLATE": disk_template,
993
    "INSTANCE_HYPERVISOR": hypervisor_name,
994
  }
995

    
996
  if nics:
997
    nic_count = len(nics)
998
    for idx, (ip, mac, mode, link) in enumerate(nics):
999
      if ip is None:
1000
        ip = ""
1001
      env["INSTANCE_NIC%d_IP" % idx] = ip
1002
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1003
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1004
      env["INSTANCE_NIC%d_LINK" % idx] = link
1005
      if mode == constants.NIC_MODE_BRIDGED:
1006
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1007
  else:
1008
    nic_count = 0
1009

    
1010
  env["INSTANCE_NIC_COUNT"] = nic_count
1011

    
1012
  if disks:
1013
    disk_count = len(disks)
1014
    for idx, (size, mode) in enumerate(disks):
1015
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1016
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1017
  else:
1018
    disk_count = 0
1019

    
1020
  env["INSTANCE_DISK_COUNT"] = disk_count
1021

    
1022
  if not tags:
1023
    tags = []
1024

    
1025
  env["INSTANCE_TAGS"] = " ".join(tags)
1026

    
1027
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1028
    for key, value in source.items():
1029
      env["INSTANCE_%s_%s" % (kind, key)] = value
1030

    
1031
  return env
1032

    
1033

    
1034
def _NICListToTuple(lu, nics):
1035
  """Build a list of nic information tuples.
1036

1037
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1038
  value in LUInstanceQueryData.
1039

1040
  @type lu:  L{LogicalUnit}
1041
  @param lu: the logical unit on whose behalf we execute
1042
  @type nics: list of L{objects.NIC}
1043
  @param nics: list of nics to convert to hooks tuples
1044

1045
  """
1046
  hooks_nics = []
1047
  cluster = lu.cfg.GetClusterInfo()
1048
  for nic in nics:
1049
    ip = nic.ip
1050
    mac = nic.mac
1051
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1052
    mode = filled_params[constants.NIC_MODE]
1053
    link = filled_params[constants.NIC_LINK]
1054
    hooks_nics.append((ip, mac, mode, link))
1055
  return hooks_nics
1056

    
1057

    
1058
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1059
  """Builds instance related env variables for hooks from an object.
1060

1061
  @type lu: L{LogicalUnit}
1062
  @param lu: the logical unit on whose behalf we execute
1063
  @type instance: L{objects.Instance}
1064
  @param instance: the instance for which we should build the
1065
      environment
1066
  @type override: dict
1067
  @param override: dictionary with key/values that will override
1068
      our values
1069
  @rtype: dict
1070
  @return: the hook environment dictionary
1071

1072
  """
1073
  cluster = lu.cfg.GetClusterInfo()
1074
  bep = cluster.FillBE(instance)
1075
  hvp = cluster.FillHV(instance)
1076
  args = {
1077
    "name": instance.name,
1078
    "primary_node": instance.primary_node,
1079
    "secondary_nodes": instance.secondary_nodes,
1080
    "os_type": instance.os,
1081
    "status": instance.admin_up,
1082
    "memory": bep[constants.BE_MEMORY],
1083
    "vcpus": bep[constants.BE_VCPUS],
1084
    "nics": _NICListToTuple(lu, instance.nics),
1085
    "disk_template": instance.disk_template,
1086
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1087
    "bep": bep,
1088
    "hvp": hvp,
1089
    "hypervisor_name": instance.hypervisor,
1090
    "tags": instance.tags,
1091
  }
1092
  if override:
1093
    args.update(override)
1094
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1095

    
1096

    
1097
def _AdjustCandidatePool(lu, exceptions):
1098
  """Adjust the candidate pool after node operations.
1099

1100
  """
1101
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1102
  if mod_list:
1103
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1104
               utils.CommaJoin(node.name for node in mod_list))
1105
    for name in mod_list:
1106
      lu.context.ReaddNode(name)
1107
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1108
  if mc_now > mc_max:
1109
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1110
               (mc_now, mc_max))
1111

    
1112

    
1113
def _DecideSelfPromotion(lu, exceptions=None):
1114
  """Decide whether I should promote myself as a master candidate.
1115

1116
  """
1117
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1118
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1119
  # the new node will increase mc_max with one, so:
1120
  mc_should = min(mc_should + 1, cp_size)
1121
  return mc_now < mc_should
1122

    
1123

    
1124
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1125
  """Check that the brigdes needed by a list of nics exist.
1126

1127
  """
1128
  cluster = lu.cfg.GetClusterInfo()
1129
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1130
  brlist = [params[constants.NIC_LINK] for params in paramslist
1131
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1132
  if brlist:
1133
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1134
    result.Raise("Error checking bridges on destination node '%s'" %
1135
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1136

    
1137

    
1138
def _CheckInstanceBridgesExist(lu, instance, node=None):
1139
  """Check that the brigdes needed by an instance exist.
1140

1141
  """
1142
  if node is None:
1143
    node = instance.primary_node
1144
  _CheckNicsBridgesExist(lu, instance.nics, node)
1145

    
1146

    
1147
def _CheckOSVariant(os_obj, name):
1148
  """Check whether an OS name conforms to the os variants specification.
1149

1150
  @type os_obj: L{objects.OS}
1151
  @param os_obj: OS object to check
1152
  @type name: string
1153
  @param name: OS name passed by the user, to check for validity
1154

1155
  """
1156
  variant = objects.OS.GetVariant(name)
1157
  if not os_obj.supported_variants:
1158
    if variant:
1159
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1160
                                 " passed)" % (os_obj.name, variant),
1161
                                 errors.ECODE_INVAL)
1162
    return
1163
  if not variant:
1164
    raise errors.OpPrereqError("OS name must include a variant",
1165
                               errors.ECODE_INVAL)
1166

    
1167
  if variant not in os_obj.supported_variants:
1168
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1169

    
1170

    
1171
def _GetNodeInstancesInner(cfg, fn):
1172
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1173

    
1174

    
1175
def _GetNodeInstances(cfg, node_name):
1176
  """Returns a list of all primary and secondary instances on a node.
1177

1178
  """
1179

    
1180
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1181

    
1182

    
1183
def _GetNodePrimaryInstances(cfg, node_name):
1184
  """Returns primary instances on a node.
1185

1186
  """
1187
  return _GetNodeInstancesInner(cfg,
1188
                                lambda inst: node_name == inst.primary_node)
1189

    
1190

    
1191
def _GetNodeSecondaryInstances(cfg, node_name):
1192
  """Returns secondary instances on a node.
1193

1194
  """
1195
  return _GetNodeInstancesInner(cfg,
1196
                                lambda inst: node_name in inst.secondary_nodes)
1197

    
1198

    
1199
def _GetStorageTypeArgs(cfg, storage_type):
1200
  """Returns the arguments for a storage type.
1201

1202
  """
1203
  # Special case for file storage
1204
  if storage_type == constants.ST_FILE:
1205
    # storage.FileStorage wants a list of storage directories
1206
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207

    
1208
  return []
1209

    
1210

    
1211
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1212
  faulty = []
1213

    
1214
  for dev in instance.disks:
1215
    cfg.SetDiskID(dev, node_name)
1216

    
1217
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1218
  result.Raise("Failed to get disk status from node %s" % node_name,
1219
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1220

    
1221
  for idx, bdev_status in enumerate(result.payload):
1222
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1223
      faulty.append(idx)
1224

    
1225
  return faulty
1226

    
1227

    
1228
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1229
  """Check the sanity of iallocator and node arguments and use the
1230
  cluster-wide iallocator if appropriate.
1231

1232
  Check that at most one of (iallocator, node) is specified. If none is
1233
  specified, then the LU's opcode's iallocator slot is filled with the
1234
  cluster-wide default iallocator.
1235

1236
  @type iallocator_slot: string
1237
  @param iallocator_slot: the name of the opcode iallocator slot
1238
  @type node_slot: string
1239
  @param node_slot: the name of the opcode target node slot
1240

1241
  """
1242
  node = getattr(lu.op, node_slot, None)
1243
  iallocator = getattr(lu.op, iallocator_slot, None)
1244

    
1245
  if node is not None and iallocator is not None:
1246
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1247
                               errors.ECODE_INVAL)
1248
  elif node is None and iallocator is None:
1249
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1250
    if default_iallocator:
1251
      setattr(lu.op, iallocator_slot, default_iallocator)
1252
    else:
1253
      raise errors.OpPrereqError("No iallocator or node given and no"
1254
                                 " cluster-wide default iallocator found;"
1255
                                 " please specify either an iallocator or a"
1256
                                 " node, or set a cluster-wide default"
1257
                                 " iallocator")
1258

    
1259

    
1260
def _GetDefaultIAllocator(cfg, iallocator):
1261
  """Decides on which iallocator to use.
1262

1263
  @type cfg: L{config.ConfigWriter}
1264
  @param cfg: Cluster configuration object
1265
  @type iallocator: string or None
1266
  @param iallocator: Iallocator specified in opcode
1267
  @rtype: string
1268
  @return: Iallocator name
1269

1270
  """
1271
  if not iallocator:
1272
    # Use default iallocator
1273
    iallocator = cfg.GetDefaultIAllocator()
1274

    
1275
  if not iallocator:
1276
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1277
                               " opcode nor as a cluster-wide default",
1278
                               errors.ECODE_INVAL)
1279

    
1280
  return iallocator
1281

    
1282

    
1283
class LUClusterPostInit(LogicalUnit):
1284
  """Logical unit for running hooks after cluster initialization.
1285

1286
  """
1287
  HPATH = "cluster-init"
1288
  HTYPE = constants.HTYPE_CLUSTER
1289

    
1290
  def BuildHooksEnv(self):
1291
    """Build hooks env.
1292

1293
    """
1294
    return {
1295
      "OP_TARGET": self.cfg.GetClusterName(),
1296
      }
1297

    
1298
  def BuildHooksNodes(self):
1299
    """Build hooks nodes.
1300

1301
    """
1302
    return ([], [self.cfg.GetMasterNode()])
1303

    
1304
  def Exec(self, feedback_fn):
1305
    """Nothing to do.
1306

1307
    """
1308
    return True
1309

    
1310

    
1311
class LUClusterDestroy(LogicalUnit):
1312
  """Logical unit for destroying the cluster.
1313

1314
  """
1315
  HPATH = "cluster-destroy"
1316
  HTYPE = constants.HTYPE_CLUSTER
1317

    
1318
  def BuildHooksEnv(self):
1319
    """Build hooks env.
1320

1321
    """
1322
    return {
1323
      "OP_TARGET": self.cfg.GetClusterName(),
1324
      }
1325

    
1326
  def BuildHooksNodes(self):
1327
    """Build hooks nodes.
1328

1329
    """
1330
    return ([], [])
1331

    
1332
  def CheckPrereq(self):
1333
    """Check prerequisites.
1334

1335
    This checks whether the cluster is empty.
1336

1337
    Any errors are signaled by raising errors.OpPrereqError.
1338

1339
    """
1340
    master = self.cfg.GetMasterNode()
1341

    
1342
    nodelist = self.cfg.GetNodeList()
1343
    if len(nodelist) != 1 or nodelist[0] != master:
1344
      raise errors.OpPrereqError("There are still %d node(s) in"
1345
                                 " this cluster." % (len(nodelist) - 1),
1346
                                 errors.ECODE_INVAL)
1347
    instancelist = self.cfg.GetInstanceList()
1348
    if instancelist:
1349
      raise errors.OpPrereqError("There are still %d instance(s) in"
1350
                                 " this cluster." % len(instancelist),
1351
                                 errors.ECODE_INVAL)
1352

    
1353
  def Exec(self, feedback_fn):
1354
    """Destroys the cluster.
1355

1356
    """
1357
    master = self.cfg.GetMasterNode()
1358

    
1359
    # Run post hooks on master node before it's removed
1360
    _RunPostHook(self, master)
1361

    
1362
    result = self.rpc.call_node_deactivate_master_ip(master)
1363
    result.Raise("Could not disable the master role")
1364

    
1365
    return master
1366

    
1367

    
1368
def _VerifyCertificate(filename):
1369
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1370

1371
  @type filename: string
1372
  @param filename: Path to PEM file
1373

1374
  """
1375
  try:
1376
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1377
                                           utils.ReadFile(filename))
1378
  except Exception, err: # pylint: disable=W0703
1379
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1380
            "Failed to load X509 certificate %s: %s" % (filename, err))
1381

    
1382
  (errcode, msg) = \
1383
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1384
                                constants.SSL_CERT_EXPIRATION_ERROR)
1385

    
1386
  if msg:
1387
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1388
  else:
1389
    fnamemsg = None
1390

    
1391
  if errcode is None:
1392
    return (None, fnamemsg)
1393
  elif errcode == utils.CERT_WARNING:
1394
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1395
  elif errcode == utils.CERT_ERROR:
1396
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1397

    
1398
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1399

    
1400

    
1401
def _GetAllHypervisorParameters(cluster, instances):
1402
  """Compute the set of all hypervisor parameters.
1403

1404
  @type cluster: L{objects.Cluster}
1405
  @param cluster: the cluster object
1406
  @param instances: list of L{objects.Instance}
1407
  @param instances: additional instances from which to obtain parameters
1408
  @rtype: list of (origin, hypervisor, parameters)
1409
  @return: a list with all parameters found, indicating the hypervisor they
1410
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411

1412
  """
1413
  hvp_data = []
1414

    
1415
  for hv_name in cluster.enabled_hypervisors:
1416
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1417

    
1418
  for os_name, os_hvp in cluster.os_hvp.items():
1419
    for hv_name, hv_params in os_hvp.items():
1420
      if hv_params:
1421
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1422
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1423

    
1424
  # TODO: collapse identical parameter values in a single one
1425
  for instance in instances:
1426
    if instance.hvparams:
1427
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1428
                       cluster.FillHV(instance)))
1429

    
1430
  return hvp_data
1431

    
1432

    
1433
class _VerifyErrors(object):
1434
  """Mix-in for cluster/group verify LUs.
1435

1436
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1437
  self.op and self._feedback_fn to be available.)
1438

1439
  """
1440

    
1441
  ETYPE_FIELD = "code"
1442
  ETYPE_ERROR = "ERROR"
1443
  ETYPE_WARNING = "WARNING"
1444

    
1445
  def _Error(self, ecode, item, msg, *args, **kwargs):
1446
    """Format an error message.
1447

1448
    Based on the opcode's error_codes parameter, either format a
1449
    parseable error code, or a simpler error string.
1450

1451
    This must be called only from Exec and functions called from Exec.
1452

1453
    """
1454
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1455
    itype, etxt, _ = ecode
1456
    # first complete the msg
1457
    if args:
1458
      msg = msg % args
1459
    # then format the whole message
1460
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1461
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1462
    else:
1463
      if item:
1464
        item = " " + item
1465
      else:
1466
        item = ""
1467
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1468
    # and finally report it via the feedback_fn
1469
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1470

    
1471
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1472
    """Log an error message if the passed condition is True.
1473

1474
    """
1475
    cond = (bool(cond)
1476
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1477

    
1478
    # If the error code is in the list of ignored errors, demote the error to a
1479
    # warning
1480
    (_, etxt, _) = ecode
1481
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1482
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1483

    
1484
    if cond:
1485
      self._Error(ecode, *args, **kwargs)
1486

    
1487
    # do not mark the operation as failed for WARN cases only
1488
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1489
      self.bad = self.bad or cond
1490

    
1491

    
1492
class LUClusterVerify(NoHooksLU):
1493
  """Submits all jobs necessary to verify the cluster.
1494

1495
  """
1496
  REQ_BGL = False
1497

    
1498
  def ExpandNames(self):
1499
    self.needed_locks = {}
1500

    
1501
  def Exec(self, feedback_fn):
1502
    jobs = []
1503

    
1504
    if self.op.group_name:
1505
      groups = [self.op.group_name]
1506
      depends_fn = lambda: None
1507
    else:
1508
      groups = self.cfg.GetNodeGroupList()
1509

    
1510
      # Verify global configuration
1511
      jobs.append([
1512
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1513
        ])
1514

    
1515
      # Always depend on global verification
1516
      depends_fn = lambda: [(-len(jobs), [])]
1517

    
1518
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1519
                                            ignore_errors=self.op.ignore_errors,
1520
                                            depends=depends_fn())]
1521
                for group in groups)
1522

    
1523
    # Fix up all parameters
1524
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1525
      op.debug_simulate_errors = self.op.debug_simulate_errors
1526
      op.verbose = self.op.verbose
1527
      op.error_codes = self.op.error_codes
1528
      try:
1529
        op.skip_checks = self.op.skip_checks
1530
      except AttributeError:
1531
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1532

    
1533
    return ResultWithJobs(jobs)
1534

    
1535

    
1536
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1537
  """Verifies the cluster config.
1538

1539
  """
1540
  REQ_BGL = True
1541

    
1542
  def _VerifyHVP(self, hvp_data):
1543
    """Verifies locally the syntax of the hypervisor parameters.
1544

1545
    """
1546
    for item, hv_name, hv_params in hvp_data:
1547
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1548
             (item, hv_name))
1549
      try:
1550
        hv_class = hypervisor.GetHypervisor(hv_name)
1551
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1552
        hv_class.CheckParameterSyntax(hv_params)
1553
      except errors.GenericError, err:
1554
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1555

    
1556
  def ExpandNames(self):
1557
    # Information can be safely retrieved as the BGL is acquired in exclusive
1558
    # mode
1559
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1560
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1561
    self.all_node_info = self.cfg.GetAllNodesInfo()
1562
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1563
    self.needed_locks = {}
1564

    
1565
  def Exec(self, feedback_fn):
1566
    """Verify integrity of cluster, performing various test on nodes.
1567

1568
    """
1569
    self.bad = False
1570
    self._feedback_fn = feedback_fn
1571

    
1572
    feedback_fn("* Verifying cluster config")
1573

    
1574
    for msg in self.cfg.VerifyConfig():
1575
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1576

    
1577
    feedback_fn("* Verifying cluster certificate files")
1578

    
1579
    for cert_filename in constants.ALL_CERT_FILES:
1580
      (errcode, msg) = _VerifyCertificate(cert_filename)
1581
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1582

    
1583
    feedback_fn("* Verifying hypervisor parameters")
1584

    
1585
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1586
                                                self.all_inst_info.values()))
1587

    
1588
    feedback_fn("* Verifying all nodes belong to an existing group")
1589

    
1590
    # We do this verification here because, should this bogus circumstance
1591
    # occur, it would never be caught by VerifyGroup, which only acts on
1592
    # nodes/instances reachable from existing node groups.
1593

    
1594
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1595
                         if node.group not in self.all_group_info)
1596

    
1597
    dangling_instances = {}
1598
    no_node_instances = []
1599

    
1600
    for inst in self.all_inst_info.values():
1601
      if inst.primary_node in dangling_nodes:
1602
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1603
      elif inst.primary_node not in self.all_node_info:
1604
        no_node_instances.append(inst.name)
1605

    
1606
    pretty_dangling = [
1607
        "%s (%s)" %
1608
        (node.name,
1609
         utils.CommaJoin(dangling_instances.get(node.name,
1610
                                                ["no instances"])))
1611
        for node in dangling_nodes]
1612

    
1613
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1614
                  None,
1615
                  "the following nodes (and their instances) belong to a non"
1616
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1617

    
1618
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1619
                  None,
1620
                  "the following instances have a non-existing primary-node:"
1621
                  " %s", utils.CommaJoin(no_node_instances))
1622

    
1623
    return not self.bad
1624

    
1625

    
1626
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1627
  """Verifies the status of a node group.
1628

1629
  """
1630
  HPATH = "cluster-verify"
1631
  HTYPE = constants.HTYPE_CLUSTER
1632
  REQ_BGL = False
1633

    
1634
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1635

    
1636
  class NodeImage(object):
1637
    """A class representing the logical and physical status of a node.
1638

1639
    @type name: string
1640
    @ivar name: the node name to which this object refers
1641
    @ivar volumes: a structure as returned from
1642
        L{ganeti.backend.GetVolumeList} (runtime)
1643
    @ivar instances: a list of running instances (runtime)
1644
    @ivar pinst: list of configured primary instances (config)
1645
    @ivar sinst: list of configured secondary instances (config)
1646
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1647
        instances for which this node is secondary (config)
1648
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1649
    @ivar dfree: free disk, as reported by the node (runtime)
1650
    @ivar offline: the offline status (config)
1651
    @type rpc_fail: boolean
1652
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1653
        not whether the individual keys were correct) (runtime)
1654
    @type lvm_fail: boolean
1655
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1656
    @type hyp_fail: boolean
1657
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1658
    @type ghost: boolean
1659
    @ivar ghost: whether this is a known node or not (config)
1660
    @type os_fail: boolean
1661
    @ivar os_fail: whether the RPC call didn't return valid OS data
1662
    @type oslist: list
1663
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1664
    @type vm_capable: boolean
1665
    @ivar vm_capable: whether the node can host instances
1666

1667
    """
1668
    def __init__(self, offline=False, name=None, vm_capable=True):
1669
      self.name = name
1670
      self.volumes = {}
1671
      self.instances = []
1672
      self.pinst = []
1673
      self.sinst = []
1674
      self.sbp = {}
1675
      self.mfree = 0
1676
      self.dfree = 0
1677
      self.offline = offline
1678
      self.vm_capable = vm_capable
1679
      self.rpc_fail = False
1680
      self.lvm_fail = False
1681
      self.hyp_fail = False
1682
      self.ghost = False
1683
      self.os_fail = False
1684
      self.oslist = {}
1685

    
1686
  def ExpandNames(self):
1687
    # This raises errors.OpPrereqError on its own:
1688
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1689

    
1690
    # Get instances in node group; this is unsafe and needs verification later
1691
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1692

    
1693
    self.needed_locks = {
1694
      locking.LEVEL_INSTANCE: inst_names,
1695
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1696
      locking.LEVEL_NODE: [],
1697
      }
1698

    
1699
    self.share_locks = _ShareAll()
1700

    
1701
  def DeclareLocks(self, level):
1702
    if level == locking.LEVEL_NODE:
1703
      # Get members of node group; this is unsafe and needs verification later
1704
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1705

    
1706
      all_inst_info = self.cfg.GetAllInstancesInfo()
1707

    
1708
      # In Exec(), we warn about mirrored instances that have primary and
1709
      # secondary living in separate node groups. To fully verify that
1710
      # volumes for these instances are healthy, we will need to do an
1711
      # extra call to their secondaries. We ensure here those nodes will
1712
      # be locked.
1713
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1714
        # Important: access only the instances whose lock is owned
1715
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1716
          nodes.update(all_inst_info[inst].secondary_nodes)
1717

    
1718
      self.needed_locks[locking.LEVEL_NODE] = nodes
1719

    
1720
  def CheckPrereq(self):
1721
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1722
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1723

    
1724
    group_nodes = set(self.group_info.members)
1725
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1726

    
1727
    unlocked_nodes = \
1728
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1729

    
1730
    unlocked_instances = \
1731
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1732

    
1733
    if unlocked_nodes:
1734
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1735
                                 utils.CommaJoin(unlocked_nodes))
1736

    
1737
    if unlocked_instances:
1738
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1739
                                 utils.CommaJoin(unlocked_instances))
1740

    
1741
    self.all_node_info = self.cfg.GetAllNodesInfo()
1742
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1743

    
1744
    self.my_node_names = utils.NiceSort(group_nodes)
1745
    self.my_inst_names = utils.NiceSort(group_instances)
1746

    
1747
    self.my_node_info = dict((name, self.all_node_info[name])
1748
                             for name in self.my_node_names)
1749

    
1750
    self.my_inst_info = dict((name, self.all_inst_info[name])
1751
                             for name in self.my_inst_names)
1752

    
1753
    # We detect here the nodes that will need the extra RPC calls for verifying
1754
    # split LV volumes; they should be locked.
1755
    extra_lv_nodes = set()
1756

    
1757
    for inst in self.my_inst_info.values():
1758
      if inst.disk_template in constants.DTS_INT_MIRROR:
1759
        group = self.my_node_info[inst.primary_node].group
1760
        for nname in inst.secondary_nodes:
1761
          if self.all_node_info[nname].group != group:
1762
            extra_lv_nodes.add(nname)
1763

    
1764
    unlocked_lv_nodes = \
1765
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1766

    
1767
    if unlocked_lv_nodes:
1768
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1769
                                 utils.CommaJoin(unlocked_lv_nodes))
1770
    self.extra_lv_nodes = list(extra_lv_nodes)
1771

    
1772
  def _VerifyNode(self, ninfo, nresult):
1773
    """Perform some basic validation on data returned from a node.
1774

1775
      - check the result data structure is well formed and has all the
1776
        mandatory fields
1777
      - check ganeti version
1778

1779
    @type ninfo: L{objects.Node}
1780
    @param ninfo: the node to check
1781
    @param nresult: the results from the node
1782
    @rtype: boolean
1783
    @return: whether overall this call was successful (and we can expect
1784
         reasonable values in the respose)
1785

1786
    """
1787
    node = ninfo.name
1788
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1789

    
1790
    # main result, nresult should be a non-empty dict
1791
    test = not nresult or not isinstance(nresult, dict)
1792
    _ErrorIf(test, constants.CV_ENODERPC, node,
1793
                  "unable to verify node: no data returned")
1794
    if test:
1795
      return False
1796

    
1797
    # compares ganeti version
1798
    local_version = constants.PROTOCOL_VERSION
1799
    remote_version = nresult.get("version", None)
1800
    test = not (remote_version and
1801
                isinstance(remote_version, (list, tuple)) and
1802
                len(remote_version) == 2)
1803
    _ErrorIf(test, constants.CV_ENODERPC, node,
1804
             "connection to node returned invalid data")
1805
    if test:
1806
      return False
1807

    
1808
    test = local_version != remote_version[0]
1809
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1810
             "incompatible protocol versions: master %s,"
1811
             " node %s", local_version, remote_version[0])
1812
    if test:
1813
      return False
1814

    
1815
    # node seems compatible, we can actually try to look into its results
1816

    
1817
    # full package version
1818
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1819
                  constants.CV_ENODEVERSION, node,
1820
                  "software version mismatch: master %s, node %s",
1821
                  constants.RELEASE_VERSION, remote_version[1],
1822
                  code=self.ETYPE_WARNING)
1823

    
1824
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1825
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1826
      for hv_name, hv_result in hyp_result.iteritems():
1827
        test = hv_result is not None
1828
        _ErrorIf(test, constants.CV_ENODEHV, node,
1829
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1830

    
1831
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1832
    if ninfo.vm_capable and isinstance(hvp_result, list):
1833
      for item, hv_name, hv_result in hvp_result:
1834
        _ErrorIf(True, constants.CV_ENODEHV, node,
1835
                 "hypervisor %s parameter verify failure (source %s): %s",
1836
                 hv_name, item, hv_result)
1837

    
1838
    test = nresult.get(constants.NV_NODESETUP,
1839
                       ["Missing NODESETUP results"])
1840
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1841
             "; ".join(test))
1842

    
1843
    return True
1844

    
1845
  def _VerifyNodeTime(self, ninfo, nresult,
1846
                      nvinfo_starttime, nvinfo_endtime):
1847
    """Check the node time.
1848

1849
    @type ninfo: L{objects.Node}
1850
    @param ninfo: the node to check
1851
    @param nresult: the remote results for the node
1852
    @param nvinfo_starttime: the start time of the RPC call
1853
    @param nvinfo_endtime: the end time of the RPC call
1854

1855
    """
1856
    node = ninfo.name
1857
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1858

    
1859
    ntime = nresult.get(constants.NV_TIME, None)
1860
    try:
1861
      ntime_merged = utils.MergeTime(ntime)
1862
    except (ValueError, TypeError):
1863
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1864
      return
1865

    
1866
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1867
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1868
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1869
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1870
    else:
1871
      ntime_diff = None
1872

    
1873
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1874
             "Node time diverges by at least %s from master node time",
1875
             ntime_diff)
1876

    
1877
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1878
    """Check the node LVM results.
1879

1880
    @type ninfo: L{objects.Node}
1881
    @param ninfo: the node to check
1882
    @param nresult: the remote results for the node
1883
    @param vg_name: the configured VG name
1884

1885
    """
1886
    if vg_name is None:
1887
      return
1888

    
1889
    node = ninfo.name
1890
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1891

    
1892
    # checks vg existence and size > 20G
1893
    vglist = nresult.get(constants.NV_VGLIST, None)
1894
    test = not vglist
1895
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1896
    if not test:
1897
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1898
                                            constants.MIN_VG_SIZE)
1899
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1900

    
1901
    # check pv names
1902
    pvlist = nresult.get(constants.NV_PVLIST, None)
1903
    test = pvlist is None
1904
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1905
    if not test:
1906
      # check that ':' is not present in PV names, since it's a
1907
      # special character for lvcreate (denotes the range of PEs to
1908
      # use on the PV)
1909
      for _, pvname, owner_vg in pvlist:
1910
        test = ":" in pvname
1911
        _ErrorIf(test, constants.CV_ENODELVM, node,
1912
                 "Invalid character ':' in PV '%s' of VG '%s'",
1913
                 pvname, owner_vg)
1914

    
1915
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1916
    """Check the node bridges.
1917

1918
    @type ninfo: L{objects.Node}
1919
    @param ninfo: the node to check
1920
    @param nresult: the remote results for the node
1921
    @param bridges: the expected list of bridges
1922

1923
    """
1924
    if not bridges:
1925
      return
1926

    
1927
    node = ninfo.name
1928
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1929

    
1930
    missing = nresult.get(constants.NV_BRIDGES, None)
1931
    test = not isinstance(missing, list)
1932
    _ErrorIf(test, constants.CV_ENODENET, node,
1933
             "did not return valid bridge information")
1934
    if not test:
1935
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1936
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1937

    
1938
  def _VerifyNodeNetwork(self, ninfo, nresult):
1939
    """Check the node network connectivity results.
1940

1941
    @type ninfo: L{objects.Node}
1942
    @param ninfo: the node to check
1943
    @param nresult: the remote results for the node
1944

1945
    """
1946
    node = ninfo.name
1947
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948

    
1949
    test = constants.NV_NODELIST not in nresult
1950
    _ErrorIf(test, constants.CV_ENODESSH, node,
1951
             "node hasn't returned node ssh connectivity data")
1952
    if not test:
1953
      if nresult[constants.NV_NODELIST]:
1954
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1955
          _ErrorIf(True, constants.CV_ENODESSH, node,
1956
                   "ssh communication with node '%s': %s", a_node, a_msg)
1957

    
1958
    test = constants.NV_NODENETTEST not in nresult
1959
    _ErrorIf(test, constants.CV_ENODENET, node,
1960
             "node hasn't returned node tcp connectivity data")
1961
    if not test:
1962
      if nresult[constants.NV_NODENETTEST]:
1963
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1964
        for anode in nlist:
1965
          _ErrorIf(True, constants.CV_ENODENET, node,
1966
                   "tcp communication with node '%s': %s",
1967
                   anode, nresult[constants.NV_NODENETTEST][anode])
1968

    
1969
    test = constants.NV_MASTERIP not in nresult
1970
    _ErrorIf(test, constants.CV_ENODENET, node,
1971
             "node hasn't returned node master IP reachability data")
1972
    if not test:
1973
      if not nresult[constants.NV_MASTERIP]:
1974
        if node == self.master_node:
1975
          msg = "the master node cannot reach the master IP (not configured?)"
1976
        else:
1977
          msg = "cannot reach the master IP"
1978
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1979

    
1980
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1981
                      diskstatus):
1982
    """Verify an instance.
1983

1984
    This function checks to see if the required block devices are
1985
    available on the instance's node.
1986

1987
    """
1988
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1989
    node_current = instanceconfig.primary_node
1990

    
1991
    node_vol_should = {}
1992
    instanceconfig.MapLVsByNode(node_vol_should)
1993

    
1994
    for node in node_vol_should:
1995
      n_img = node_image[node]
1996
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1997
        # ignore missing volumes on offline or broken nodes
1998
        continue
1999
      for volume in node_vol_should[node]:
2000
        test = volume not in n_img.volumes
2001
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2002
                 "volume %s missing on node %s", volume, node)
2003

    
2004
    if instanceconfig.admin_up:
2005
      pri_img = node_image[node_current]
2006
      test = instance not in pri_img.instances and not pri_img.offline
2007
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2008
               "instance not running on its primary node %s",
2009
               node_current)
2010

    
2011
    diskdata = [(nname, success, status, idx)
2012
                for (nname, disks) in diskstatus.items()
2013
                for idx, (success, status) in enumerate(disks)]
2014

    
2015
    for nname, success, bdev_status, idx in diskdata:
2016
      # the 'ghost node' construction in Exec() ensures that we have a
2017
      # node here
2018
      snode = node_image[nname]
2019
      bad_snode = snode.ghost or snode.offline
2020
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2021
               constants.CV_EINSTANCEFAULTYDISK, instance,
2022
               "couldn't retrieve status for disk/%s on %s: %s",
2023
               idx, nname, bdev_status)
2024
      _ErrorIf((instanceconfig.admin_up and success and
2025
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2026
               constants.CV_EINSTANCEFAULTYDISK, instance,
2027
               "disk/%s on %s is faulty", idx, nname)
2028

    
2029
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2030
    """Verify if there are any unknown volumes in the cluster.
2031

2032
    The .os, .swap and backup volumes are ignored. All other volumes are
2033
    reported as unknown.
2034

2035
    @type reserved: L{ganeti.utils.FieldSet}
2036
    @param reserved: a FieldSet of reserved volume names
2037

2038
    """
2039
    for node, n_img in node_image.items():
2040
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2041
        # skip non-healthy nodes
2042
        continue
2043
      for volume in n_img.volumes:
2044
        test = ((node not in node_vol_should or
2045
                volume not in node_vol_should[node]) and
2046
                not reserved.Matches(volume))
2047
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2048
                      "volume %s is unknown", volume)
2049

    
2050
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2051
    """Verify N+1 Memory Resilience.
2052

2053
    Check that if one single node dies we can still start all the
2054
    instances it was primary for.
2055

2056
    """
2057
    cluster_info = self.cfg.GetClusterInfo()
2058
    for node, n_img in node_image.items():
2059
      # This code checks that every node which is now listed as
2060
      # secondary has enough memory to host all instances it is
2061
      # supposed to should a single other node in the cluster fail.
2062
      # FIXME: not ready for failover to an arbitrary node
2063
      # FIXME: does not support file-backed instances
2064
      # WARNING: we currently take into account down instances as well
2065
      # as up ones, considering that even if they're down someone
2066
      # might want to start them even in the event of a node failure.
2067
      if n_img.offline:
2068
        # we're skipping offline nodes from the N+1 warning, since
2069
        # most likely we don't have good memory infromation from them;
2070
        # we already list instances living on such nodes, and that's
2071
        # enough warning
2072
        continue
2073
      for prinode, instances in n_img.sbp.items():
2074
        needed_mem = 0
2075
        for instance in instances:
2076
          bep = cluster_info.FillBE(instance_cfg[instance])
2077
          if bep[constants.BE_AUTO_BALANCE]:
2078
            needed_mem += bep[constants.BE_MEMORY]
2079
        test = n_img.mfree < needed_mem
2080
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2081
                      "not enough memory to accomodate instance failovers"
2082
                      " should node %s fail (%dMiB needed, %dMiB available)",
2083
                      prinode, needed_mem, n_img.mfree)
2084

    
2085
  @classmethod
2086
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2087
                   (files_all, files_opt, files_mc, files_vm)):
2088
    """Verifies file checksums collected from all nodes.
2089

2090
    @param errorif: Callback for reporting errors
2091
    @param nodeinfo: List of L{objects.Node} objects
2092
    @param master_node: Name of master node
2093
    @param all_nvinfo: RPC results
2094

2095
    """
2096
    # Define functions determining which nodes to consider for a file
2097
    files2nodefn = [
2098
      (files_all, None),
2099
      (files_mc, lambda node: (node.master_candidate or
2100
                               node.name == master_node)),
2101
      (files_vm, lambda node: node.vm_capable),
2102
      ]
2103

    
2104
    # Build mapping from filename to list of nodes which should have the file
2105
    nodefiles = {}
2106
    for (files, fn) in files2nodefn:
2107
      if fn is None:
2108
        filenodes = nodeinfo
2109
      else:
2110
        filenodes = filter(fn, nodeinfo)
2111
      nodefiles.update((filename,
2112
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2113
                       for filename in files)
2114

    
2115
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2116

    
2117
    fileinfo = dict((filename, {}) for filename in nodefiles)
2118
    ignore_nodes = set()
2119

    
2120
    for node in nodeinfo:
2121
      if node.offline:
2122
        ignore_nodes.add(node.name)
2123
        continue
2124

    
2125
      nresult = all_nvinfo[node.name]
2126

    
2127
      if nresult.fail_msg or not nresult.payload:
2128
        node_files = None
2129
      else:
2130
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2131

    
2132
      test = not (node_files and isinstance(node_files, dict))
2133
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2134
              "Node did not return file checksum data")
2135
      if test:
2136
        ignore_nodes.add(node.name)
2137
        continue
2138

    
2139
      # Build per-checksum mapping from filename to nodes having it
2140
      for (filename, checksum) in node_files.items():
2141
        assert filename in nodefiles
2142
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2143

    
2144
    for (filename, checksums) in fileinfo.items():
2145
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2146

    
2147
      # Nodes having the file
2148
      with_file = frozenset(node_name
2149
                            for nodes in fileinfo[filename].values()
2150
                            for node_name in nodes) - ignore_nodes
2151

    
2152
      expected_nodes = nodefiles[filename] - ignore_nodes
2153

    
2154
      # Nodes missing file
2155
      missing_file = expected_nodes - with_file
2156

    
2157
      if filename in files_opt:
2158
        # All or no nodes
2159
        errorif(missing_file and missing_file != expected_nodes,
2160
                constants.CV_ECLUSTERFILECHECK, None,
2161
                "File %s is optional, but it must exist on all or no"
2162
                " nodes (not found on %s)",
2163
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2164
      else:
2165
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2166
                "File %s is missing from node(s) %s", filename,
2167
                utils.CommaJoin(utils.NiceSort(missing_file)))
2168

    
2169
        # Warn if a node has a file it shouldn't
2170
        unexpected = with_file - expected_nodes
2171
        errorif(unexpected,
2172
                constants.CV_ECLUSTERFILECHECK, None,
2173
                "File %s should not exist on node(s) %s",
2174
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2175

    
2176
      # See if there are multiple versions of the file
2177
      test = len(checksums) > 1
2178
      if test:
2179
        variants = ["variant %s on %s" %
2180
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2181
                    for (idx, (checksum, nodes)) in
2182
                      enumerate(sorted(checksums.items()))]
2183
      else:
2184
        variants = []
2185

    
2186
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2187
              "File %s found with %s different checksums (%s)",
2188
              filename, len(checksums), "; ".join(variants))
2189

    
2190
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2191
                      drbd_map):
2192
    """Verifies and the node DRBD status.
2193

2194
    @type ninfo: L{objects.Node}
2195
    @param ninfo: the node to check
2196
    @param nresult: the remote results for the node
2197
    @param instanceinfo: the dict of instances
2198
    @param drbd_helper: the configured DRBD usermode helper
2199
    @param drbd_map: the DRBD map as returned by
2200
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2201

2202
    """
2203
    node = ninfo.name
2204
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2205

    
2206
    if drbd_helper:
2207
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2208
      test = (helper_result == None)
2209
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2210
               "no drbd usermode helper returned")
2211
      if helper_result:
2212
        status, payload = helper_result
2213
        test = not status
2214
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2215
                 "drbd usermode helper check unsuccessful: %s", payload)
2216
        test = status and (payload != drbd_helper)
2217
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2218
                 "wrong drbd usermode helper: %s", payload)
2219

    
2220
    # compute the DRBD minors
2221
    node_drbd = {}
2222
    for minor, instance in drbd_map[node].items():
2223
      test = instance not in instanceinfo
2224
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2225
               "ghost instance '%s' in temporary DRBD map", instance)
2226
        # ghost instance should not be running, but otherwise we
2227
        # don't give double warnings (both ghost instance and
2228
        # unallocated minor in use)
2229
      if test:
2230
        node_drbd[minor] = (instance, False)
2231
      else:
2232
        instance = instanceinfo[instance]
2233
        node_drbd[minor] = (instance.name, instance.admin_up)
2234

    
2235
    # and now check them
2236
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2237
    test = not isinstance(used_minors, (tuple, list))
2238
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2239
             "cannot parse drbd status file: %s", str(used_minors))
2240
    if test:
2241
      # we cannot check drbd status
2242
      return
2243

    
2244
    for minor, (iname, must_exist) in node_drbd.items():
2245
      test = minor not in used_minors and must_exist
2246
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2247
               "drbd minor %d of instance %s is not active", minor, iname)
2248
    for minor in used_minors:
2249
      test = minor not in node_drbd
2250
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2251
               "unallocated drbd minor %d is in use", minor)
2252

    
2253
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2254
    """Builds the node OS structures.
2255

2256
    @type ninfo: L{objects.Node}
2257
    @param ninfo: the node to check
2258
    @param nresult: the remote results for the node
2259
    @param nimg: the node image object
2260

2261
    """
2262
    node = ninfo.name
2263
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2264

    
2265
    remote_os = nresult.get(constants.NV_OSLIST, None)
2266
    test = (not isinstance(remote_os, list) or
2267
            not compat.all(isinstance(v, list) and len(v) == 7
2268
                           for v in remote_os))
2269

    
2270
    _ErrorIf(test, constants.CV_ENODEOS, node,
2271
             "node hasn't returned valid OS data")
2272

    
2273
    nimg.os_fail = test
2274

    
2275
    if test:
2276
      return
2277

    
2278
    os_dict = {}
2279

    
2280
    for (name, os_path, status, diagnose,
2281
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2282

    
2283
      if name not in os_dict:
2284
        os_dict[name] = []
2285

    
2286
      # parameters is a list of lists instead of list of tuples due to
2287
      # JSON lacking a real tuple type, fix it:
2288
      parameters = [tuple(v) for v in parameters]
2289
      os_dict[name].append((os_path, status, diagnose,
2290
                            set(variants), set(parameters), set(api_ver)))
2291

    
2292
    nimg.oslist = os_dict
2293

    
2294
  def _VerifyNodeOS(self, ninfo, nimg, base):
2295
    """Verifies the node OS list.
2296

2297
    @type ninfo: L{objects.Node}
2298
    @param ninfo: the node to check
2299
    @param nimg: the node image object
2300
    @param base: the 'template' node we match against (e.g. from the master)
2301

2302
    """
2303
    node = ninfo.name
2304
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305

    
2306
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2307

    
2308
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2309
    for os_name, os_data in nimg.oslist.items():
2310
      assert os_data, "Empty OS status for OS %s?!" % os_name
2311
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2312
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2313
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2314
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2315
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2316
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2317
      # comparisons with the 'base' image
2318
      test = os_name not in base.oslist
2319
      _ErrorIf(test, constants.CV_ENODEOS, node,
2320
               "Extra OS %s not present on reference node (%s)",
2321
               os_name, base.name)
2322
      if test:
2323
        continue
2324
      assert base.oslist[os_name], "Base node has empty OS status?"
2325
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2326
      if not b_status:
2327
        # base OS is invalid, skipping
2328
        continue
2329
      for kind, a, b in [("API version", f_api, b_api),
2330
                         ("variants list", f_var, b_var),
2331
                         ("parameters", beautify_params(f_param),
2332
                          beautify_params(b_param))]:
2333
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2334
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2335
                 kind, os_name, base.name,
2336
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2337

    
2338
    # check any missing OSes
2339
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2340
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2341
             "OSes present on reference node %s but missing on this node: %s",
2342
             base.name, utils.CommaJoin(missing))
2343

    
2344
  def _VerifyOob(self, ninfo, nresult):
2345
    """Verifies out of band functionality of a node.
2346

2347
    @type ninfo: L{objects.Node}
2348
    @param ninfo: the node to check
2349
    @param nresult: the remote results for the node
2350

2351
    """
2352
    node = ninfo.name
2353
    # We just have to verify the paths on master and/or master candidates
2354
    # as the oob helper is invoked on the master
2355
    if ((ninfo.master_candidate or ninfo.master_capable) and
2356
        constants.NV_OOB_PATHS in nresult):
2357
      for path_result in nresult[constants.NV_OOB_PATHS]:
2358
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2359

    
2360
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2361
    """Verifies and updates the node volume data.
2362

2363
    This function will update a L{NodeImage}'s internal structures
2364
    with data from the remote call.
2365

2366
    @type ninfo: L{objects.Node}
2367
    @param ninfo: the node to check
2368
    @param nresult: the remote results for the node
2369
    @param nimg: the node image object
2370
    @param vg_name: the configured VG name
2371

2372
    """
2373
    node = ninfo.name
2374
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375

    
2376
    nimg.lvm_fail = True
2377
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2378
    if vg_name is None:
2379
      pass
2380
    elif isinstance(lvdata, basestring):
2381
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2382
               utils.SafeEncode(lvdata))
2383
    elif not isinstance(lvdata, dict):
2384
      _ErrorIf(True, constants.CV_ENODELVM, node,
2385
               "rpc call to node failed (lvlist)")
2386
    else:
2387
      nimg.volumes = lvdata
2388
      nimg.lvm_fail = False
2389

    
2390
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2391
    """Verifies and updates the node instance list.
2392

2393
    If the listing was successful, then updates this node's instance
2394
    list. Otherwise, it marks the RPC call as failed for the instance
2395
    list key.
2396

2397
    @type ninfo: L{objects.Node}
2398
    @param ninfo: the node to check
2399
    @param nresult: the remote results for the node
2400
    @param nimg: the node image object
2401

2402
    """
2403
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2404
    test = not isinstance(idata, list)
2405
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2406
                  "rpc call to node failed (instancelist): %s",
2407
                  utils.SafeEncode(str(idata)))
2408
    if test:
2409
      nimg.hyp_fail = True
2410
    else:
2411
      nimg.instances = idata
2412

    
2413
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2414
    """Verifies and computes a node information map
2415

2416
    @type ninfo: L{objects.Node}
2417
    @param ninfo: the node to check
2418
    @param nresult: the remote results for the node
2419
    @param nimg: the node image object
2420
    @param vg_name: the configured VG name
2421

2422
    """
2423
    node = ninfo.name
2424
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2425

    
2426
    # try to read free memory (from the hypervisor)
2427
    hv_info = nresult.get(constants.NV_HVINFO, None)
2428
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2429
    _ErrorIf(test, constants.CV_ENODEHV, node,
2430
             "rpc call to node failed (hvinfo)")
2431
    if not test:
2432
      try:
2433
        nimg.mfree = int(hv_info["memory_free"])
2434
      except (ValueError, TypeError):
2435
        _ErrorIf(True, constants.CV_ENODERPC, node,
2436
                 "node returned invalid nodeinfo, check hypervisor")
2437

    
2438
    # FIXME: devise a free space model for file based instances as well
2439
    if vg_name is not None:
2440
      test = (constants.NV_VGLIST not in nresult or
2441
              vg_name not in nresult[constants.NV_VGLIST])
2442
      _ErrorIf(test, constants.CV_ENODELVM, node,
2443
               "node didn't return data for the volume group '%s'"
2444
               " - it is either missing or broken", vg_name)
2445
      if not test:
2446
        try:
2447
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2448
        except (ValueError, TypeError):
2449
          _ErrorIf(True, constants.CV_ENODERPC, node,
2450
                   "node returned invalid LVM info, check LVM status")
2451

    
2452
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2453
    """Gets per-disk status information for all instances.
2454

2455
    @type nodelist: list of strings
2456
    @param nodelist: Node names
2457
    @type node_image: dict of (name, L{objects.Node})
2458
    @param node_image: Node objects
2459
    @type instanceinfo: dict of (name, L{objects.Instance})
2460
    @param instanceinfo: Instance objects
2461
    @rtype: {instance: {node: [(succes, payload)]}}
2462
    @return: a dictionary of per-instance dictionaries with nodes as
2463
        keys and disk information as values; the disk information is a
2464
        list of tuples (success, payload)
2465

2466
    """
2467
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468

    
2469
    node_disks = {}
2470
    node_disks_devonly = {}
2471
    diskless_instances = set()
2472
    diskless = constants.DT_DISKLESS
2473

    
2474
    for nname in nodelist:
2475
      node_instances = list(itertools.chain(node_image[nname].pinst,
2476
                                            node_image[nname].sinst))
2477
      diskless_instances.update(inst for inst in node_instances
2478
                                if instanceinfo[inst].disk_template == diskless)
2479
      disks = [(inst, disk)
2480
               for inst in node_instances
2481
               for disk in instanceinfo[inst].disks]
2482

    
2483
      if not disks:
2484
        # No need to collect data
2485
        continue
2486

    
2487
      node_disks[nname] = disks
2488

    
2489
      # Creating copies as SetDiskID below will modify the objects and that can
2490
      # lead to incorrect data returned from nodes
2491
      devonly = [dev.Copy() for (_, dev) in disks]
2492

    
2493
      for dev in devonly:
2494
        self.cfg.SetDiskID(dev, nname)
2495

    
2496
      node_disks_devonly[nname] = devonly
2497

    
2498
    assert len(node_disks) == len(node_disks_devonly)
2499

    
2500
    # Collect data from all nodes with disks
2501
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2502
                                                          node_disks_devonly)
2503

    
2504
    assert len(result) == len(node_disks)
2505

    
2506
    instdisk = {}
2507

    
2508
    for (nname, nres) in result.items():
2509
      disks = node_disks[nname]
2510

    
2511
      if nres.offline:
2512
        # No data from this node
2513
        data = len(disks) * [(False, "node offline")]
2514
      else:
2515
        msg = nres.fail_msg
2516
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2517
                 "while getting disk information: %s", msg)
2518
        if msg:
2519
          # No data from this node
2520
          data = len(disks) * [(False, msg)]
2521
        else:
2522
          data = []
2523
          for idx, i in enumerate(nres.payload):
2524
            if isinstance(i, (tuple, list)) and len(i) == 2:
2525
              data.append(i)
2526
            else:
2527
              logging.warning("Invalid result from node %s, entry %d: %s",
2528
                              nname, idx, i)
2529
              data.append((False, "Invalid result from the remote node"))
2530

    
2531
      for ((inst, _), status) in zip(disks, data):
2532
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2533

    
2534
    # Add empty entries for diskless instances.
2535
    for inst in diskless_instances:
2536
      assert inst not in instdisk
2537
      instdisk[inst] = {}
2538

    
2539
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2540
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2541
                      compat.all(isinstance(s, (tuple, list)) and
2542
                                 len(s) == 2 for s in statuses)
2543
                      for inst, nnames in instdisk.items()
2544
                      for nname, statuses in nnames.items())
2545
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2546

    
2547
    return instdisk
2548

    
2549
  @staticmethod
2550
  def _SshNodeSelector(group_uuid, all_nodes):
2551
    """Create endless iterators for all potential SSH check hosts.
2552

2553
    """
2554
    nodes = [node for node in all_nodes
2555
             if (node.group != group_uuid and
2556
                 not node.offline)]
2557
    keyfunc = operator.attrgetter("group")
2558

    
2559
    return map(itertools.cycle,
2560
               [sorted(map(operator.attrgetter("name"), names))
2561
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2562
                                                  keyfunc)])
2563

    
2564
  @classmethod
2565
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2566
    """Choose which nodes should talk to which other nodes.
2567

2568
    We will make nodes contact all nodes in their group, and one node from
2569
    every other group.
2570

2571
    @warning: This algorithm has a known issue if one node group is much
2572
      smaller than others (e.g. just one node). In such a case all other
2573
      nodes will talk to the single node.
2574

2575
    """
2576
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2577
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2578

    
2579
    return (online_nodes,
2580
            dict((name, sorted([i.next() for i in sel]))
2581
                 for name in online_nodes))
2582

    
2583
  def BuildHooksEnv(self):
2584
    """Build hooks env.
2585

2586
    Cluster-Verify hooks just ran in the post phase and their failure makes
2587
    the output be logged in the verify output and the verification to fail.
2588

2589
    """
2590
    env = {
2591
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2592
      }
2593

    
2594
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2595
               for node in self.my_node_info.values())
2596

    
2597
    return env
2598

    
2599
  def BuildHooksNodes(self):
2600
    """Build hooks nodes.
2601

2602
    """
2603
    return ([], self.my_node_names)
2604

    
2605
  def Exec(self, feedback_fn):
2606
    """Verify integrity of the node group, performing various test on nodes.
2607

2608
    """
2609
    # This method has too many local variables. pylint: disable=R0914
2610
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2611

    
2612
    if not self.my_node_names:
2613
      # empty node group
2614
      feedback_fn("* Empty node group, skipping verification")
2615
      return True
2616

    
2617
    self.bad = False
2618
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619
    verbose = self.op.verbose
2620
    self._feedback_fn = feedback_fn
2621

    
2622
    vg_name = self.cfg.GetVGName()
2623
    drbd_helper = self.cfg.GetDRBDHelper()
2624
    cluster = self.cfg.GetClusterInfo()
2625
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2626
    hypervisors = cluster.enabled_hypervisors
2627
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2628

    
2629
    i_non_redundant = [] # Non redundant instances
2630
    i_non_a_balanced = [] # Non auto-balanced instances
2631
    n_offline = 0 # Count of offline nodes
2632
    n_drained = 0 # Count of nodes being drained
2633
    node_vol_should = {}
2634

    
2635
    # FIXME: verify OS list
2636

    
2637
    # File verification
2638
    filemap = _ComputeAncillaryFiles(cluster, False)
2639

    
2640
    # do local checksums
2641
    master_node = self.master_node = self.cfg.GetMasterNode()
2642
    master_ip = self.cfg.GetMasterIP()
2643

    
2644
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2645

    
2646
    node_verify_param = {
2647
      constants.NV_FILELIST:
2648
        utils.UniqueSequence(filename
2649
                             for files in filemap
2650
                             for filename in files),
2651
      constants.NV_NODELIST:
2652
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2653
                                  self.all_node_info.values()),
2654
      constants.NV_HYPERVISOR: hypervisors,
2655
      constants.NV_HVPARAMS:
2656
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2657
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2658
                                 for node in node_data_list
2659
                                 if not node.offline],
2660
      constants.NV_INSTANCELIST: hypervisors,
2661
      constants.NV_VERSION: None,
2662
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2663
      constants.NV_NODESETUP: None,
2664
      constants.NV_TIME: None,
2665
      constants.NV_MASTERIP: (master_node, master_ip),
2666
      constants.NV_OSLIST: None,
2667
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2668
      }
2669

    
2670
    if vg_name is not None:
2671
      node_verify_param[constants.NV_VGLIST] = None
2672
      node_verify_param[constants.NV_LVLIST] = vg_name
2673
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2674
      node_verify_param[constants.NV_DRBDLIST] = None
2675

    
2676
    if drbd_helper:
2677
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2678

    
2679
    # bridge checks
2680
    # FIXME: this needs to be changed per node-group, not cluster-wide
2681
    bridges = set()
2682
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2683
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2684
      bridges.add(default_nicpp[constants.NIC_LINK])
2685
    for instance in self.my_inst_info.values():
2686
      for nic in instance.nics:
2687
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2688
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2689
          bridges.add(full_nic[constants.NIC_LINK])
2690

    
2691
    if bridges:
2692
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2693

    
2694
    # Build our expected cluster state
2695
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2696
                                                 name=node.name,
2697
                                                 vm_capable=node.vm_capable))
2698
                      for node in node_data_list)
2699

    
2700
    # Gather OOB paths
2701
    oob_paths = []
2702
    for node in self.all_node_info.values():
2703
      path = _SupportsOob(self.cfg, node)
2704
      if path and path not in oob_paths:
2705
        oob_paths.append(path)
2706

    
2707
    if oob_paths:
2708
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2709

    
2710
    for instance in self.my_inst_names:
2711
      inst_config = self.my_inst_info[instance]
2712

    
2713
      for nname in inst_config.all_nodes:
2714
        if nname not in node_image:
2715
          gnode = self.NodeImage(name=nname)
2716
          gnode.ghost = (nname not in self.all_node_info)
2717
          node_image[nname] = gnode
2718

    
2719
      inst_config.MapLVsByNode(node_vol_should)
2720

    
2721
      pnode = inst_config.primary_node
2722
      node_image[pnode].pinst.append(instance)
2723

    
2724
      for snode in inst_config.secondary_nodes:
2725
        nimg = node_image[snode]
2726
        nimg.sinst.append(instance)
2727
        if pnode not in nimg.sbp:
2728
          nimg.sbp[pnode] = []
2729
        nimg.sbp[pnode].append(instance)
2730

    
2731
    # At this point, we have the in-memory data structures complete,
2732
    # except for the runtime information, which we'll gather next
2733

    
2734
    # Due to the way our RPC system works, exact response times cannot be
2735
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2736
    # time before and after executing the request, we can at least have a time
2737
    # window.
2738
    nvinfo_starttime = time.time()
2739
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2740
                                           node_verify_param,
2741
                                           self.cfg.GetClusterName())
2742
    nvinfo_endtime = time.time()
2743

    
2744
    if self.extra_lv_nodes and vg_name is not None:
2745
      extra_lv_nvinfo = \
2746
          self.rpc.call_node_verify(self.extra_lv_nodes,
2747
                                    {constants.NV_LVLIST: vg_name},
2748
                                    self.cfg.GetClusterName())
2749
    else:
2750
      extra_lv_nvinfo = {}
2751

    
2752
    all_drbd_map = self.cfg.ComputeDRBDMap()
2753

    
2754
    feedback_fn("* Gathering disk information (%s nodes)" %
2755
                len(self.my_node_names))
2756
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2757
                                     self.my_inst_info)
2758

    
2759
    feedback_fn("* Verifying configuration file consistency")
2760

    
2761
    # If not all nodes are being checked, we need to make sure the master node
2762
    # and a non-checked vm_capable node are in the list.
2763
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2764
    if absent_nodes:
2765
      vf_nvinfo = all_nvinfo.copy()
2766
      vf_node_info = list(self.my_node_info.values())
2767
      additional_nodes = []
2768
      if master_node not in self.my_node_info:
2769
        additional_nodes.append(master_node)
2770
        vf_node_info.append(self.all_node_info[master_node])
2771
      # Add the first vm_capable node we find which is not included
2772
      for node in absent_nodes:
2773
        nodeinfo = self.all_node_info[node]
2774
        if nodeinfo.vm_capable and not nodeinfo.offline:
2775
          additional_nodes.append(node)
2776
          vf_node_info.append(self.all_node_info[node])
2777
          break
2778
      key = constants.NV_FILELIST
2779
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2780
                                                 {key: node_verify_param[key]},
2781
                                                 self.cfg.GetClusterName()))
2782
    else:
2783
      vf_nvinfo = all_nvinfo
2784
      vf_node_info = self.my_node_info.values()
2785

    
2786
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2787

    
2788
    feedback_fn("* Verifying node status")
2789

    
2790
    refos_img = None
2791

    
2792
    for node_i in node_data_list:
2793
      node = node_i.name
2794
      nimg = node_image[node]
2795

    
2796
      if node_i.offline:
2797
        if verbose:
2798
          feedback_fn("* Skipping offline node %s" % (node,))
2799
        n_offline += 1
2800
        continue
2801

    
2802
      if node == master_node:
2803
        ntype = "master"
2804
      elif node_i.master_candidate:
2805
        ntype = "master candidate"
2806
      elif node_i.drained:
2807
        ntype = "drained"
2808
        n_drained += 1
2809
      else:
2810
        ntype = "regular"
2811
      if verbose:
2812
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2813

    
2814
      msg = all_nvinfo[node].fail_msg
2815
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2816
               msg)
2817
      if msg:
2818
        nimg.rpc_fail = True
2819
        continue
2820

    
2821
      nresult = all_nvinfo[node].payload
2822

    
2823
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2824
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2825
      self._VerifyNodeNetwork(node_i, nresult)
2826
      self._VerifyOob(node_i, nresult)
2827

    
2828
      if nimg.vm_capable:
2829
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2830
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2831
                             all_drbd_map)
2832

    
2833
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2834
        self._UpdateNodeInstances(node_i, nresult, nimg)
2835
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2836
        self._UpdateNodeOS(node_i, nresult, nimg)
2837

    
2838
        if not nimg.os_fail:
2839
          if refos_img is None:
2840
            refos_img = nimg
2841
          self._VerifyNodeOS(node_i, nimg, refos_img)
2842
        self._VerifyNodeBridges(node_i, nresult, bridges)
2843

    
2844
        # Check whether all running instancies are primary for the node. (This
2845
        # can no longer be done from _VerifyInstance below, since some of the
2846
        # wrong instances could be from other node groups.)
2847
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2848

    
2849
        for inst in non_primary_inst:
2850
          test = inst in self.all_inst_info
2851
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2852
                   "instance should not run on node %s", node_i.name)
2853
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2854
                   "node is running unknown instance %s", inst)
2855

    
2856
    for node, result in extra_lv_nvinfo.items():
2857
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2858
                              node_image[node], vg_name)
2859

    
2860
    feedback_fn("* Verifying instance status")
2861
    for instance in self.my_inst_names:
2862
      if verbose:
2863
        feedback_fn("* Verifying instance %s" % instance)
2864
      inst_config = self.my_inst_info[instance]
2865
      self._VerifyInstance(instance, inst_config, node_image,
2866
                           instdisk[instance])
2867
      inst_nodes_offline = []
2868

    
2869
      pnode = inst_config.primary_node
2870
      pnode_img = node_image[pnode]
2871
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2872
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2873
               " primary node failed", instance)
2874

    
2875
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2876
               constants.CV_EINSTANCEBADNODE, instance,
2877
               "instance is marked as running and lives on offline node %s",
2878
               inst_config.primary_node)
2879

    
2880
      # If the instance is non-redundant we cannot survive losing its primary
2881
      # node, so we are not N+1 compliant. On the other hand we have no disk
2882
      # templates with more than one secondary so that situation is not well
2883
      # supported either.
2884
      # FIXME: does not support file-backed instances
2885
      if not inst_config.secondary_nodes:
2886
        i_non_redundant.append(instance)
2887

    
2888
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2889
               constants.CV_EINSTANCELAYOUT,
2890
               instance, "instance has multiple secondary nodes: %s",
2891
               utils.CommaJoin(inst_config.secondary_nodes),
2892
               code=self.ETYPE_WARNING)
2893

    
2894
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2895
        pnode = inst_config.primary_node
2896
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2897
        instance_groups = {}
2898

    
2899
        for node in instance_nodes:
2900
          instance_groups.setdefault(self.all_node_info[node].group,
2901
                                     []).append(node)
2902

    
2903
        pretty_list = [
2904
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2905
          # Sort so that we always list the primary node first.
2906
          for group, nodes in sorted(instance_groups.items(),
2907
                                     key=lambda (_, nodes): pnode in nodes,
2908
                                     reverse=True)]
2909

    
2910
        self._ErrorIf(len(instance_groups) > 1,
2911
                      constants.CV_EINSTANCESPLITGROUPS,
2912
                      instance, "instance has primary and secondary nodes in"
2913
                      " different groups: %s", utils.CommaJoin(pretty_list),
2914
                      code=self.ETYPE_WARNING)
2915

    
2916
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2917
        i_non_a_balanced.append(instance)
2918

    
2919
      for snode in inst_config.secondary_nodes:
2920
        s_img = node_image[snode]
2921
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2922
                 snode, "instance %s, connection to secondary node failed",
2923
                 instance)
2924

    
2925
        if s_img.offline:
2926
          inst_nodes_offline.append(snode)
2927

    
2928
      # warn that the instance lives on offline nodes
2929
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2930
               "instance has offline secondary node(s) %s",
2931
               utils.CommaJoin(inst_nodes_offline))
2932
      # ... or ghost/non-vm_capable nodes
2933
      for node in inst_config.all_nodes:
2934
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2935
                 instance, "instance lives on ghost node %s", node)
2936
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2937
                 instance, "instance lives on non-vm_capable node %s", node)
2938

    
2939
    feedback_fn("* Verifying orphan volumes")
2940
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2941

    
2942
    # We will get spurious "unknown volume" warnings if any node of this group
2943
    # is secondary for an instance whose primary is in another group. To avoid
2944
    # them, we find these instances and add their volumes to node_vol_should.
2945
    for inst in self.all_inst_info.values():
2946
      for secondary in inst.secondary_nodes:
2947
        if (secondary in self.my_node_info
2948
            and inst.name not in self.my_inst_info):
2949
          inst.MapLVsByNode(node_vol_should)
2950
          break
2951

    
2952
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2953

    
2954
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2955
      feedback_fn("* Verifying N+1 Memory redundancy")
2956
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2957

    
2958
    feedback_fn("* Other Notes")
2959
    if i_non_redundant:
2960
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2961
                  % len(i_non_redundant))
2962

    
2963
    if i_non_a_balanced:
2964
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2965
                  % len(i_non_a_balanced))
2966

    
2967
    if n_offline:
2968
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2969

    
2970
    if n_drained:
2971
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2972

    
2973
    return not self.bad
2974

    
2975
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2976
    """Analyze the post-hooks' result
2977

2978
    This method analyses the hook result, handles it, and sends some
2979
    nicely-formatted feedback back to the user.
2980

2981
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2982
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2983
    @param hooks_results: the results of the multi-node hooks rpc call
2984
    @param feedback_fn: function used send feedback back to the caller
2985
    @param lu_result: previous Exec result
2986
    @return: the new Exec result, based on the previous result
2987
        and hook results
2988

2989
    """
2990
    # We only really run POST phase hooks, only for non-empty groups,
2991
    # and are only interested in their results
2992
    if not self.my_node_names:
2993
      # empty node group
2994
      pass
2995
    elif phase == constants.HOOKS_PHASE_POST:
2996
      # Used to change hooks' output to proper indentation
2997
      feedback_fn("* Hooks Results")
2998
      assert hooks_results, "invalid result from hooks"
2999

    
3000
      for node_name in hooks_results:
3001
        res = hooks_results[node_name]
3002
        msg = res.fail_msg
3003
        test = msg and not res.offline
3004
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3005
                      "Communication failure in hooks execution: %s", msg)
3006
        if res.offline or msg:
3007
          # No need to investigate payload if node is offline or gave
3008
          # an error.
3009
          continue
3010
        for script, hkr, output in res.payload:
3011
          test = hkr == constants.HKR_FAIL
3012
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3013
                        "Script %s failed, output:", script)
3014
          if test:
3015
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3016
            feedback_fn("%s" % output)
3017
            lu_result = False
3018

    
3019
    return lu_result
3020

    
3021

    
3022
class LUClusterVerifyDisks(NoHooksLU):
3023
  """Verifies the cluster disks status.
3024

3025
  """
3026
  REQ_BGL = False
3027

    
3028
  def ExpandNames(self):
3029
    self.share_locks = _ShareAll()
3030
    self.needed_locks = {
3031
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3032
      }
3033

    
3034
  def Exec(self, feedback_fn):
3035
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3036

    
3037
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3038
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3039
                           for group in group_names])
3040

    
3041

    
3042
class LUGroupVerifyDisks(NoHooksLU):
3043
  """Verifies the status of all disks in a node group.
3044

3045
  """
3046
  REQ_BGL = False
3047

    
3048
  def ExpandNames(self):
3049
    # Raises errors.OpPrereqError on its own if group can't be found
3050
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3051

    
3052
    self.share_locks = _ShareAll()
3053
    self.needed_locks = {
3054
      locking.LEVEL_INSTANCE: [],
3055
      locking.LEVEL_NODEGROUP: [],
3056
      locking.LEVEL_NODE: [],
3057
      }
3058

    
3059
  def DeclareLocks(self, level):
3060
    if level == locking.LEVEL_INSTANCE:
3061
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3062

    
3063
      # Lock instances optimistically, needs verification once node and group
3064
      # locks have been acquired
3065
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3066
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3067

    
3068
    elif level == locking.LEVEL_NODEGROUP:
3069
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3070

    
3071
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3072
        set([self.group_uuid] +
3073
            # Lock all groups used by instances optimistically; this requires
3074
            # going via the node before it's locked, requiring verification
3075
            # later on
3076
            [group_uuid
3077
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3078
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3079

    
3080
    elif level == locking.LEVEL_NODE:
3081
      # This will only lock the nodes in the group to be verified which contain
3082
      # actual instances
3083
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3084
      self._LockInstancesNodes()
3085

    
3086
      # Lock all nodes in group to be verified
3087
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3088
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3089
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3090

    
3091
  def CheckPrereq(self):
3092
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3093
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3094
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3095

    
3096
    assert self.group_uuid in owned_groups
3097

    
3098
    # Check if locked instances are still correct
3099
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3100

    
3101
    # Get instance information
3102
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3103

    
3104
    # Check if node groups for locked instances are still correct
3105
    for (instance_name, inst) in self.instances.items():
3106
      assert owned_nodes.issuperset(inst.all_nodes), \
3107
        "Instance %s's nodes changed while we kept the lock" % instance_name
3108

    
3109
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3110
                                             owned_groups)
3111

    
3112
      assert self.group_uuid in inst_groups, \
3113
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3114

    
3115
  def Exec(self, feedback_fn):
3116
    """Verify integrity of cluster disks.
3117

3118
    @rtype: tuple of three items
3119
    @return: a tuple of (dict of node-to-node_error, list of instances
3120
        which need activate-disks, dict of instance: (node, volume) for
3121
        missing volumes
3122

3123
    """
3124
    res_nodes = {}
3125
    res_instances = set()
3126
    res_missing = {}
3127

    
3128
    nv_dict = _MapInstanceDisksToNodes([inst
3129
                                        for inst in self.instances.values()
3130
                                        if inst.admin_up])
3131

    
3132
    if nv_dict:
3133
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3134
                             set(self.cfg.GetVmCapableNodeList()))
3135

    
3136
      node_lvs = self.rpc.call_lv_list(nodes, [])
3137

    
3138
      for (node, node_res) in node_lvs.items():
3139
        if node_res.offline:
3140
          continue
3141

    
3142
        msg = node_res.fail_msg
3143
        if msg:
3144
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3145
          res_nodes[node] = msg
3146
          continue
3147

    
3148
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3149
          inst = nv_dict.pop((node, lv_name), None)
3150
          if not (lv_online or inst is None):
3151
            res_instances.add(inst)
3152

    
3153
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3154
      # better
3155
      for key, inst in nv_dict.iteritems():
3156
        res_missing.setdefault(inst, []).append(key)
3157

    
3158
    return (res_nodes, list(res_instances), res_missing)
3159

    
3160

    
3161
class LUClusterRepairDiskSizes(NoHooksLU):
3162
  """Verifies the cluster disks sizes.
3163

3164
  """
3165
  REQ_BGL = False
3166

    
3167
  def ExpandNames(self):
3168
    if self.op.instances:
3169
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3170
      self.needed_locks = {
3171
        locking.LEVEL_NODE: [],
3172
        locking.LEVEL_INSTANCE: self.wanted_names,
3173
        }
3174
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3175
    else:
3176
      self.wanted_names = None
3177
      self.needed_locks = {
3178
        locking.LEVEL_NODE: locking.ALL_SET,
3179
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3180
        }
3181
    self.share_locks = _ShareAll()
3182

    
3183
  def DeclareLocks(self, level):
3184
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3185
      self._LockInstancesNodes(primary_only=True)
3186

    
3187
  def CheckPrereq(self):
3188
    """Check prerequisites.
3189

3190
    This only checks the optional instance list against the existing names.
3191

3192
    """
3193
    if self.wanted_names is None:
3194
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3195

    
3196
    self.wanted_instances = \
3197
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3198

    
3199
  def _EnsureChildSizes(self, disk):
3200
    """Ensure children of the disk have the needed disk size.
3201

3202
    This is valid mainly for DRBD8 and fixes an issue where the
3203
    children have smaller disk size.
3204

3205
    @param disk: an L{ganeti.objects.Disk} object
3206

3207
    """
3208
    if disk.dev_type == constants.LD_DRBD8:
3209
      assert disk.children, "Empty children for DRBD8?"
3210
      fchild = disk.children[0]
3211
      mismatch = fchild.size < disk.size
3212
      if mismatch:
3213
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3214
                     fchild.size, disk.size)
3215
        fchild.size = disk.size
3216

    
3217
      # and we recurse on this child only, not on the metadev
3218
      return self._EnsureChildSizes(fchild) or mismatch
3219
    else:
3220
      return False
3221

    
3222
  def Exec(self, feedback_fn):
3223
    """Verify the size of cluster disks.
3224

3225
    """
3226
    # TODO: check child disks too
3227
    # TODO: check differences in size between primary/secondary nodes
3228
    per_node_disks = {}
3229
    for instance in self.wanted_instances:
3230
      pnode = instance.primary_node
3231
      if pnode not in per_node_disks:
3232
        per_node_disks[pnode] = []
3233
      for idx, disk in enumerate(instance.disks):
3234
        per_node_disks[pnode].append((instance, idx, disk))
3235

    
3236
    changed = []
3237
    for node, dskl in per_node_disks.items():
3238
      newl = [v[2].Copy() for v in dskl]
3239
      for dsk in newl:
3240
        self.cfg.SetDiskID(dsk, node)
3241
      result = self.rpc.call_blockdev_getsize(node, newl)
3242
      if result.fail_msg:
3243
        self.LogWarning("Failure in blockdev_getsize call to node"
3244
                        " %s, ignoring", node)
3245
        continue
3246
      if len(result.payload) != len(dskl):
3247
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3248
                        " result.payload=%s", node, len(dskl), result.payload)
3249
        self.LogWarning("Invalid result from node %s, ignoring node results",
3250
                        node)
3251
        continue
3252
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3253
        if size is None:
3254
          self.LogWarning("Disk %d of instance %s did not return size"
3255
                          " information, ignoring", idx, instance.name)
3256
          continue
3257
        if not isinstance(size, (int, long)):
3258
          self.LogWarning("Disk %d of instance %s did not return valid"
3259
                          " size information, ignoring", idx, instance.name)
3260
          continue
3261
        size = size >> 20
3262
        if size != disk.size:
3263
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3264
                       " correcting: recorded %d, actual %d", idx,
3265
                       instance.name, disk.size, size)
3266
          disk.size = size
3267
          self.cfg.Update(instance, feedback_fn)
3268
          changed.append((instance.name, idx, size))
3269
        if self._EnsureChildSizes(disk):
3270
          self.cfg.Update(instance, feedback_fn)
3271
          changed.append((instance.name, idx, disk.size))
3272
    return changed
3273

    
3274

    
3275
class LUClusterRename(LogicalUnit):
3276
  """Rename the cluster.
3277

3278
  """
3279
  HPATH = "cluster-rename"
3280
  HTYPE = constants.HTYPE_CLUSTER
3281

    
3282
  def BuildHooksEnv(self):
3283
    """Build hooks env.
3284

3285
    """
3286
    return {
3287
      "OP_TARGET": self.cfg.GetClusterName(),
3288
      "NEW_NAME": self.op.name,
3289
      }
3290

    
3291
  def BuildHooksNodes(self):
3292
    """Build hooks nodes.
3293

3294
    """
3295
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3296

    
3297
  def CheckPrereq(self):
3298
    """Verify that the passed name is a valid one.
3299

3300
    """
3301
    hostname = netutils.GetHostname(name=self.op.name,
3302
                                    family=self.cfg.GetPrimaryIPFamily())
3303

    
3304
    new_name = hostname.name
3305
    self.ip = new_ip = hostname.ip
3306
    old_name = self.cfg.GetClusterName()
3307
    old_ip = self.cfg.GetMasterIP()
3308
    if new_name == old_name and new_ip == old_ip:
3309
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3310
                                 " cluster has changed",
3311
                                 errors.ECODE_INVAL)
3312
    if new_ip != old_ip:
3313
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3314
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3315
                                   " reachable on the network" %
3316
                                   new_ip, errors.ECODE_NOTUNIQUE)
3317

    
3318
    self.op.name = new_name
3319

    
3320
  def Exec(self, feedback_fn):
3321
    """Rename the cluster.
3322

3323
    """
3324
    clustername = self.op.name
3325
    ip = self.ip
3326

    
3327
    # shutdown the master IP
3328
    master = self.cfg.GetMasterNode()
3329
    result = self.rpc.call_node_deactivate_master_ip(master)
3330
    result.Raise("Could not disable the master role")
3331

    
3332
    try:
3333
      cluster = self.cfg.GetClusterInfo()
3334
      cluster.cluster_name = clustername
3335
      cluster.master_ip = ip
3336
      self.cfg.Update(cluster, feedback_fn)
3337

    
3338
      # update the known hosts file
3339
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3340
      node_list = self.cfg.GetOnlineNodeList()
3341
      try:
3342
        node_list.remove(master)
3343
      except ValueError:
3344
        pass
3345
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3346
    finally:
3347
      result = self.rpc.call_node_activate_master_ip(master)
3348
      msg = result.fail_msg
3349
      if msg:
3350
        self.LogWarning("Could not re-enable the master role on"
3351
                        " the master, please restart manually: %s", msg)
3352

    
3353
    return clustername
3354

    
3355

    
3356
def _ValidateNetmask(cfg, netmask):
3357
  """Checks if a netmask is valid.
3358

3359
  @type cfg: L{config.ConfigWriter}
3360
  @param cfg: The cluster configuration
3361
  @type netmask: int
3362
  @param netmask: the netmask to be verified
3363
  @raise errors.OpPrereqError: if the validation fails
3364

3365
  """
3366
  ip_family = cfg.GetPrimaryIPFamily()
3367
  try:
3368
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3369
  except errors.ProgrammerError:
3370
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3371
                               ip_family)
3372
  if not ipcls.ValidateNetmask(netmask):
3373
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3374
                                (netmask))
3375

    
3376

    
3377
class LUClusterSetParams(LogicalUnit):
3378
  """Change the parameters of the cluster.
3379

3380
  """
3381
  HPATH = "cluster-modify"
3382
  HTYPE = constants.HTYPE_CLUSTER
3383
  REQ_BGL = False
3384

    
3385
  def CheckArguments(self):
3386
    """Check parameters
3387

3388
    """
3389
    if self.op.uid_pool:
3390
      uidpool.CheckUidPool(self.op.uid_pool)
3391

    
3392
    if self.op.add_uids:
3393
      uidpool.CheckUidPool(self.op.add_uids)
3394

    
3395
    if self.op.remove_uids:
3396
      uidpool.CheckUidPool(self.op.remove_uids)
3397

    
3398
    if self.op.master_netmask is not None:
3399
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3400

    
3401
  def ExpandNames(self):
3402
    # FIXME: in the future maybe other cluster params won't require checking on
3403
    # all nodes to be modified.
3404
    self.needed_locks = {
3405
      locking.LEVEL_NODE: locking.ALL_SET,
3406
    }
3407
    self.share_locks[locking.LEVEL_NODE] = 1
3408

    
3409
  def BuildHooksEnv(self):
3410
    """Build hooks env.
3411

3412
    """
3413
    return {
3414
      "OP_TARGET": self.cfg.GetClusterName(),
3415
      "NEW_VG_NAME": self.op.vg_name,
3416
      }
3417

    
3418
  def BuildHooksNodes(self):
3419
    """Build hooks nodes.
3420

3421
    """
3422
    mn = self.cfg.GetMasterNode()
3423
    return ([mn], [mn])
3424

    
3425
  def CheckPrereq(self):
3426
    """Check prerequisites.
3427

3428
    This checks whether the given params don't conflict and
3429
    if the given volume group is valid.
3430

3431
    """
3432
    if self.op.vg_name is not None and not self.op.vg_name:
3433
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3434
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3435
                                   " instances exist", errors.ECODE_INVAL)
3436

    
3437
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3438
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3439
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3440
                                   " drbd-based instances exist",
3441
                                   errors.ECODE_INVAL)
3442

    
3443
    node_list = self.owned_locks(locking.LEVEL_NODE)
3444

    
3445
    # if vg_name not None, checks given volume group on all nodes
3446
    if self.op.vg_name:
3447
      vglist = self.rpc.call_vg_list(node_list)
3448
      for node in node_list:
3449
        msg = vglist[node].fail_msg
3450
        if msg:
3451
          # ignoring down node
3452
          self.LogWarning("Error while gathering data on node %s"
3453
                          " (ignoring node): %s", node, msg)
3454
          continue
3455
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3456
                                              self.op.vg_name,
3457
                                              constants.MIN_VG_SIZE)
3458
        if vgstatus:
3459
          raise errors.OpPrereqError("Error on node '%s': %s" %
3460
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3461

    
3462
    if self.op.drbd_helper:
3463
      # checks given drbd helper on all nodes
3464
      helpers = self.rpc.call_drbd_helper(node_list)
3465
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3466
        if ninfo.offline:
3467
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3468
          continue
3469
        msg = helpers[node].fail_msg
3470
        if msg:
3471
          raise errors.OpPrereqError("Error checking drbd helper on node"
3472
                                     " '%s': %s" % (node, msg),
3473
                                     errors.ECODE_ENVIRON)
3474
        node_helper = helpers[node].payload
3475
        if node_helper != self.op.drbd_helper:
3476
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3477
                                     (node, node_helper), errors.ECODE_ENVIRON)
3478

    
3479
    self.cluster = cluster = self.cfg.GetClusterInfo()
3480
    # validate params changes
3481
    if self.op.beparams:
3482
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3483
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3484

    
3485
    if self.op.ndparams:
3486
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3487
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3488

    
3489
      # TODO: we need a more general way to handle resetting
3490
      # cluster-level parameters to default values
3491
      if self.new_ndparams["oob_program"] == "":
3492
        self.new_ndparams["oob_program"] = \
3493
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3494

    
3495
    if self.op.nicparams:
3496
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3497
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3498
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3499
      nic_errors = []
3500

    
3501
      # check all instances for consistency
3502
      for instance in self.cfg.GetAllInstancesInfo().values():
3503
        for nic_idx, nic in enumerate(instance.nics):
3504
          params_copy = copy.deepcopy(nic.nicparams)
3505
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3506

    
3507
          # check parameter syntax
3508
          try:
3509
            objects.NIC.CheckParameterSyntax(params_filled)
3510
          except errors.ConfigurationError, err:
3511
            nic_errors.append("Instance %s, nic/%d: %s" %
3512
                              (instance.name, nic_idx, err))
3513

    
3514
          # if we're moving instances to routed, check that they have an ip
3515
          target_mode = params_filled[constants.NIC_MODE]
3516
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3517
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3518
                              " address" % (instance.name, nic_idx))
3519
      if nic_errors:
3520
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3521
                                   "\n".join(nic_errors))
3522

    
3523
    # hypervisor list/parameters
3524
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3525
    if self.op.hvparams:
3526
      for hv_name, hv_dict in self.op.hvparams.items():
3527
        if hv_name not in self.new_hvparams:
3528
          self.new_hvparams[hv_name] = hv_dict
3529
        else:
3530
          self.new_hvparams[hv_name].update(hv_dict)
3531

    
3532
    # os hypervisor parameters
3533
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3534
    if self.op.os_hvp:
3535
      for os_name, hvs in self.op.os_hvp.items():
3536
        if os_name not in self.new_os_hvp:
3537
          self.new_os_hvp[os_name] = hvs
3538
        else:
3539
          for hv_name, hv_dict in hvs.items():
3540
            if hv_name not in self.new_os_hvp[os_name]:
3541
              self.new_os_hvp[os_name][hv_name] = hv_dict
3542
            else:
3543
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3544

    
3545
    # os parameters
3546
    self.new_osp = objects.FillDict(cluster.osparams, {})
3547
    if self.op.osparams:
3548
      for os_name, osp in self.op.osparams.items():
3549
        if os_name not in self.new_osp:
3550
          self.new_osp[os_name] = {}
3551

    
3552
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3553
                                                  use_none=True)
3554

    
3555
        if not self.new_osp[os_name]:
3556
          # we removed all parameters
3557
          del self.new_osp[os_name]
3558
        else:
3559
          # check the parameter validity (remote check)
3560
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3561
                         os_name, self.new_osp[os_name])
3562

    
3563
    # changes to the hypervisor list
3564
    if self.op.enabled_hypervisors is not None:
3565
      self.hv_list = self.op.enabled_hypervisors
3566
      for hv in self.hv_list:
3567
        # if the hypervisor doesn't already exist in the cluster
3568
        # hvparams, we initialize it to empty, and then (in both
3569
        # cases) we make sure to fill the defaults, as we might not
3570
        # have a complete defaults list if the hypervisor wasn't
3571
        # enabled before
3572
        if hv not in new_hvp:
3573
          new_hvp[hv] = {}
3574
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3575
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3576
    else:
3577
      self.hv_list = cluster.enabled_hypervisors
3578

    
3579
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3580
      # either the enabled list has changed, or the parameters have, validate
3581
      for hv_name, hv_params in self.new_hvparams.items():
3582
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3583
            (self.op.enabled_hypervisors and
3584
             hv_name in self.op.enabled_hypervisors)):
3585
          # either this is a new hypervisor, or its parameters have changed
3586
          hv_class = hypervisor.GetHypervisor(hv_name)
3587
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3588
          hv_class.CheckParameterSyntax(hv_params)
3589
          _CheckHVParams(self, node_list, hv_name, hv_params)
3590

    
3591
    if self.op.os_hvp:
3592
      # no need to check any newly-enabled hypervisors, since the
3593
      # defaults have already been checked in the above code-block
3594
      for os_name, os_hvp in self.new_os_hvp.items():
3595
        for hv_name, hv_params in os_hvp.items():
3596
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3597
          # we need to fill in the new os_hvp on top of the actual hv_p
3598
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3599
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3600
          hv_class = hypervisor.GetHypervisor(hv_name)
3601
          hv_class.CheckParameterSyntax(new_osp)
3602
          _CheckHVParams(self, node_list, hv_name, new_osp)
3603

    
3604
    if self.op.default_iallocator:
3605
      alloc_script = utils.FindFile(self.op.default_iallocator,
3606
                                    constants.IALLOCATOR_SEARCH_PATH,
3607
                                    os.path.isfile)
3608
      if alloc_script is None:
3609
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3610
                                   " specified" % self.op.default_iallocator,
3611
                                   errors.ECODE_INVAL)
3612

    
3613
  def Exec(self, feedback_fn):
3614
    """Change the parameters of the cluster.
3615

3616
    """
3617
    if self.op.vg_name is not None:
3618
      new_volume = self.op.vg_name
3619
      if not new_volume:
3620
        new_volume = None
3621
      if new_volume != self.cfg.GetVGName():
3622
        self.cfg.SetVGName(new_volume)
3623
      else:
3624
        feedback_fn("Cluster LVM configuration already in desired"
3625
                    " state, not changing")
3626
    if self.op.drbd_helper is not None:
3627
      new_helper = self.op.drbd_helper
3628
      if not new_helper:
3629
        new_helper = None
3630
      if new_helper != self.cfg.GetDRBDHelper():
3631
        self.cfg.SetDRBDHelper(new_helper)
3632
      else:
3633
        feedback_fn("Cluster DRBD helper already in desired state,"
3634
                    " not changing")
3635
    if self.op.hvparams:
3636
      self.cluster.hvparams = self.new_hvparams
3637
    if self.op.os_hvp:
3638
      self.cluster.os_hvp = self.new_os_hvp
3639
    if self.op.enabled_hypervisors is not None:
3640
      self.cluster.hvparams = self.new_hvparams
3641
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3642
    if self.op.beparams:
3643
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3644
    if self.op.nicparams:
3645
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3646
    if self.op.osparams:
3647
      self.cluster.osparams = self.new_osp
3648
    if self.op.ndparams:
3649
      self.cluster.ndparams = self.new_ndparams
3650

    
3651
    if self.op.candidate_pool_size is not None:
3652
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3653
      # we need to update the pool size here, otherwise the save will fail
3654
      _AdjustCandidatePool(self, [])
3655

    
3656
    if self.op.maintain_node_health is not None:
3657
      self.cluster.maintain_node_health = self.op.maintain_node_health
3658

    
3659
    if self.op.prealloc_wipe_disks is not None:
3660
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3661

    
3662
    if self.op.add_uids is not None:
3663
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3664

    
3665
    if self.op.remove_uids is not None:
3666
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3667

    
3668
    if self.op.uid_pool is not None:
3669
      self.cluster.uid_pool = self.op.uid_pool
3670

    
3671
    if self.op.default_iallocator is not None:
3672
      self.cluster.default_iallocator = self.op.default_iallocator
3673

    
3674
    if self.op.reserved_lvs is not None:
3675
      self.cluster.reserved_lvs = self.op.reserved_lvs
3676

    
3677
    def helper_os(aname, mods, desc):
3678
      desc += " OS list"
3679
      lst = getattr(self.cluster, aname)
3680
      for key, val in mods:
3681
        if key == constants.DDM_ADD:
3682
          if val in lst:
3683
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3684
          else:
3685
            lst.append(val)
3686
        elif key == constants.DDM_REMOVE:
3687
          if val in lst:
3688
            lst.remove(val)
3689
          else:
3690
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3691
        else:
3692
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3693

    
3694
    if self.op.hidden_os:
3695
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3696

    
3697
    if self.op.blacklisted_os:
3698
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3699

    
3700
    if self.op.master_netdev:
3701
      master = self.cfg.GetMasterNode()
3702
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3703
                  self.cluster.master_netdev)
3704
      result = self.rpc.call_node_deactivate_master_ip(master)
3705
      result.Raise("Could not disable the master ip")
3706
      feedback_fn("Changing master_netdev from %s to %s" %
3707
                  (self.cluster.master_netdev, self.op.master_netdev))
3708
      self.cluster.master_netdev = self.op.master_netdev
3709

    
3710
    if self.op.master_netmask:
3711
      master = self.cfg.GetMasterNode()
3712
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3713
      result = self.rpc.call_node_change_master_netmask(master,
3714
                                                        self.op.master_netmask)
3715
      if result.fail_msg:
3716
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3717
        self.LogWarning(msg)
3718
        feedback_fn(msg)
3719
      else:
3720
        self.cluster.master_netmask = self.op.master_netmask
3721

    
3722
    self.cfg.Update(self.cluster, feedback_fn)
3723

    
3724
    if self.op.master_netdev:
3725
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3726
                  self.op.master_netdev)
3727
      result = self.rpc.call_node_activate_master_ip(master)
3728
      if result.fail_msg:
3729
        self.LogWarning("Could not re-enable the master ip on"
3730
                        " the master, please restart manually: %s",
3731
                        result.fail_msg)
3732

    
3733

    
3734
def _UploadHelper(lu, nodes, fname):
3735
  """Helper for uploading a file and showing warnings.
3736

3737
  """
3738
  if os.path.exists(fname):
3739
    result = lu.rpc.call_upload_file(nodes, fname)
3740
    for to_node, to_result in result.items():
3741
      msg = to_result.fail_msg
3742
      if msg:
3743
        msg = ("Copy of file %s to node %s failed: %s" %
3744
               (fname, to_node, msg))
3745
        lu.proc.LogWarning(msg)
3746

    
3747

    
3748
def _ComputeAncillaryFiles(cluster, redist):
3749
  """Compute files external to Ganeti which need to be consistent.
3750

3751
  @type redist: boolean
3752
  @param redist: Whether to include files which need to be redistributed
3753

3754
  """
3755
  # Compute files for all nodes
3756
  files_all = set([
3757
    constants.SSH_KNOWN_HOSTS_FILE,
3758
    constants.CONFD_HMAC_KEY,
3759
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3760
    constants.SPICE_CERT_FILE,
3761
    constants.SPICE_CACERT_FILE,
3762
    constants.RAPI_USERS_FILE,
3763
    ])
3764

    
3765
  if not redist:
3766
    files_all.update(constants.ALL_CERT_FILES)
3767
    files_all.update(ssconf.SimpleStore().GetFileList())
3768
  else:
3769
    # we need to ship at least the RAPI certificate
3770
    files_all.add(constants.RAPI_CERT_FILE)
3771

    
3772
  if cluster.modify_etc_hosts:
3773
    files_all.add(constants.ETC_HOSTS)
3774

    
3775
  # Files which are optional, these must:
3776
  # - be present in one other category as well
3777
  # - either exist or not exist on all nodes of that category (mc, vm all)
3778
  files_opt = set([
3779
    constants.RAPI_USERS_FILE,
3780
    ])
3781

    
3782
  # Files which should only be on master candidates
3783
  files_mc = set()
3784
  if not redist:
3785
    files_mc.add(constants.CLUSTER_CONF_FILE)
3786

    
3787
  # Files which should only be on VM-capable nodes
3788
  files_vm = set(filename
3789
    for hv_name in cluster.enabled_hypervisors
3790
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3791

    
3792
  files_opt |= set(filename
3793
    for hv_name in cluster.enabled_hypervisors
3794
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3795

    
3796
  # Filenames in each category must be unique
3797
  all_files_set = files_all | files_mc | files_vm
3798
  assert (len(all_files_set) ==
3799
          sum(map(len, [files_all, files_mc, files_vm]))), \
3800
         "Found file listed in more than one file list"
3801

    
3802
  # Optional files must be present in one other category
3803
  assert all_files_set.issuperset(files_opt), \
3804
         "Optional file not in a different required list"
3805

    
3806
  return (files_all, files_opt, files_mc, files_vm)
3807

    
3808

    
3809
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3810
  """Distribute additional files which are part of the cluster configuration.
3811

3812
  ConfigWriter takes care of distributing the config and ssconf files, but
3813
  there are more files which should be distributed to all nodes. This function
3814
  makes sure those are copied.
3815

3816
  @param lu: calling logical unit
3817
  @param additional_nodes: list of nodes not in the config to distribute to
3818
  @type additional_vm: boolean
3819
  @param additional_vm: whether the additional nodes are vm-capable or not
3820

3821
  """
3822
  # Gather target nodes
3823
  cluster = lu.cfg.GetClusterInfo()
3824
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3825

    
3826
  online_nodes = lu.cfg.GetOnlineNodeList()
3827
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3828

    
3829
  if additional_nodes is not None:
3830
    online_nodes.extend(additional_nodes)
3831
    if additional_vm:
3832
      vm_nodes.extend(additional_nodes)
3833

    
3834
  # Never distribute to master node
3835
  for nodelist in [online_nodes, vm_nodes]:
3836
    if master_info.name in nodelist:
3837
      nodelist.remove(master_info.name)
3838

    
3839
  # Gather file lists
3840
  (files_all, _, files_mc, files_vm) = \
3841
    _ComputeAncillaryFiles(cluster, True)
3842

    
3843
  # Never re-distribute configuration file from here
3844
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3845
              constants.CLUSTER_CONF_FILE in files_vm)
3846
  assert not files_mc, "Master candidates not handled in this function"
3847

    
3848
  filemap = [
3849
    (online_nodes, files_all),
3850
    (vm_nodes, files_vm),
3851
    ]
3852

    
3853
  # Upload the files
3854
  for (node_list, files) in filemap:
3855
    for fname in files:
3856
      _UploadHelper(lu, node_list, fname)
3857

    
3858

    
3859
class LUClusterRedistConf(NoHooksLU):
3860
  """Force the redistribution of cluster configuration.
3861

3862
  This is a very simple LU.
3863

3864
  """
3865
  REQ_BGL = False
3866

    
3867
  def ExpandNames(self):
3868
    self.needed_locks = {
3869
      locking.LEVEL_NODE: locking.ALL_SET,
3870
    }
3871
    self.share_locks[locking.LEVEL_NODE] = 1
3872

    
3873
  def Exec(self, feedback_fn):
3874
    """Redistribute the configuration.
3875

3876
    """
3877
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3878
    _RedistributeAncillaryFiles(self)
3879

    
3880

    
3881
class LUClusterActivateMasterIp(NoHooksLU):
3882
  """Activate the master IP on the master node.
3883

3884
  """
3885
  def Exec(self, feedback_fn):
3886
    """Activate the master IP.
3887

3888
    """
3889
    master = self.cfg.GetMasterNode()
3890
    self.rpc.call_node_activate_master_ip(master)
3891

    
3892

    
3893
class LUClusterDeactivateMasterIp(NoHooksLU):
3894
  """Deactivate the master IP on the master node.
3895

3896
  """
3897
  def Exec(self, feedback_fn):
3898
    """Deactivate the master IP.
3899

3900
    """
3901
    master = self.cfg.GetMasterNode()
3902
    self.rpc.call_node_deactivate_master_ip(master)
3903

    
3904

    
3905
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3906
  """Sleep and poll for an instance's disk to sync.
3907

3908
  """
3909
  if not instance.disks or disks is not None and not disks:
3910
    return True
3911

    
3912
  disks = _ExpandCheckDisks(instance, disks)
3913

    
3914
  if not oneshot:
3915
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3916

    
3917
  node = instance.primary_node
3918

    
3919
  for dev in disks:
3920
    lu.cfg.SetDiskID(dev, node)
3921

    
3922
  # TODO: Convert to utils.Retry
3923

    
3924
  retries = 0
3925
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3926
  while True:
3927
    max_time = 0
3928
    done = True
3929
    cumul_degraded = False
3930
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3931
    msg = rstats.fail_msg
3932
    if msg:
3933
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3934
      retries += 1
3935
      if retries >= 10:
3936
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3937
                                 " aborting." % node)
3938
      time.sleep(6)
3939
      continue
3940
    rstats = rstats.payload
3941
    retries = 0
3942
    for i, mstat in enumerate(rstats):
3943
      if mstat is None:
3944
        lu.LogWarning("Can't compute data for node %s/%s",
3945
                           node, disks[i].iv_name)
3946
        continue
3947

    
3948
      cumul_degraded = (cumul_degraded or
3949
                        (mstat.is_degraded and mstat.sync_percent is None))
3950
      if mstat.sync_percent is not None:
3951
        done = False
3952
        if mstat.estimated_time is not None:
3953
          rem_time = ("%s remaining (estimated)" %
3954
                      utils.FormatSeconds(mstat.estimated_time))
3955
          max_time = mstat.estimated_time
3956
        else:
3957
          rem_time = "no time estimate"
3958
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3959
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3960

    
3961
    # if we're done but degraded, let's do a few small retries, to
3962
    # make sure we see a stable and not transient situation; therefore
3963
    # we force restart of the loop
3964
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3965
      logging.info("Degraded disks found, %d retries left", degr_retries)
3966
      degr_retries -= 1
3967
      time.sleep(1)
3968
      continue
3969

    
3970
    if done or oneshot:
3971
      break
3972

    
3973
    time.sleep(min(60, max_time))
3974

    
3975
  if done:
3976
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3977
  return not cumul_degraded
3978

    
3979

    
3980
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3981
  """Check that mirrors are not degraded.
3982

3983
  The ldisk parameter, if True, will change the test from the
3984
  is_degraded attribute (which represents overall non-ok status for
3985
  the device(s)) to the ldisk (representing the local storage status).
3986

3987
  """
3988
  lu.cfg.SetDiskID(dev, node)
3989

    
3990
  result = True
3991

    
3992
  if on_primary or dev.AssembleOnSecondary():
3993
    rstats = lu.rpc.call_blockdev_find(node, dev)
3994
    msg = rstats.fail_msg
3995
    if msg:
3996
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3997
      result = False
3998
    elif not rstats.payload:
3999
      lu.LogWarning("Can't find disk on node %s", node)
4000
      result = False
4001
    else:
4002
      if ldisk:
4003
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4004
      else:
4005
        result = result and not rstats.payload.is_degraded
4006

    
4007
  if dev.children:
4008
    for child in dev.children:
4009
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4010

    
4011
  return result
4012

    
4013

    
4014
class LUOobCommand(NoHooksLU):
4015
  """Logical unit for OOB handling.
4016

4017
  """
4018
  REG_BGL = False
4019
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4020

    
4021
  def ExpandNames(self):
4022
    """Gather locks we need.
4023

4024
    """
4025
    if self.op.node_names:
4026
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4027
      lock_names = self.op.node_names
4028
    else:
4029
      lock_names = locking.ALL_SET
4030

    
4031
    self.needed_locks = {
4032
      locking.LEVEL_NODE: lock_names,
4033
      }
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks:
4039
     - the node exists in the configuration
4040
     - OOB is supported
4041

4042
    Any errors are signaled by raising errors.OpPrereqError.
4043

4044
    """
4045
    self.nodes = []
4046
    self.master_node = self.cfg.GetMasterNode()
4047

    
4048
    assert self.op.power_delay >= 0.0
4049

    
4050
    if self.op.node_names:
4051
      if (self.op.command in self._SKIP_MASTER and
4052
          self.master_node in self.op.node_names):
4053
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4054
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4055

    
4056
        if master_oob_handler:
4057
          additional_text = ("run '%s %s %s' if you want to operate on the"
4058
                             " master regardless") % (master_oob_handler,
4059
                                                      self.op.command,
4060
                                                      self.master_node)
4061
        else:
4062
          additional_text = "it does not support out-of-band operations"
4063

    
4064
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4065
                                    " allowed for %s; %s") %
4066
                                   (self.master_node, self.op.command,
4067
                                    additional_text), errors.ECODE_INVAL)
4068
    else:
4069
      self.op.node_names = self.cfg.GetNodeList()
4070
      if self.op.command in self._SKIP_MASTER:
4071
        self.op.node_names.remove(self.master_node)
4072

    
4073
    if self.op.command in self._SKIP_MASTER:
4074
      assert self.master_node not in self.op.node_names
4075

    
4076
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4077
      if node is None:
4078
        raise errors.OpPrereqError("Node %s not found" % node_name,
4079
                                   errors.ECODE_NOENT)
4080
      else:
4081
        self.nodes.append(node)
4082

    
4083
      if (not self.op.ignore_status and
4084
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4085
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4086
                                    " not marked offline") % node_name,
4087
                                   errors.ECODE_STATE)
4088

    
4089
  def Exec(self, feedback_fn):
4090
    """Execute OOB and return result if we expect any.
4091

4092
    """
4093
    master_node = self.master_node
4094
    ret = []
4095

    
4096
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4097
                                              key=lambda node: node.name)):
4098
      node_entry = [(constants.RS_NORMAL, node.name)]
4099
      ret.append(node_entry)
4100

    
4101
      oob_program = _SupportsOob(self.cfg, node)
4102

    
4103
      if not oob_program:
4104
        node_entry.append((constants.RS_UNAVAIL, None))
4105
        continue
4106

    
4107
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4108
                   self.op.command, oob_program, node.name)
4109
      result = self.rpc.call_run_oob(master_node, oob_program,
4110
                                     self.op.command, node.name,
4111
                                     self.op.timeout)
4112

    
4113
      if result.fail_msg:
4114
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4115
                        node.name, result.fail_msg)
4116
        node_entry.append((constants.RS_NODATA, None))
4117
      else:
4118
        try:
4119
          self._CheckPayload(result)
4120
        except errors.OpExecError, err:
4121
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4122
                          node.name, err)
4123
          node_entry.append((constants.RS_NODATA, None))
4124
        else:
4125
          if self.op.command == constants.OOB_HEALTH:
4126
            # For health we should log important events
4127
            for item, status in result.payload:
4128
              if status in [constants.OOB_STATUS_WARNING,
4129
                            constants.OOB_STATUS_CRITICAL]:
4130
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4131
                                item, node.name, status)
4132

    
4133
          if self.op.command == constants.OOB_POWER_ON:
4134
            node.powered = True
4135
          elif self.op.command == constants.OOB_POWER_OFF:
4136
            node.powered = False
4137
          elif self.op.command == constants.OOB_POWER_STATUS:
4138
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4139
            if powered != node.powered:
4140
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4141
                               " match actual power state (%s)"), node.powered,
4142
                              node.name, powered)
4143

    
4144
          # For configuration changing commands we should update the node
4145
          if self.op.command in (constants.OOB_POWER_ON,
4146
                                 constants.OOB_POWER_OFF):
4147
            self.cfg.Update(node, feedback_fn)
4148

    
4149
          node_entry.append((constants.RS_NORMAL, result.payload))
4150

    
4151
          if (self.op.command == constants.OOB_POWER_ON and
4152
              idx < len(self.nodes) - 1):
4153
            time.sleep(self.op.power_delay)
4154

    
4155
    return ret
4156

    
4157
  def _CheckPayload(self, result):
4158
    """Checks if the payload is valid.
4159

4160
    @param result: RPC result
4161
    @raises errors.OpExecError: If payload is not valid
4162

4163
    """
4164
    errs = []
4165
    if self.op.command == constants.OOB_HEALTH:
4166
      if not isinstance(result.payload, list):
4167
        errs.append("command 'health' is expected to return a list but got %s" %
4168
                    type(result.payload))
4169
      else:
4170
        for item, status in result.payload:
4171
          if status not in constants.OOB_STATUSES:
4172
            errs.append("health item '%s' has invalid status '%s'" %
4173
                        (item, status))
4174

    
4175
    if self.op.command == constants.OOB_POWER_STATUS:
4176
      if not isinstance(result.payload, dict):
4177
        errs.append("power-status is expected to return a dict but got %s" %
4178
                    type(result.payload))
4179

    
4180
    if self.op.command in [
4181
        constants.OOB_POWER_ON,
4182
        constants.OOB_POWER_OFF,
4183
        constants.OOB_POWER_CYCLE,
4184
        ]:
4185
      if result.payload is not None:
4186
        errs.append("%s is expected to not return payload but got '%s'" %
4187
                    (self.op.command, result.payload))
4188

    
4189
    if errs:
4190
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4191
                               utils.CommaJoin(errs))
4192

    
4193

    
4194
class _OsQuery(_QueryBase):
4195
  FIELDS = query.OS_FIELDS
4196

    
4197
  def ExpandNames(self, lu):
4198
    # Lock all nodes in shared mode
4199
    # Temporary removal of locks, should be reverted later
4200
    # TODO: reintroduce locks when they are lighter-weight
4201
    lu.needed_locks = {}
4202
    #self.share_locks[locking.LEVEL_NODE] = 1
4203
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4204

    
4205
    # The following variables interact with _QueryBase._GetNames
4206
    if self.names:
4207
      self.wanted = self.names
4208
    else:
4209
      self.wanted = locking.ALL_SET
4210

    
4211
    self.do_locking = self.use_locking
4212

    
4213
  def DeclareLocks(self, lu, level):
4214
    pass
4215

    
4216
  @staticmethod
4217
  def _DiagnoseByOS(rlist):
4218
    """Remaps a per-node return list into an a per-os per-node dictionary
4219

4220
    @param rlist: a map with node names as keys and OS objects as values
4221

4222
    @rtype: dict
4223
    @return: a dictionary with osnames as keys and as value another
4224
        map, with nodes as keys and tuples of (path, status, diagnose,
4225
        variants, parameters, api_versions) as values, eg::
4226

4227
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4228
                                     (/srv/..., False, "invalid api")],
4229
                           "node2": [(/srv/..., True, "", [], [])]}
4230
          }
4231

4232
    """
4233
    all_os = {}
4234
    # we build here the list of nodes that didn't fail the RPC (at RPC
4235
    # level), so that nodes with a non-responding node daemon don't
4236
    # make all OSes invalid
4237
    good_nodes = [node_name for node_name in rlist
4238
                  if not rlist[node_name].fail_msg]
4239
    for node_name, nr in rlist.items():
4240
      if nr.fail_msg or not nr.payload:
4241
        continue
4242
      for (name, path, status, diagnose, variants,
4243
           params, api_versions) in nr.payload:
4244
        if name not in all_os:
4245
          # build a list of nodes for this os containing empty lists
4246
          # for each node in node_list
4247
          all_os[name] = {}
4248
          for nname in good_nodes:
4249
            all_os[name][nname] = []
4250
        # convert params from [name, help] to (name, help)
4251
        params = [tuple(v) for v in params]
4252
        all_os[name][node_name].append((path, status, diagnose,
4253
                                        variants, params, api_versions))
4254
    return all_os
4255

    
4256
  def _GetQueryData(self, lu):
4257
    """Computes the list of nodes and their attributes.
4258

4259
    """
4260
    # Locking is not used
4261
    assert not (compat.any(lu.glm.is_owned(level)
4262
                           for level in locking.LEVELS
4263
                           if level != locking.LEVEL_CLUSTER) or
4264
                self.do_locking or self.use_locking)
4265

    
4266
    valid_nodes = [node.name
4267
                   for node in lu.cfg.GetAllNodesInfo().values()
4268
                   if not node.offline and node.vm_capable]
4269
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4270
    cluster = lu.cfg.GetClusterInfo()
4271

    
4272
    data = {}
4273

    
4274
    for (os_name, os_data) in pol.items():
4275
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4276
                          hidden=(os_name in cluster.hidden_os),
4277
                          blacklisted=(os_name in cluster.blacklisted_os))
4278

    
4279
      variants = set()
4280
      parameters = set()
4281
      api_versions = set()
4282

    
4283
      for idx, osl in enumerate(os_data.values()):
4284
        info.valid = bool(info.valid and osl and osl[0][1])
4285
        if not info.valid:
4286
          break
4287

    
4288
        (node_variants, node_params, node_api) = osl[0][3:6]
4289
        if idx == 0:
4290
          # First entry
4291
          variants.update(node_variants)
4292
          parameters.update(node_params)
4293
          api_versions.update(node_api)
4294
        else:
4295
          # Filter out inconsistent values
4296
          variants.intersection_update(node_variants)
4297
          parameters.intersection_update(node_params)
4298
          api_versions.intersection_update(node_api)
4299

    
4300
      info.variants = list(variants)
4301
      info.parameters = list(parameters)
4302
      info.api_versions = list(api_versions)
4303

    
4304
      data[os_name] = info
4305

    
4306
    # Prepare data in requested order
4307
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4308
            if name in data]
4309

    
4310

    
4311
class LUOsDiagnose(NoHooksLU):
4312
  """Logical unit for OS diagnose/query.
4313

4314
  """
4315
  REQ_BGL = False
4316

    
4317
  @staticmethod
4318
  def _BuildFilter(fields, names):
4319
    """Builds a filter for querying OSes.
4320

4321
    """
4322
    name_filter = qlang.MakeSimpleFilter("name", names)
4323

    
4324
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4325
    # respective field is not requested
4326
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4327
                     for fname in ["hidden", "blacklisted"]
4328
                     if fname not in fields]
4329
    if "valid" not in fields:
4330
      status_filter.append([qlang.OP_TRUE, "valid"])
4331

    
4332
    if status_filter:
4333
      status_filter.insert(0, qlang.OP_AND)
4334
    else:
4335
      status_filter = None
4336

    
4337
    if name_filter and status_filter:
4338
      return [qlang.OP_AND, name_filter, status_filter]
4339
    elif name_filter:
4340
      return name_filter
4341
    else:
4342
      return status_filter
4343

    
4344
  def CheckArguments(self):
4345
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4346
                       self.op.output_fields, False)
4347

    
4348
  def ExpandNames(self):
4349
    self.oq.ExpandNames(self)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    return self.oq.OldStyleQuery(self)
4353

    
4354

    
4355
class LUNodeRemove(LogicalUnit):
4356
  """Logical unit for removing a node.
4357

4358
  """
4359
  HPATH = "node-remove"
4360
  HTYPE = constants.HTYPE_NODE
4361

    
4362
  def BuildHooksEnv(self):
4363
    """Build hooks env.
4364

4365
    This doesn't run on the target node in the pre phase as a failed
4366
    node would then be impossible to remove.
4367

4368
    """
4369
    return {
4370
      "OP_TARGET": self.op.node_name,
4371
      "NODE_NAME": self.op.node_name,
4372
      }
4373

    
4374
  def BuildHooksNodes(self):
4375
    """Build hooks nodes.
4376

4377
    """
4378
    all_nodes = self.cfg.GetNodeList()
4379
    try:
4380
      all_nodes.remove(self.op.node_name)
4381
    except ValueError:
4382
      logging.warning("Node '%s', which is about to be removed, was not found"
4383
                      " in the list of all nodes", self.op.node_name)
4384
    return (all_nodes, all_nodes)
4385

    
4386
  def CheckPrereq(self):
4387
    """Check prerequisites.
4388

4389
    This checks:
4390
     - the node exists in the configuration
4391
     - it does not have primary or secondary instances
4392
     - it's not the master
4393

4394
    Any errors are signaled by raising errors.OpPrereqError.
4395

4396
    """
4397
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4398
    node = self.cfg.GetNodeInfo(self.op.node_name)
4399
    assert node is not None
4400

    
4401
    masternode = self.cfg.GetMasterNode()
4402
    if node.name == masternode:
4403
      raise errors.OpPrereqError("Node is the master node, failover to another"
4404
                                 " node is required", errors.ECODE_INVAL)
4405

    
4406
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4407
      if node.name in instance.all_nodes:
4408
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4409
                                   " please remove first" % instance_name,
4410
                                   errors.ECODE_INVAL)
4411
    self.op.node_name = node.name
4412
    self.node = node
4413

    
4414
  def Exec(self, feedback_fn):
4415
    """Removes the node from the cluster.
4416

4417
    """
4418
    node = self.node
4419
    logging.info("Stopping the node daemon and removing configs from node %s",
4420
                 node.name)
4421

    
4422
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4423

    
4424
    # Promote nodes to master candidate as needed
4425
    _AdjustCandidatePool(self, exceptions=[node.name])
4426
    self.context.RemoveNode(node.name)
4427

    
4428
    # Run post hooks on the node before it's removed
4429
    _RunPostHook(self, node.name)
4430

    
4431
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4432
    msg = result.fail_msg
4433
    if msg:
4434
      self.LogWarning("Errors encountered on the remote node while leaving"
4435
                      " the cluster: %s", msg)
4436

    
4437
    # Remove node from our /etc/hosts
4438
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4439
      master_node = self.cfg.GetMasterNode()
4440
      result = self.rpc.call_etc_hosts_modify(master_node,
4441
                                              constants.ETC_HOSTS_REMOVE,
4442
                                              node.name, None)
4443
      result.Raise("Can't update hosts file with new host data")
4444
      _RedistributeAncillaryFiles(self)
4445

    
4446

    
4447
class _NodeQuery(_QueryBase):
4448
  FIELDS = query.NODE_FIELDS
4449

    
4450
  def ExpandNames(self, lu):
4451
    lu.needed_locks = {}
4452
    lu.share_locks = _ShareAll()
4453

    
4454
    if self.names:
4455
      self.wanted = _GetWantedNodes(lu, self.names)
4456
    else:
4457
      self.wanted = locking.ALL_SET
4458

    
4459
    self.do_locking = (self.use_locking and
4460
                       query.NQ_LIVE in self.requested_data)
4461

    
4462
    if self.do_locking:
4463
      # If any non-static field is requested we need to lock the nodes
4464
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4465

    
4466
  def DeclareLocks(self, lu, level):
4467
    pass
4468

    
4469
  def _GetQueryData(self, lu):
4470
    """Computes the list of nodes and their attributes.
4471

4472
    """
4473
    all_info = lu.cfg.GetAllNodesInfo()
4474

    
4475
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4476

    
4477
    # Gather data as requested
4478
    if query.NQ_LIVE in self.requested_data:
4479
      # filter out non-vm_capable nodes
4480
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4481

    
4482
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4483
                                        lu.cfg.GetHypervisorType())
4484
      live_data = dict((name, nresult.payload)
4485
                       for (name, nresult) in node_data.items()
4486
                       if not nresult.fail_msg and nresult.payload)
4487
    else:
4488
      live_data = None
4489

    
4490
    if query.NQ_INST in self.requested_data:
4491
      node_to_primary = dict([(name, set()) for name in nodenames])
4492
      node_to_secondary = dict([(name, set()) for name in nodenames])
4493

    
4494
      inst_data = lu.cfg.GetAllInstancesInfo()
4495

    
4496
      for inst in inst_data.values():
4497
        if inst.primary_node in node_to_primary:
4498
          node_to_primary[inst.primary_node].add(inst.name)
4499
        for secnode in inst.secondary_nodes:
4500
          if secnode in node_to_secondary:
4501
            node_to_secondary[secnode].add(inst.name)
4502
    else:
4503
      node_to_primary = None
4504
      node_to_secondary = None
4505

    
4506
    if query.NQ_OOB in self.requested_data:
4507
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4508
                         for name, node in all_info.iteritems())
4509
    else:
4510
      oob_support = None
4511

    
4512
    if query.NQ_GROUP in self.requested_data:
4513
      groups = lu.cfg.GetAllNodeGroupsInfo()
4514
    else:
4515
      groups = {}
4516

    
4517
    return query.NodeQueryData([all_info[name] for name in nodenames],
4518
                               live_data, lu.cfg.GetMasterNode(),
4519
                               node_to_primary, node_to_secondary, groups,
4520
                               oob_support, lu.cfg.GetClusterInfo())
4521

    
4522

    
4523
class LUNodeQuery(NoHooksLU):
4524
  """Logical unit for querying nodes.
4525

4526
  """
4527
  # pylint: disable=W0142
4528
  REQ_BGL = False
4529

    
4530
  def CheckArguments(self):
4531
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4532
                         self.op.output_fields, self.op.use_locking)
4533

    
4534
  def ExpandNames(self):
4535
    self.nq.ExpandNames(self)
4536

    
4537
  def Exec(self, feedback_fn):
4538
    return self.nq.OldStyleQuery(self)
4539

    
4540

    
4541
class LUNodeQueryvols(NoHooksLU):
4542
  """Logical unit for getting volumes on node(s).
4543

4544
  """
4545
  REQ_BGL = False
4546
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4547
  _FIELDS_STATIC = utils.FieldSet("node")
4548

    
4549
  def CheckArguments(self):
4550
    _CheckOutputFields(static=self._FIELDS_STATIC,
4551
                       dynamic=self._FIELDS_DYNAMIC,
4552
                       selected=self.op.output_fields)
4553

    
4554
  def ExpandNames(self):
4555
    self.needed_locks = {}
4556
    self.share_locks[locking.LEVEL_NODE] = 1
4557
    if not self.op.nodes:
4558
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559
    else:
4560
      self.needed_locks[locking.LEVEL_NODE] = \
4561
        _GetWantedNodes(self, self.op.nodes)
4562

    
4563
  def Exec(self, feedback_fn):
4564
    """Computes the list of nodes and their attributes.
4565

4566
    """
4567
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4568
    volumes = self.rpc.call_node_volumes(nodenames)
4569

    
4570
    ilist = self.cfg.GetAllInstancesInfo()
4571
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4572

    
4573
    output = []
4574
    for node in nodenames:
4575
      nresult = volumes[node]
4576
      if nresult.offline:
4577
        continue
4578
      msg = nresult.fail_msg
4579
      if msg:
4580
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4581
        continue
4582

    
4583
      node_vols = sorted(nresult.payload,
4584
                         key=operator.itemgetter("dev"))
4585

    
4586
      for vol in node_vols:
4587
        node_output = []
4588
        for field in self.op.output_fields:
4589
          if field == "node":
4590
            val = node
4591
          elif field == "phys":
4592
            val = vol["dev"]
4593
          elif field == "vg":
4594
            val = vol["vg"]
4595
          elif field == "name":
4596
            val = vol["name"]
4597
          elif field == "size":
4598
            val = int(float(vol["size"]))
4599
          elif field == "instance":
4600
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4601
          else:
4602
            raise errors.ParameterError(field)
4603
          node_output.append(str(val))
4604

    
4605
        output.append(node_output)
4606

    
4607
    return output
4608

    
4609

    
4610
class LUNodeQueryStorage(NoHooksLU):
4611
  """Logical unit for getting information on storage units on node(s).
4612

4613
  """
4614
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4615
  REQ_BGL = False
4616

    
4617
  def CheckArguments(self):
4618
    _CheckOutputFields(static=self._FIELDS_STATIC,
4619
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4620
                       selected=self.op.output_fields)
4621

    
4622
  def ExpandNames(self):
4623
    self.needed_locks = {}
4624
    self.share_locks[locking.LEVEL_NODE] = 1
4625

    
4626
    if self.op.nodes:
4627
      self.needed_locks[locking.LEVEL_NODE] = \
4628
        _GetWantedNodes(self, self.op.nodes)
4629
    else:
4630
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4631

    
4632
  def Exec(self, feedback_fn):
4633
    """Computes the list of nodes and their attributes.
4634

4635
    """
4636
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4637

    
4638
    # Always get name to sort by
4639
    if constants.SF_NAME in self.op.output_fields:
4640
      fields = self.op.output_fields[:]
4641
    else:
4642
      fields = [constants.SF_NAME] + self.op.output_fields
4643

    
4644
    # Never ask for node or type as it's only known to the LU
4645
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4646
      while extra in fields:
4647
        fields.remove(extra)
4648

    
4649
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4650
    name_idx = field_idx[constants.SF_NAME]
4651

    
4652
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4653
    data = self.rpc.call_storage_list(self.nodes,
4654
                                      self.op.storage_type, st_args,
4655
                                      self.op.name, fields)
4656

    
4657
    result = []
4658

    
4659
    for node in utils.NiceSort(self.nodes):
4660
      nresult = data[node]
4661
      if nresult.offline:
4662
        continue
4663

    
4664
      msg = nresult.fail_msg
4665
      if msg:
4666
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4667
        continue
4668

    
4669
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4670

    
4671
      for name in utils.NiceSort(rows.keys()):
4672
        row = rows[name]
4673

    
4674
        out = []
4675

    
4676
        for field in self.op.output_fields:
4677
          if field == constants.SF_NODE:
4678
            val = node
4679
          elif field == constants.SF_TYPE:
4680
            val = self.op.storage_type
4681
          elif field in field_idx:
4682
            val = row[field_idx[field]]
4683
          else:
4684
            raise errors.ParameterError(field)
4685

    
4686
          out.append(val)
4687

    
4688
        result.append(out)
4689

    
4690
    return result
4691

    
4692

    
4693
class _InstanceQuery(_QueryBase):
4694
  FIELDS = query.INSTANCE_FIELDS
4695

    
4696
  def ExpandNames(self, lu):
4697
    lu.needed_locks = {}
4698
    lu.share_locks = _ShareAll()
4699

    
4700
    if self.names:
4701
      self.wanted = _GetWantedInstances(lu, self.names)
4702
    else:
4703
      self.wanted = locking.ALL_SET
4704

    
4705
    self.do_locking = (self.use_locking and
4706
                       query.IQ_LIVE in self.requested_data)
4707
    if self.do_locking:
4708
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4709
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4710
      lu.needed_locks[locking.LEVEL_NODE] = []
4711
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
    self.do_grouplocks = (self.do_locking and
4714
                          query.IQ_NODES in self.requested_data)
4715

    
4716
  def DeclareLocks(self, lu, level):
4717
    if self.do_locking:
4718
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4719
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4720

    
4721
        # Lock all groups used by instances optimistically; this requires going
4722
        # via the node before it's locked, requiring verification later on
4723
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4724
          set(group_uuid
4725
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4726
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4727
      elif level == locking.LEVEL_NODE:
4728
        lu._LockInstancesNodes() # pylint: disable=W0212
4729

    
4730
  @staticmethod
4731
  def _CheckGroupLocks(lu):
4732
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4733
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4734

    
4735
    # Check if node groups for locked instances are still correct
4736
    for instance_name in owned_instances:
4737
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4738

    
4739
  def _GetQueryData(self, lu):
4740
    """Computes the list of instances and their attributes.
4741

4742
    """
4743
    if self.do_grouplocks:
4744
      self._CheckGroupLocks(lu)
4745

    
4746
    cluster = lu.cfg.GetClusterInfo()
4747
    all_info = lu.cfg.GetAllInstancesInfo()
4748

    
4749
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4750

    
4751
    instance_list = [all_info[name] for name in instance_names]
4752
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4753
                                        for inst in instance_list)))
4754
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755
    bad_nodes = []
4756
    offline_nodes = []
4757
    wrongnode_inst = set()
4758

    
4759
    # Gather data as requested
4760
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4761
      live_data = {}
4762
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4763
      for name in nodes:
4764
        result = node_data[name]
4765
        if result.offline:
4766
          # offline nodes will be in both lists
4767
          assert result.fail_msg
4768
          offline_nodes.append(name)
4769
        if result.fail_msg:
4770
          bad_nodes.append(name)
4771
        elif result.payload:
4772
          for inst in result.payload:
4773
            if inst in all_info:
4774
              if all_info[inst].primary_node == name:
4775
                live_data.update(result.payload)
4776
              else:
4777
                wrongnode_inst.add(inst)
4778
            else:
4779
              # orphan instance; we don't list it here as we don't
4780
              # handle this case yet in the output of instance listing
4781
              logging.warning("Orphan instance '%s' found on node %s",
4782
                              inst, name)
4783
        # else no instance is alive
4784
    else:
4785
      live_data = {}
4786

    
4787
    if query.IQ_DISKUSAGE in self.requested_data:
4788
      disk_usage = dict((inst.name,
4789
                         _ComputeDiskSize(inst.disk_template,
4790
                                          [{constants.IDISK_SIZE: disk.size}
4791
                                           for disk in inst.disks]))
4792
                        for inst in instance_list)
4793
    else:
4794
      disk_usage = None
4795

    
4796
    if query.IQ_CONSOLE in self.requested_data:
4797
      consinfo = {}
4798
      for inst in instance_list:
4799
        if inst.name in live_data:
4800
          # Instance is running
4801
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4802
        else:
4803
          consinfo[inst.name] = None
4804
      assert set(consinfo.keys()) == set(instance_names)
4805
    else:
4806
      consinfo = None
4807

    
4808
    if query.IQ_NODES in self.requested_data:
4809
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4810
                                            instance_list)))
4811
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4812
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4813
                    for uuid in set(map(operator.attrgetter("group"),
4814
                                        nodes.values())))
4815
    else:
4816
      nodes = None
4817
      groups = None
4818

    
4819
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4820
                                   disk_usage, offline_nodes, bad_nodes,
4821
                                   live_data, wrongnode_inst, consinfo,
4822
                                   nodes, groups)
4823

    
4824

    
4825
class LUQuery(NoHooksLU):
4826
  """Query for resources/items of a certain kind.
4827

4828
  """
4829
  # pylint: disable=W0142
4830
  REQ_BGL = False
4831

    
4832
  def CheckArguments(self):
4833
    qcls = _GetQueryImplementation(self.op.what)
4834

    
4835
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4836

    
4837
  def ExpandNames(self):
4838
    self.impl.ExpandNames(self)
4839

    
4840
  def DeclareLocks(self, level):
4841
    self.impl.DeclareLocks(self, level)
4842

    
4843
  def Exec(self, feedback_fn):
4844
    return self.impl.NewStyleQuery(self)
4845

    
4846

    
4847
class LUQueryFields(NoHooksLU):
4848
  """Query for resources/items of a certain kind.
4849

4850
  """
4851
  # pylint: disable=W0142
4852
  REQ_BGL = False
4853

    
4854
  def CheckArguments(self):
4855
    self.qcls = _GetQueryImplementation(self.op.what)
4856

    
4857
  def ExpandNames(self):
4858
    self.needed_locks = {}
4859

    
4860
  def Exec(self, feedback_fn):
4861
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4862

    
4863

    
4864
class LUNodeModifyStorage(NoHooksLU):
4865
  """Logical unit for modifying a storage volume on a node.
4866

4867
  """
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872

    
4873
    storage_type = self.op.storage_type
4874

    
4875
    try:
4876
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4877
    except KeyError:
4878
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4879
                                 " modified" % storage_type,
4880
                                 errors.ECODE_INVAL)
4881

    
4882
    diff = set(self.op.changes.keys()) - modifiable
4883
    if diff:
4884
      raise errors.OpPrereqError("The following fields can not be modified for"
4885
                                 " storage units of type '%s': %r" %
4886
                                 (storage_type, list(diff)),
4887
                                 errors.ECODE_INVAL)
4888

    
4889
  def ExpandNames(self):
4890
    self.needed_locks = {
4891
      locking.LEVEL_NODE: self.op.node_name,
4892
      }
4893

    
4894
  def Exec(self, feedback_fn):
4895
    """Computes the list of nodes and their attributes.
4896

4897
    """
4898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4899
    result = self.rpc.call_storage_modify(self.op.node_name,
4900
                                          self.op.storage_type, st_args,
4901
                                          self.op.name, self.op.changes)
4902
    result.Raise("Failed to modify storage unit '%s' on %s" %
4903
                 (self.op.name, self.op.node_name))
4904

    
4905

    
4906
class LUNodeAdd(LogicalUnit):
4907
  """Logical unit for adding node to the cluster.
4908

4909
  """
4910
  HPATH = "node-add"
4911
  HTYPE = constants.HTYPE_NODE
4912
  _NFLAGS = ["master_capable", "vm_capable"]
4913

    
4914
  def CheckArguments(self):
4915
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4916
    # validate/normalize the node name
4917
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4918
                                         family=self.primary_ip_family)
4919
    self.op.node_name = self.hostname.name
4920

    
4921
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4922
      raise errors.OpPrereqError("Cannot readd the master node",
4923
                                 errors.ECODE_STATE)
4924

    
4925
    if self.op.readd and self.op.group:
4926
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4927
                                 " being readded", errors.ECODE_INVAL)
4928

    
4929
  def BuildHooksEnv(self):
4930
    """Build hooks env.
4931

4932
    This will run on all nodes before, and on all nodes + the new node after.
4933

4934
    """
4935
    return {
4936
      "OP_TARGET": self.op.node_name,
4937
      "NODE_NAME": self.op.node_name,
4938
      "NODE_PIP": self.op.primary_ip,
4939
      "NODE_SIP": self.op.secondary_ip,
4940
      "MASTER_CAPABLE": str(self.op.master_capable),
4941
      "VM_CAPABLE": str(self.op.vm_capable),
4942
      }
4943

    
4944
  def BuildHooksNodes(self):
4945
    """Build hooks nodes.
4946

4947
    """
4948
    # Exclude added node
4949
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4950
    post_nodes = pre_nodes + [self.op.node_name, ]
4951

    
4952
    return (pre_nodes, post_nodes)
4953

    
4954
  def CheckPrereq(self):
4955
    """Check prerequisites.
4956

4957
    This checks:
4958
     - the new node is not already in the config
4959
     - it is resolvable
4960
     - its parameters (single/dual homed) matches the cluster
4961

4962
    Any errors are signaled by raising errors.OpPrereqError.
4963

4964
    """
4965
    cfg = self.cfg
4966
    hostname = self.hostname
4967
    node = hostname.name
4968
    primary_ip = self.op.primary_ip = hostname.ip
4969
    if self.op.secondary_ip is None:
4970
      if self.primary_ip_family == netutils.IP6Address.family:
4971
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4972
                                   " IPv4 address must be given as secondary",
4973
                                   errors.ECODE_INVAL)
4974
      self.op.secondary_ip = primary_ip
4975

    
4976
    secondary_ip = self.op.secondary_ip
4977
    if not netutils.IP4Address.IsValid(secondary_ip):
4978
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4979
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4980

    
4981
    node_list = cfg.GetNodeList()
4982
    if not self.op.readd and node in node_list:
4983
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4984
                                 node, errors.ECODE_EXISTS)
4985
    elif self.op.readd and node not in node_list:
4986
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4987
                                 errors.ECODE_NOENT)
4988

    
4989
    self.changed_primary_ip = False
4990

    
4991
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4992
      if self.op.readd and node == existing_node_name:
4993
        if existing_node.secondary_ip != secondary_ip:
4994
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4995
                                     " address configuration as before",
4996
                                     errors.ECODE_INVAL)
4997
        if existing_node.primary_ip != primary_ip:
4998
          self.changed_primary_ip = True
4999

    
5000
        continue
5001

    
5002
      if (existing_node.primary_ip == primary_ip or
5003
          existing_node.secondary_ip == primary_ip or
5004
          existing_node.primary_ip == secondary_ip or
5005
          existing_node.secondary_ip == secondary_ip):
5006
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5007
                                   " existing node %s" % existing_node.name,
5008
                                   errors.ECODE_NOTUNIQUE)
5009

    
5010
    # After this 'if' block, None is no longer a valid value for the
5011
    # _capable op attributes
5012
    if self.op.readd:
5013
      old_node = self.cfg.GetNodeInfo(node)
5014
      assert old_node is not None, "Can't retrieve locked node %s" % node
5015
      for attr in self._NFLAGS:
5016
        if getattr(self.op, attr) is None:
5017
          setattr(self.op, attr, getattr(old_node, attr))
5018
    else:
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, True)
5022

    
5023
    if self.op.readd and not self.op.vm_capable:
5024
      pri, sec = cfg.GetNodeInstances(node)
5025
      if pri or sec:
5026
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5027
                                   " flag set to false, but it already holds"
5028
                                   " instances" % node,
5029
                                   errors.ECODE_STATE)
5030

    
5031
    # check that the type of the node (single versus dual homed) is the
5032
    # same as for the master
5033
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5034
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5035
    newbie_singlehomed = secondary_ip == primary_ip
5036
    if master_singlehomed != newbie_singlehomed:
5037
      if master_singlehomed:
5038
        raise errors.OpPrereqError("The master has no secondary ip but the"
5039
                                   " new node has one",
5040
                                   errors.ECODE_INVAL)
5041
      else:
5042
        raise errors.OpPrereqError("The master has a secondary ip but the"
5043
                                   " new node doesn't have one",
5044
                                   errors.ECODE_INVAL)
5045

    
5046
    # checks reachability
5047
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5048
      raise errors.OpPrereqError("Node not reachable by ping",
5049
                                 errors.ECODE_ENVIRON)
5050

    
5051
    if not newbie_singlehomed:
5052
      # check reachability from my secondary ip to newbie's secondary ip
5053
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5054
                           source=myself.secondary_ip):
5055
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5056
                                   " based ping to node daemon port",
5057
                                   errors.ECODE_ENVIRON)
5058

    
5059
    if self.op.readd:
5060
      exceptions = [node]
5061
    else:
5062
      exceptions = []
5063

    
5064
    if self.op.master_capable:
5065
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5066
    else:
5067
      self.master_candidate = False
5068

    
5069
    if self.op.readd:
5070
      self.new_node = old_node
5071
    else:
5072
      node_group = cfg.LookupNodeGroup(self.op.group)
5073
      self.new_node = objects.Node(name=node,
5074
                                   primary_ip=primary_ip,
5075
                                   secondary_ip=secondary_ip,
5076
                                   master_candidate=self.master_candidate,
5077
                                   offline=False, drained=False,
5078
                                   group=node_group)
5079

    
5080
    if self.op.ndparams:
5081
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5082

    
5083
  def Exec(self, feedback_fn):
5084
    """Adds the new node to the cluster.
5085

5086
    """
5087
    new_node = self.new_node
5088
    node = new_node.name
5089

    
5090
    # We adding a new node so we assume it's powered
5091
    new_node.powered = True
5092

    
5093
    # for re-adds, reset the offline/drained/master-candidate flags;
5094
    # we need to reset here, otherwise offline would prevent RPC calls
5095
    # later in the procedure; this also means that if the re-add
5096
    # fails, we are left with a non-offlined, broken node
5097
    if self.op.readd:
5098
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5099
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5100
      # if we demote the node, we do cleanup later in the procedure
5101
      new_node.master_candidate = self.master_candidate
5102
      if self.changed_primary_ip:
5103
        new_node.primary_ip = self.op.primary_ip
5104

    
5105
    # copy the master/vm_capable flags
5106
    for attr in self._NFLAGS:
5107
      setattr(new_node, attr, getattr(self.op, attr))
5108

    
5109
    # notify the user about any possible mc promotion
5110
    if new_node.master_candidate:
5111
      self.LogInfo("Node will be a master candidate")
5112

    
5113
    if self.op.ndparams:
5114
      new_node.ndparams = self.op.ndparams
5115
    else:
5116
      new_node.ndparams = {}
5117

    
5118
    # check connectivity
5119
    result = self.rpc.call_version([node])[node]
5120
    result.Raise("Can't get version information from node %s" % node)
5121
    if constants.PROTOCOL_VERSION == result.payload:
5122
      logging.info("Communication to node %s fine, sw version %s match",
5123
                   node, result.payload)
5124
    else:
5125
      raise errors.OpExecError("Version mismatch master version %s,"
5126
                               " node version %s" %
5127
                               (constants.PROTOCOL_VERSION, result.payload))
5128

    
5129
    # Add node to our /etc/hosts, and add key to known_hosts
5130
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5131
      master_node = self.cfg.GetMasterNode()
5132
      result = self.rpc.call_etc_hosts_modify(master_node,
5133
                                              constants.ETC_HOSTS_ADD,
5134
                                              self.hostname.name,
5135
                                              self.hostname.ip)
5136
      result.Raise("Can't update hosts file with new host data")
5137

    
5138
    if new_node.secondary_ip != new_node.primary_ip:
5139
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5140
                               False)
5141

    
5142
    node_verify_list = [self.cfg.GetMasterNode()]
5143
    node_verify_param = {
5144
      constants.NV_NODELIST: ([node], {}),
5145
      # TODO: do a node-net-test as well?
5146
    }
5147

    
5148
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5149
                                       self.cfg.GetClusterName())
5150
    for verifier in node_verify_list:
5151
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5152
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5153
      if nl_payload:
5154
        for failed in nl_payload:
5155
          feedback_fn("ssh/hostname verification failed"
5156
                      " (checking from %s): %s" %
5157
                      (verifier, nl_payload[failed]))
5158
        raise errors.OpExecError("ssh/hostname verification failed")
5159

    
5160
    if self.op.readd:
5161
      _RedistributeAncillaryFiles(self)
5162
      self.context.ReaddNode(new_node)
5163
      # make sure we redistribute the config
5164
      self.cfg.Update(new_node, feedback_fn)
5165
      # and make sure the new node will not have old files around
5166
      if not new_node.master_candidate:
5167
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5168
        msg = result.fail_msg
5169
        if msg:
5170
          self.LogWarning("Node failed to demote itself from master"
5171
                          " candidate status: %s" % msg)
5172
    else:
5173
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5174
                                  additional_vm=self.op.vm_capable)
5175
      self.context.AddNode(new_node, self.proc.GetECId())
5176

    
5177

    
5178
class LUNodeSetParams(LogicalUnit):
5179
  """Modifies the parameters of a node.
5180

5181
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5182
      to the node role (as _ROLE_*)
5183
  @cvar _R2F: a dictionary from node role to tuples of flags
5184
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5185

5186
  """
5187
  HPATH = "node-modify"
5188
  HTYPE = constants.HTYPE_NODE
5189
  REQ_BGL = False
5190
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5191
  _F2R = {
5192
    (True, False, False): _ROLE_CANDIDATE,
5193
    (False, True, False): _ROLE_DRAINED,
5194
    (False, False, True): _ROLE_OFFLINE,
5195
    (False, False, False): _ROLE_REGULAR,
5196
    }
5197
  _R2F = dict((v, k) for k, v in _F2R.items())
5198
  _FLAGS = ["master_candidate", "drained", "offline"]
5199

    
5200
  def CheckArguments(self):
5201
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5202
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5203
                self.op.master_capable, self.op.vm_capable,
5204
                self.op.secondary_ip, self.op.ndparams]
5205
    if all_mods.count(None) == len(all_mods):
5206
      raise errors.OpPrereqError("Please pass at least one modification",
5207
                                 errors.ECODE_INVAL)
5208
    if all_mods.count(True) > 1:
5209
      raise errors.OpPrereqError("Can't set the node into more than one"
5210
                                 " state at the same time",
5211
                                 errors.ECODE_INVAL)
5212

    
5213
    # Boolean value that tells us whether we might be demoting from MC
5214
    self.might_demote = (self.op.master_candidate == False or
5215
                         self.op.offline == True or
5216
                         self.op.drained == True or
5217
                         self.op.master_capable == False)
5218

    
5219
    if self.op.secondary_ip:
5220
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5221
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5222
                                   " address" % self.op.secondary_ip,
5223
                                   errors.ECODE_INVAL)
5224

    
5225
    self.lock_all = self.op.auto_promote and self.might_demote
5226
    self.lock_instances = self.op.secondary_ip is not None
5227

    
5228
  def ExpandNames(self):
5229
    if self.lock_all:
5230
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5231
    else:
5232
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5233

    
5234
    if self.lock_instances:
5235
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5236

    
5237
  def DeclareLocks(self, level):
5238
    # If we have locked all instances, before waiting to lock nodes, release
5239
    # all the ones living on nodes unrelated to the current operation.
5240
    if level == locking.LEVEL_NODE and self.lock_instances:
5241
      self.affected_instances = []
5242
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5243
        instances_keep = []
5244

    
5245
        # Build list of instances to release
5246
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5247
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5248
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5249
              self.op.node_name in instance.all_nodes):
5250
            instances_keep.append(instance_name)
5251
            self.affected_instances.append(instance)
5252

    
5253
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5254

    
5255
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5256
                set(instances_keep))
5257

    
5258
  def BuildHooksEnv(self):
5259
    """Build hooks env.
5260

5261
    This runs on the master node.
5262

5263
    """
5264
    return {
5265
      "OP_TARGET": self.op.node_name,
5266
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5267
      "OFFLINE": str(self.op.offline),
5268
      "DRAINED": str(self.op.drained),
5269
      "MASTER_CAPABLE": str(self.op.master_capable),
5270
      "VM_CAPABLE": str(self.op.vm_capable),
5271
      }
5272

    
5273
  def BuildHooksNodes(self):
5274
    """Build hooks nodes.
5275

5276
    """
5277
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5278
    return (nl, nl)
5279

    
5280
  def CheckPrereq(self):
5281
    """Check prerequisites.
5282

5283
    This only checks the instance list against the existing names.
5284

5285
    """
5286
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5287

    
5288
    if (self.op.master_candidate is not None or
5289
        self.op.drained is not None or
5290
        self.op.offline is not None):
5291
      # we can't change the master's node flags
5292
      if self.op.node_name == self.cfg.GetMasterNode():
5293
        raise errors.OpPrereqError("The master role can be changed"
5294
                                   " only via master-failover",
5295
                                   errors.ECODE_INVAL)
5296

    
5297
    if self.op.master_candidate and not node.master_capable:
5298
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5299
                                 " it a master candidate" % node.name,
5300
                                 errors.ECODE_STATE)
5301

    
5302
    if self.op.vm_capable == False:
5303
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5304
      if ipri or isec:
5305
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5306
                                   " the vm_capable flag" % node.name,
5307
                                   errors.ECODE_STATE)
5308

    
5309
    if node.master_candidate and self.might_demote and not self.lock_all:
5310
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5311
      # check if after removing the current node, we're missing master
5312
      # candidates
5313
      (mc_remaining, mc_should, _) = \
5314
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5315
      if mc_remaining < mc_should:
5316
        raise errors.OpPrereqError("Not enough master candidates, please"
5317
                                   " pass auto promote option to allow"
5318
                                   " promotion", errors.ECODE_STATE)
5319

    
5320
    self.old_flags = old_flags = (node.master_candidate,
5321
                                  node.drained, node.offline)
5322
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5323
    self.old_role = old_role = self._F2R[old_flags]
5324

    
5325
    # Check for ineffective changes
5326
    for attr in self._FLAGS:
5327
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5328
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5329
        setattr(self.op, attr, None)
5330

    
5331
    # Past this point, any flag change to False means a transition
5332
    # away from the respective state, as only real changes are kept
5333

    
5334
    # TODO: We might query the real power state if it supports OOB
5335
    if _SupportsOob(self.cfg, node):
5336
      if self.op.offline is False and not (node.powered or
5337
                                           self.op.powered == True):
5338
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5339
                                    " offline status can be reset") %
5340
                                   self.op.node_name)
5341
    elif self.op.powered is not None:
5342
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5343
                                  " as it does not support out-of-band"
5344
                                  " handling") % self.op.node_name)
5345

    
5346
    # If we're being deofflined/drained, we'll MC ourself if needed
5347
    if (self.op.drained == False or self.op.offline == False or
5348
        (self.op.master_capable and not node.master_capable)):
5349
      if _DecideSelfPromotion(self):
5350
        self.op.master_candidate = True
5351
        self.LogInfo("Auto-promoting node to master candidate")
5352

    
5353
    # If we're no longer master capable, we'll demote ourselves from MC
5354
    if self.op.master_capable == False and node.master_candidate:
5355
      self.LogInfo("Demoting from master candidate")
5356
      self.op.master_candidate = False
5357

    
5358
    # Compute new role
5359
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5360
    if self.op.master_candidate:
5361
      new_role = self._ROLE_CANDIDATE
5362
    elif self.op.drained:
5363
      new_role = self._ROLE_DRAINED
5364
    elif self.op.offline:
5365
      new_role = self._ROLE_OFFLINE
5366
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5367
      # False is still in new flags, which means we're un-setting (the
5368
      # only) True flag
5369
      new_role = self._ROLE_REGULAR
5370
    else: # no new flags, nothing, keep old role
5371
      new_role = old_role
5372

    
5373
    self.new_role = new_role
5374

    
5375
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5376
      # Trying to transition out of offline status
5377
      result = self.rpc.call_version([node.name])[node.name]
5378
      if result.fail_msg:
5379
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5380
                                   " to report its version: %s" %
5381
                                   (node.name, result.fail_msg),
5382
                                   errors.ECODE_STATE)
5383
      else:
5384
        self.LogWarning("Transitioning node from offline to online state"
5385
                        " without using re-add. Please make sure the node"
5386
                        " is healthy!")
5387

    
5388
    if self.op.secondary_ip:
5389
      # Ok even without locking, because this can't be changed by any LU
5390
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5391
      master_singlehomed = master.secondary_ip == master.primary_ip
5392
      if master_singlehomed and self.op.secondary_ip:
5393
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5394
                                   " homed cluster", errors.ECODE_INVAL)
5395

    
5396
      if node.offline:
5397
        if self.affected_instances:
5398
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5399
                                     " node has instances (%s) configured"
5400
                                     " to use it" % self.affected_instances)
5401
      else:
5402
        # On online nodes, check that no instances are running, and that
5403
        # the node has the new ip and we can reach it.
5404
        for instance in self.affected_instances:
5405
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5406

    
5407
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5408
        if master.name != node.name:
5409
          # check reachability from master secondary ip to new secondary ip
5410
          if not netutils.TcpPing(self.op.secondary_ip,
5411
                                  constants.DEFAULT_NODED_PORT,
5412
                                  source=master.secondary_ip):
5413
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5414
                                       " based ping to node daemon port",
5415
                                       errors.ECODE_ENVIRON)
5416

    
5417
    if self.op.ndparams:
5418
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5419
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5420
      self.new_ndparams = new_ndparams
5421

    
5422
  def Exec(self, feedback_fn):
5423
    """Modifies a node.
5424

5425
    """
5426
    node = self.node
5427
    old_role = self.old_role
5428
    new_role = self.new_role
5429

    
5430
    result = []
5431

    
5432
    if self.op.ndparams:
5433
      node.ndparams = self.new_ndparams
5434

    
5435
    if self.op.powered is not None:
5436
      node.powered = self.op.powered
5437

    
5438
    for attr in ["master_capable", "vm_capable"]:
5439
      val = getattr(self.op, attr)
5440
      if val is not None:
5441
        setattr(node, attr, val)
5442
        result.append((attr, str(val)))
5443

    
5444
    if new_role != old_role:
5445
      # Tell the node to demote itself, if no longer MC and not offline
5446
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5447
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5448
        if msg:
5449
          self.LogWarning("Node failed to demote itself: %s", msg)
5450

    
5451
      new_flags = self._R2F[new_role]
5452
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5453
        if of != nf:
5454
          result.append((desc, str(nf)))
5455
      (node.master_candidate, node.drained, node.offline) = new_flags
5456

    
5457
      # we locked all nodes, we adjust the CP before updating this node
5458
      if self.lock_all:
5459
        _AdjustCandidatePool(self, [node.name])
5460

    
5461
    if self.op.secondary_ip:
5462
      node.secondary_ip = self.op.secondary_ip
5463
      result.append(("secondary_ip", self.op.secondary_ip))
5464

    
5465
    # this will trigger configuration file update, if needed
5466
    self.cfg.Update(node, feedback_fn)
5467

    
5468
    # this will trigger job queue propagation or cleanup if the mc
5469
    # flag changed
5470
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5471
      self.context.ReaddNode(node)
5472

    
5473
    return result
5474

    
5475

    
5476
class LUNodePowercycle(NoHooksLU):
5477
  """Powercycles a node.
5478

5479
  """
5480
  REQ_BGL = False
5481

    
5482
  def CheckArguments(self):
5483
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5484
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5485
      raise errors.OpPrereqError("The node is the master and the force"
5486
                                 " parameter was not set",
5487
                                 errors.ECODE_INVAL)
5488

    
5489
  def ExpandNames(self):
5490
    """Locking for PowercycleNode.
5491

5492
    This is a last-resort option and shouldn't block on other
5493
    jobs. Therefore, we grab no locks.
5494

5495
    """
5496
    self.needed_locks = {}
5497

    
5498
  def Exec(self, feedback_fn):
5499
    """Reboots a node.
5500

5501
    """
5502
    result = self.rpc.call_node_powercycle(self.op.node_name,
5503
                                           self.cfg.GetHypervisorType())
5504
    result.Raise("Failed to schedule the reboot")
5505
    return result.payload
5506

    
5507

    
5508
class LUClusterQuery(NoHooksLU):
5509
  """Query cluster configuration.
5510

5511
  """
5512
  REQ_BGL = False
5513

    
5514
  def ExpandNames(self):
5515
    self.needed_locks = {}
5516

    
5517
  def Exec(self, feedback_fn):
5518
    """Return cluster config.
5519

5520
    """
5521
    cluster = self.cfg.GetClusterInfo()
5522
    os_hvp = {}
5523

    
5524
    # Filter just for enabled hypervisors
5525
    for os_name, hv_dict in cluster.os_hvp.items():
5526
      os_hvp[os_name] = {}
5527
      for hv_name, hv_params in hv_dict.items():
5528
        if hv_name in cluster.enabled_hypervisors:
5529
          os_hvp[os_name][hv_name] = hv_params
5530

    
5531
    # Convert ip_family to ip_version
5532
    primary_ip_version = constants.IP4_VERSION
5533
    if cluster.primary_ip_family == netutils.IP6Address.family:
5534
      primary_ip_version = constants.IP6_VERSION
5535

    
5536
    result = {
5537
      "software_version": constants.RELEASE_VERSION,
5538
      "protocol_version": constants.PROTOCOL_VERSION,
5539
      "config_version": constants.CONFIG_VERSION,
5540
      "os_api_version": max(constants.OS_API_VERSIONS),
5541
      "export_version": constants.EXPORT_VERSION,
5542
      "architecture": (platform.architecture()[0], platform.machine()),
5543
      "name": cluster.cluster_name,
5544
      "master": cluster.master_node,
5545
      "default_hypervisor": cluster.enabled_hypervisors[0],
5546
      "enabled_hypervisors": cluster.enabled_hypervisors,
5547
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5548
                        for hypervisor_name in cluster.enabled_hypervisors]),
5549
      "os_hvp": os_hvp,
5550
      "beparams": cluster.beparams,
5551
      "osparams": cluster.osparams,
5552
      "nicparams": cluster.nicparams,
5553
      "ndparams": cluster.ndparams,
5554
      "candidate_pool_size": cluster.candidate_pool_size,
5555
      "master_netdev": cluster.master_netdev,
5556
      "master_netmask": cluster.master_netmask,
5557
      "volume_group_name": cluster.volume_group_name,
5558
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5559
      "file_storage_dir": cluster.file_storage_dir,
5560
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5561
      "maintain_node_health": cluster.maintain_node_health,
5562
      "ctime": cluster.ctime,
5563
      "mtime": cluster.mtime,
5564
      "uuid": cluster.uuid,
5565
      "tags": list(cluster.GetTags()),
5566
      "uid_pool": cluster.uid_pool,
5567
      "default_iallocator": cluster.default_iallocator,
5568
      "reserved_lvs": cluster.reserved_lvs,
5569
      "primary_ip_version": primary_ip_version,
5570
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5571
      "hidden_os": cluster.hidden_os,
5572
      "blacklisted_os": cluster.blacklisted_os,
5573
      }
5574

    
5575
    return result
5576

    
5577

    
5578
class LUClusterConfigQuery(NoHooksLU):
5579
  """Return configuration values.
5580

5581
  """
5582
  REQ_BGL = False
5583
  _FIELDS_DYNAMIC = utils.FieldSet()
5584
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5585
                                  "watcher_pause", "volume_group_name")
5586

    
5587
  def CheckArguments(self):
5588
    _CheckOutputFields(static=self._FIELDS_STATIC,
5589
                       dynamic=self._FIELDS_DYNAMIC,
5590
                       selected=self.op.output_fields)
5591

    
5592
  def ExpandNames(self):
5593
    self.needed_locks = {}
5594

    
5595
  def Exec(self, feedback_fn):
5596
    """Dump a representation of the cluster config to the standard output.
5597

5598
    """
5599
    values = []
5600
    for field in self.op.output_fields:
5601
      if field == "cluster_name":
5602
        entry = self.cfg.GetClusterName()
5603
      elif field == "master_node":
5604
        entry = self.cfg.GetMasterNode()
5605
      elif field == "drain_flag":
5606
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5607
      elif field == "watcher_pause":
5608
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5609
      elif field == "volume_group_name":
5610
        entry = self.cfg.GetVGName()
5611
      else:
5612
        raise errors.ParameterError(field)
5613
      values.append(entry)
5614
    return values
5615

    
5616

    
5617
class LUInstanceActivateDisks(NoHooksLU):
5618
  """Bring up an instance's disks.
5619

5620
  """
5621
  REQ_BGL = False
5622

    
5623
  def ExpandNames(self):
5624
    self._ExpandAndLockInstance()
5625
    self.needed_locks[locking.LEVEL_NODE] = []
5626
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5627

    
5628
  def DeclareLocks(self, level):
5629
    if level == locking.LEVEL_NODE:
5630
      self._LockInstancesNodes()
5631

    
5632
  def CheckPrereq(self):
5633
    """Check prerequisites.
5634

5635
    This checks that the instance is in the cluster.
5636

5637
    """
5638
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5639
    assert self.instance is not None, \
5640
      "Cannot retrieve locked instance %s" % self.op.instance_name
5641
    _CheckNodeOnline(self, self.instance.primary_node)
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Activate the disks.
5645

5646
    """
5647
    disks_ok, disks_info = \
5648
              _AssembleInstanceDisks(self, self.instance,
5649
                                     ignore_size=self.op.ignore_size)
5650
    if not disks_ok:
5651
      raise errors.OpExecError("Cannot activate block devices")
5652

    
5653
    return disks_info
5654

    
5655

    
5656
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5657
                           ignore_size=False):
5658
  """Prepare the block devices for an instance.
5659

5660
  This sets up the block devices on all nodes.
5661

5662
  @type lu: L{LogicalUnit}
5663
  @param lu: the logical unit on whose behalf we execute
5664
  @type instance: L{objects.Instance}
5665
  @param instance: the instance for whose disks we assemble
5666
  @type disks: list of L{objects.Disk} or None
5667
  @param disks: which disks to assemble (or all, if None)
5668
  @type ignore_secondaries: boolean
5669
  @param ignore_secondaries: if true, errors on secondary nodes
5670
      won't result in an error return from the function
5671
  @type ignore_size: boolean
5672
  @param ignore_size: if true, the current known size of the disk
5673
      will not be used during the disk activation, useful for cases
5674
      when the size is wrong
5675
  @return: False if the operation failed, otherwise a list of
5676
      (host, instance_visible_name, node_visible_name)
5677
      with the mapping from node devices to instance devices
5678

5679
  """
5680
  device_info = []
5681
  disks_ok = True
5682
  iname = instance.name
5683
  disks = _ExpandCheckDisks(instance, disks)
5684

    
5685
  # With the two passes mechanism we try to reduce the window of
5686
  # opportunity for the race condition of switching DRBD to primary
5687
  # before handshaking occured, but we do not eliminate it
5688

    
5689
  # The proper fix would be to wait (with some limits) until the
5690
  # connection has been made and drbd transitions from WFConnection
5691
  # into any other network-connected state (Connected, SyncTarget,
5692
  # SyncSource, etc.)
5693

    
5694
  # 1st pass, assemble on all nodes in secondary mode
5695
  for idx, inst_disk in enumerate(disks):
5696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697
      if ignore_size:
5698
        node_disk = node_disk.Copy()
5699
        node_disk.UnsetSize()
5700
      lu.cfg.SetDiskID(node_disk, node)
5701
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5705
                           " (is_primary=False, pass=1): %s",
5706
                           inst_disk.iv_name, node, msg)
5707
        if not ignore_secondaries:
5708
          disks_ok = False
5709

    
5710
  # FIXME: race condition on drbd migration to primary
5711

    
5712
  # 2nd pass, do only the primary node
5713
  for idx, inst_disk in enumerate(disks):
5714
    dev_path = None
5715

    
5716
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5717
      if node != instance.primary_node:
5718
        continue
5719
      if ignore_size:
5720
        node_disk = node_disk.Copy()
5721
        node_disk.UnsetSize()
5722
      lu.cfg.SetDiskID(node_disk, node)
5723
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5724
      msg = result.fail_msg
5725
      if msg:
5726
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5727
                           " (is_primary=True, pass=2): %s",
5728
                           inst_disk.iv_name, node, msg)
5729
        disks_ok = False
5730
      else:
5731
        dev_path = result.payload
5732

    
5733
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5734

    
5735
  # leave the disks configured for the primary node
5736
  # this is a workaround that would be fixed better by
5737
  # improving the logical/physical id handling
5738
  for disk in disks:
5739
    lu.cfg.SetDiskID(disk, instance.primary_node)
5740

    
5741
  return disks_ok, device_info
5742

    
5743

    
5744
def _StartInstanceDisks(lu, instance, force):
5745
  """Start the disks of an instance.
5746

5747
  """
5748
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5749
                                           ignore_secondaries=force)
5750
  if not disks_ok:
5751
    _ShutdownInstanceDisks(lu, instance)
5752
    if force is not None and not force:
5753
      lu.proc.LogWarning("", hint="If the message above refers to a"
5754
                         " secondary node,"
5755
                         " you can retry the operation using '--force'.")
5756
    raise errors.OpExecError("Disk consistency error")
5757

    
5758

    
5759
class LUInstanceDeactivateDisks(NoHooksLU):
5760
  """Shutdown an instance's disks.
5761

5762
  """
5763
  REQ_BGL = False
5764

    
5765
  def ExpandNames(self):
5766
    self._ExpandAndLockInstance()
5767
    self.needed_locks[locking.LEVEL_NODE] = []
5768
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5769

    
5770
  def DeclareLocks(self, level):
5771
    if level == locking.LEVEL_NODE:
5772
      self._LockInstancesNodes()
5773

    
5774
  def CheckPrereq(self):
5775
    """Check prerequisites.
5776

5777
    This checks that the instance is in the cluster.
5778

5779
    """
5780
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781
    assert self.instance is not None, \
5782
      "Cannot retrieve locked instance %s" % self.op.instance_name
5783

    
5784
  def Exec(self, feedback_fn):
5785
    """Deactivate the disks
5786

5787
    """
5788
    instance = self.instance
5789
    if self.op.force:
5790
      _ShutdownInstanceDisks(self, instance)
5791
    else:
5792
      _SafeShutdownInstanceDisks(self, instance)
5793

    
5794

    
5795
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5796
  """Shutdown block devices of an instance.
5797

5798
  This function checks if an instance is running, before calling
5799
  _ShutdownInstanceDisks.
5800

5801
  """
5802
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5803
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5804

    
5805

    
5806
def _ExpandCheckDisks(instance, disks):
5807
  """Return the instance disks selected by the disks list
5808

5809
  @type disks: list of L{objects.Disk} or None
5810
  @param disks: selected disks
5811
  @rtype: list of L{objects.Disk}
5812
  @return: selected instance disks to act on
5813

5814
  """
5815
  if disks is None:
5816
    return instance.disks
5817
  else:
5818
    if not set(disks).issubset(instance.disks):
5819
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5820
                                   " target instance")
5821
    return disks
5822

    
5823

    
5824
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5825
  """Shutdown block devices of an instance.
5826

5827
  This does the shutdown on all nodes of the instance.
5828

5829
  If the ignore_primary is false, errors on the primary node are
5830
  ignored.
5831

5832
  """
5833
  all_result = True
5834
  disks = _ExpandCheckDisks(instance, disks)
5835

    
5836
  for disk in disks:
5837
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5838
      lu.cfg.SetDiskID(top_disk, node)
5839
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5840
      msg = result.fail_msg
5841
      if msg:
5842
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5843
                      disk.iv_name, node, msg)
5844
        if ((node == instance.primary_node and not ignore_primary) or
5845
            (node != instance.primary_node and not result.offline)):
5846
          all_result = False
5847
  return all_result
5848

    
5849

    
5850
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5851
  """Checks if a node has enough free memory.
5852

5853
  This function check if a given node has the needed amount of free
5854
  memory. In case the node has less memory or we cannot get the
5855
  information from the node, this function raise an OpPrereqError
5856
  exception.
5857

5858
  @type lu: C{LogicalUnit}
5859
  @param lu: a logical unit from which we get configuration data
5860
  @type node: C{str}
5861
  @param node: the node to check
5862
  @type reason: C{str}
5863
  @param reason: string to use in the error message
5864
  @type requested: C{int}
5865
  @param requested: the amount of memory in MiB to check for
5866
  @type hypervisor_name: C{str}
5867
  @param hypervisor_name: the hypervisor to ask for memory stats
5868
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5869
      we cannot check the node
5870

5871
  """
5872
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5873
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5874
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5875
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5876
  if not isinstance(free_mem, int):
5877
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5878
                               " was '%s'" % (node, free_mem),
5879
                               errors.ECODE_ENVIRON)
5880
  if requested > free_mem:
5881
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5882
                               " needed %s MiB, available %s MiB" %
5883
                               (node, reason, requested, free_mem),
5884
                               errors.ECODE_NORES)
5885

    
5886

    
5887
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5888
  """Checks if nodes have enough free disk space in the all VGs.
5889

5890
  This function check if all given nodes have the needed amount of
5891
  free disk. In case any node has less disk or we cannot get the
5892
  information from the node, this function raise an OpPrereqError
5893
  exception.
5894

5895
  @type lu: C{LogicalUnit}
5896
  @param lu: a logical unit from which we get configuration data
5897
  @type nodenames: C{list}
5898
  @param nodenames: the list of node names to check
5899
  @type req_sizes: C{dict}
5900
  @param req_sizes: the hash of vg and corresponding amount of disk in
5901
      MiB to check for
5902
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5903
      or we cannot check the node
5904

5905
  """
5906
  for vg, req_size in req_sizes.items():
5907
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5908

    
5909

    
5910
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5911
  """Checks if nodes have enough free disk space in the specified VG.
5912

5913
  This function check if all given nodes have the needed amount of
5914
  free disk. In case any node has less disk or we cannot get the
5915
  information from the node, this function raise an OpPrereqError
5916
  exception.
5917

5918
  @type lu: C{LogicalUnit}
5919
  @param lu: a logical unit from which we get configuration data
5920
  @type nodenames: C{list}
5921
  @param nodenames: the list of node names to check
5922
  @type vg: C{str}
5923
  @param vg: the volume group to check
5924
  @type requested: C{int}
5925
  @param requested: the amount of disk in MiB to check for
5926
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5927
      or we cannot check the node
5928

5929
  """
5930
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5931
  for node in nodenames:
5932
    info = nodeinfo[node]
5933
    info.Raise("Cannot get current information from node %s" % node,
5934
               prereq=True, ecode=errors.ECODE_ENVIRON)
5935
    vg_free = info.payload.get("vg_free", None)
5936
    if not isinstance(vg_free, int):
5937
      raise errors.OpPrereqError("Can't compute free disk space on node"
5938
                                 " %s for vg %s, result was '%s'" %
5939
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5940
    if requested > vg_free:
5941
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5942
                                 " vg %s: required %d MiB, available %d MiB" %
5943
                                 (node, vg, requested, vg_free),
5944
                                 errors.ECODE_NORES)
5945

    
5946

    
5947
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5948
  """Checks if nodes have enough physical CPUs
5949

5950
  This function checks if all given nodes have the needed number of
5951
  physical CPUs. In case any node has less CPUs or we cannot get the
5952
  information from the node, this function raises an OpPrereqError
5953
  exception.
5954

5955
  @type lu: C{LogicalUnit}
5956
  @param lu: a logical unit from which we get configuration data
5957
  @type nodenames: C{list}
5958
  @param nodenames: the list of node names to check
5959
  @type requested: C{int}
5960
  @param requested: the minimum acceptable number of physical CPUs
5961
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5962
      or we cannot check the node
5963

5964
  """
5965
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5966
  for node in nodenames:
5967
    info = nodeinfo[node]
5968
    info.Raise("Cannot get current information from node %s" % node,
5969
               prereq=True, ecode=errors.ECODE_ENVIRON)
5970
    num_cpus = info.payload.get("cpu_total", None)
5971
    if not isinstance(num_cpus, int):
5972
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5973
                                 " on node %s, result was '%s'" %
5974
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5975
    if requested > num_cpus:
5976
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5977
                                 "required" % (node, num_cpus, requested),
5978
                                 errors.ECODE_NORES)
5979

    
5980

    
5981
class LUInstanceStartup(LogicalUnit):
5982
  """Starts an instance.
5983

5984
  """
5985
  HPATH = "instance-start"
5986
  HTYPE = constants.HTYPE_INSTANCE
5987
  REQ_BGL = False
5988

    
5989
  def CheckArguments(self):
5990
    # extra beparams
5991
    if self.op.beparams:
5992
      # fill the beparams dict
5993
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5994

    
5995
  def ExpandNames(self):
5996
    self._ExpandAndLockInstance()
5997

    
5998
  def BuildHooksEnv(self):
5999
    """Build hooks env.
6000

6001
    This runs on master, primary and secondary nodes of the instance.
6002

6003
    """
6004
    env = {
6005
      "FORCE": self.op.force,
6006
      }
6007

    
6008
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6009

    
6010
    return env
6011

    
6012
  def BuildHooksNodes(self):
6013
    """Build hooks nodes.
6014

6015
    """
6016
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6017
    return (nl, nl)
6018

    
6019
  def CheckPrereq(self):
6020
    """Check prerequisites.
6021

6022
    This checks that the instance is in the cluster.
6023

6024
    """
6025
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6026
    assert self.instance is not None, \
6027
      "Cannot retrieve locked instance %s" % self.op.instance_name
6028

    
6029
    # extra hvparams
6030
    if self.op.hvparams:
6031
      # check hypervisor parameter syntax (locally)
6032
      cluster = self.cfg.GetClusterInfo()
6033
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6034
      filled_hvp = cluster.FillHV(instance)
6035
      filled_hvp.update(self.op.hvparams)
6036
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6037
      hv_type.CheckParameterSyntax(filled_hvp)
6038
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6039

    
6040
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6041

    
6042
    if self.primary_offline and self.op.ignore_offline_nodes:
6043
      self.proc.LogWarning("Ignoring offline primary node")
6044

    
6045
      if self.op.hvparams or self.op.beparams:
6046
        self.proc.LogWarning("Overridden parameters are ignored")
6047
    else:
6048
      _CheckNodeOnline(self, instance.primary_node)
6049

    
6050
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6051

    
6052
      # check bridges existence
6053
      _CheckInstanceBridgesExist(self, instance)
6054

    
6055
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6056
                                                instance.name,
6057
                                                instance.hypervisor)
6058
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6059
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6060
      if not remote_info.payload: # not running already
6061
        _CheckNodeFreeMemory(self, instance.primary_node,
6062
                             "starting instance %s" % instance.name,
6063
                             bep[constants.BE_MEMORY], instance.hypervisor)
6064

    
6065
  def Exec(self, feedback_fn):
6066
    """Start the instance.
6067

6068
    """
6069
    instance = self.instance
6070
    force = self.op.force
6071

    
6072
    if not self.op.no_remember:
6073
      self.cfg.MarkInstanceUp(instance.name)
6074

    
6075
    if self.primary_offline:
6076
      assert self.op.ignore_offline_nodes
6077
      self.proc.LogInfo("Primary node offline, marked instance as started")
6078
    else:
6079
      node_current = instance.primary_node
6080

    
6081
      _StartInstanceDisks(self, instance, force)
6082

    
6083
      result = self.rpc.call_instance_start(node_current, instance,
6084
                                            self.op.hvparams, self.op.beparams,
6085
                                            self.op.startup_paused)
6086
      msg = result.fail_msg
6087
      if msg:
6088
        _ShutdownInstanceDisks(self, instance)
6089
        raise errors.OpExecError("Could not start instance: %s" % msg)
6090

    
6091

    
6092
class LUInstanceReboot(LogicalUnit):
6093
  """Reboot an instance.
6094

6095
  """
6096
  HPATH = "instance-reboot"
6097
  HTYPE = constants.HTYPE_INSTANCE
6098
  REQ_BGL = False
6099

    
6100
  def ExpandNames(self):
6101
    self._ExpandAndLockInstance()
6102

    
6103
  def BuildHooksEnv(self):
6104
    """Build hooks env.
6105

6106
    This runs on master, primary and secondary nodes of the instance.
6107

6108
    """
6109
    env = {
6110
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6111
      "REBOOT_TYPE": self.op.reboot_type,
6112
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6113
      }
6114

    
6115
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6116

    
6117
    return env
6118

    
6119
  def BuildHooksNodes(self):
6120
    """Build hooks nodes.
6121

6122
    """
6123
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6124
    return (nl, nl)
6125

    
6126
  def CheckPrereq(self):
6127
    """Check prerequisites.
6128

6129
    This checks that the instance is in the cluster.
6130

6131
    """
6132
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6133
    assert self.instance is not None, \
6134
      "Cannot retrieve locked instance %s" % self.op.instance_name
6135

    
6136
    _CheckNodeOnline(self, instance.primary_node)
6137

    
6138
    # check bridges existence
6139
    _CheckInstanceBridgesExist(self, instance)
6140

    
6141
  def Exec(self, feedback_fn):
6142
    """Reboot the instance.
6143

6144
    """
6145
    instance = self.instance
6146
    ignore_secondaries = self.op.ignore_secondaries
6147
    reboot_type = self.op.reboot_type
6148

    
6149
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6150
                                              instance.name,
6151
                                              instance.hypervisor)
6152
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6153
    instance_running = bool(remote_info.payload)
6154

    
6155
    node_current = instance.primary_node
6156

    
6157
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6158
                                            constants.INSTANCE_REBOOT_HARD]:
6159
      for disk in instance.disks:
6160
        self.cfg.SetDiskID(disk, node_current)
6161
      result = self.rpc.call_instance_reboot(node_current, instance,
6162
                                             reboot_type,
6163
                                             self.op.shutdown_timeout)
6164
      result.Raise("Could not reboot instance")
6165
    else:
6166
      if instance_running:
6167
        result = self.rpc.call_instance_shutdown(node_current, instance,
6168
                                                 self.op.shutdown_timeout)
6169
        result.Raise("Could not shutdown instance for full reboot")
6170
        _ShutdownInstanceDisks(self, instance)
6171
      else:
6172
        self.LogInfo("Instance %s was already stopped, starting now",
6173
                     instance.name)
6174
      _StartInstanceDisks(self, instance, ignore_secondaries)
6175
      result = self.rpc.call_instance_start(node_current, instance,
6176
                                            None, None, False)
6177
      msg = result.fail_msg
6178
      if msg:
6179
        _ShutdownInstanceDisks(self, instance)
6180
        raise errors.OpExecError("Could not start instance for"
6181
                                 " full reboot: %s" % msg)
6182

    
6183
    self.cfg.MarkInstanceUp(instance.name)
6184

    
6185

    
6186
class LUInstanceShutdown(LogicalUnit):
6187
  """Shutdown an instance.
6188

6189
  """
6190
  HPATH = "instance-stop"
6191
  HTYPE = constants.HTYPE_INSTANCE
6192
  REQ_BGL = False
6193

    
6194
  def ExpandNames(self):
6195
    self._ExpandAndLockInstance()
6196

    
6197
  def BuildHooksEnv(self):
6198
    """Build hooks env.
6199

6200
    This runs on master, primary and secondary nodes of the instance.
6201

6202
    """
6203
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6204
    env["TIMEOUT"] = self.op.timeout
6205
    return env
6206

    
6207
  def BuildHooksNodes(self):
6208
    """Build hooks nodes.
6209

6210
    """
6211
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6212
    return (nl, nl)
6213

    
6214
  def CheckPrereq(self):
6215
    """Check prerequisites.
6216

6217
    This checks that the instance is in the cluster.
6218

6219
    """
6220
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6221
    assert self.instance is not None, \
6222
      "Cannot retrieve locked instance %s" % self.op.instance_name
6223

    
6224
    self.primary_offline = \
6225
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6226

    
6227
    if self.primary_offline and self.op.ignore_offline_nodes:
6228
      self.proc.LogWarning("Ignoring offline primary node")
6229
    else:
6230
      _CheckNodeOnline(self, self.instance.primary_node)
6231

    
6232
  def Exec(self, feedback_fn):
6233
    """Shutdown the instance.
6234

6235
    """
6236
    instance = self.instance
6237
    node_current = instance.primary_node
6238
    timeout = self.op.timeout
6239

    
6240
    if not self.op.no_remember:
6241
      self.cfg.MarkInstanceDown(instance.name)
6242

    
6243
    if self.primary_offline:
6244
      assert self.op.ignore_offline_nodes
6245
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6246
    else:
6247
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6248
      msg = result.fail_msg
6249
      if msg:
6250
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6251

    
6252
      _ShutdownInstanceDisks(self, instance)
6253

    
6254

    
6255
class LUInstanceReinstall(LogicalUnit):
6256
  """Reinstall an instance.
6257

6258
  """
6259
  HPATH = "instance-reinstall"
6260
  HTYPE = constants.HTYPE_INSTANCE
6261
  REQ_BGL = False
6262

    
6263
  def ExpandNames(self):
6264
    self._ExpandAndLockInstance()
6265

    
6266
  def BuildHooksEnv(self):
6267
    """Build hooks env.
6268

6269
    This runs on master, primary and secondary nodes of the instance.
6270

6271
    """
6272
    return _BuildInstanceHookEnvByObject(self, self.instance)
6273

    
6274
  def BuildHooksNodes(self):
6275
    """Build hooks nodes.
6276

6277
    """
6278
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6279
    return (nl, nl)
6280

    
6281
  def CheckPrereq(self):
6282
    """Check prerequisites.
6283

6284
    This checks that the instance is in the cluster and is not running.
6285

6286
    """
6287
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6288
    assert instance is not None, \
6289
      "Cannot retrieve locked instance %s" % self.op.instance_name
6290
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6291
                     " offline, cannot reinstall")
6292
    for node in instance.secondary_nodes:
6293
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6294
                       " cannot reinstall")
6295

    
6296
    if instance.disk_template == constants.DT_DISKLESS:
6297
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6298
                                 self.op.instance_name,
6299
                                 errors.ECODE_INVAL)
6300
    _CheckInstanceDown(self, instance, "cannot reinstall")
6301

    
6302
    if self.op.os_type is not None:
6303
      # OS verification
6304
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6305
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6306
      instance_os = self.op.os_type
6307
    else:
6308
      instance_os = instance.os
6309

    
6310
    nodelist = list(instance.all_nodes)
6311

    
6312
    if self.op.osparams:
6313
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6314
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6315
      self.os_inst = i_osdict # the new dict (without defaults)
6316
    else:
6317
      self.os_inst = None
6318

    
6319
    self.instance = instance
6320

    
6321
  def Exec(self, feedback_fn):
6322
    """Reinstall the instance.
6323

6324
    """
6325
    inst = self.instance
6326

    
6327
    if self.op.os_type is not None:
6328
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6329
      inst.os = self.op.os_type
6330
      # Write to configuration
6331
      self.cfg.Update(inst, feedback_fn)
6332

    
6333
    _StartInstanceDisks(self, inst, None)
6334
    try:
6335
      feedback_fn("Running the instance OS create scripts...")
6336
      # FIXME: pass debug option from opcode to backend
6337
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6338
                                             self.op.debug_level,
6339
                                             osparams=self.os_inst)
6340
      result.Raise("Could not install OS for instance %s on node %s" %
6341
                   (inst.name, inst.primary_node))
6342
    finally:
6343
      _ShutdownInstanceDisks(self, inst)
6344

    
6345

    
6346
class LUInstanceRecreateDisks(LogicalUnit):
6347
  """Recreate an instance's missing disks.
6348

6349
  """
6350
  HPATH = "instance-recreate-disks"
6351
  HTYPE = constants.HTYPE_INSTANCE
6352
  REQ_BGL = False
6353

    
6354
  def CheckArguments(self):
6355
    # normalise the disk list
6356
    self.op.disks = sorted(frozenset(self.op.disks))
6357

    
6358
  def ExpandNames(self):
6359
    self._ExpandAndLockInstance()
6360
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6361
    if self.op.nodes:
6362
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6363
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6364
    else:
6365
      self.needed_locks[locking.LEVEL_NODE] = []
6366

    
6367
  def DeclareLocks(self, level):
6368
    if level == locking.LEVEL_NODE:
6369
      # if we replace the nodes, we only need to lock the old primary,
6370
      # otherwise we need to lock all nodes for disk re-creation
6371
      primary_only = bool(self.op.nodes)
6372
      self._LockInstancesNodes(primary_only=primary_only)
6373

    
6374
  def BuildHooksEnv(self):
6375
    """Build hooks env.
6376

6377
    This runs on master, primary and secondary nodes of the instance.
6378

6379
    """
6380
    return _BuildInstanceHookEnvByObject(self, self.instance)
6381

    
6382
  def BuildHooksNodes(self):
6383
    """Build hooks nodes.
6384

6385
    """
6386
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6387
    return (nl, nl)
6388

    
6389
  def CheckPrereq(self):
6390
    """Check prerequisites.
6391

6392
    This checks that the instance is in the cluster and is not running.
6393

6394
    """
6395
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6396
    assert instance is not None, \
6397
      "Cannot retrieve locked instance %s" % self.op.instance_name
6398
    if self.op.nodes:
6399
      if len(self.op.nodes) != len(instance.all_nodes):
6400
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6401
                                   " %d replacement nodes were specified" %
6402
                                   (instance.name, len(instance.all_nodes),
6403
                                    len(self.op.nodes)),
6404
                                   errors.ECODE_INVAL)
6405
      assert instance.disk_template != constants.DT_DRBD8 or \
6406
          len(self.op.nodes) == 2
6407
      assert instance.disk_template != constants.DT_PLAIN or \
6408
          len(self.op.nodes) == 1
6409
      primary_node = self.op.nodes[0]
6410
    else:
6411
      primary_node = instance.primary_node
6412
    _CheckNodeOnline(self, primary_node)
6413

    
6414
    if instance.disk_template == constants.DT_DISKLESS:
6415
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6416
                                 self.op.instance_name, errors.ECODE_INVAL)
6417
    # if we replace nodes *and* the old primary is offline, we don't
6418
    # check
6419
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6420
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6421
    if not (self.op.nodes and old_pnode.offline):
6422
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6423

    
6424
    if not self.op.disks:
6425
      self.op.disks = range(len(instance.disks))
6426
    else:
6427
      for idx in self.op.disks:
6428
        if idx >= len(instance.disks):
6429
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6430
                                     errors.ECODE_INVAL)
6431
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6432
      raise errors.OpPrereqError("Can't recreate disks partially and"
6433
                                 " change the nodes at the same time",
6434
                                 errors.ECODE_INVAL)
6435
    self.instance = instance
6436

    
6437
  def Exec(self, feedback_fn):
6438
    """Recreate the disks.
6439

6440
    """
6441
    instance = self.instance
6442

    
6443
    to_skip = []
6444
    mods = [] # keeps track of needed logical_id changes
6445

    
6446
    for idx, disk in enumerate(instance.disks):
6447
      if idx not in self.op.disks: # disk idx has not been passed in
6448
        to_skip.append(idx)
6449
        continue
6450
      # update secondaries for disks, if needed
6451
      if self.op.nodes:
6452
        if disk.dev_type == constants.LD_DRBD8:
6453
          # need to update the nodes and minors
6454
          assert len(self.op.nodes) == 2
6455
          assert len(disk.logical_id) == 6 # otherwise disk internals
6456
                                           # have changed
6457
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6458
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6459
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6460
                    new_minors[0], new_minors[1], old_secret)
6461
          assert len(disk.logical_id) == len(new_id)
6462
          mods.append((idx, new_id))
6463

    
6464
    # now that we have passed all asserts above, we can apply the mods
6465
    # in a single run (to avoid partial changes)
6466
    for idx, new_id in mods:
6467
      instance.disks[idx].logical_id = new_id
6468

    
6469
    # change primary node, if needed
6470
    if self.op.nodes:
6471
      instance.primary_node = self.op.nodes[0]
6472
      self.LogWarning("Changing the instance's nodes, you will have to"
6473
                      " remove any disks left on the older nodes manually")
6474

    
6475
    if self.op.nodes:
6476
      self.cfg.Update(instance, feedback_fn)
6477

    
6478
    _CreateDisks(self, instance, to_skip=to_skip)
6479

    
6480

    
6481
class LUInstanceRename(LogicalUnit):
6482
  """Rename an instance.
6483

6484
  """
6485
  HPATH = "instance-rename"
6486
  HTYPE = constants.HTYPE_INSTANCE
6487

    
6488
  def CheckArguments(self):
6489
    """Check arguments.
6490

6491
    """
6492
    if self.op.ip_check and not self.op.name_check:
6493
      # TODO: make the ip check more flexible and not depend on the name check
6494
      raise errors.OpPrereqError("IP address check requires a name check",
6495
                                 errors.ECODE_INVAL)
6496

    
6497
  def BuildHooksEnv(self):
6498
    """Build hooks env.
6499

6500
    This runs on master, primary and secondary nodes of the instance.
6501

6502
    """
6503
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6504
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6505
    return env
6506

    
6507
  def BuildHooksNodes(self):
6508
    """Build hooks nodes.
6509

6510
    """
6511
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6512
    return (nl, nl)
6513

    
6514
  def CheckPrereq(self):
6515
    """Check prerequisites.
6516

6517
    This checks that the instance is in the cluster and is not running.
6518

6519
    """
6520
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6521
                                                self.op.instance_name)
6522
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6523
    assert instance is not None
6524
    _CheckNodeOnline(self, instance.primary_node)
6525
    _CheckInstanceDown(self, instance, "cannot rename")
6526
    self.instance = instance
6527

    
6528
    new_name = self.op.new_name
6529
    if self.op.name_check:
6530
      hostname = netutils.GetHostname(name=new_name)
6531
      if hostname != new_name:
6532
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6533
                     hostname.name)
6534
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6535
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6536
                                    " same as given hostname '%s'") %
6537
                                    (hostname.name, self.op.new_name),
6538
                                    errors.ECODE_INVAL)
6539
      new_name = self.op.new_name = hostname.name
6540
      if (self.op.ip_check and
6541
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6542
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6543
                                   (hostname.ip, new_name),
6544
                                   errors.ECODE_NOTUNIQUE)
6545

    
6546
    instance_list = self.cfg.GetInstanceList()
6547
    if new_name in instance_list and new_name != instance.name:
6548
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6549
                                 new_name, errors.ECODE_EXISTS)
6550

    
6551
  def Exec(self, feedback_fn):
6552
    """Rename the instance.
6553

6554
    """
6555
    inst = self.instance
6556
    old_name = inst.name
6557

    
6558
    rename_file_storage = False
6559
    if (inst.disk_template in constants.DTS_FILEBASED and
6560
        self.op.new_name != inst.name):
6561
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6562
      rename_file_storage = True
6563

    
6564
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6565
    # Change the instance lock. This is definitely safe while we hold the BGL.
6566
    # Otherwise the new lock would have to be added in acquired mode.
6567
    assert self.REQ_BGL
6568
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6569
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6570

    
6571
    # re-read the instance from the configuration after rename
6572
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6573

    
6574
    if rename_file_storage:
6575
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6576
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6577
                                                     old_file_storage_dir,
6578
                                                     new_file_storage_dir)
6579
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6580
                   " (but the instance has been renamed in Ganeti)" %
6581
                   (inst.primary_node, old_file_storage_dir,
6582
                    new_file_storage_dir))
6583

    
6584
    _StartInstanceDisks(self, inst, None)
6585
    try:
6586
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6587
                                                 old_name, self.op.debug_level)
6588
      msg = result.fail_msg
6589
      if msg:
6590
        msg = ("Could not run OS rename script for instance %s on node %s"
6591
               " (but the instance has been renamed in Ganeti): %s" %
6592
               (inst.name, inst.primary_node, msg))
6593
        self.proc.LogWarning(msg)
6594
    finally:
6595
      _ShutdownInstanceDisks(self, inst)
6596

    
6597
    return inst.name
6598

    
6599

    
6600
class LUInstanceRemove(LogicalUnit):
6601
  """Remove an instance.
6602

6603
  """
6604
  HPATH = "instance-remove"
6605
  HTYPE = constants.HTYPE_INSTANCE
6606
  REQ_BGL = False
6607

    
6608
  def ExpandNames(self):
6609
    self._ExpandAndLockInstance()
6610
    self.needed_locks[locking.LEVEL_NODE] = []
6611
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6612

    
6613
  def DeclareLocks(self, level):
6614
    if level == locking.LEVEL_NODE:
6615
      self._LockInstancesNodes()
6616

    
6617
  def BuildHooksEnv(self):
6618
    """Build hooks env.
6619

6620
    This runs on master, primary and secondary nodes of the instance.
6621

6622
    """
6623
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6624
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6625
    return env
6626

    
6627
  def BuildHooksNodes(self):
6628
    """Build hooks nodes.
6629

6630
    """
6631
    nl = [self.cfg.GetMasterNode()]
6632
    nl_post = list(self.instance.all_nodes) + nl
6633
    return (nl, nl_post)
6634

    
6635
  def CheckPrereq(self):
6636
    """Check prerequisites.
6637

6638
    This checks that the instance is in the cluster.
6639

6640
    """
6641
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6642
    assert self.instance is not None, \
6643
      "Cannot retrieve locked instance %s" % self.op.instance_name
6644

    
6645
  def Exec(self, feedback_fn):
6646
    """Remove the instance.
6647

6648
    """
6649
    instance = self.instance
6650
    logging.info("Shutting down instance %s on node %s",
6651
                 instance.name, instance.primary_node)
6652

    
6653
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6654
                                             self.op.shutdown_timeout)
6655
    msg = result.fail_msg
6656
    if msg:
6657
      if self.op.ignore_failures:
6658
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6659
      else:
6660
        raise errors.OpExecError("Could not shutdown instance %s on"
6661
                                 " node %s: %s" %
6662
                                 (instance.name, instance.primary_node, msg))
6663

    
6664
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6665

    
6666

    
6667
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6668
  """Utility function to remove an instance.
6669

6670
  """
6671
  logging.info("Removing block devices for instance %s", instance.name)
6672

    
6673
  if not _RemoveDisks(lu, instance):
6674
    if not ignore_failures:
6675
      raise errors.OpExecError("Can't remove instance's disks")
6676
    feedback_fn("Warning: can't remove instance's disks")
6677

    
6678
  logging.info("Removing instance %s out of cluster config", instance.name)
6679

    
6680
  lu.cfg.RemoveInstance(instance.name)
6681

    
6682
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6683
    "Instance lock removal conflict"
6684

    
6685
  # Remove lock for the instance
6686
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6687

    
6688

    
6689
class LUInstanceQuery(NoHooksLU):
6690
  """Logical unit for querying instances.
6691

6692
  """
6693
  # pylint: disable=W0142
6694
  REQ_BGL = False
6695

    
6696
  def CheckArguments(self):
6697
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6698
                             self.op.output_fields, self.op.use_locking)
6699

    
6700
  def ExpandNames(self):
6701
    self.iq.ExpandNames(self)
6702

    
6703
  def DeclareLocks(self, level):
6704
    self.iq.DeclareLocks(self, level)
6705

    
6706
  def Exec(self, feedback_fn):
6707
    return self.iq.OldStyleQuery(self)
6708

    
6709

    
6710
class LUInstanceFailover(LogicalUnit):
6711
  """Failover an instance.
6712

6713
  """
6714
  HPATH = "instance-failover"
6715
  HTYPE = constants.HTYPE_INSTANCE
6716
  REQ_BGL = False
6717

    
6718
  def CheckArguments(self):
6719
    """Check the arguments.
6720

6721
    """
6722
    self.iallocator = getattr(self.op, "iallocator", None)
6723
    self.target_node = getattr(self.op, "target_node", None)
6724

    
6725
  def ExpandNames(self):
6726
    self._ExpandAndLockInstance()
6727

    
6728
    if self.op.target_node is not None:
6729
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6730

    
6731
    self.needed_locks[locking.LEVEL_NODE] = []
6732
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6733

    
6734
    ignore_consistency = self.op.ignore_consistency
6735
    shutdown_timeout = self.op.shutdown_timeout
6736
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6737
                                       cleanup=False,
6738
                                       failover=True,
6739
                                       ignore_consistency=ignore_consistency,
6740
                                       shutdown_timeout=shutdown_timeout)
6741
    self.tasklets = [self._migrater]
6742

    
6743
  def DeclareLocks(self, level):
6744
    if level == locking.LEVEL_NODE:
6745
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6746
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6747
        if self.op.target_node is None:
6748
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6749
        else:
6750
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6751
                                                   self.op.target_node]
6752
        del self.recalculate_locks[locking.LEVEL_NODE]
6753
      else:
6754
        self._LockInstancesNodes()
6755

    
6756
  def BuildHooksEnv(self):
6757
    """Build hooks env.
6758

6759
    This runs on master, primary and secondary nodes of the instance.
6760

6761
    """
6762
    instance = self._migrater.instance
6763
    source_node = instance.primary_node
6764
    target_node = self.op.target_node
6765
    env = {
6766
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6767
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6768
      "OLD_PRIMARY": source_node,
6769
      "NEW_PRIMARY": target_node,
6770
      }
6771

    
6772
    if instance.disk_template in constants.DTS_INT_MIRROR:
6773
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6774
      env["NEW_SECONDARY"] = source_node
6775
    else:
6776
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6777

    
6778
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6779

    
6780
    return env
6781

    
6782
  def BuildHooksNodes(self):
6783
    """Build hooks nodes.
6784

6785
    """
6786
    instance = self._migrater.instance
6787
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6788
    return (nl, nl + [instance.primary_node])
6789

    
6790

    
6791
class LUInstanceMigrate(LogicalUnit):
6792
  """Migrate an instance.
6793

6794
  This is migration without shutting down, compared to the failover,
6795
  which is done with shutdown.
6796

6797
  """
6798
  HPATH = "instance-migrate"
6799
  HTYPE = constants.HTYPE_INSTANCE
6800
  REQ_BGL = False
6801

    
6802
  def ExpandNames(self):
6803
    self._ExpandAndLockInstance()
6804

    
6805
    if self.op.target_node is not None:
6806
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6807

    
6808
    self.needed_locks[locking.LEVEL_NODE] = []
6809
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6810

    
6811
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6812
                                       cleanup=self.op.cleanup,
6813
                                       failover=False,
6814
                                       fallback=self.op.allow_failover)
6815
    self.tasklets = [self._migrater]
6816

    
6817
  def DeclareLocks(self, level):
6818
    if level == locking.LEVEL_NODE:
6819
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6820
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6821
        if self.op.target_node is None:
6822
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6823
        else:
6824
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6825
                                                   self.op.target_node]
6826
        del self.recalculate_locks[locking.LEVEL_NODE]
6827
      else:
6828
        self._LockInstancesNodes()
6829

    
6830
  def BuildHooksEnv(self):
6831
    """Build hooks env.
6832

6833
    This runs on master, primary and secondary nodes of the instance.
6834

6835
    """
6836
    instance = self._migrater.instance
6837
    source_node = instance.primary_node
6838
    target_node = self.op.target_node
6839
    env = _BuildInstanceHookEnvByObject(self, instance)
6840
    env.update({
6841
      "MIGRATE_LIVE": self._migrater.live,
6842
      "MIGRATE_CLEANUP": self.op.cleanup,
6843
      "OLD_PRIMARY": source_node,
6844
      "NEW_PRIMARY": target_node,
6845
      })
6846

    
6847
    if instance.disk_template in constants.DTS_INT_MIRROR:
6848
      env["OLD_SECONDARY"] = target_node
6849
      env["NEW_SECONDARY"] = source_node
6850
    else:
6851
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6852

    
6853
    return env
6854

    
6855
  def BuildHooksNodes(self):
6856
    """Build hooks nodes.
6857

6858
    """
6859
    instance = self._migrater.instance
6860
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6861
    return (nl, nl + [instance.primary_node])
6862

    
6863

    
6864
class LUInstanceMove(LogicalUnit):
6865
  """Move an instance by data-copying.
6866

6867
  """
6868
  HPATH = "instance-move"
6869
  HTYPE = constants.HTYPE_INSTANCE
6870
  REQ_BGL = False
6871

    
6872
  def ExpandNames(self):
6873
    self._ExpandAndLockInstance()
6874
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6875
    self.op.target_node = target_node
6876
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6877
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6878

    
6879
  def DeclareLocks(self, level):
6880
    if level == locking.LEVEL_NODE:
6881
      self._LockInstancesNodes(primary_only=True)
6882

    
6883
  def BuildHooksEnv(self):
6884
    """Build hooks env.
6885

6886
    This runs on master, primary and secondary nodes of the instance.
6887

6888
    """
6889
    env = {
6890
      "TARGET_NODE": self.op.target_node,
6891
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6892
      }
6893
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6894
    return env
6895

    
6896
  def BuildHooksNodes(self):
6897
    """Build hooks nodes.
6898

6899
    """
6900
    nl = [
6901
      self.cfg.GetMasterNode(),
6902
      self.instance.primary_node,
6903
      self.op.target_node,
6904
      ]
6905
    return (nl, nl)
6906

    
6907
  def CheckPrereq(self):
6908
    """Check prerequisites.
6909

6910
    This checks that the instance is in the cluster.
6911

6912
    """
6913
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6914
    assert self.instance is not None, \
6915
      "Cannot retrieve locked instance %s" % self.op.instance_name
6916

    
6917
    node = self.cfg.GetNodeInfo(self.op.target_node)
6918
    assert node is not None, \
6919
      "Cannot retrieve locked node %s" % self.op.target_node
6920

    
6921
    self.target_node = target_node = node.name
6922

    
6923
    if target_node == instance.primary_node:
6924
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6925
                                 (instance.name, target_node),
6926
                                 errors.ECODE_STATE)
6927

    
6928
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6929

    
6930
    for idx, dsk in enumerate(instance.disks):
6931
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6932
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6933
                                   " cannot copy" % idx, errors.ECODE_STATE)
6934

    
6935
    _CheckNodeOnline(self, target_node)
6936
    _CheckNodeNotDrained(self, target_node)
6937
    _CheckNodeVmCapable(self, target_node)
6938

    
6939
    if instance.admin_up:
6940
      # check memory requirements on the secondary node
6941
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6942
                           instance.name, bep[constants.BE_MEMORY],
6943
                           instance.hypervisor)
6944
    else:
6945
      self.LogInfo("Not checking memory on the secondary node as"
6946
                   " instance will not be started")
6947

    
6948
    # check bridge existance
6949
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6950

    
6951
  def Exec(self, feedback_fn):
6952
    """Move an instance.
6953

6954
    The move is done by shutting it down on its present node, copying
6955
    the data over (slow) and starting it on the new node.
6956

6957
    """
6958
    instance = self.instance
6959

    
6960
    source_node = instance.primary_node
6961
    target_node = self.target_node
6962

    
6963
    self.LogInfo("Shutting down instance %s on source node %s",
6964
                 instance.name, source_node)
6965

    
6966
    result = self.rpc.call_instance_shutdown(source_node, instance,
6967
                                             self.op.shutdown_timeout)
6968
    msg = result.fail_msg
6969
    if msg:
6970
      if self.op.ignore_consistency:
6971
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6972
                             " Proceeding anyway. Please make sure node"
6973
                             " %s is down. Error details: %s",
6974
                             instance.name, source_node, source_node, msg)
6975
      else:
6976
        raise errors.OpExecError("Could not shutdown instance %s on"
6977
                                 " node %s: %s" %
6978
                                 (instance.name, source_node, msg))
6979

    
6980
    # create the target disks
6981
    try:
6982
      _CreateDisks(self, instance, target_node=target_node)
6983
    except errors.OpExecError:
6984
      self.LogWarning("Device creation failed, reverting...")
6985
      try:
6986
        _RemoveDisks(self, instance, target_node=target_node)
6987
      finally:
6988
        self.cfg.ReleaseDRBDMinors(instance.name)
6989
        raise
6990

    
6991
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6992

    
6993
    errs = []
6994
    # activate, get path, copy the data over
6995
    for idx, disk in enumerate(instance.disks):
6996
      self.LogInfo("Copying data for disk %d", idx)
6997
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6998
                                               instance.name, True, idx)
6999
      if result.fail_msg:
7000
        self.LogWarning("Can't assemble newly created disk %d: %s",
7001
                        idx, result.fail_msg)
7002
        errs.append(result.fail_msg)
7003
        break
7004
      dev_path = result.payload
7005
      result = self.rpc.call_blockdev_export(source_node, disk,
7006
                                             target_node, dev_path,
7007
                                             cluster_name)
7008
      if result.fail_msg:
7009
        self.LogWarning("Can't copy data over for disk %d: %s",
7010
                        idx, result.fail_msg)
7011
        errs.append(result.fail_msg)
7012
        break
7013

    
7014
    if errs:
7015
      self.LogWarning("Some disks failed to copy, aborting")
7016
      try:
7017
        _RemoveDisks(self, instance, target_node=target_node)
7018
      finally:
7019
        self.cfg.ReleaseDRBDMinors(instance.name)
7020
        raise errors.OpExecError("Errors during disk copy: %s" %
7021
                                 (",".join(errs),))
7022

    
7023
    instance.primary_node = target_node
7024
    self.cfg.Update(instance, feedback_fn)
7025

    
7026
    self.LogInfo("Removing the disks on the original node")
7027
    _RemoveDisks(self, instance, target_node=source_node)
7028

    
7029
    # Only start the instance if it's marked as up
7030
    if instance.admin_up:
7031
      self.LogInfo("Starting instance %s on node %s",
7032
                   instance.name, target_node)
7033

    
7034
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7035
                                           ignore_secondaries=True)
7036
      if not disks_ok:
7037
        _ShutdownInstanceDisks(self, instance)
7038
        raise errors.OpExecError("Can't activate the instance's disks")
7039

    
7040
      result = self.rpc.call_instance_start(target_node, instance,
7041
                                            None, None, False)
7042
      msg = result.fail_msg
7043
      if msg:
7044
        _ShutdownInstanceDisks(self, instance)
7045
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7046
                                 (instance.name, target_node, msg))
7047

    
7048

    
7049
class LUNodeMigrate(LogicalUnit):
7050
  """Migrate all instances from a node.
7051

7052
  """
7053
  HPATH = "node-migrate"
7054
  HTYPE = constants.HTYPE_NODE
7055
  REQ_BGL = False
7056

    
7057
  def CheckArguments(self):
7058
    pass
7059

    
7060
  def ExpandNames(self):
7061
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7062

    
7063
    self.share_locks = _ShareAll()
7064
    self.needed_locks = {
7065
      locking.LEVEL_NODE: [self.op.node_name],
7066
      }
7067

    
7068
  def BuildHooksEnv(self):
7069
    """Build hooks env.
7070

7071
    This runs on the master, the primary and all the secondaries.
7072

7073
    """
7074
    return {
7075
      "NODE_NAME": self.op.node_name,
7076
      }
7077

    
7078
  def BuildHooksNodes(self):
7079
    """Build hooks nodes.
7080

7081
    """
7082
    nl = [self.cfg.GetMasterNode()]
7083
    return (nl, nl)
7084

    
7085
  def CheckPrereq(self):
7086
    pass
7087

    
7088
  def Exec(self, feedback_fn):
7089
    # Prepare jobs for migration instances
7090
    jobs = [
7091
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7092
                                 mode=self.op.mode,
7093
                                 live=self.op.live,
7094
                                 iallocator=self.op.iallocator,
7095
                                 target_node=self.op.target_node)]
7096
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7097
      ]
7098

    
7099
    # TODO: Run iallocator in this opcode and pass correct placement options to
7100
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7101
    # running the iallocator and the actual migration, a good consistency model
7102
    # will have to be found.
7103

    
7104
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7105
            frozenset([self.op.node_name]))
7106

    
7107
    return ResultWithJobs(jobs)
7108

    
7109

    
7110
class TLMigrateInstance(Tasklet):
7111
  """Tasklet class for instance migration.
7112

7113
  @type live: boolean
7114
  @ivar live: whether the migration will be done live or non-live;
7115
      this variable is initalized only after CheckPrereq has run
7116
  @type cleanup: boolean
7117
  @ivar cleanup: Wheater we cleanup from a failed migration
7118
  @type iallocator: string
7119
  @ivar iallocator: The iallocator used to determine target_node
7120
  @type target_node: string
7121
  @ivar target_node: If given, the target_node to reallocate the instance to
7122
  @type failover: boolean
7123
  @ivar failover: Whether operation results in failover or migration
7124
  @type fallback: boolean
7125
  @ivar fallback: Whether fallback to failover is allowed if migration not
7126
                  possible
7127
  @type ignore_consistency: boolean
7128
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7129
                            and target node
7130
  @type shutdown_timeout: int
7131
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7132

7133
  """
7134

    
7135
  # Constants
7136
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7137
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7138

    
7139
  def __init__(self, lu, instance_name, cleanup=False,
7140
               failover=False, fallback=False,
7141
               ignore_consistency=False,
7142
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7143
    """Initializes this class.
7144

7145
    """
7146
    Tasklet.__init__(self, lu)
7147

    
7148
    # Parameters
7149
    self.instance_name = instance_name
7150
    self.cleanup = cleanup
7151
    self.live = False # will be overridden later
7152
    self.failover = failover
7153
    self.fallback = fallback
7154
    self.ignore_consistency = ignore_consistency
7155
    self.shutdown_timeout = shutdown_timeout
7156

    
7157
  def CheckPrereq(self):
7158
    """Check prerequisites.
7159

7160
    This checks that the instance is in the cluster.
7161

7162
    """
7163
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7164
    instance = self.cfg.GetInstanceInfo(instance_name)
7165
    assert instance is not None
7166
    self.instance = instance
7167

    
7168
    if (not self.cleanup and not instance.admin_up and not self.failover and
7169
        self.fallback):
7170
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7171
                      " to failover")
7172
      self.failover = True
7173

    
7174
    if instance.disk_template not in constants.DTS_MIRRORED:
7175
      if self.failover:
7176
        text = "failovers"
7177
      else:
7178
        text = "migrations"
7179
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7180
                                 " %s" % (instance.disk_template, text),
7181
                                 errors.ECODE_STATE)
7182

    
7183
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7184
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7185

    
7186
      if self.lu.op.iallocator:
7187
        self._RunAllocator()
7188
      else:
7189
        # We set set self.target_node as it is required by
7190
        # BuildHooksEnv
7191
        self.target_node = self.lu.op.target_node
7192

    
7193
      # self.target_node is already populated, either directly or by the
7194
      # iallocator run
7195
      target_node = self.target_node
7196
      if self.target_node == instance.primary_node:
7197
        raise errors.OpPrereqError("Cannot migrate instance %s"
7198
                                   " to its primary (%s)" %
7199
                                   (instance.name, instance.primary_node))
7200

    
7201
      if len(self.lu.tasklets) == 1:
7202
        # It is safe to release locks only when we're the only tasklet
7203
        # in the LU
7204
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7205
                      keep=[instance.primary_node, self.target_node])
7206

    
7207
    else:
7208
      secondary_nodes = instance.secondary_nodes
7209
      if not secondary_nodes:
7210
        raise errors.ConfigurationError("No secondary node but using"
7211
                                        " %s disk template" %
7212
                                        instance.disk_template)
7213
      target_node = secondary_nodes[0]
7214
      if self.lu.op.iallocator or (self.lu.op.target_node and
7215
                                   self.lu.op.target_node != target_node):
7216
        if self.failover:
7217
          text = "failed over"
7218
        else:
7219
          text = "migrated"
7220
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7221
                                   " be %s to arbitrary nodes"
7222
                                   " (neither an iallocator nor a target"
7223
                                   " node can be passed)" %
7224
                                   (instance.disk_template, text),
7225
                                   errors.ECODE_INVAL)
7226

    
7227
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7228

    
7229
    # check memory requirements on the secondary node
7230
    if not self.failover or instance.admin_up:
7231
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7232
                           instance.name, i_be[constants.BE_MEMORY],
7233
                           instance.hypervisor)
7234
    else:
7235
      self.lu.LogInfo("Not checking memory on the secondary node as"
7236
                      " instance will not be started")
7237

    
7238
    # check bridge existance
7239
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7240

    
7241
    if not self.cleanup:
7242
      _CheckNodeNotDrained(self.lu, target_node)
7243
      if not self.failover:
7244
        result = self.rpc.call_instance_migratable(instance.primary_node,
7245
                                                   instance)
7246
        if result.fail_msg and self.fallback:
7247
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7248
                          " failover")
7249
          self.failover = True
7250
        else:
7251
          result.Raise("Can't migrate, please use failover",
7252
                       prereq=True, ecode=errors.ECODE_STATE)
7253

    
7254
    assert not (self.failover and self.cleanup)
7255

    
7256
    if not self.failover:
7257
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7258
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7259
                                   " parameters are accepted",
7260
                                   errors.ECODE_INVAL)
7261
      if self.lu.op.live is not None:
7262
        if self.lu.op.live:
7263
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7264
        else:
7265
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7266
        # reset the 'live' parameter to None so that repeated
7267
        # invocations of CheckPrereq do not raise an exception
7268
        self.lu.op.live = None
7269
      elif self.lu.op.mode is None:
7270
        # read the default value from the hypervisor
7271
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7272
                                                skip_globals=False)
7273
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7274

    
7275
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7276
    else:
7277
      # Failover is never live
7278
      self.live = False
7279

    
7280
  def _RunAllocator(self):
7281
    """Run the allocator based on input opcode.
7282

7283
    """
7284
    ial = IAllocator(self.cfg, self.rpc,
7285
                     mode=constants.IALLOCATOR_MODE_RELOC,
7286
                     name=self.instance_name,
7287
                     # TODO See why hail breaks with a single node below
7288
                     relocate_from=[self.instance.primary_node,
7289
                                    self.instance.primary_node],
7290
                     )
7291

    
7292
    ial.Run(self.lu.op.iallocator)
7293

    
7294
    if not ial.success:
7295
      raise errors.OpPrereqError("Can't compute nodes using"
7296
                                 " iallocator '%s': %s" %
7297
                                 (self.lu.op.iallocator, ial.info),
7298
                                 errors.ECODE_NORES)
7299
    if len(ial.result) != ial.required_nodes:
7300
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7301
                                 " of nodes (%s), required %s" %
7302
                                 (self.lu.op.iallocator, len(ial.result),
7303
                                  ial.required_nodes), errors.ECODE_FAULT)
7304
    self.target_node = ial.result[0]
7305
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7306
                 self.instance_name, self.lu.op.iallocator,
7307
                 utils.CommaJoin(ial.result))
7308

    
7309
  def _WaitUntilSync(self):
7310
    """Poll with custom rpc for disk sync.
7311

7312
    This uses our own step-based rpc call.
7313

7314
    """
7315
    self.feedback_fn("* wait until resync is done")
7316
    all_done = False
7317
    while not all_done:
7318
      all_done = True
7319
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7320
                                            self.nodes_ip,
7321
                                            self.instance.disks)
7322
      min_percent = 100
7323
      for node, nres in result.items():
7324
        nres.Raise("Cannot resync disks on node %s" % node)
7325
        node_done, node_percent = nres.payload
7326
        all_done = all_done and node_done
7327
        if node_percent is not None:
7328
          min_percent = min(min_percent, node_percent)
7329
      if not all_done:
7330
        if min_percent < 100:
7331
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7332
        time.sleep(2)
7333

    
7334
  def _EnsureSecondary(self, node):
7335
    """Demote a node to secondary.
7336

7337
    """
7338
    self.feedback_fn("* switching node %s to secondary mode" % node)
7339

    
7340
    for dev in self.instance.disks:
7341
      self.cfg.SetDiskID(dev, node)
7342

    
7343
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7344
                                          self.instance.disks)
7345
    result.Raise("Cannot change disk to secondary on node %s" % node)
7346

    
7347
  def _GoStandalone(self):
7348
    """Disconnect from the network.
7349

7350
    """
7351
    self.feedback_fn("* changing into standalone mode")
7352
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7353
                                               self.instance.disks)
7354
    for node, nres in result.items():
7355
      nres.Raise("Cannot disconnect disks node %s" % node)
7356

    
7357
  def _GoReconnect(self, multimaster):
7358
    """Reconnect to the network.
7359

7360
    """
7361
    if multimaster:
7362
      msg = "dual-master"
7363
    else:
7364
      msg = "single-master"
7365
    self.feedback_fn("* changing disks into %s mode" % msg)
7366
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7367
                                           self.instance.disks,
7368
                                           self.instance.name, multimaster)
7369
    for node, nres in result.items():
7370
      nres.Raise("Cannot change disks config on node %s" % node)
7371

    
7372
  def _ExecCleanup(self):
7373
    """Try to cleanup after a failed migration.
7374

7375
    The cleanup is done by:
7376
      - check that the instance is running only on one node
7377
        (and update the config if needed)
7378
      - change disks on its secondary node to secondary
7379
      - wait until disks are fully synchronized
7380
      - disconnect from the network
7381
      - change disks into single-master mode
7382
      - wait again until disks are fully synchronized
7383

7384
    """
7385
    instance = self.instance
7386
    target_node = self.target_node
7387
    source_node = self.source_node
7388

    
7389
    # check running on only one node
7390
    self.feedback_fn("* checking where the instance actually runs"
7391
                     " (if this hangs, the hypervisor might be in"
7392
                     " a bad state)")
7393
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7394
    for node, result in ins_l.items():
7395
      result.Raise("Can't contact node %s" % node)
7396

    
7397
    runningon_source = instance.name in ins_l[source_node].payload
7398
    runningon_target = instance.name in ins_l[target_node].payload
7399

    
7400
    if runningon_source and runningon_target:
7401
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7402
                               " or the hypervisor is confused; you will have"
7403
                               " to ensure manually that it runs only on one"
7404
                               " and restart this operation")
7405

    
7406
    if not (runningon_source or runningon_target):
7407
      raise errors.OpExecError("Instance does not seem to be running at all;"
7408
                               " in this case it's safer to repair by"
7409
                               " running 'gnt-instance stop' to ensure disk"
7410
                               " shutdown, and then restarting it")
7411

    
7412
    if runningon_target:
7413
      # the migration has actually succeeded, we need to update the config
7414
      self.feedback_fn("* instance running on secondary node (%s),"
7415
                       " updating config" % target_node)
7416
      instance.primary_node = target_node
7417
      self.cfg.Update(instance, self.feedback_fn)
7418
      demoted_node = source_node
7419
    else:
7420
      self.feedback_fn("* instance confirmed to be running on its"
7421
                       " primary node (%s)" % source_node)
7422
      demoted_node = target_node
7423

    
7424
    if instance.disk_template in constants.DTS_INT_MIRROR:
7425
      self._EnsureSecondary(demoted_node)
7426
      try:
7427
        self._WaitUntilSync()
7428
      except errors.OpExecError:
7429
        # we ignore here errors, since if the device is standalone, it
7430
        # won't be able to sync
7431
        pass
7432
      self._GoStandalone()
7433
      self._GoReconnect(False)
7434
      self._WaitUntilSync()
7435

    
7436
    self.feedback_fn("* done")
7437

    
7438
  def _RevertDiskStatus(self):
7439
    """Try to revert the disk status after a failed migration.
7440

7441
    """
7442
    target_node = self.target_node
7443
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7444
      return
7445

    
7446
    try:
7447
      self._EnsureSecondary(target_node)
7448
      self._GoStandalone()
7449
      self._GoReconnect(False)
7450
      self._WaitUntilSync()
7451
    except errors.OpExecError, err:
7452
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7453
                         " please try to recover the instance manually;"
7454
                         " error '%s'" % str(err))
7455

    
7456
  def _AbortMigration(self):
7457
    """Call the hypervisor code to abort a started migration.
7458

7459
    """
7460
    instance = self.instance
7461
    target_node = self.target_node
7462
    source_node = self.source_node
7463
    migration_info = self.migration_info
7464

    
7465
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7466
                                                                 instance,
7467
                                                                 migration_info,
7468
                                                                 False)
7469
    abort_msg = abort_result.fail_msg
7470
    if abort_msg:
7471
      logging.error("Aborting migration failed on target node %s: %s",
7472
                    target_node, abort_msg)
7473
      # Don't raise an exception here, as we stil have to try to revert the
7474
      # disk status, even if this step failed.
7475

    
7476
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7477
        instance, False, self.live)
7478
    abort_msg = abort_result.fail_msg
7479
    if abort_msg:
7480
      logging.error("Aborting migration failed on source node %s: %s",
7481
                    source_node, abort_msg)
7482

    
7483
  def _ExecMigration(self):
7484
    """Migrate an instance.
7485

7486
    The migrate is done by:
7487
      - change the disks into dual-master mode
7488
      - wait until disks are fully synchronized again
7489
      - migrate the instance
7490
      - change disks on the new secondary node (the old primary) to secondary
7491
      - wait until disks are fully synchronized
7492
      - change disks into single-master mode
7493

7494
    """
7495
    instance = self.instance
7496
    target_node = self.target_node
7497
    source_node = self.source_node
7498

    
7499
    # Check for hypervisor version mismatch and warn the user.
7500
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7501
                                       None, self.instance.hypervisor)
7502
    src_info = nodeinfo[source_node]
7503
    dst_info = nodeinfo[target_node]
7504

    
7505
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7506
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7507
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7508
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7509
      if src_version != dst_version:
7510
        self.feedback_fn("* warning: hypervisor version mismatch between"
7511
                         " source (%s) and target (%s) node" %
7512
                         (src_version, dst_version))
7513

    
7514
    self.feedback_fn("* checking disk consistency between source and target")
7515
    for dev in instance.disks:
7516
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7517
        raise errors.OpExecError("Disk %s is degraded or not fully"
7518
                                 " synchronized on target node,"
7519
                                 " aborting migration" % dev.iv_name)
7520

    
7521
    # First get the migration information from the remote node
7522
    result = self.rpc.call_migration_info(source_node, instance)
7523
    msg = result.fail_msg
7524
    if msg:
7525
      log_err = ("Failed fetching source migration information from %s: %s" %
7526
                 (source_node, msg))
7527
      logging.error(log_err)
7528
      raise errors.OpExecError(log_err)
7529

    
7530
    self.migration_info = migration_info = result.payload
7531

    
7532
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7533
      # Then switch the disks to master/master mode
7534
      self._EnsureSecondary(target_node)
7535
      self._GoStandalone()
7536
      self._GoReconnect(True)
7537
      self._WaitUntilSync()
7538

    
7539
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7540
    result = self.rpc.call_accept_instance(target_node,
7541
                                           instance,
7542
                                           migration_info,
7543
                                           self.nodes_ip[target_node])
7544

    
7545
    msg = result.fail_msg
7546
    if msg:
7547
      logging.error("Instance pre-migration failed, trying to revert"
7548
                    " disk status: %s", msg)
7549
      self.feedback_fn("Pre-migration failed, aborting")
7550
      self._AbortMigration()
7551
      self._RevertDiskStatus()
7552
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7553
                               (instance.name, msg))
7554

    
7555
    self.feedback_fn("* migrating instance to %s" % target_node)
7556
    result = self.rpc.call_instance_migrate(source_node, instance,
7557
                                            self.nodes_ip[target_node],
7558
                                            self.live)
7559
    msg = result.fail_msg
7560
    if msg:
7561
      logging.error("Instance migration failed, trying to revert"
7562
                    " disk status: %s", msg)
7563
      self.feedback_fn("Migration failed, aborting")
7564
      self._AbortMigration()
7565
      self._RevertDiskStatus()
7566
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7567
                               (instance.name, msg))
7568

    
7569
    self.feedback_fn("* starting memory transfer")
7570
    last_feedback = time.time()
7571
    while True:
7572
      result = self.rpc.call_instance_get_migration_status(source_node,
7573
                                                           instance)
7574
      msg = result.fail_msg
7575
      ms = result.payload   # MigrationStatus instance
7576
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7577
        logging.error("Instance migration failed, trying to revert"
7578
                      " disk status: %s", msg)
7579
        self.feedback_fn("Migration failed, aborting")
7580
        self._AbortMigration()
7581
        self._RevertDiskStatus()
7582
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7583
                                 (instance.name, msg))
7584

    
7585
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7586
        self.feedback_fn("* memory transfer complete")
7587
        break
7588

    
7589
      if (utils.TimeoutExpired(last_feedback,
7590
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7591
          ms.transferred_ram is not None):
7592
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7593
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7594
        last_feedback = time.time()
7595

    
7596
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7597

    
7598
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7599
                                                           instance,
7600
                                                           True,
7601
                                                           self.live)
7602
    msg = result.fail_msg
7603
    if msg:
7604
      logging.error("Instance migration succeeded, but finalization failed"
7605
                    " on the source node: %s", msg)
7606
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7607
                               msg)
7608

    
7609
    instance.primary_node = target_node
7610

    
7611
    # distribute new instance config to the other nodes
7612
    self.cfg.Update(instance, self.feedback_fn)
7613

    
7614
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7615
                                                           instance,
7616
                                                           migration_info,
7617
                                                           True)
7618
    msg = result.fail_msg
7619
    if msg:
7620
      logging.error("Instance migration succeeded, but finalization failed"
7621
                    " on the target node: %s", msg)
7622
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7623
                               msg)
7624

    
7625
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7626
      self._EnsureSecondary(source_node)
7627
      self._WaitUntilSync()
7628
      self._GoStandalone()
7629
      self._GoReconnect(False)
7630
      self._WaitUntilSync()
7631

    
7632
    self.feedback_fn("* done")
7633

    
7634
  def _ExecFailover(self):
7635
    """Failover an instance.
7636

7637
    The failover is done by shutting it down on its present node and
7638
    starting it on the secondary.
7639

7640
    """
7641
    instance = self.instance
7642
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7643

    
7644
    source_node = instance.primary_node
7645
    target_node = self.target_node
7646

    
7647
    if instance.admin_up:
7648
      self.feedback_fn("* checking disk consistency between source and target")
7649
      for dev in instance.disks:
7650
        # for drbd, these are drbd over lvm
7651
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7652
          if primary_node.offline:
7653
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7654
                             " target node %s" %
7655
                             (primary_node.name, dev.iv_name, target_node))
7656
          elif not self.ignore_consistency:
7657
            raise errors.OpExecError("Disk %s is degraded on target node,"
7658
                                     " aborting failover" % dev.iv_name)
7659
    else:
7660
      self.feedback_fn("* not checking disk consistency as instance is not"
7661
                       " running")
7662

    
7663
    self.feedback_fn("* shutting down instance on source node")
7664
    logging.info("Shutting down instance %s on node %s",
7665
                 instance.name, source_node)
7666

    
7667
    result = self.rpc.call_instance_shutdown(source_node, instance,
7668
                                             self.shutdown_timeout)
7669
    msg = result.fail_msg
7670
    if msg:
7671
      if self.ignore_consistency or primary_node.offline:
7672
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7673
                           " proceeding anyway; please make sure node"
7674
                           " %s is down; error details: %s",
7675
                           instance.name, source_node, source_node, msg)
7676
      else:
7677
        raise errors.OpExecError("Could not shutdown instance %s on"
7678
                                 " node %s: %s" %
7679
                                 (instance.name, source_node, msg))
7680

    
7681
    self.feedback_fn("* deactivating the instance's disks on source node")
7682
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7683
      raise errors.OpExecError("Can't shut down the instance's disks")
7684

    
7685
    instance.primary_node = target_node
7686
    # distribute new instance config to the other nodes
7687
    self.cfg.Update(instance, self.feedback_fn)
7688

    
7689
    # Only start the instance if it's marked as up
7690
    if instance.admin_up:
7691
      self.feedback_fn("* activating the instance's disks on target node %s" %
7692
                       target_node)
7693
      logging.info("Starting instance %s on node %s",
7694
                   instance.name, target_node)
7695

    
7696
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7697
                                           ignore_secondaries=True)
7698
      if not disks_ok:
7699
        _ShutdownInstanceDisks(self.lu, instance)
7700
        raise errors.OpExecError("Can't activate the instance's disks")
7701

    
7702
      self.feedback_fn("* starting the instance on the target node %s" %
7703
                       target_node)
7704
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7705
                                            False)
7706
      msg = result.fail_msg
7707
      if msg:
7708
        _ShutdownInstanceDisks(self.lu, instance)
7709
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7710
                                 (instance.name, target_node, msg))
7711

    
7712
  def Exec(self, feedback_fn):
7713
    """Perform the migration.
7714

7715
    """
7716
    self.feedback_fn = feedback_fn
7717
    self.source_node = self.instance.primary_node
7718

    
7719
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7720
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7721
      self.target_node = self.instance.secondary_nodes[0]
7722
      # Otherwise self.target_node has been populated either
7723
      # directly, or through an iallocator.
7724

    
7725
    self.all_nodes = [self.source_node, self.target_node]
7726
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7727
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7728

    
7729
    if self.failover:
7730
      feedback_fn("Failover instance %s" % self.instance.name)
7731
      self._ExecFailover()
7732
    else:
7733
      feedback_fn("Migrating instance %s" % self.instance.name)
7734

    
7735
      if self.cleanup:
7736
        return self._ExecCleanup()
7737
      else:
7738
        return self._ExecMigration()
7739

    
7740

    
7741
def _CreateBlockDev(lu, node, instance, device, force_create,
7742
                    info, force_open):
7743
  """Create a tree of block devices on a given node.
7744

7745
  If this device type has to be created on secondaries, create it and
7746
  all its children.
7747

7748
  If not, just recurse to children keeping the same 'force' value.
7749

7750
  @param lu: the lu on whose behalf we execute
7751
  @param node: the node on which to create the device
7752
  @type instance: L{objects.Instance}
7753
  @param instance: the instance which owns the device
7754
  @type device: L{objects.Disk}
7755
  @param device: the device to create
7756
  @type force_create: boolean
7757
  @param force_create: whether to force creation of this device; this
7758
      will be change to True whenever we find a device which has
7759
      CreateOnSecondary() attribute
7760
  @param info: the extra 'metadata' we should attach to the device
7761
      (this will be represented as a LVM tag)
7762
  @type force_open: boolean
7763
  @param force_open: this parameter will be passes to the
7764
      L{backend.BlockdevCreate} function where it specifies
7765
      whether we run on primary or not, and it affects both
7766
      the child assembly and the device own Open() execution
7767

7768
  """
7769
  if device.CreateOnSecondary():
7770
    force_create = True
7771

    
7772
  if device.children:
7773
    for child in device.children:
7774
      _CreateBlockDev(lu, node, instance, child, force_create,
7775
                      info, force_open)
7776

    
7777
  if not force_create:
7778
    return
7779

    
7780
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7781

    
7782

    
7783
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7784
  """Create a single block device on a given node.
7785

7786
  This will not recurse over children of the device, so they must be
7787
  created in advance.
7788

7789
  @param lu: the lu on whose behalf we execute
7790
  @param node: the node on which to create the device
7791
  @type instance: L{objects.Instance}
7792
  @param instance: the instance which owns the device
7793
  @type device: L{objects.Disk}
7794
  @param device: the device to create
7795
  @param info: the extra 'metadata' we should attach to the device
7796
      (this will be represented as a LVM tag)
7797
  @type force_open: boolean
7798
  @param force_open: this parameter will be passes to the
7799
      L{backend.BlockdevCreate} function where it specifies
7800
      whether we run on primary or not, and it affects both
7801
      the child assembly and the device own Open() execution
7802

7803
  """
7804
  lu.cfg.SetDiskID(device, node)
7805
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7806
                                       instance.name, force_open, info)
7807
  result.Raise("Can't create block device %s on"
7808
               " node %s for instance %s" % (device, node, instance.name))
7809
  if device.physical_id is None:
7810
    device.physical_id = result.payload
7811

    
7812

    
7813
def _GenerateUniqueNames(lu, exts):
7814
  """Generate a suitable LV name.
7815

7816
  This will generate a logical volume name for the given instance.
7817

7818
  """
7819
  results = []
7820
  for val in exts:
7821
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7822
    results.append("%s%s" % (new_id, val))
7823
  return results
7824

    
7825

    
7826
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7827
                         iv_name, p_minor, s_minor):
7828
  """Generate a drbd8 device complete with its children.
7829

7830
  """
7831
  assert len(vgnames) == len(names) == 2
7832
  port = lu.cfg.AllocatePort()
7833
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7834
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7835
                          logical_id=(vgnames[0], names[0]))
7836
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7837
                          logical_id=(vgnames[1], names[1]))
7838
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7839
                          logical_id=(primary, secondary, port,
7840
                                      p_minor, s_minor,
7841
                                      shared_secret),
7842
                          children=[dev_data, dev_meta],
7843
                          iv_name=iv_name)
7844
  return drbd_dev
7845

    
7846

    
7847
def _GenerateDiskTemplate(lu, template_name,
7848
                          instance_name, primary_node,
7849
                          secondary_nodes, disk_info,
7850
                          file_storage_dir, file_driver,
7851
                          base_index, feedback_fn):
7852
  """Generate the entire disk layout for a given template type.
7853

7854
  """
7855
  #TODO: compute space requirements
7856

    
7857
  vgname = lu.cfg.GetVGName()
7858
  disk_count = len(disk_info)
7859
  disks = []
7860
  if template_name == constants.DT_DISKLESS:
7861
    pass
7862
  elif template_name == constants.DT_PLAIN:
7863
    if len(secondary_nodes) != 0:
7864
      raise errors.ProgrammerError("Wrong template configuration")
7865

    
7866
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7867
                                      for i in range(disk_count)])
7868
    for idx, disk in enumerate(disk_info):
7869
      disk_index = idx + base_index
7870
      vg = disk.get(constants.IDISK_VG, vgname)
7871
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7872
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7873
                              size=disk[constants.IDISK_SIZE],
7874
                              logical_id=(vg, names[idx]),
7875
                              iv_name="disk/%d" % disk_index,
7876
                              mode=disk[constants.IDISK_MODE])
7877
      disks.append(disk_dev)
7878
  elif template_name == constants.DT_DRBD8:
7879
    if len(secondary_nodes) != 1:
7880
      raise errors.ProgrammerError("Wrong template configuration")
7881
    remote_node = secondary_nodes[0]
7882
    minors = lu.cfg.AllocateDRBDMinor(
7883
      [primary_node, remote_node] * len(disk_info), instance_name)
7884

    
7885
    names = []
7886
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7887
                                               for i in range(disk_count)]):
7888
      names.append(lv_prefix + "_data")
7889
      names.append(lv_prefix + "_meta")
7890
    for idx, disk in enumerate(disk_info):
7891
      disk_index = idx + base_index
7892
      data_vg = disk.get(constants.IDISK_VG, vgname)
7893
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7894
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7895
                                      disk[constants.IDISK_SIZE],
7896
                                      [data_vg, meta_vg],
7897
                                      names[idx * 2:idx * 2 + 2],
7898
                                      "disk/%d" % disk_index,
7899
                                      minors[idx * 2], minors[idx * 2 + 1])
7900
      disk_dev.mode = disk[constants.IDISK_MODE]
7901
      disks.append(disk_dev)
7902
  elif template_name == constants.DT_FILE:
7903
    if len(secondary_nodes) != 0:
7904
      raise errors.ProgrammerError("Wrong template configuration")
7905

    
7906
    opcodes.RequireFileStorage()
7907

    
7908
    for idx, disk in enumerate(disk_info):
7909
      disk_index = idx + base_index
7910
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7911
                              size=disk[constants.IDISK_SIZE],
7912
                              iv_name="disk/%d" % disk_index,
7913
                              logical_id=(file_driver,
7914
                                          "%s/disk%d" % (file_storage_dir,
7915
                                                         disk_index)),
7916
                              mode=disk[constants.IDISK_MODE])
7917
      disks.append(disk_dev)
7918
  elif template_name == constants.DT_SHARED_FILE:
7919
    if len(secondary_nodes) != 0:
7920
      raise errors.ProgrammerError("Wrong template configuration")
7921

    
7922
    opcodes.RequireSharedFileStorage()
7923

    
7924
    for idx, disk in enumerate(disk_info):
7925
      disk_index = idx + base_index
7926
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7927
                              size=disk[constants.IDISK_SIZE],
7928
                              iv_name="disk/%d" % disk_index,
7929
                              logical_id=(file_driver,
7930
                                          "%s/disk%d" % (file_storage_dir,
7931
                                                         disk_index)),
7932
                              mode=disk[constants.IDISK_MODE])
7933
      disks.append(disk_dev)
7934
  elif template_name == constants.DT_BLOCK:
7935
    if len(secondary_nodes) != 0:
7936
      raise errors.ProgrammerError("Wrong template configuration")
7937

    
7938
    for idx, disk in enumerate(disk_info):
7939
      disk_index = idx + base_index
7940
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7941
                              size=disk[constants.IDISK_SIZE],
7942
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7943
                                          disk[constants.IDISK_ADOPT]),
7944
                              iv_name="disk/%d" % disk_index,
7945
                              mode=disk[constants.IDISK_MODE])
7946
      disks.append(disk_dev)
7947

    
7948
  else:
7949
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7950
  return disks
7951

    
7952

    
7953
def _GetInstanceInfoText(instance):
7954
  """Compute that text that should be added to the disk's metadata.
7955

7956
  """
7957
  return "originstname+%s" % instance.name
7958

    
7959

    
7960
def _CalcEta(time_taken, written, total_size):
7961
  """Calculates the ETA based on size written and total size.
7962

7963
  @param time_taken: The time taken so far
7964
  @param written: amount written so far
7965
  @param total_size: The total size of data to be written
7966
  @return: The remaining time in seconds
7967

7968
  """
7969
  avg_time = time_taken / float(written)
7970
  return (total_size - written) * avg_time
7971

    
7972

    
7973
def _WipeDisks(lu, instance):
7974
  """Wipes instance disks.
7975

7976
  @type lu: L{LogicalUnit}
7977
  @param lu: the logical unit on whose behalf we execute
7978
  @type instance: L{objects.Instance}
7979
  @param instance: the instance whose disks we should create
7980
  @return: the success of the wipe
7981

7982
  """
7983
  node = instance.primary_node
7984

    
7985
  for device in instance.disks:
7986
    lu.cfg.SetDiskID(device, node)
7987

    
7988
  logging.info("Pause sync of instance %s disks", instance.name)
7989
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7990

    
7991
  for idx, success in enumerate(result.payload):
7992
    if not success:
7993
      logging.warn("pause-sync of instance %s for disks %d failed",
7994
                   instance.name, idx)
7995

    
7996
  try:
7997
    for idx, device in enumerate(instance.disks):
7998
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7999
      # MAX_WIPE_CHUNK at max
8000
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8001
                            constants.MIN_WIPE_CHUNK_PERCENT)
8002
      # we _must_ make this an int, otherwise rounding errors will
8003
      # occur
8004
      wipe_chunk_size = int(wipe_chunk_size)
8005

    
8006
      lu.LogInfo("* Wiping disk %d", idx)
8007
      logging.info("Wiping disk %d for instance %s, node %s using"
8008
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8009

    
8010
      offset = 0
8011
      size = device.size
8012
      last_output = 0
8013
      start_time = time.time()
8014

    
8015
      while offset < size:
8016
        wipe_size = min(wipe_chunk_size, size - offset)
8017
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8018
                      idx, offset, wipe_size)
8019
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8020
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8021
                     (idx, offset, wipe_size))
8022
        now = time.time()
8023
        offset += wipe_size
8024
        if now - last_output >= 60:
8025
          eta = _CalcEta(now - start_time, offset, size)
8026
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8027
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8028
          last_output = now
8029
  finally:
8030
    logging.info("Resume sync of instance %s disks", instance.name)
8031

    
8032
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8033

    
8034
    for idx, success in enumerate(result.payload):
8035
      if not success:
8036
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8037
                      " look at the status and troubleshoot the issue", idx)
8038
        logging.warn("resume-sync of instance %s for disks %d failed",
8039
                     instance.name, idx)
8040

    
8041

    
8042
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8043
  """Create all disks for an instance.
8044

8045
  This abstracts away some work from AddInstance.
8046

8047
  @type lu: L{LogicalUnit}
8048
  @param lu: the logical unit on whose behalf we execute
8049
  @type instance: L{objects.Instance}
8050
  @param instance: the instance whose disks we should create
8051
  @type to_skip: list
8052
  @param to_skip: list of indices to skip
8053
  @type target_node: string
8054
  @param target_node: if passed, overrides the target node for creation
8055
  @rtype: boolean
8056
  @return: the success of the creation
8057

8058
  """
8059
  info = _GetInstanceInfoText(instance)
8060
  if target_node is None:
8061
    pnode = instance.primary_node
8062
    all_nodes = instance.all_nodes
8063
  else:
8064
    pnode = target_node
8065
    all_nodes = [pnode]
8066

    
8067
  if instance.disk_template in constants.DTS_FILEBASED:
8068
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8069
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8070

    
8071
    result.Raise("Failed to create directory '%s' on"
8072
                 " node %s" % (file_storage_dir, pnode))
8073

    
8074
  # Note: this needs to be kept in sync with adding of disks in
8075
  # LUInstanceSetParams
8076
  for idx, device in enumerate(instance.disks):
8077
    if to_skip and idx in to_skip:
8078
      continue
8079
    logging.info("Creating volume %s for instance %s",
8080
                 device.iv_name, instance.name)
8081
    #HARDCODE
8082
    for node in all_nodes:
8083
      f_create = node == pnode
8084
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8085

    
8086

    
8087
def _RemoveDisks(lu, instance, target_node=None):
8088
  """Remove all disks for an instance.
8089

8090
  This abstracts away some work from `AddInstance()` and
8091
  `RemoveInstance()`. Note that in case some of the devices couldn't
8092
  be removed, the removal will continue with the other ones (compare
8093
  with `_CreateDisks()`).
8094

8095
  @type lu: L{LogicalUnit}
8096
  @param lu: the logical unit on whose behalf we execute
8097
  @type instance: L{objects.Instance}
8098
  @param instance: the instance whose disks we should remove
8099
  @type target_node: string
8100
  @param target_node: used to override the node on which to remove the disks
8101
  @rtype: boolean
8102
  @return: the success of the removal
8103

8104
  """
8105
  logging.info("Removing block devices for instance %s", instance.name)
8106

    
8107
  all_result = True
8108
  for device in instance.disks:
8109
    if target_node:
8110
      edata = [(target_node, device)]
8111
    else:
8112
      edata = device.ComputeNodeTree(instance.primary_node)
8113
    for node, disk in edata:
8114
      lu.cfg.SetDiskID(disk, node)
8115
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8116
      if msg:
8117
        lu.LogWarning("Could not remove block device %s on node %s,"
8118
                      " continuing anyway: %s", device.iv_name, node, msg)
8119
        all_result = False
8120

    
8121
  if instance.disk_template == constants.DT_FILE:
8122
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8123
    if target_node:
8124
      tgt = target_node
8125
    else:
8126
      tgt = instance.primary_node
8127
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8128
    if result.fail_msg:
8129
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8130
                    file_storage_dir, instance.primary_node, result.fail_msg)
8131
      all_result = False
8132

    
8133
  return all_result
8134

    
8135

    
8136
def _ComputeDiskSizePerVG(disk_template, disks):
8137
  """Compute disk size requirements in the volume group
8138

8139
  """
8140
  def _compute(disks, payload):
8141
    """Universal algorithm.
8142

8143
    """
8144
    vgs = {}
8145
    for disk in disks:
8146
      vgs[disk[constants.IDISK_VG]] = \
8147
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8148

    
8149
    return vgs
8150

    
8151
  # Required free disk space as a function of disk and swap space
8152
  req_size_dict = {
8153
    constants.DT_DISKLESS: {},
8154
    constants.DT_PLAIN: _compute(disks, 0),
8155
    # 128 MB are added for drbd metadata for each disk
8156
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8157
    constants.DT_FILE: {},
8158
    constants.DT_SHARED_FILE: {},
8159
  }
8160

    
8161
  if disk_template not in req_size_dict:
8162
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8163
                                 " is unknown" % disk_template)
8164

    
8165
  return req_size_dict[disk_template]
8166

    
8167

    
8168
def _ComputeDiskSize(disk_template, disks):
8169
  """Compute disk size requirements in the volume group
8170

8171
  """
8172
  # Required free disk space as a function of disk and swap space
8173
  req_size_dict = {
8174
    constants.DT_DISKLESS: None,
8175
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8176
    # 128 MB are added for drbd metadata for each disk
8177
    constants.DT_DRBD8:
8178
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8179
    constants.DT_FILE: None,
8180
    constants.DT_SHARED_FILE: 0,
8181
    constants.DT_BLOCK: 0,
8182
  }
8183

    
8184
  if disk_template not in req_size_dict:
8185
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8186
                                 " is unknown" % disk_template)
8187

    
8188
  return req_size_dict[disk_template]
8189

    
8190

    
8191
def _FilterVmNodes(lu, nodenames):
8192
  """Filters out non-vm_capable nodes from a list.
8193

8194
  @type lu: L{LogicalUnit}
8195
  @param lu: the logical unit for which we check
8196
  @type nodenames: list
8197
  @param nodenames: the list of nodes on which we should check
8198
  @rtype: list
8199
  @return: the list of vm-capable nodes
8200

8201
  """
8202
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8203
  return [name for name in nodenames if name not in vm_nodes]
8204

    
8205

    
8206
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8207
  """Hypervisor parameter validation.
8208

8209
  This function abstract the hypervisor parameter validation to be
8210
  used in both instance create and instance modify.
8211

8212
  @type lu: L{LogicalUnit}
8213
  @param lu: the logical unit for which we check
8214
  @type nodenames: list
8215
  @param nodenames: the list of nodes on which we should check
8216
  @type hvname: string
8217
  @param hvname: the name of the hypervisor we should use
8218
  @type hvparams: dict
8219
  @param hvparams: the parameters which we need to check
8220
  @raise errors.OpPrereqError: if the parameters are not valid
8221

8222
  """
8223
  nodenames = _FilterVmNodes(lu, nodenames)
8224
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8225
                                                  hvname,
8226
                                                  hvparams)
8227
  for node in nodenames:
8228
    info = hvinfo[node]
8229
    if info.offline:
8230
      continue
8231
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8232

    
8233

    
8234
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8235
  """OS parameters validation.
8236

8237
  @type lu: L{LogicalUnit}
8238
  @param lu: the logical unit for which we check
8239
  @type required: boolean
8240
  @param required: whether the validation should fail if the OS is not
8241
      found
8242
  @type nodenames: list
8243
  @param nodenames: the list of nodes on which we should check
8244
  @type osname: string
8245
  @param osname: the name of the hypervisor we should use
8246
  @type osparams: dict
8247
  @param osparams: the parameters which we need to check
8248
  @raise errors.OpPrereqError: if the parameters are not valid
8249

8250
  """
8251
  nodenames = _FilterVmNodes(lu, nodenames)
8252
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8253
                                   [constants.OS_VALIDATE_PARAMETERS],
8254
                                   osparams)
8255
  for node, nres in result.items():
8256
    # we don't check for offline cases since this should be run only
8257
    # against the master node and/or an instance's nodes
8258
    nres.Raise("OS Parameters validation failed on node %s" % node)
8259
    if not nres.payload:
8260
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8261
                 osname, node)
8262

    
8263

    
8264
class LUInstanceCreate(LogicalUnit):
8265
  """Create an instance.
8266

8267
  """
8268
  HPATH = "instance-add"
8269
  HTYPE = constants.HTYPE_INSTANCE
8270
  REQ_BGL = False
8271

    
8272
  def CheckArguments(self):
8273
    """Check arguments.
8274

8275
    """
8276
    # do not require name_check to ease forward/backward compatibility
8277
    # for tools
8278
    if self.op.no_install and self.op.start:
8279
      self.LogInfo("No-installation mode selected, disabling startup")
8280
      self.op.start = False
8281
    # validate/normalize the instance name
8282
    self.op.instance_name = \
8283
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8284

    
8285
    if self.op.ip_check and not self.op.name_check:
8286
      # TODO: make the ip check more flexible and not depend on the name check
8287
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8288
                                 " check", errors.ECODE_INVAL)
8289

    
8290
    # check nics' parameter names
8291
    for nic in self.op.nics:
8292
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8293

    
8294
    # check disks. parameter names and consistent adopt/no-adopt strategy
8295
    has_adopt = has_no_adopt = False
8296
    for disk in self.op.disks:
8297
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8298
      if constants.IDISK_ADOPT in disk:
8299
        has_adopt = True
8300
      else:
8301
        has_no_adopt = True
8302
    if has_adopt and has_no_adopt:
8303
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8304
                                 errors.ECODE_INVAL)
8305
    if has_adopt:
8306
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8307
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8308
                                   " '%s' disk template" %
8309
                                   self.op.disk_template,
8310
                                   errors.ECODE_INVAL)
8311
      if self.op.iallocator is not None:
8312
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8313
                                   " iallocator script", errors.ECODE_INVAL)
8314
      if self.op.mode == constants.INSTANCE_IMPORT:
8315
        raise errors.OpPrereqError("Disk adoption not allowed for"
8316
                                   " instance import", errors.ECODE_INVAL)
8317
    else:
8318
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8319
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8320
                                   " but no 'adopt' parameter given" %
8321
                                   self.op.disk_template,
8322
                                   errors.ECODE_INVAL)
8323

    
8324
    self.adopt_disks = has_adopt
8325

    
8326
    # instance name verification
8327
    if self.op.name_check:
8328
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8329
      self.op.instance_name = self.hostname1.name
8330
      # used in CheckPrereq for ip ping check
8331
      self.check_ip = self.hostname1.ip
8332
    else:
8333
      self.check_ip = None
8334

    
8335
    # file storage checks
8336
    if (self.op.file_driver and
8337
        not self.op.file_driver in constants.FILE_DRIVER):
8338
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8339
                                 self.op.file_driver, errors.ECODE_INVAL)
8340

    
8341
    if self.op.disk_template == constants.DT_FILE:
8342
      opcodes.RequireFileStorage()
8343
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8344
      opcodes.RequireSharedFileStorage()
8345

    
8346
    ### Node/iallocator related checks
8347
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8348

    
8349
    if self.op.pnode is not None:
8350
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8351
        if self.op.snode is None:
8352
          raise errors.OpPrereqError("The networked disk templates need"
8353
                                     " a mirror node", errors.ECODE_INVAL)
8354
      elif self.op.snode:
8355
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8356
                        " template")
8357
        self.op.snode = None
8358

    
8359
    self._cds = _GetClusterDomainSecret()
8360

    
8361
    if self.op.mode == constants.INSTANCE_IMPORT:
8362
      # On import force_variant must be True, because if we forced it at
8363
      # initial install, our only chance when importing it back is that it
8364
      # works again!
8365
      self.op.force_variant = True
8366

    
8367
      if self.op.no_install:
8368
        self.LogInfo("No-installation mode has no effect during import")
8369

    
8370
    elif self.op.mode == constants.INSTANCE_CREATE:
8371
      if self.op.os_type is None:
8372
        raise errors.OpPrereqError("No guest OS specified",
8373
                                   errors.ECODE_INVAL)
8374
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8375
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8376
                                   " installation" % self.op.os_type,
8377
                                   errors.ECODE_STATE)
8378
      if self.op.disk_template is None:
8379
        raise errors.OpPrereqError("No disk template specified",
8380
                                   errors.ECODE_INVAL)
8381

    
8382
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8383
      # Check handshake to ensure both clusters have the same domain secret
8384
      src_handshake = self.op.source_handshake
8385
      if not src_handshake:
8386
        raise errors.OpPrereqError("Missing source handshake",
8387
                                   errors.ECODE_INVAL)
8388

    
8389
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8390
                                                           src_handshake)
8391
      if errmsg:
8392
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8393
                                   errors.ECODE_INVAL)
8394

    
8395
      # Load and check source CA
8396
      self.source_x509_ca_pem = self.op.source_x509_ca
8397
      if not self.source_x509_ca_pem:
8398
        raise errors.OpPrereqError("Missing source X509 CA",
8399
                                   errors.ECODE_INVAL)
8400

    
8401
      try:
8402
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8403
                                                    self._cds)
8404
      except OpenSSL.crypto.Error, err:
8405
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8406
                                   (err, ), errors.ECODE_INVAL)
8407

    
8408
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8409
      if errcode is not None:
8410
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8411
                                   errors.ECODE_INVAL)
8412

    
8413
      self.source_x509_ca = cert
8414

    
8415
      src_instance_name = self.op.source_instance_name
8416
      if not src_instance_name:
8417
        raise errors.OpPrereqError("Missing source instance name",
8418
                                   errors.ECODE_INVAL)
8419

    
8420
      self.source_instance_name = \
8421
          netutils.GetHostname(name=src_instance_name).name
8422

    
8423
    else:
8424
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8425
                                 self.op.mode, errors.ECODE_INVAL)
8426

    
8427
  def ExpandNames(self):
8428
    """ExpandNames for CreateInstance.
8429

8430
    Figure out the right locks for instance creation.
8431

8432
    """
8433
    self.needed_locks = {}
8434

    
8435
    instance_name = self.op.instance_name
8436
    # this is just a preventive check, but someone might still add this
8437
    # instance in the meantime, and creation will fail at lock-add time
8438
    if instance_name in self.cfg.GetInstanceList():
8439
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8440
                                 instance_name, errors.ECODE_EXISTS)
8441

    
8442
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8443

    
8444
    if self.op.iallocator:
8445
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8446
    else:
8447
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8448
      nodelist = [self.op.pnode]
8449
      if self.op.snode is not None:
8450
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8451
        nodelist.append(self.op.snode)
8452
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8453

    
8454
    # in case of import lock the source node too
8455
    if self.op.mode == constants.INSTANCE_IMPORT:
8456
      src_node = self.op.src_node
8457
      src_path = self.op.src_path
8458

    
8459
      if src_path is None:
8460
        self.op.src_path = src_path = self.op.instance_name
8461

    
8462
      if src_node is None:
8463
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8464
        self.op.src_node = None
8465
        if os.path.isabs(src_path):
8466
          raise errors.OpPrereqError("Importing an instance from a path"
8467
                                     " requires a source node option",
8468
                                     errors.ECODE_INVAL)
8469
      else:
8470
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8471
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8472
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8473
        if not os.path.isabs(src_path):
8474
          self.op.src_path = src_path = \
8475
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8476

    
8477
  def _RunAllocator(self):
8478
    """Run the allocator based on input opcode.
8479

8480
    """
8481
    nics = [n.ToDict() for n in self.nics]
8482
    ial = IAllocator(self.cfg, self.rpc,
8483
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8484
                     name=self.op.instance_name,
8485
                     disk_template=self.op.disk_template,
8486
                     tags=self.op.tags,
8487
                     os=self.op.os_type,
8488
                     vcpus=self.be_full[constants.BE_VCPUS],
8489
                     memory=self.be_full[constants.BE_MEMORY],
8490
                     disks=self.disks,
8491
                     nics=nics,
8492
                     hypervisor=self.op.hypervisor,
8493
                     )
8494

    
8495
    ial.Run(self.op.iallocator)
8496

    
8497
    if not ial.success:
8498
      raise errors.OpPrereqError("Can't compute nodes using"
8499
                                 " iallocator '%s': %s" %
8500
                                 (self.op.iallocator, ial.info),
8501
                                 errors.ECODE_NORES)
8502
    if len(ial.result) != ial.required_nodes:
8503
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8504
                                 " of nodes (%s), required %s" %
8505
                                 (self.op.iallocator, len(ial.result),
8506
                                  ial.required_nodes), errors.ECODE_FAULT)
8507
    self.op.pnode = ial.result[0]
8508
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8509
                 self.op.instance_name, self.op.iallocator,
8510
                 utils.CommaJoin(ial.result))
8511
    if ial.required_nodes == 2:
8512
      self.op.snode = ial.result[1]
8513

    
8514
  def BuildHooksEnv(self):
8515
    """Build hooks env.
8516

8517
    This runs on master, primary and secondary nodes of the instance.
8518

8519
    """
8520
    env = {
8521
      "ADD_MODE": self.op.mode,
8522
      }
8523
    if self.op.mode == constants.INSTANCE_IMPORT:
8524
      env["SRC_NODE"] = self.op.src_node
8525
      env["SRC_PATH"] = self.op.src_path
8526
      env["SRC_IMAGES"] = self.src_images
8527

    
8528
    env.update(_BuildInstanceHookEnv(
8529
      name=self.op.instance_name,
8530
      primary_node=self.op.pnode,
8531
      secondary_nodes=self.secondaries,
8532
      status=self.op.start,
8533
      os_type=self.op.os_type,
8534
      memory=self.be_full[constants.BE_MEMORY],
8535
      vcpus=self.be_full[constants.BE_VCPUS],
8536
      nics=_NICListToTuple(self, self.nics),
8537
      disk_template=self.op.disk_template,
8538
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8539
             for d in self.disks],
8540
      bep=self.be_full,
8541
      hvp=self.hv_full,
8542
      hypervisor_name=self.op.hypervisor,
8543
      tags=self.op.tags,
8544
    ))
8545

    
8546
    return env
8547

    
8548
  def BuildHooksNodes(self):
8549
    """Build hooks nodes.
8550

8551
    """
8552
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8553
    return nl, nl
8554

    
8555
  def _ReadExportInfo(self):
8556
    """Reads the export information from disk.
8557

8558
    It will override the opcode source node and path with the actual
8559
    information, if these two were not specified before.
8560

8561
    @return: the export information
8562

8563
    """
8564
    assert self.op.mode == constants.INSTANCE_IMPORT
8565

    
8566
    src_node = self.op.src_node
8567
    src_path = self.op.src_path
8568

    
8569
    if src_node is None:
8570
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8571
      exp_list = self.rpc.call_export_list(locked_nodes)
8572
      found = False
8573
      for node in exp_list:
8574
        if exp_list[node].fail_msg:
8575
          continue
8576
        if src_path in exp_list[node].payload:
8577
          found = True
8578
          self.op.src_node = src_node = node
8579
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8580
                                                       src_path)
8581
          break
8582
      if not found:
8583
        raise errors.OpPrereqError("No export found for relative path %s" %
8584
                                    src_path, errors.ECODE_INVAL)
8585

    
8586
    _CheckNodeOnline(self, src_node)
8587
    result = self.rpc.call_export_info(src_node, src_path)
8588
    result.Raise("No export or invalid export found in dir %s" % src_path)
8589

    
8590
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8591
    if not export_info.has_section(constants.INISECT_EXP):
8592
      raise errors.ProgrammerError("Corrupted export config",
8593
                                   errors.ECODE_ENVIRON)
8594

    
8595
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8596
    if (int(ei_version) != constants.EXPORT_VERSION):
8597
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8598
                                 (ei_version, constants.EXPORT_VERSION),
8599
                                 errors.ECODE_ENVIRON)
8600
    return export_info
8601

    
8602
  def _ReadExportParams(self, einfo):
8603
    """Use export parameters as defaults.
8604

8605
    In case the opcode doesn't specify (as in override) some instance
8606
    parameters, then try to use them from the export information, if
8607
    that declares them.
8608

8609
    """
8610
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8611

    
8612
    if self.op.disk_template is None:
8613
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8614
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8615
                                          "disk_template")
8616
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8617
          raise errors.OpPrereqError("Disk template specified in configuration"
8618
                                     " file is not one of the allowed values:"
8619
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8620
      else:
8621
        raise errors.OpPrereqError("No disk template specified and the export"
8622
                                   " is missing the disk_template information",
8623
                                   errors.ECODE_INVAL)
8624

    
8625
    if not self.op.disks:
8626
      disks = []
8627
      # TODO: import the disk iv_name too
8628
      for idx in range(constants.MAX_DISKS):
8629
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8630
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8631
          disks.append({constants.IDISK_SIZE: disk_sz})
8632
      self.op.disks = disks
8633
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8634
        raise errors.OpPrereqError("No disk info specified and the export"
8635
                                   " is missing the disk information",
8636
                                   errors.ECODE_INVAL)
8637

    
8638
    if not self.op.nics:
8639
      nics = []
8640
      for idx in range(constants.MAX_NICS):
8641
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8642
          ndict = {}
8643
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8644
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8645
            ndict[name] = v
8646
          nics.append(ndict)
8647
        else:
8648
          break
8649
      self.op.nics = nics
8650

    
8651
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8652
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8653

    
8654
    if (self.op.hypervisor is None and
8655
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8656
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8657

    
8658
    if einfo.has_section(constants.INISECT_HYP):
8659
      # use the export parameters but do not override the ones
8660
      # specified by the user
8661
      for name, value in einfo.items(constants.INISECT_HYP):
8662
        if name not in self.op.hvparams:
8663
          self.op.hvparams[name] = value
8664

    
8665
    if einfo.has_section(constants.INISECT_BEP):
8666
      # use the parameters, without overriding
8667
      for name, value in einfo.items(constants.INISECT_BEP):
8668
        if name not in self.op.beparams:
8669
          self.op.beparams[name] = value
8670
    else:
8671
      # try to read the parameters old style, from the main section
8672
      for name in constants.BES_PARAMETERS:
8673
        if (name not in self.op.beparams and
8674
            einfo.has_option(constants.INISECT_INS, name)):
8675
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8676

    
8677
    if einfo.has_section(constants.INISECT_OSP):
8678
      # use the parameters, without overriding
8679
      for name, value in einfo.items(constants.INISECT_OSP):
8680
        if name not in self.op.osparams:
8681
          self.op.osparams[name] = value
8682

    
8683
  def _RevertToDefaults(self, cluster):
8684
    """Revert the instance parameters to the default values.
8685

8686
    """
8687
    # hvparams
8688
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8689
    for name in self.op.hvparams.keys():
8690
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8691
        del self.op.hvparams[name]
8692
    # beparams
8693
    be_defs = cluster.SimpleFillBE({})
8694
    for name in self.op.beparams.keys():
8695
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8696
        del self.op.beparams[name]
8697
    # nic params
8698
    nic_defs = cluster.SimpleFillNIC({})
8699
    for nic in self.op.nics:
8700
      for name in constants.NICS_PARAMETERS:
8701
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8702
          del nic[name]
8703
    # osparams
8704
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8705
    for name in self.op.osparams.keys():
8706
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8707
        del self.op.osparams[name]
8708

    
8709
  def _CalculateFileStorageDir(self):
8710
    """Calculate final instance file storage dir.
8711

8712
    """
8713
    # file storage dir calculation/check
8714
    self.instance_file_storage_dir = None
8715
    if self.op.disk_template in constants.DTS_FILEBASED:
8716
      # build the full file storage dir path
8717
      joinargs = []
8718

    
8719
      if self.op.disk_template == constants.DT_SHARED_FILE:
8720
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8721
      else:
8722
        get_fsd_fn = self.cfg.GetFileStorageDir
8723

    
8724
      cfg_storagedir = get_fsd_fn()
8725
      if not cfg_storagedir:
8726
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8727
      joinargs.append(cfg_storagedir)
8728

    
8729
      if self.op.file_storage_dir is not None:
8730
        joinargs.append(self.op.file_storage_dir)
8731

    
8732
      joinargs.append(self.op.instance_name)
8733

    
8734
      # pylint: disable=W0142
8735
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8736

    
8737
  def CheckPrereq(self):
8738
    """Check prerequisites.
8739

8740
    """
8741
    self._CalculateFileStorageDir()
8742

    
8743
    if self.op.mode == constants.INSTANCE_IMPORT:
8744
      export_info = self._ReadExportInfo()
8745
      self._ReadExportParams(export_info)
8746

    
8747
    if (not self.cfg.GetVGName() and
8748
        self.op.disk_template not in constants.DTS_NOT_LVM):
8749
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8750
                                 " instances", errors.ECODE_STATE)
8751

    
8752
    if (self.op.hypervisor is None or
8753
        self.op.hypervisor == constants.VALUE_AUTO):
8754
      self.op.hypervisor = self.cfg.GetHypervisorType()
8755

    
8756
    cluster = self.cfg.GetClusterInfo()
8757
    enabled_hvs = cluster.enabled_hypervisors
8758
    if self.op.hypervisor not in enabled_hvs:
8759
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8760
                                 " cluster (%s)" % (self.op.hypervisor,
8761
                                  ",".join(enabled_hvs)),
8762
                                 errors.ECODE_STATE)
8763

    
8764
    # Check tag validity
8765
    for tag in self.op.tags:
8766
      objects.TaggableObject.ValidateTag(tag)
8767

    
8768
    # check hypervisor parameter syntax (locally)
8769
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8770
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8771
                                      self.op.hvparams)
8772
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8773
    hv_type.CheckParameterSyntax(filled_hvp)
8774
    self.hv_full = filled_hvp
8775
    # check that we don't specify global parameters on an instance
8776
    _CheckGlobalHvParams(self.op.hvparams)
8777

    
8778
    # fill and remember the beparams dict
8779
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8780
    for param, value in self.op.beparams.iteritems():
8781
      if value == constants.VALUE_AUTO:
8782
        self.op.beparams[param] = default_beparams[param]
8783
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8784
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8785

    
8786
    # build os parameters
8787
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8788

    
8789
    # now that hvp/bep are in final format, let's reset to defaults,
8790
    # if told to do so
8791
    if self.op.identify_defaults:
8792
      self._RevertToDefaults(cluster)
8793

    
8794
    # NIC buildup
8795
    self.nics = []
8796
    for idx, nic in enumerate(self.op.nics):
8797
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8798
      nic_mode = nic_mode_req
8799
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8800
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8801

    
8802
      # in routed mode, for the first nic, the default ip is 'auto'
8803
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8804
        default_ip_mode = constants.VALUE_AUTO
8805
      else:
8806
        default_ip_mode = constants.VALUE_NONE
8807

    
8808
      # ip validity checks
8809
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8810
      if ip is None or ip.lower() == constants.VALUE_NONE:
8811
        nic_ip = None
8812
      elif ip.lower() == constants.VALUE_AUTO:
8813
        if not self.op.name_check:
8814
          raise errors.OpPrereqError("IP address set to auto but name checks"
8815
                                     " have been skipped",
8816
                                     errors.ECODE_INVAL)
8817
        nic_ip = self.hostname1.ip
8818
      else:
8819
        if not netutils.IPAddress.IsValid(ip):
8820
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8821
                                     errors.ECODE_INVAL)
8822
        nic_ip = ip
8823

    
8824
      # TODO: check the ip address for uniqueness
8825
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8826
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8827
                                   errors.ECODE_INVAL)
8828

    
8829
      # MAC address verification
8830
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8831
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8832
        mac = utils.NormalizeAndValidateMac(mac)
8833

    
8834
        try:
8835
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8836
        except errors.ReservationError:
8837
          raise errors.OpPrereqError("MAC address %s already in use"
8838
                                     " in cluster" % mac,
8839
                                     errors.ECODE_NOTUNIQUE)
8840

    
8841
      #  Build nic parameters
8842
      link = nic.get(constants.INIC_LINK, None)
8843
      if link == constants.VALUE_AUTO:
8844
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8845
      nicparams = {}
8846
      if nic_mode_req:
8847
        nicparams[constants.NIC_MODE] = nic_mode
8848
      if link:
8849
        nicparams[constants.NIC_LINK] = link
8850

    
8851
      check_params = cluster.SimpleFillNIC(nicparams)
8852
      objects.NIC.CheckParameterSyntax(check_params)
8853
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8854

    
8855
    # disk checks/pre-build
8856
    default_vg = self.cfg.GetVGName()
8857
    self.disks = []
8858
    for disk in self.op.disks:
8859
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8860
      if mode not in constants.DISK_ACCESS_SET:
8861
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8862
                                   mode, errors.ECODE_INVAL)
8863
      size = disk.get(constants.IDISK_SIZE, None)
8864
      if size is None:
8865
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8866
      try:
8867
        size = int(size)
8868
      except (TypeError, ValueError):
8869
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8870
                                   errors.ECODE_INVAL)
8871

    
8872
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8873
      new_disk = {
8874
        constants.IDISK_SIZE: size,
8875
        constants.IDISK_MODE: mode,
8876
        constants.IDISK_VG: data_vg,
8877
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8878
        }
8879
      if constants.IDISK_ADOPT in disk:
8880
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8881
      self.disks.append(new_disk)
8882

    
8883
    if self.op.mode == constants.INSTANCE_IMPORT:
8884
      disk_images = []
8885
      for idx in range(len(self.disks)):
8886
        option = "disk%d_dump" % idx
8887
        if export_info.has_option(constants.INISECT_INS, option):
8888
          # FIXME: are the old os-es, disk sizes, etc. useful?
8889
          export_name = export_info.get(constants.INISECT_INS, option)
8890
          image = utils.PathJoin(self.op.src_path, export_name)
8891
          disk_images.append(image)
8892
        else:
8893
          disk_images.append(False)
8894

    
8895
      self.src_images = disk_images
8896

    
8897
      old_name = export_info.get(constants.INISECT_INS, "name")
8898
      if self.op.instance_name == old_name:
8899
        for idx, nic in enumerate(self.nics):
8900
          if nic.mac == constants.VALUE_AUTO:
8901
            nic_mac_ini = "nic%d_mac" % idx
8902
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8903

    
8904
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8905

    
8906
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8907
    if self.op.ip_check:
8908
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8909
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8910
                                   (self.check_ip, self.op.instance_name),
8911
                                   errors.ECODE_NOTUNIQUE)
8912

    
8913
    #### mac address generation
8914
    # By generating here the mac address both the allocator and the hooks get
8915
    # the real final mac address rather than the 'auto' or 'generate' value.
8916
    # There is a race condition between the generation and the instance object
8917
    # creation, which means that we know the mac is valid now, but we're not
8918
    # sure it will be when we actually add the instance. If things go bad
8919
    # adding the instance will abort because of a duplicate mac, and the
8920
    # creation job will fail.
8921
    for nic in self.nics:
8922
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8923
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8924

    
8925
    #### allocator run
8926

    
8927
    if self.op.iallocator is not None:
8928
      self._RunAllocator()
8929

    
8930
    #### node related checks
8931

    
8932
    # check primary node
8933
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8934
    assert self.pnode is not None, \
8935
      "Cannot retrieve locked node %s" % self.op.pnode
8936
    if pnode.offline:
8937
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8938
                                 pnode.name, errors.ECODE_STATE)
8939
    if pnode.drained:
8940
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8941
                                 pnode.name, errors.ECODE_STATE)
8942
    if not pnode.vm_capable:
8943
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8944
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8945

    
8946
    self.secondaries = []
8947

    
8948
    # mirror node verification
8949
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8950
      if self.op.snode == pnode.name:
8951
        raise errors.OpPrereqError("The secondary node cannot be the"
8952
                                   " primary node", errors.ECODE_INVAL)
8953
      _CheckNodeOnline(self, self.op.snode)
8954
      _CheckNodeNotDrained(self, self.op.snode)
8955
      _CheckNodeVmCapable(self, self.op.snode)
8956
      self.secondaries.append(self.op.snode)
8957

    
8958
    nodenames = [pnode.name] + self.secondaries
8959

    
8960
    if not self.adopt_disks:
8961
      # Check lv size requirements, if not adopting
8962
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8963
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8964

    
8965
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8966
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8967
                                disk[constants.IDISK_ADOPT])
8968
                     for disk in self.disks])
8969
      if len(all_lvs) != len(self.disks):
8970
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8971
                                   errors.ECODE_INVAL)
8972
      for lv_name in all_lvs:
8973
        try:
8974
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8975
          # to ReserveLV uses the same syntax
8976
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8977
        except errors.ReservationError:
8978
          raise errors.OpPrereqError("LV named %s used by another instance" %
8979
                                     lv_name, errors.ECODE_NOTUNIQUE)
8980

    
8981
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8982
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8983

    
8984
      node_lvs = self.rpc.call_lv_list([pnode.name],
8985
                                       vg_names.payload.keys())[pnode.name]
8986
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8987
      node_lvs = node_lvs.payload
8988

    
8989
      delta = all_lvs.difference(node_lvs.keys())
8990
      if delta:
8991
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8992
                                   utils.CommaJoin(delta),
8993
                                   errors.ECODE_INVAL)
8994
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8995
      if online_lvs:
8996
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8997
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8998
                                   errors.ECODE_STATE)
8999
      # update the size of disk based on what is found
9000
      for dsk in self.disks:
9001
        dsk[constants.IDISK_SIZE] = \
9002
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9003
                                        dsk[constants.IDISK_ADOPT])][0]))
9004

    
9005
    elif self.op.disk_template == constants.DT_BLOCK:
9006
      # Normalize and de-duplicate device paths
9007
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9008
                       for disk in self.disks])
9009
      if len(all_disks) != len(self.disks):
9010
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9011
                                   errors.ECODE_INVAL)
9012
      baddisks = [d for d in all_disks
9013
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9014
      if baddisks:
9015
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9016
                                   " cannot be adopted" %
9017
                                   (", ".join(baddisks),
9018
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9019
                                   errors.ECODE_INVAL)
9020

    
9021
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9022
                                            list(all_disks))[pnode.name]
9023
      node_disks.Raise("Cannot get block device information from node %s" %
9024
                       pnode.name)
9025
      node_disks = node_disks.payload
9026
      delta = all_disks.difference(node_disks.keys())
9027
      if delta:
9028
        raise errors.OpPrereqError("Missing block device(s): %s" %
9029
                                   utils.CommaJoin(delta),
9030
                                   errors.ECODE_INVAL)
9031
      for dsk in self.disks:
9032
        dsk[constants.IDISK_SIZE] = \
9033
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9034

    
9035
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9036

    
9037
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9038
    # check OS parameters (remotely)
9039
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9040

    
9041
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9042

    
9043
    # memory check on primary node
9044
    if self.op.start:
9045
      _CheckNodeFreeMemory(self, self.pnode.name,
9046
                           "creating instance %s" % self.op.instance_name,
9047
                           self.be_full[constants.BE_MEMORY],
9048
                           self.op.hypervisor)
9049

    
9050
    self.dry_run_result = list(nodenames)
9051

    
9052
  def Exec(self, feedback_fn):
9053
    """Create and add the instance to the cluster.
9054

9055
    """
9056
    instance = self.op.instance_name
9057
    pnode_name = self.pnode.name
9058

    
9059
    ht_kind = self.op.hypervisor
9060
    if ht_kind in constants.HTS_REQ_PORT:
9061
      network_port = self.cfg.AllocatePort()
9062
    else:
9063
      network_port = None
9064

    
9065
    disks = _GenerateDiskTemplate(self,
9066
                                  self.op.disk_template,
9067
                                  instance, pnode_name,
9068
                                  self.secondaries,
9069
                                  self.disks,
9070
                                  self.instance_file_storage_dir,
9071
                                  self.op.file_driver,
9072
                                  0,
9073
                                  feedback_fn)
9074

    
9075
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9076
                            primary_node=pnode_name,
9077
                            nics=self.nics, disks=disks,
9078
                            disk_template=self.op.disk_template,
9079
                            admin_up=False,
9080
                            network_port=network_port,
9081
                            beparams=self.op.beparams,
9082
                            hvparams=self.op.hvparams,
9083
                            hypervisor=self.op.hypervisor,
9084
                            osparams=self.op.osparams,
9085
                            )
9086

    
9087
    if self.op.tags:
9088
      for tag in self.op.tags:
9089
        iobj.AddTag(tag)
9090

    
9091
    if self.adopt_disks:
9092
      if self.op.disk_template == constants.DT_PLAIN:
9093
        # rename LVs to the newly-generated names; we need to construct
9094
        # 'fake' LV disks with the old data, plus the new unique_id
9095
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9096
        rename_to = []
9097
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9098
          rename_to.append(t_dsk.logical_id)
9099
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9100
          self.cfg.SetDiskID(t_dsk, pnode_name)
9101
        result = self.rpc.call_blockdev_rename(pnode_name,
9102
                                               zip(tmp_disks, rename_to))
9103
        result.Raise("Failed to rename adoped LVs")
9104
    else:
9105
      feedback_fn("* creating instance disks...")
9106
      try:
9107
        _CreateDisks(self, iobj)
9108
      except errors.OpExecError:
9109
        self.LogWarning("Device creation failed, reverting...")
9110
        try:
9111
          _RemoveDisks(self, iobj)
9112
        finally:
9113
          self.cfg.ReleaseDRBDMinors(instance)
9114
          raise
9115

    
9116
    feedback_fn("adding instance %s to cluster config" % instance)
9117

    
9118
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9119

    
9120
    # Declare that we don't want to remove the instance lock anymore, as we've
9121
    # added the instance to the config
9122
    del self.remove_locks[locking.LEVEL_INSTANCE]
9123

    
9124
    if self.op.mode == constants.INSTANCE_IMPORT:
9125
      # Release unused nodes
9126
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9127
    else:
9128
      # Release all nodes
9129
      _ReleaseLocks(self, locking.LEVEL_NODE)
9130

    
9131
    disk_abort = False
9132
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9133
      feedback_fn("* wiping instance disks...")
9134
      try:
9135
        _WipeDisks(self, iobj)
9136
      except errors.OpExecError, err:
9137
        logging.exception("Wiping disks failed")
9138
        self.LogWarning("Wiping instance disks failed (%s)", err)
9139
        disk_abort = True
9140

    
9141
    if disk_abort:
9142
      # Something is already wrong with the disks, don't do anything else
9143
      pass
9144
    elif self.op.wait_for_sync:
9145
      disk_abort = not _WaitForSync(self, iobj)
9146
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9147
      # make sure the disks are not degraded (still sync-ing is ok)
9148
      feedback_fn("* checking mirrors status")
9149
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9150
    else:
9151
      disk_abort = False
9152

    
9153
    if disk_abort:
9154
      _RemoveDisks(self, iobj)
9155
      self.cfg.RemoveInstance(iobj.name)
9156
      # Make sure the instance lock gets removed
9157
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9158
      raise errors.OpExecError("There are some degraded disks for"
9159
                               " this instance")
9160

    
9161
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9162
      if self.op.mode == constants.INSTANCE_CREATE:
9163
        if not self.op.no_install:
9164
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9165
                        not self.op.wait_for_sync)
9166
          if pause_sync:
9167
            feedback_fn("* pausing disk sync to install instance OS")
9168
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9169
                                                              iobj.disks, True)
9170
            for idx, success in enumerate(result.payload):
9171
              if not success:
9172
                logging.warn("pause-sync of instance %s for disk %d failed",
9173
                             instance, idx)
9174

    
9175
          feedback_fn("* running the instance OS create scripts...")
9176
          # FIXME: pass debug option from opcode to backend
9177
          os_add_result = \
9178
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9179
                                          self.op.debug_level)
9180
          if pause_sync:
9181
            feedback_fn("* resuming disk sync")
9182
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9183
                                                              iobj.disks, False)
9184
            for idx, success in enumerate(result.payload):
9185
              if not success:
9186
                logging.warn("resume-sync of instance %s for disk %d failed",
9187
                             instance, idx)
9188

    
9189
          os_add_result.Raise("Could not add os for instance %s"
9190
                              " on node %s" % (instance, pnode_name))
9191

    
9192
      elif self.op.mode == constants.INSTANCE_IMPORT:
9193
        feedback_fn("* running the instance OS import scripts...")
9194

    
9195
        transfers = []
9196

    
9197
        for idx, image in enumerate(self.src_images):
9198
          if not image:
9199
            continue
9200

    
9201
          # FIXME: pass debug option from opcode to backend
9202
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9203
                                             constants.IEIO_FILE, (image, ),
9204
                                             constants.IEIO_SCRIPT,
9205
                                             (iobj.disks[idx], idx),
9206
                                             None)
9207
          transfers.append(dt)
9208

    
9209
        import_result = \
9210
          masterd.instance.TransferInstanceData(self, feedback_fn,
9211
                                                self.op.src_node, pnode_name,
9212
                                                self.pnode.secondary_ip,
9213
                                                iobj, transfers)
9214
        if not compat.all(import_result):
9215
          self.LogWarning("Some disks for instance %s on node %s were not"
9216
                          " imported successfully" % (instance, pnode_name))
9217

    
9218
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9219
        feedback_fn("* preparing remote import...")
9220
        # The source cluster will stop the instance before attempting to make a
9221
        # connection. In some cases stopping an instance can take a long time,
9222
        # hence the shutdown timeout is added to the connection timeout.
9223
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9224
                           self.op.source_shutdown_timeout)
9225
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9226

    
9227
        assert iobj.primary_node == self.pnode.name
9228
        disk_results = \
9229
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9230
                                        self.source_x509_ca,
9231
                                        self._cds, timeouts)
9232
        if not compat.all(disk_results):
9233
          # TODO: Should the instance still be started, even if some disks
9234
          # failed to import (valid for local imports, too)?
9235
          self.LogWarning("Some disks for instance %s on node %s were not"
9236
                          " imported successfully" % (instance, pnode_name))
9237

    
9238
        # Run rename script on newly imported instance
9239
        assert iobj.name == instance
9240
        feedback_fn("Running rename script for %s" % instance)
9241
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9242
                                                   self.source_instance_name,
9243
                                                   self.op.debug_level)
9244
        if result.fail_msg:
9245
          self.LogWarning("Failed to run rename script for %s on node"
9246
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9247

    
9248
      else:
9249
        # also checked in the prereq part
9250
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9251
                                     % self.op.mode)
9252

    
9253
    if self.op.start:
9254
      iobj.admin_up = True
9255
      self.cfg.Update(iobj, feedback_fn)
9256
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9257
      feedback_fn("* starting instance...")
9258
      result = self.rpc.call_instance_start(pnode_name, iobj,
9259
                                            None, None, False)
9260
      result.Raise("Could not start instance")
9261

    
9262
    return list(iobj.all_nodes)
9263

    
9264

    
9265
class LUInstanceConsole(NoHooksLU):
9266
  """Connect to an instance's console.
9267

9268
  This is somewhat special in that it returns the command line that
9269
  you need to run on the master node in order to connect to the
9270
  console.
9271

9272
  """
9273
  REQ_BGL = False
9274

    
9275
  def ExpandNames(self):
9276
    self._ExpandAndLockInstance()
9277

    
9278
  def CheckPrereq(self):
9279
    """Check prerequisites.
9280

9281
    This checks that the instance is in the cluster.
9282

9283
    """
9284
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9285
    assert self.instance is not None, \
9286
      "Cannot retrieve locked instance %s" % self.op.instance_name
9287
    _CheckNodeOnline(self, self.instance.primary_node)
9288

    
9289
  def Exec(self, feedback_fn):
9290
    """Connect to the console of an instance
9291

9292
    """
9293
    instance = self.instance
9294
    node = instance.primary_node
9295

    
9296
    node_insts = self.rpc.call_instance_list([node],
9297
                                             [instance.hypervisor])[node]
9298
    node_insts.Raise("Can't get node information from %s" % node)
9299

    
9300
    if instance.name not in node_insts.payload:
9301
      if instance.admin_up:
9302
        state = constants.INSTST_ERRORDOWN
9303
      else:
9304
        state = constants.INSTST_ADMINDOWN
9305
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9306
                               (instance.name, state))
9307

    
9308
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9309

    
9310
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9311

    
9312

    
9313
def _GetInstanceConsole(cluster, instance):
9314
  """Returns console information for an instance.
9315

9316
  @type cluster: L{objects.Cluster}
9317
  @type instance: L{objects.Instance}
9318
  @rtype: dict
9319

9320
  """
9321
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9322
  # beparams and hvparams are passed separately, to avoid editing the
9323
  # instance and then saving the defaults in the instance itself.
9324
  hvparams = cluster.FillHV(instance)
9325
  beparams = cluster.FillBE(instance)
9326
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9327

    
9328
  assert console.instance == instance.name
9329
  assert console.Validate()
9330

    
9331
  return console.ToDict()
9332

    
9333

    
9334
class LUInstanceReplaceDisks(LogicalUnit):
9335
  """Replace the disks of an instance.
9336

9337
  """
9338
  HPATH = "mirrors-replace"
9339
  HTYPE = constants.HTYPE_INSTANCE
9340
  REQ_BGL = False
9341

    
9342
  def CheckArguments(self):
9343
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9344
                                  self.op.iallocator)
9345

    
9346
  def ExpandNames(self):
9347
    self._ExpandAndLockInstance()
9348

    
9349
    assert locking.LEVEL_NODE not in self.needed_locks
9350
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9351

    
9352
    assert self.op.iallocator is None or self.op.remote_node is None, \
9353
      "Conflicting options"
9354

    
9355
    if self.op.remote_node is not None:
9356
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9357

    
9358
      # Warning: do not remove the locking of the new secondary here
9359
      # unless DRBD8.AddChildren is changed to work in parallel;
9360
      # currently it doesn't since parallel invocations of
9361
      # FindUnusedMinor will conflict
9362
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9363
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9364
    else:
9365
      self.needed_locks[locking.LEVEL_NODE] = []
9366
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9367

    
9368
      if self.op.iallocator is not None:
9369
        # iallocator will select a new node in the same group
9370
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9371

    
9372
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9373
                                   self.op.iallocator, self.op.remote_node,
9374
                                   self.op.disks, False, self.op.early_release)
9375

    
9376
    self.tasklets = [self.replacer]
9377

    
9378
  def DeclareLocks(self, level):
9379
    if level == locking.LEVEL_NODEGROUP:
9380
      assert self.op.remote_node is None
9381
      assert self.op.iallocator is not None
9382
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9383

    
9384
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9385
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9386
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9387

    
9388
    elif level == locking.LEVEL_NODE:
9389
      if self.op.iallocator is not None:
9390
        assert self.op.remote_node is None
9391
        assert not self.needed_locks[locking.LEVEL_NODE]
9392

    
9393
        # Lock member nodes of all locked groups
9394
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9395
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9396
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9397
      else:
9398
        self._LockInstancesNodes()
9399

    
9400
  def BuildHooksEnv(self):
9401
    """Build hooks env.
9402

9403
    This runs on the master, the primary and all the secondaries.
9404

9405
    """
9406
    instance = self.replacer.instance
9407
    env = {
9408
      "MODE": self.op.mode,
9409
      "NEW_SECONDARY": self.op.remote_node,
9410
      "OLD_SECONDARY": instance.secondary_nodes[0],
9411
      }
9412
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9413
    return env
9414

    
9415
  def BuildHooksNodes(self):
9416
    """Build hooks nodes.
9417

9418
    """
9419
    instance = self.replacer.instance
9420
    nl = [
9421
      self.cfg.GetMasterNode(),
9422
      instance.primary_node,
9423
      ]
9424
    if self.op.remote_node is not None:
9425
      nl.append(self.op.remote_node)
9426
    return nl, nl
9427

    
9428
  def CheckPrereq(self):
9429
    """Check prerequisites.
9430

9431
    """
9432
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9433
            self.op.iallocator is None)
9434

    
9435
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9436
    if owned_groups:
9437
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9438

    
9439
    return LogicalUnit.CheckPrereq(self)
9440

    
9441

    
9442
class TLReplaceDisks(Tasklet):
9443
  """Replaces disks for an instance.
9444

9445
  Note: Locking is not within the scope of this class.
9446

9447
  """
9448
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9449
               disks, delay_iallocator, early_release):
9450
    """Initializes this class.
9451

9452
    """
9453
    Tasklet.__init__(self, lu)
9454

    
9455
    # Parameters
9456
    self.instance_name = instance_name
9457
    self.mode = mode
9458
    self.iallocator_name = iallocator_name
9459
    self.remote_node = remote_node
9460
    self.disks = disks
9461
    self.delay_iallocator = delay_iallocator
9462
    self.early_release = early_release
9463

    
9464
    # Runtime data
9465
    self.instance = None
9466
    self.new_node = None
9467
    self.target_node = None
9468
    self.other_node = None
9469
    self.remote_node_info = None
9470
    self.node_secondary_ip = None
9471

    
9472
  @staticmethod
9473
  def CheckArguments(mode, remote_node, iallocator):
9474
    """Helper function for users of this class.
9475

9476
    """
9477
    # check for valid parameter combination
9478
    if mode == constants.REPLACE_DISK_CHG:
9479
      if remote_node is None and iallocator is None:
9480
        raise errors.OpPrereqError("When changing the secondary either an"
9481
                                   " iallocator script must be used or the"
9482
                                   " new node given", errors.ECODE_INVAL)
9483

    
9484
      if remote_node is not None and iallocator is not None:
9485
        raise errors.OpPrereqError("Give either the iallocator or the new"
9486
                                   " secondary, not both", errors.ECODE_INVAL)
9487

    
9488
    elif remote_node is not None or iallocator is not None:
9489
      # Not replacing the secondary
9490
      raise errors.OpPrereqError("The iallocator and new node options can"
9491
                                 " only be used when changing the"
9492
                                 " secondary node", errors.ECODE_INVAL)
9493

    
9494
  @staticmethod
9495
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9496
    """Compute a new secondary node using an IAllocator.
9497

9498
    """
9499
    ial = IAllocator(lu.cfg, lu.rpc,
9500
                     mode=constants.IALLOCATOR_MODE_RELOC,
9501
                     name=instance_name,
9502
                     relocate_from=list(relocate_from))
9503

    
9504
    ial.Run(iallocator_name)
9505

    
9506
    if not ial.success:
9507
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9508
                                 " %s" % (iallocator_name, ial.info),
9509
                                 errors.ECODE_NORES)
9510

    
9511
    if len(ial.result) != ial.required_nodes:
9512
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9513
                                 " of nodes (%s), required %s" %
9514
                                 (iallocator_name,
9515
                                  len(ial.result), ial.required_nodes),
9516
                                 errors.ECODE_FAULT)
9517

    
9518
    remote_node_name = ial.result[0]
9519

    
9520
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9521
               instance_name, remote_node_name)
9522

    
9523
    return remote_node_name
9524

    
9525
  def _FindFaultyDisks(self, node_name):
9526
    """Wrapper for L{_FindFaultyInstanceDisks}.
9527

9528
    """
9529
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9530
                                    node_name, True)
9531

    
9532
  def _CheckDisksActivated(self, instance):
9533
    """Checks if the instance disks are activated.
9534

9535
    @param instance: The instance to check disks
9536
    @return: True if they are activated, False otherwise
9537

9538
    """
9539
    nodes = instance.all_nodes
9540

    
9541
    for idx, dev in enumerate(instance.disks):
9542
      for node in nodes:
9543
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9544
        self.cfg.SetDiskID(dev, node)
9545

    
9546
        result = self.rpc.call_blockdev_find(node, dev)
9547

    
9548
        if result.offline:
9549
          continue
9550
        elif result.fail_msg or not result.payload:
9551
          return False
9552

    
9553
    return True
9554

    
9555
  def CheckPrereq(self):
9556
    """Check prerequisites.
9557

9558
    This checks that the instance is in the cluster.
9559

9560
    """
9561
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9562
    assert instance is not None, \
9563
      "Cannot retrieve locked instance %s" % self.instance_name
9564

    
9565
    if instance.disk_template != constants.DT_DRBD8:
9566
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9567
                                 " instances", errors.ECODE_INVAL)
9568

    
9569
    if len(instance.secondary_nodes) != 1:
9570
      raise errors.OpPrereqError("The instance has a strange layout,"
9571
                                 " expected one secondary but found %d" %
9572
                                 len(instance.secondary_nodes),
9573
                                 errors.ECODE_FAULT)
9574

    
9575
    if not self.delay_iallocator:
9576
      self._CheckPrereq2()
9577

    
9578
  def _CheckPrereq2(self):
9579
    """Check prerequisites, second part.
9580

9581
    This function should always be part of CheckPrereq. It was separated and is
9582
    now called from Exec because during node evacuation iallocator was only
9583
    called with an unmodified cluster model, not taking planned changes into
9584
    account.
9585

9586
    """
9587
    instance = self.instance
9588
    secondary_node = instance.secondary_nodes[0]
9589

    
9590
    if self.iallocator_name is None:
9591
      remote_node = self.remote_node
9592
    else:
9593
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9594
                                       instance.name, instance.secondary_nodes)
9595

    
9596
    if remote_node is None:
9597
      self.remote_node_info = None
9598
    else:
9599
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9600
             "Remote node '%s' is not locked" % remote_node
9601

    
9602
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9603
      assert self.remote_node_info is not None, \
9604
        "Cannot retrieve locked node %s" % remote_node
9605

    
9606
    if remote_node == self.instance.primary_node:
9607
      raise errors.OpPrereqError("The specified node is the primary node of"
9608
                                 " the instance", errors.ECODE_INVAL)
9609

    
9610
    if remote_node == secondary_node:
9611
      raise errors.OpPrereqError("The specified node is already the"
9612
                                 " secondary node of the instance",
9613
                                 errors.ECODE_INVAL)
9614

    
9615
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9616
                                    constants.REPLACE_DISK_CHG):
9617
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9618
                                 errors.ECODE_INVAL)
9619

    
9620
    if self.mode == constants.REPLACE_DISK_AUTO:
9621
      if not self._CheckDisksActivated(instance):
9622
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9623
                                   " first" % self.instance_name,
9624
                                   errors.ECODE_STATE)
9625
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9626
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9627

    
9628
      if faulty_primary and faulty_secondary:
9629
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9630
                                   " one node and can not be repaired"
9631
                                   " automatically" % self.instance_name,
9632
                                   errors.ECODE_STATE)
9633

    
9634
      if faulty_primary:
9635
        self.disks = faulty_primary
9636
        self.target_node = instance.primary_node
9637
        self.other_node = secondary_node
9638
        check_nodes = [self.target_node, self.other_node]
9639
      elif faulty_secondary:
9640
        self.disks = faulty_secondary
9641
        self.target_node = secondary_node
9642
        self.other_node = instance.primary_node
9643
        check_nodes = [self.target_node, self.other_node]
9644
      else:
9645
        self.disks = []
9646
        check_nodes = []
9647

    
9648
    else:
9649
      # Non-automatic modes
9650
      if self.mode == constants.REPLACE_DISK_PRI:
9651
        self.target_node = instance.primary_node
9652
        self.other_node = secondary_node
9653
        check_nodes = [self.target_node, self.other_node]
9654

    
9655
      elif self.mode == constants.REPLACE_DISK_SEC:
9656
        self.target_node = secondary_node
9657
        self.other_node = instance.primary_node
9658
        check_nodes = [self.target_node, self.other_node]
9659

    
9660
      elif self.mode == constants.REPLACE_DISK_CHG:
9661
        self.new_node = remote_node
9662
        self.other_node = instance.primary_node
9663
        self.target_node = secondary_node
9664
        check_nodes = [self.new_node, self.other_node]
9665

    
9666
        _CheckNodeNotDrained(self.lu, remote_node)
9667
        _CheckNodeVmCapable(self.lu, remote_node)
9668

    
9669
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9670
        assert old_node_info is not None
9671
        if old_node_info.offline and not self.early_release:
9672
          # doesn't make sense to delay the release
9673
          self.early_release = True
9674
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9675
                          " early-release mode", secondary_node)
9676

    
9677
      else:
9678
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9679
                                     self.mode)
9680

    
9681
      # If not specified all disks should be replaced
9682
      if not self.disks:
9683
        self.disks = range(len(self.instance.disks))
9684

    
9685
    for node in check_nodes:
9686
      _CheckNodeOnline(self.lu, node)
9687

    
9688
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9689
                                                          self.other_node,
9690
                                                          self.target_node]
9691
                              if node_name is not None)
9692

    
9693
    # Release unneeded node locks
9694
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9695

    
9696
    # Release any owned node group
9697
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9698
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9699

    
9700
    # Check whether disks are valid
9701
    for disk_idx in self.disks:
9702
      instance.FindDisk(disk_idx)
9703

    
9704
    # Get secondary node IP addresses
9705
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9706
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9707

    
9708
  def Exec(self, feedback_fn):
9709
    """Execute disk replacement.
9710

9711
    This dispatches the disk replacement to the appropriate handler.
9712

9713
    """
9714
    if self.delay_iallocator:
9715
      self._CheckPrereq2()
9716

    
9717
    if __debug__:
9718
      # Verify owned locks before starting operation
9719
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9720
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9721
          ("Incorrect node locks, owning %s, expected %s" %
9722
           (owned_nodes, self.node_secondary_ip.keys()))
9723

    
9724
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9725
      assert list(owned_instances) == [self.instance_name], \
9726
          "Instance '%s' not locked" % self.instance_name
9727

    
9728
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9729
          "Should not own any node group lock at this point"
9730

    
9731
    if not self.disks:
9732
      feedback_fn("No disks need replacement")
9733
      return
9734

    
9735
    feedback_fn("Replacing disk(s) %s for %s" %
9736
                (utils.CommaJoin(self.disks), self.instance.name))
9737

    
9738
    activate_disks = (not self.instance.admin_up)
9739

    
9740
    # Activate the instance disks if we're replacing them on a down instance
9741
    if activate_disks:
9742
      _StartInstanceDisks(self.lu, self.instance, True)
9743

    
9744
    try:
9745
      # Should we replace the secondary node?
9746
      if self.new_node is not None:
9747
        fn = self._ExecDrbd8Secondary
9748
      else:
9749
        fn = self._ExecDrbd8DiskOnly
9750

    
9751
      result = fn(feedback_fn)
9752
    finally:
9753
      # Deactivate the instance disks if we're replacing them on a
9754
      # down instance
9755
      if activate_disks:
9756
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9757

    
9758
    if __debug__:
9759
      # Verify owned locks
9760
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9761
      nodes = frozenset(self.node_secondary_ip)
9762
      assert ((self.early_release and not owned_nodes) or
9763
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9764
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9765
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9766

    
9767
    return result
9768

    
9769
  def _CheckVolumeGroup(self, nodes):
9770
    self.lu.LogInfo("Checking volume groups")
9771

    
9772
    vgname = self.cfg.GetVGName()
9773

    
9774
    # Make sure volume group exists on all involved nodes
9775
    results = self.rpc.call_vg_list(nodes)
9776
    if not results:
9777
      raise errors.OpExecError("Can't list volume groups on the nodes")
9778

    
9779
    for node in nodes:
9780
      res = results[node]
9781
      res.Raise("Error checking node %s" % node)
9782
      if vgname not in res.payload:
9783
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9784
                                 (vgname, node))
9785

    
9786
  def _CheckDisksExistence(self, nodes):
9787
    # Check disk existence
9788
    for idx, dev in enumerate(self.instance.disks):
9789
      if idx not in self.disks:
9790
        continue
9791

    
9792
      for node in nodes:
9793
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9794
        self.cfg.SetDiskID(dev, node)
9795

    
9796
        result = self.rpc.call_blockdev_find(node, dev)
9797

    
9798
        msg = result.fail_msg
9799
        if msg or not result.payload:
9800
          if not msg:
9801
            msg = "disk not found"
9802
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9803
                                   (idx, node, msg))
9804

    
9805
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9806
    for idx, dev in enumerate(self.instance.disks):
9807
      if idx not in self.disks:
9808
        continue
9809

    
9810
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9811
                      (idx, node_name))
9812

    
9813
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9814
                                   ldisk=ldisk):
9815
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9816
                                 " replace disks for instance %s" %
9817
                                 (node_name, self.instance.name))
9818

    
9819
  def _CreateNewStorage(self, node_name):
9820
    """Create new storage on the primary or secondary node.
9821

9822
    This is only used for same-node replaces, not for changing the
9823
    secondary node, hence we don't want to modify the existing disk.
9824

9825
    """
9826
    iv_names = {}
9827

    
9828
    for idx, dev in enumerate(self.instance.disks):
9829
      if idx not in self.disks:
9830
        continue
9831

    
9832
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9833

    
9834
      self.cfg.SetDiskID(dev, node_name)
9835

    
9836
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9837
      names = _GenerateUniqueNames(self.lu, lv_names)
9838

    
9839
      vg_data = dev.children[0].logical_id[0]
9840
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9841
                             logical_id=(vg_data, names[0]))
9842
      vg_meta = dev.children[1].logical_id[0]
9843
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9844
                             logical_id=(vg_meta, names[1]))
9845

    
9846
      new_lvs = [lv_data, lv_meta]
9847
      old_lvs = [child.Copy() for child in dev.children]
9848
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9849

    
9850
      # we pass force_create=True to force the LVM creation
9851
      for new_lv in new_lvs:
9852
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9853
                        _GetInstanceInfoText(self.instance), False)
9854

    
9855
    return iv_names
9856

    
9857
  def _CheckDevices(self, node_name, iv_names):
9858
    for name, (dev, _, _) in iv_names.iteritems():
9859
      self.cfg.SetDiskID(dev, node_name)
9860

    
9861
      result = self.rpc.call_blockdev_find(node_name, dev)
9862

    
9863
      msg = result.fail_msg
9864
      if msg or not result.payload:
9865
        if not msg:
9866
          msg = "disk not found"
9867
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9868
                                 (name, msg))
9869

    
9870
      if result.payload.is_degraded:
9871
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9872

    
9873
  def _RemoveOldStorage(self, node_name, iv_names):
9874
    for name, (_, old_lvs, _) in iv_names.iteritems():
9875
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9876

    
9877
      for lv in old_lvs:
9878
        self.cfg.SetDiskID(lv, node_name)
9879

    
9880
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9881
        if msg:
9882
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9883
                             hint="remove unused LVs manually")
9884

    
9885
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9886
    """Replace a disk on the primary or secondary for DRBD 8.
9887

9888
    The algorithm for replace is quite complicated:
9889

9890
      1. for each disk to be replaced:
9891

9892
        1. create new LVs on the target node with unique names
9893
        1. detach old LVs from the drbd device
9894
        1. rename old LVs to name_replaced.<time_t>
9895
        1. rename new LVs to old LVs
9896
        1. attach the new LVs (with the old names now) to the drbd device
9897

9898
      1. wait for sync across all devices
9899

9900
      1. for each modified disk:
9901

9902
        1. remove old LVs (which have the name name_replaces.<time_t>)
9903

9904
    Failures are not very well handled.
9905

9906
    """
9907
    steps_total = 6
9908

    
9909
    # Step: check device activation
9910
    self.lu.LogStep(1, steps_total, "Check device existence")
9911
    self._CheckDisksExistence([self.other_node, self.target_node])
9912
    self._CheckVolumeGroup([self.target_node, self.other_node])
9913

    
9914
    # Step: check other node consistency
9915
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9916
    self._CheckDisksConsistency(self.other_node,
9917
                                self.other_node == self.instance.primary_node,
9918
                                False)
9919

    
9920
    # Step: create new storage
9921
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9922
    iv_names = self._CreateNewStorage(self.target_node)
9923

    
9924
    # Step: for each lv, detach+rename*2+attach
9925
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9926
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9927
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9928

    
9929
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9930
                                                     old_lvs)
9931
      result.Raise("Can't detach drbd from local storage on node"
9932
                   " %s for device %s" % (self.target_node, dev.iv_name))
9933
      #dev.children = []
9934
      #cfg.Update(instance)
9935

    
9936
      # ok, we created the new LVs, so now we know we have the needed
9937
      # storage; as such, we proceed on the target node to rename
9938
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9939
      # using the assumption that logical_id == physical_id (which in
9940
      # turn is the unique_id on that node)
9941

    
9942
      # FIXME(iustin): use a better name for the replaced LVs
9943
      temp_suffix = int(time.time())
9944
      ren_fn = lambda d, suff: (d.physical_id[0],
9945
                                d.physical_id[1] + "_replaced-%s" % suff)
9946

    
9947
      # Build the rename list based on what LVs exist on the node
9948
      rename_old_to_new = []
9949
      for to_ren in old_lvs:
9950
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9951
        if not result.fail_msg and result.payload:
9952
          # device exists
9953
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9954

    
9955
      self.lu.LogInfo("Renaming the old LVs on the target node")
9956
      result = self.rpc.call_blockdev_rename(self.target_node,
9957
                                             rename_old_to_new)
9958
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9959

    
9960
      # Now we rename the new LVs to the old LVs
9961
      self.lu.LogInfo("Renaming the new LVs on the target node")
9962
      rename_new_to_old = [(new, old.physical_id)
9963
                           for old, new in zip(old_lvs, new_lvs)]
9964
      result = self.rpc.call_blockdev_rename(self.target_node,
9965
                                             rename_new_to_old)
9966
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9967

    
9968
      # Intermediate steps of in memory modifications
9969
      for old, new in zip(old_lvs, new_lvs):
9970
        new.logical_id = old.logical_id
9971
        self.cfg.SetDiskID(new, self.target_node)
9972

    
9973
      # We need to modify old_lvs so that removal later removes the
9974
      # right LVs, not the newly added ones; note that old_lvs is a
9975
      # copy here
9976
      for disk in old_lvs:
9977
        disk.logical_id = ren_fn(disk, temp_suffix)
9978
        self.cfg.SetDiskID(disk, self.target_node)
9979

    
9980
      # Now that the new lvs have the old name, we can add them to the device
9981
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9982
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9983
                                                  new_lvs)
9984
      msg = result.fail_msg
9985
      if msg:
9986
        for new_lv in new_lvs:
9987
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9988
                                               new_lv).fail_msg
9989
          if msg2:
9990
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9991
                               hint=("cleanup manually the unused logical"
9992
                                     "volumes"))
9993
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9994

    
9995
    cstep = 5
9996
    if self.early_release:
9997
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9998
      cstep += 1
9999
      self._RemoveOldStorage(self.target_node, iv_names)
10000
      # WARNING: we release both node locks here, do not do other RPCs
10001
      # than WaitForSync to the primary node
10002
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10003
                    names=[self.target_node, self.other_node])
10004

    
10005
    # Wait for sync
10006
    # This can fail as the old devices are degraded and _WaitForSync
10007
    # does a combined result over all disks, so we don't check its return value
10008
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10009
    cstep += 1
10010
    _WaitForSync(self.lu, self.instance)
10011

    
10012
    # Check all devices manually
10013
    self._CheckDevices(self.instance.primary_node, iv_names)
10014

    
10015
    # Step: remove old storage
10016
    if not self.early_release:
10017
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10018
      cstep += 1
10019
      self._RemoveOldStorage(self.target_node, iv_names)
10020

    
10021
  def _ExecDrbd8Secondary(self, feedback_fn):
10022
    """Replace the secondary node for DRBD 8.
10023

10024
    The algorithm for replace is quite complicated:
10025
      - for all disks of the instance:
10026
        - create new LVs on the new node with same names
10027
        - shutdown the drbd device on the old secondary
10028
        - disconnect the drbd network on the primary
10029
        - create the drbd device on the new secondary
10030
        - network attach the drbd on the primary, using an artifice:
10031
          the drbd code for Attach() will connect to the network if it
10032
          finds a device which is connected to the good local disks but
10033
          not network enabled
10034
      - wait for sync across all devices
10035
      - remove all disks from the old secondary
10036

10037
    Failures are not very well handled.
10038

10039
    """
10040
    steps_total = 6
10041

    
10042
    pnode = self.instance.primary_node
10043

    
10044
    # Step: check device activation
10045
    self.lu.LogStep(1, steps_total, "Check device existence")
10046
    self._CheckDisksExistence([self.instance.primary_node])
10047
    self._CheckVolumeGroup([self.instance.primary_node])
10048

    
10049
    # Step: check other node consistency
10050
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10051
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10052

    
10053
    # Step: create new storage
10054
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10055
    for idx, dev in enumerate(self.instance.disks):
10056
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10057
                      (self.new_node, idx))
10058
      # we pass force_create=True to force LVM creation
10059
      for new_lv in dev.children:
10060
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10061
                        _GetInstanceInfoText(self.instance), False)
10062

    
10063
    # Step 4: dbrd minors and drbd setups changes
10064
    # after this, we must manually remove the drbd minors on both the
10065
    # error and the success paths
10066
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10067
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10068
                                         for dev in self.instance.disks],
10069
                                        self.instance.name)
10070
    logging.debug("Allocated minors %r", minors)
10071

    
10072
    iv_names = {}
10073
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10074
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10075
                      (self.new_node, idx))
10076
      # create new devices on new_node; note that we create two IDs:
10077
      # one without port, so the drbd will be activated without
10078
      # networking information on the new node at this stage, and one
10079
      # with network, for the latter activation in step 4
10080
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10081
      if self.instance.primary_node == o_node1:
10082
        p_minor = o_minor1
10083
      else:
10084
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10085
        p_minor = o_minor2
10086

    
10087
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10088
                      p_minor, new_minor, o_secret)
10089
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10090
                    p_minor, new_minor, o_secret)
10091

    
10092
      iv_names[idx] = (dev, dev.children, new_net_id)
10093
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10094
                    new_net_id)
10095
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10096
                              logical_id=new_alone_id,
10097
                              children=dev.children,
10098
                              size=dev.size)
10099
      try:
10100
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10101
                              _GetInstanceInfoText(self.instance), False)
10102
      except errors.GenericError:
10103
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10104
        raise
10105

    
10106
    # We have new devices, shutdown the drbd on the old secondary
10107
    for idx, dev in enumerate(self.instance.disks):
10108
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10109
      self.cfg.SetDiskID(dev, self.target_node)
10110
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10111
      if msg:
10112
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10113
                           "node: %s" % (idx, msg),
10114
                           hint=("Please cleanup this device manually as"
10115
                                 " soon as possible"))
10116

    
10117
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10118
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10119
                                               self.instance.disks)[pnode]
10120

    
10121
    msg = result.fail_msg
10122
    if msg:
10123
      # detaches didn't succeed (unlikely)
10124
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10125
      raise errors.OpExecError("Can't detach the disks from the network on"
10126
                               " old node: %s" % (msg,))
10127

    
10128
    # if we managed to detach at least one, we update all the disks of
10129
    # the instance to point to the new secondary
10130
    self.lu.LogInfo("Updating instance configuration")
10131
    for dev, _, new_logical_id in iv_names.itervalues():
10132
      dev.logical_id = new_logical_id
10133
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10134

    
10135
    self.cfg.Update(self.instance, feedback_fn)
10136

    
10137
    # and now perform the drbd attach
10138
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10139
                    " (standalone => connected)")
10140
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10141
                                            self.new_node],
10142
                                           self.node_secondary_ip,
10143
                                           self.instance.disks,
10144
                                           self.instance.name,
10145
                                           False)
10146
    for to_node, to_result in result.items():
10147
      msg = to_result.fail_msg
10148
      if msg:
10149
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10150
                           to_node, msg,
10151
                           hint=("please do a gnt-instance info to see the"
10152
                                 " status of disks"))
10153
    cstep = 5
10154
    if self.early_release:
10155
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10156
      cstep += 1
10157
      self._RemoveOldStorage(self.target_node, iv_names)
10158
      # WARNING: we release all node locks here, do not do other RPCs
10159
      # than WaitForSync to the primary node
10160
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10161
                    names=[self.instance.primary_node,
10162
                           self.target_node,
10163
                           self.new_node])
10164

    
10165
    # Wait for sync
10166
    # This can fail as the old devices are degraded and _WaitForSync
10167
    # does a combined result over all disks, so we don't check its return value
10168
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10169
    cstep += 1
10170
    _WaitForSync(self.lu, self.instance)
10171

    
10172
    # Check all devices manually
10173
    self._CheckDevices(self.instance.primary_node, iv_names)
10174

    
10175
    # Step: remove old storage
10176
    if not self.early_release:
10177
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10178
      self._RemoveOldStorage(self.target_node, iv_names)
10179

    
10180

    
10181
class LURepairNodeStorage(NoHooksLU):
10182
  """Repairs the volume group on a node.
10183

10184
  """
10185
  REQ_BGL = False
10186

    
10187
  def CheckArguments(self):
10188
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10189

    
10190
    storage_type = self.op.storage_type
10191

    
10192
    if (constants.SO_FIX_CONSISTENCY not in
10193
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10194
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10195
                                 " repaired" % storage_type,
10196
                                 errors.ECODE_INVAL)
10197

    
10198
  def ExpandNames(self):
10199
    self.needed_locks = {
10200
      locking.LEVEL_NODE: [self.op.node_name],
10201
      }
10202

    
10203
  def _CheckFaultyDisks(self, instance, node_name):
10204
    """Ensure faulty disks abort the opcode or at least warn."""
10205
    try:
10206
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10207
                                  node_name, True):
10208
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10209
                                   " node '%s'" % (instance.name, node_name),
10210
                                   errors.ECODE_STATE)
10211
    except errors.OpPrereqError, err:
10212
      if self.op.ignore_consistency:
10213
        self.proc.LogWarning(str(err.args[0]))
10214
      else:
10215
        raise
10216

    
10217
  def CheckPrereq(self):
10218
    """Check prerequisites.
10219

10220
    """
10221
    # Check whether any instance on this node has faulty disks
10222
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10223
      if not inst.admin_up:
10224
        continue
10225
      check_nodes = set(inst.all_nodes)
10226
      check_nodes.discard(self.op.node_name)
10227
      for inst_node_name in check_nodes:
10228
        self._CheckFaultyDisks(inst, inst_node_name)
10229

    
10230
  def Exec(self, feedback_fn):
10231
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10232
                (self.op.name, self.op.node_name))
10233

    
10234
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10235
    result = self.rpc.call_storage_execute(self.op.node_name,
10236
                                           self.op.storage_type, st_args,
10237
                                           self.op.name,
10238
                                           constants.SO_FIX_CONSISTENCY)
10239
    result.Raise("Failed to repair storage unit '%s' on %s" %
10240
                 (self.op.name, self.op.node_name))
10241

    
10242

    
10243
class LUNodeEvacuate(NoHooksLU):
10244
  """Evacuates instances off a list of nodes.
10245

10246
  """
10247
  REQ_BGL = False
10248

    
10249
  def CheckArguments(self):
10250
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10251

    
10252
  def ExpandNames(self):
10253
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10254

    
10255
    if self.op.remote_node is not None:
10256
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10257
      assert self.op.remote_node
10258

    
10259
      if self.op.remote_node == self.op.node_name:
10260
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10261
                                   " secondary node", errors.ECODE_INVAL)
10262

    
10263
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10264
        raise errors.OpPrereqError("Without the use of an iallocator only"
10265
                                   " secondary instances can be evacuated",
10266
                                   errors.ECODE_INVAL)
10267

    
10268
    # Declare locks
10269
    self.share_locks = _ShareAll()
10270
    self.needed_locks = {
10271
      locking.LEVEL_INSTANCE: [],
10272
      locking.LEVEL_NODEGROUP: [],
10273
      locking.LEVEL_NODE: [],
10274
      }
10275

    
10276
    if self.op.remote_node is None:
10277
      # Iallocator will choose any node(s) in the same group
10278
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10279
    else:
10280
      group_nodes = frozenset([self.op.remote_node])
10281

    
10282
    # Determine nodes to be locked
10283
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10284

    
10285
  def _DetermineInstances(self):
10286
    """Builds list of instances to operate on.
10287

10288
    """
10289
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10290

    
10291
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10292
      # Primary instances only
10293
      inst_fn = _GetNodePrimaryInstances
10294
      assert self.op.remote_node is None, \
10295
        "Evacuating primary instances requires iallocator"
10296
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10297
      # Secondary instances only
10298
      inst_fn = _GetNodeSecondaryInstances
10299
    else:
10300
      # All instances
10301
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10302
      inst_fn = _GetNodeInstances
10303

    
10304
    return inst_fn(self.cfg, self.op.node_name)
10305

    
10306
  def DeclareLocks(self, level):
10307
    if level == locking.LEVEL_INSTANCE:
10308
      # Lock instances optimistically, needs verification once node and group
10309
      # locks have been acquired
10310
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10311
        set(i.name for i in self._DetermineInstances())
10312

    
10313
    elif level == locking.LEVEL_NODEGROUP:
10314
      # Lock node groups optimistically, needs verification once nodes have
10315
      # been acquired
10316
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10317
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10318

    
10319
    elif level == locking.LEVEL_NODE:
10320
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10321

    
10322
  def CheckPrereq(self):
10323
    # Verify locks
10324
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10325
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10326
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10327

    
10328
    assert owned_nodes == self.lock_nodes
10329

    
10330
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10331
    if owned_groups != wanted_groups:
10332
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10333
                               " current groups are '%s', used to be '%s'" %
10334
                               (utils.CommaJoin(wanted_groups),
10335
                                utils.CommaJoin(owned_groups)))
10336

    
10337
    # Determine affected instances
10338
    self.instances = self._DetermineInstances()
10339
    self.instance_names = [i.name for i in self.instances]
10340

    
10341
    if set(self.instance_names) != owned_instances:
10342
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10343
                               " were acquired, current instances are '%s',"
10344
                               " used to be '%s'" %
10345
                               (self.op.node_name,
10346
                                utils.CommaJoin(self.instance_names),
10347
                                utils.CommaJoin(owned_instances)))
10348

    
10349
    if self.instance_names:
10350
      self.LogInfo("Evacuating instances from node '%s': %s",
10351
                   self.op.node_name,
10352
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10353
    else:
10354
      self.LogInfo("No instances to evacuate from node '%s'",
10355
                   self.op.node_name)
10356

    
10357
    if self.op.remote_node is not None:
10358
      for i in self.instances:
10359
        if i.primary_node == self.op.remote_node:
10360
          raise errors.OpPrereqError("Node %s is the primary node of"
10361
                                     " instance %s, cannot use it as"
10362
                                     " secondary" %
10363
                                     (self.op.remote_node, i.name),
10364
                                     errors.ECODE_INVAL)
10365

    
10366
  def Exec(self, feedback_fn):
10367
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10368

    
10369
    if not self.instance_names:
10370
      # No instances to evacuate
10371
      jobs = []
10372

    
10373
    elif self.op.iallocator is not None:
10374
      # TODO: Implement relocation to other group
10375
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10376
                       evac_mode=self.op.mode,
10377
                       instances=list(self.instance_names))
10378

    
10379
      ial.Run(self.op.iallocator)
10380

    
10381
      if not ial.success:
10382
        raise errors.OpPrereqError("Can't compute node evacuation using"
10383
                                   " iallocator '%s': %s" %
10384
                                   (self.op.iallocator, ial.info),
10385
                                   errors.ECODE_NORES)
10386

    
10387
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10388

    
10389
    elif self.op.remote_node is not None:
10390
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10391
      jobs = [
10392
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10393
                                        remote_node=self.op.remote_node,
10394
                                        disks=[],
10395
                                        mode=constants.REPLACE_DISK_CHG,
10396
                                        early_release=self.op.early_release)]
10397
        for instance_name in self.instance_names
10398
        ]
10399

    
10400
    else:
10401
      raise errors.ProgrammerError("No iallocator or remote node")
10402

    
10403
    return ResultWithJobs(jobs)
10404

    
10405

    
10406
def _SetOpEarlyRelease(early_release, op):
10407
  """Sets C{early_release} flag on opcodes if available.
10408

10409
  """
10410
  try:
10411
    op.early_release = early_release
10412
  except AttributeError:
10413
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10414

    
10415
  return op
10416

    
10417

    
10418
def _NodeEvacDest(use_nodes, group, nodes):
10419
  """Returns group or nodes depending on caller's choice.
10420

10421
  """
10422
  if use_nodes:
10423
    return utils.CommaJoin(nodes)
10424
  else:
10425
    return group
10426

    
10427

    
10428
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10429
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10430

10431
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10432
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10433

10434
  @type lu: L{LogicalUnit}
10435
  @param lu: Logical unit instance
10436
  @type alloc_result: tuple/list
10437
  @param alloc_result: Result from iallocator
10438
  @type early_release: bool
10439
  @param early_release: Whether to release locks early if possible
10440
  @type use_nodes: bool
10441
  @param use_nodes: Whether to display node names instead of groups
10442

10443
  """
10444
  (moved, failed, jobs) = alloc_result
10445

    
10446
  if failed:
10447
    lu.LogWarning("Unable to evacuate instances %s",
10448
                  utils.CommaJoin("%s (%s)" % (name, reason)
10449
                                  for (name, reason) in failed))
10450

    
10451
  if moved:
10452
    lu.LogInfo("Instances to be moved: %s",
10453
               utils.CommaJoin("%s (to %s)" %
10454
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10455
                               for (name, group, nodes) in moved))
10456

    
10457
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10458
              map(opcodes.OpCode.LoadOpCode, ops))
10459
          for ops in jobs]
10460

    
10461

    
10462
class LUInstanceGrowDisk(LogicalUnit):
10463
  """Grow a disk of an instance.
10464

10465
  """
10466
  HPATH = "disk-grow"
10467
  HTYPE = constants.HTYPE_INSTANCE
10468
  REQ_BGL = False
10469

    
10470
  def ExpandNames(self):
10471
    self._ExpandAndLockInstance()
10472
    self.needed_locks[locking.LEVEL_NODE] = []
10473
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10474

    
10475
  def DeclareLocks(self, level):
10476
    if level == locking.LEVEL_NODE:
10477
      self._LockInstancesNodes()
10478

    
10479
  def BuildHooksEnv(self):
10480
    """Build hooks env.
10481

10482
    This runs on the master, the primary and all the secondaries.
10483

10484
    """
10485
    env = {
10486
      "DISK": self.op.disk,
10487
      "AMOUNT": self.op.amount,
10488
      }
10489
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10490
    return env
10491

    
10492
  def BuildHooksNodes(self):
10493
    """Build hooks nodes.
10494

10495
    """
10496
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10497
    return (nl, nl)
10498

    
10499
  def CheckPrereq(self):
10500
    """Check prerequisites.
10501

10502
    This checks that the instance is in the cluster.
10503

10504
    """
10505
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10506
    assert instance is not None, \
10507
      "Cannot retrieve locked instance %s" % self.op.instance_name
10508
    nodenames = list(instance.all_nodes)
10509
    for node in nodenames:
10510
      _CheckNodeOnline(self, node)
10511

    
10512
    self.instance = instance
10513

    
10514
    if instance.disk_template not in constants.DTS_GROWABLE:
10515
      raise errors.OpPrereqError("Instance's disk layout does not support"
10516
                                 " growing", errors.ECODE_INVAL)
10517

    
10518
    self.disk = instance.FindDisk(self.op.disk)
10519

    
10520
    if instance.disk_template not in (constants.DT_FILE,
10521
                                      constants.DT_SHARED_FILE):
10522
      # TODO: check the free disk space for file, when that feature will be
10523
      # supported
10524
      _CheckNodesFreeDiskPerVG(self, nodenames,
10525
                               self.disk.ComputeGrowth(self.op.amount))
10526

    
10527
  def Exec(self, feedback_fn):
10528
    """Execute disk grow.
10529

10530
    """
10531
    instance = self.instance
10532
    disk = self.disk
10533

    
10534
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10535
    if not disks_ok:
10536
      raise errors.OpExecError("Cannot activate block device to grow")
10537

    
10538
    # First run all grow ops in dry-run mode
10539
    for node in instance.all_nodes:
10540
      self.cfg.SetDiskID(disk, node)
10541
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10542
      result.Raise("Grow request failed to node %s" % node)
10543

    
10544
    # We know that (as far as we can test) operations across different
10545
    # nodes will succeed, time to run it for real
10546
    for node in instance.all_nodes:
10547
      self.cfg.SetDiskID(disk, node)
10548
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10549
      result.Raise("Grow request failed to node %s" % node)
10550

    
10551
      # TODO: Rewrite code to work properly
10552
      # DRBD goes into sync mode for a short amount of time after executing the
10553
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10554
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10555
      # time is a work-around.
10556
      time.sleep(5)
10557

    
10558
    disk.RecordGrow(self.op.amount)
10559
    self.cfg.Update(instance, feedback_fn)
10560
    if self.op.wait_for_sync:
10561
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10562
      if disk_abort:
10563
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10564
                             " status; please check the instance")
10565
      if not instance.admin_up:
10566
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10567
    elif not instance.admin_up:
10568
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10569
                           " not supposed to be running because no wait for"
10570
                           " sync mode was requested")
10571

    
10572

    
10573
class LUInstanceQueryData(NoHooksLU):
10574
  """Query runtime instance data.
10575

10576
  """
10577
  REQ_BGL = False
10578

    
10579
  def ExpandNames(self):
10580
    self.needed_locks = {}
10581

    
10582
    # Use locking if requested or when non-static information is wanted
10583
    if not (self.op.static or self.op.use_locking):
10584
      self.LogWarning("Non-static data requested, locks need to be acquired")
10585
      self.op.use_locking = True
10586

    
10587
    if self.op.instances or not self.op.use_locking:
10588
      # Expand instance names right here
10589
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10590
    else:
10591
      # Will use acquired locks
10592
      self.wanted_names = None
10593

    
10594
    if self.op.use_locking:
10595
      self.share_locks = _ShareAll()
10596

    
10597
      if self.wanted_names is None:
10598
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10599
      else:
10600
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10601

    
10602
      self.needed_locks[locking.LEVEL_NODE] = []
10603
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10604

    
10605
  def DeclareLocks(self, level):
10606
    if self.op.use_locking and level == locking.LEVEL_NODE:
10607
      self._LockInstancesNodes()
10608

    
10609
  def CheckPrereq(self):
10610
    """Check prerequisites.
10611

10612
    This only checks the optional instance list against the existing names.
10613

10614
    """
10615
    if self.wanted_names is None:
10616
      assert self.op.use_locking, "Locking was not used"
10617
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10618

    
10619
    self.wanted_instances = \
10620
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10621

    
10622
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10623
    """Returns the status of a block device
10624

10625
    """
10626
    if self.op.static or not node:
10627
      return None
10628

    
10629
    self.cfg.SetDiskID(dev, node)
10630

    
10631
    result = self.rpc.call_blockdev_find(node, dev)
10632
    if result.offline:
10633
      return None
10634

    
10635
    result.Raise("Can't compute disk status for %s" % instance_name)
10636

    
10637
    status = result.payload
10638
    if status is None:
10639
      return None
10640

    
10641
    return (status.dev_path, status.major, status.minor,
10642
            status.sync_percent, status.estimated_time,
10643
            status.is_degraded, status.ldisk_status)
10644

    
10645
  def _ComputeDiskStatus(self, instance, snode, dev):
10646
    """Compute block device status.
10647

10648
    """
10649
    if dev.dev_type in constants.LDS_DRBD:
10650
      # we change the snode then (otherwise we use the one passed in)
10651
      if dev.logical_id[0] == instance.primary_node:
10652
        snode = dev.logical_id[1]
10653
      else:
10654
        snode = dev.logical_id[0]
10655

    
10656
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10657
                                              instance.name, dev)
10658
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10659

    
10660
    if dev.children:
10661
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10662
                                        instance, snode),
10663
                         dev.children)
10664
    else:
10665
      dev_children = []
10666

    
10667
    return {
10668
      "iv_name": dev.iv_name,
10669
      "dev_type": dev.dev_type,
10670
      "logical_id": dev.logical_id,
10671
      "physical_id": dev.physical_id,
10672
      "pstatus": dev_pstatus,
10673
      "sstatus": dev_sstatus,
10674
      "children": dev_children,
10675
      "mode": dev.mode,
10676
      "size": dev.size,
10677
      }
10678

    
10679
  def Exec(self, feedback_fn):
10680
    """Gather and return data"""
10681
    result = {}
10682

    
10683
    cluster = self.cfg.GetClusterInfo()
10684

    
10685
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10686
                                          for i in self.wanted_instances)
10687
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10688
      if self.op.static or pnode.offline:
10689
        remote_state = None
10690
        if pnode.offline:
10691
          self.LogWarning("Primary node %s is marked offline, returning static"
10692
                          " information only for instance %s" %
10693
                          (pnode.name, instance.name))
10694
      else:
10695
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10696
                                                  instance.name,
10697
                                                  instance.hypervisor)
10698
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10699
        remote_info = remote_info.payload
10700
        if remote_info and "state" in remote_info:
10701
          remote_state = "up"
10702
        else:
10703
          remote_state = "down"
10704

    
10705
      if instance.admin_up:
10706
        config_state = "up"
10707
      else:
10708
        config_state = "down"
10709

    
10710
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10711
                  instance.disks)
10712

    
10713
      result[instance.name] = {
10714
        "name": instance.name,
10715
        "config_state": config_state,
10716
        "run_state": remote_state,
10717
        "pnode": instance.primary_node,
10718
        "snodes": instance.secondary_nodes,
10719
        "os": instance.os,
10720
        # this happens to be the same format used for hooks
10721
        "nics": _NICListToTuple(self, instance.nics),
10722
        "disk_template": instance.disk_template,
10723
        "disks": disks,
10724
        "hypervisor": instance.hypervisor,
10725
        "network_port": instance.network_port,
10726
        "hv_instance": instance.hvparams,
10727
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10728
        "be_instance": instance.beparams,
10729
        "be_actual": cluster.FillBE(instance),
10730
        "os_instance": instance.osparams,
10731
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10732
        "serial_no": instance.serial_no,
10733
        "mtime": instance.mtime,
10734
        "ctime": instance.ctime,
10735
        "uuid": instance.uuid,
10736
        }
10737

    
10738
    return result
10739

    
10740

    
10741
class LUInstanceSetParams(LogicalUnit):
10742
  """Modifies an instances's parameters.
10743

10744
  """
10745
  HPATH = "instance-modify"
10746
  HTYPE = constants.HTYPE_INSTANCE
10747
  REQ_BGL = False
10748

    
10749
  def CheckArguments(self):
10750
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10751
            self.op.hvparams or self.op.beparams or self.op.os_name):
10752
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10753

    
10754
    if self.op.hvparams:
10755
      _CheckGlobalHvParams(self.op.hvparams)
10756

    
10757
    # Disk validation
10758
    disk_addremove = 0
10759
    for disk_op, disk_dict in self.op.disks:
10760
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10761
      if disk_op == constants.DDM_REMOVE:
10762
        disk_addremove += 1
10763
        continue
10764
      elif disk_op == constants.DDM_ADD:
10765
        disk_addremove += 1
10766
      else:
10767
        if not isinstance(disk_op, int):
10768
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10769
        if not isinstance(disk_dict, dict):
10770
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10771
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10772

    
10773
      if disk_op == constants.DDM_ADD:
10774
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10775
        if mode not in constants.DISK_ACCESS_SET:
10776
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10777
                                     errors.ECODE_INVAL)
10778
        size = disk_dict.get(constants.IDISK_SIZE, None)
10779
        if size is None:
10780
          raise errors.OpPrereqError("Required disk parameter size missing",
10781
                                     errors.ECODE_INVAL)
10782
        try:
10783
          size = int(size)
10784
        except (TypeError, ValueError), err:
10785
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10786
                                     str(err), errors.ECODE_INVAL)
10787
        disk_dict[constants.IDISK_SIZE] = size
10788
      else:
10789
        # modification of disk
10790
        if constants.IDISK_SIZE in disk_dict:
10791
          raise errors.OpPrereqError("Disk size change not possible, use"
10792
                                     " grow-disk", errors.ECODE_INVAL)
10793

    
10794
    if disk_addremove > 1:
10795
      raise errors.OpPrereqError("Only one disk add or remove operation"
10796
                                 " supported at a time", errors.ECODE_INVAL)
10797

    
10798
    if self.op.disks and self.op.disk_template is not None:
10799
      raise errors.OpPrereqError("Disk template conversion and other disk"
10800
                                 " changes not supported at the same time",
10801
                                 errors.ECODE_INVAL)
10802

    
10803
    if (self.op.disk_template and
10804
        self.op.disk_template in constants.DTS_INT_MIRROR and
10805
        self.op.remote_node is None):
10806
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10807
                                 " one requires specifying a secondary node",
10808
                                 errors.ECODE_INVAL)
10809

    
10810
    # NIC validation
10811
    nic_addremove = 0
10812
    for nic_op, nic_dict in self.op.nics:
10813
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10814
      if nic_op == constants.DDM_REMOVE:
10815
        nic_addremove += 1
10816
        continue
10817
      elif nic_op == constants.DDM_ADD:
10818
        nic_addremove += 1
10819
      else:
10820
        if not isinstance(nic_op, int):
10821
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10822
        if not isinstance(nic_dict, dict):
10823
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10824
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10825

    
10826
      # nic_dict should be a dict
10827
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10828
      if nic_ip is not None:
10829
        if nic_ip.lower() == constants.VALUE_NONE:
10830
          nic_dict[constants.INIC_IP] = None
10831
        else:
10832
          if not netutils.IPAddress.IsValid(nic_ip):
10833
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10834
                                       errors.ECODE_INVAL)
10835

    
10836
      nic_bridge = nic_dict.get("bridge", None)
10837
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10838
      if nic_bridge and nic_link:
10839
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10840
                                   " at the same time", errors.ECODE_INVAL)
10841
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10842
        nic_dict["bridge"] = None
10843
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10844
        nic_dict[constants.INIC_LINK] = None
10845

    
10846
      if nic_op == constants.DDM_ADD:
10847
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10848
        if nic_mac is None:
10849
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10850

    
10851
      if constants.INIC_MAC in nic_dict:
10852
        nic_mac = nic_dict[constants.INIC_MAC]
10853
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10854
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10855

    
10856
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10857
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10858
                                     " modifying an existing nic",
10859
                                     errors.ECODE_INVAL)
10860

    
10861
    if nic_addremove > 1:
10862
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10863
                                 " supported at a time", errors.ECODE_INVAL)
10864

    
10865
  def ExpandNames(self):
10866
    self._ExpandAndLockInstance()
10867
    self.needed_locks[locking.LEVEL_NODE] = []
10868
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10869

    
10870
  def DeclareLocks(self, level):
10871
    if level == locking.LEVEL_NODE:
10872
      self._LockInstancesNodes()
10873
      if self.op.disk_template and self.op.remote_node:
10874
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10875
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10876

    
10877
  def BuildHooksEnv(self):
10878
    """Build hooks env.
10879

10880
    This runs on the master, primary and secondaries.
10881

10882
    """
10883
    args = dict()
10884
    if constants.BE_MEMORY in self.be_new:
10885
      args["memory"] = self.be_new[constants.BE_MEMORY]
10886
    if constants.BE_VCPUS in self.be_new:
10887
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10888
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10889
    # information at all.
10890
    if self.op.nics:
10891
      args["nics"] = []
10892
      nic_override = dict(self.op.nics)
10893
      for idx, nic in enumerate(self.instance.nics):
10894
        if idx in nic_override:
10895
          this_nic_override = nic_override[idx]
10896
        else:
10897
          this_nic_override = {}
10898
        if constants.INIC_IP in this_nic_override:
10899
          ip = this_nic_override[constants.INIC_IP]
10900
        else:
10901
          ip = nic.ip
10902
        if constants.INIC_MAC in this_nic_override:
10903
          mac = this_nic_override[constants.INIC_MAC]
10904
        else:
10905
          mac = nic.mac
10906
        if idx in self.nic_pnew:
10907
          nicparams = self.nic_pnew[idx]
10908
        else:
10909
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10910
        mode = nicparams[constants.NIC_MODE]
10911
        link = nicparams[constants.NIC_LINK]
10912
        args["nics"].append((ip, mac, mode, link))
10913
      if constants.DDM_ADD in nic_override:
10914
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10915
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10916
        nicparams = self.nic_pnew[constants.DDM_ADD]
10917
        mode = nicparams[constants.NIC_MODE]
10918
        link = nicparams[constants.NIC_LINK]
10919
        args["nics"].append((ip, mac, mode, link))
10920
      elif constants.DDM_REMOVE in nic_override:
10921
        del args["nics"][-1]
10922

    
10923
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10924
    if self.op.disk_template:
10925
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10926

    
10927
    return env
10928

    
10929
  def BuildHooksNodes(self):
10930
    """Build hooks nodes.
10931

10932
    """
10933
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10934
    return (nl, nl)
10935

    
10936
  def CheckPrereq(self):
10937
    """Check prerequisites.
10938

10939
    This only checks the instance list against the existing names.
10940

10941
    """
10942
    # checking the new params on the primary/secondary nodes
10943

    
10944
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10945
    cluster = self.cluster = self.cfg.GetClusterInfo()
10946
    assert self.instance is not None, \
10947
      "Cannot retrieve locked instance %s" % self.op.instance_name
10948
    pnode = instance.primary_node
10949
    nodelist = list(instance.all_nodes)
10950

    
10951
    # OS change
10952
    if self.op.os_name and not self.op.force:
10953
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10954
                      self.op.force_variant)
10955
      instance_os = self.op.os_name
10956
    else:
10957
      instance_os = instance.os
10958

    
10959
    if self.op.disk_template:
10960
      if instance.disk_template == self.op.disk_template:
10961
        raise errors.OpPrereqError("Instance already has disk template %s" %
10962
                                   instance.disk_template, errors.ECODE_INVAL)
10963

    
10964
      if (instance.disk_template,
10965
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10966
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10967
                                   " %s to %s" % (instance.disk_template,
10968
                                                  self.op.disk_template),
10969
                                   errors.ECODE_INVAL)
10970
      _CheckInstanceDown(self, instance, "cannot change disk template")
10971
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10972
        if self.op.remote_node == pnode:
10973
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10974
                                     " as the primary node of the instance" %
10975
                                     self.op.remote_node, errors.ECODE_STATE)
10976
        _CheckNodeOnline(self, self.op.remote_node)
10977
        _CheckNodeNotDrained(self, self.op.remote_node)
10978
        # FIXME: here we assume that the old instance type is DT_PLAIN
10979
        assert instance.disk_template == constants.DT_PLAIN
10980
        disks = [{constants.IDISK_SIZE: d.size,
10981
                  constants.IDISK_VG: d.logical_id[0]}
10982
                 for d in instance.disks]
10983
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10984
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10985

    
10986
    # hvparams processing
10987
    if self.op.hvparams:
10988
      hv_type = instance.hypervisor
10989
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10990
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10991
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10992

    
10993
      # local check
10994
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10995
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10996
      self.hv_proposed = self.hv_new = hv_new # the new actual values
10997
      self.hv_inst = i_hvdict # the new dict (without defaults)
10998
    else:
10999
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11000
                                              instance.hvparams)
11001
      self.hv_new = self.hv_inst = {}
11002

    
11003
    # beparams processing
11004
    if self.op.beparams:
11005
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11006
                                   use_none=True)
11007
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11008
      be_new = cluster.SimpleFillBE(i_bedict)
11009
      self.be_proposed = self.be_new = be_new # the new actual values
11010
      self.be_inst = i_bedict # the new dict (without defaults)
11011
    else:
11012
      self.be_new = self.be_inst = {}
11013
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11014
    be_old = cluster.FillBE(instance)
11015

    
11016
    # CPU param validation -- checking every time a paramtere is
11017
    # changed to cover all cases where either CPU mask or vcpus have
11018
    # changed
11019
    if (constants.BE_VCPUS in self.be_proposed and
11020
        constants.HV_CPU_MASK in self.hv_proposed):
11021
      cpu_list = \
11022
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11023
      # Verify mask is consistent with number of vCPUs. Can skip this
11024
      # test if only 1 entry in the CPU mask, which means same mask
11025
      # is applied to all vCPUs.
11026
      if (len(cpu_list) > 1 and
11027
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11028
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11029
                                   " CPU mask [%s]" %
11030
                                   (self.be_proposed[constants.BE_VCPUS],
11031
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11032
                                   errors.ECODE_INVAL)
11033

    
11034
      # Only perform this test if a new CPU mask is given
11035
      if constants.HV_CPU_MASK in self.hv_new:
11036
        # Calculate the largest CPU number requested
11037
        max_requested_cpu = max(map(max, cpu_list))
11038
        # Check that all of the instance's nodes have enough physical CPUs to
11039
        # satisfy the requested CPU mask
11040
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11041
                                max_requested_cpu + 1, instance.hypervisor)
11042

    
11043
    # osparams processing
11044
    if self.op.osparams:
11045
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11046
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11047
      self.os_inst = i_osdict # the new dict (without defaults)
11048
    else:
11049
      self.os_inst = {}
11050

    
11051
    self.warn = []
11052

    
11053
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11054
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11055
      mem_check_list = [pnode]
11056
      if be_new[constants.BE_AUTO_BALANCE]:
11057
        # either we changed auto_balance to yes or it was from before
11058
        mem_check_list.extend(instance.secondary_nodes)
11059
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11060
                                                  instance.hypervisor)
11061
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11062
                                         instance.hypervisor)
11063
      pninfo = nodeinfo[pnode]
11064
      msg = pninfo.fail_msg
11065
      if msg:
11066
        # Assume the primary node is unreachable and go ahead
11067
        self.warn.append("Can't get info from primary node %s: %s" %
11068
                         (pnode, msg))
11069
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11070
        self.warn.append("Node data from primary node %s doesn't contain"
11071
                         " free memory information" % pnode)
11072
      elif instance_info.fail_msg:
11073
        self.warn.append("Can't get instance runtime information: %s" %
11074
                        instance_info.fail_msg)
11075
      else:
11076
        if instance_info.payload:
11077
          current_mem = int(instance_info.payload["memory"])
11078
        else:
11079
          # Assume instance not running
11080
          # (there is a slight race condition here, but it's not very probable,
11081
          # and we have no other way to check)
11082
          current_mem = 0
11083
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11084
                    pninfo.payload["memory_free"])
11085
        if miss_mem > 0:
11086
          raise errors.OpPrereqError("This change will prevent the instance"
11087
                                     " from starting, due to %d MB of memory"
11088
                                     " missing on its primary node" % miss_mem,
11089
                                     errors.ECODE_NORES)
11090

    
11091
      if be_new[constants.BE_AUTO_BALANCE]:
11092
        for node, nres in nodeinfo.items():
11093
          if node not in instance.secondary_nodes:
11094
            continue
11095
          nres.Raise("Can't get info from secondary node %s" % node,
11096
                     prereq=True, ecode=errors.ECODE_STATE)
11097
          if not isinstance(nres.payload.get("memory_free", None), int):
11098
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11099
                                       " memory information" % node,
11100
                                       errors.ECODE_STATE)
11101
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11102
            raise errors.OpPrereqError("This change will prevent the instance"
11103
                                       " from failover to its secondary node"
11104
                                       " %s, due to not enough memory" % node,
11105
                                       errors.ECODE_STATE)
11106

    
11107
    # NIC processing
11108
    self.nic_pnew = {}
11109
    self.nic_pinst = {}
11110
    for nic_op, nic_dict in self.op.nics:
11111
      if nic_op == constants.DDM_REMOVE:
11112
        if not instance.nics:
11113
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11114
                                     errors.ECODE_INVAL)
11115
        continue
11116
      if nic_op != constants.DDM_ADD:
11117
        # an existing nic
11118
        if not instance.nics:
11119
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11120
                                     " no NICs" % nic_op,
11121
                                     errors.ECODE_INVAL)
11122
        if nic_op < 0 or nic_op >= len(instance.nics):
11123
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11124
                                     " are 0 to %d" %
11125
                                     (nic_op, len(instance.nics) - 1),
11126
                                     errors.ECODE_INVAL)
11127
        old_nic_params = instance.nics[nic_op].nicparams
11128
        old_nic_ip = instance.nics[nic_op].ip
11129
      else:
11130
        old_nic_params = {}
11131
        old_nic_ip = None
11132

    
11133
      update_params_dict = dict([(key, nic_dict[key])
11134
                                 for key in constants.NICS_PARAMETERS
11135
                                 if key in nic_dict])
11136

    
11137
      if "bridge" in nic_dict:
11138
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11139

    
11140
      new_nic_params = _GetUpdatedParams(old_nic_params,
11141
                                         update_params_dict)
11142
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11143
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11144
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11145
      self.nic_pinst[nic_op] = new_nic_params
11146
      self.nic_pnew[nic_op] = new_filled_nic_params
11147
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11148

    
11149
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11150
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11151
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11152
        if msg:
11153
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11154
          if self.op.force:
11155
            self.warn.append(msg)
11156
          else:
11157
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11158
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11159
        if constants.INIC_IP in nic_dict:
11160
          nic_ip = nic_dict[constants.INIC_IP]
11161
        else:
11162
          nic_ip = old_nic_ip
11163
        if nic_ip is None:
11164
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11165
                                     " on a routed nic", errors.ECODE_INVAL)
11166
      if constants.INIC_MAC in nic_dict:
11167
        nic_mac = nic_dict[constants.INIC_MAC]
11168
        if nic_mac is None:
11169
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11170
                                     errors.ECODE_INVAL)
11171
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11172
          # otherwise generate the mac
11173
          nic_dict[constants.INIC_MAC] = \
11174
            self.cfg.GenerateMAC(self.proc.GetECId())
11175
        else:
11176
          # or validate/reserve the current one
11177
          try:
11178
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11179
          except errors.ReservationError:
11180
            raise errors.OpPrereqError("MAC address %s already in use"
11181
                                       " in cluster" % nic_mac,
11182
                                       errors.ECODE_NOTUNIQUE)
11183

    
11184
    # DISK processing
11185
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11186
      raise errors.OpPrereqError("Disk operations not supported for"
11187
                                 " diskless instances",
11188
                                 errors.ECODE_INVAL)
11189
    for disk_op, _ in self.op.disks:
11190
      if disk_op == constants.DDM_REMOVE:
11191
        if len(instance.disks) == 1:
11192
          raise errors.OpPrereqError("Cannot remove the last disk of"
11193
                                     " an instance", errors.ECODE_INVAL)
11194
        _CheckInstanceDown(self, instance, "cannot remove disks")
11195

    
11196
      if (disk_op == constants.DDM_ADD and
11197
          len(instance.disks) >= constants.MAX_DISKS):
11198
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11199
                                   " add more" % constants.MAX_DISKS,
11200
                                   errors.ECODE_STATE)
11201
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11202
        # an existing disk
11203
        if disk_op < 0 or disk_op >= len(instance.disks):
11204
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11205
                                     " are 0 to %d" %
11206
                                     (disk_op, len(instance.disks)),
11207
                                     errors.ECODE_INVAL)
11208

    
11209
    return
11210

    
11211
  def _ConvertPlainToDrbd(self, feedback_fn):
11212
    """Converts an instance from plain to drbd.
11213

11214
    """
11215
    feedback_fn("Converting template to drbd")
11216
    instance = self.instance
11217
    pnode = instance.primary_node
11218
    snode = self.op.remote_node
11219

    
11220
    # create a fake disk info for _GenerateDiskTemplate
11221
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11222
                  constants.IDISK_VG: d.logical_id[0]}
11223
                 for d in instance.disks]
11224
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11225
                                      instance.name, pnode, [snode],
11226
                                      disk_info, None, None, 0, feedback_fn)
11227
    info = _GetInstanceInfoText(instance)
11228
    feedback_fn("Creating aditional volumes...")
11229
    # first, create the missing data and meta devices
11230
    for disk in new_disks:
11231
      # unfortunately this is... not too nice
11232
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11233
                            info, True)
11234
      for child in disk.children:
11235
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11236
    # at this stage, all new LVs have been created, we can rename the
11237
    # old ones
11238
    feedback_fn("Renaming original volumes...")
11239
    rename_list = [(o, n.children[0].logical_id)
11240
                   for (o, n) in zip(instance.disks, new_disks)]
11241
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11242
    result.Raise("Failed to rename original LVs")
11243

    
11244
    feedback_fn("Initializing DRBD devices...")
11245
    # all child devices are in place, we can now create the DRBD devices
11246
    for disk in new_disks:
11247
      for node in [pnode, snode]:
11248
        f_create = node == pnode
11249
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11250

    
11251
    # at this point, the instance has been modified
11252
    instance.disk_template = constants.DT_DRBD8
11253
    instance.disks = new_disks
11254
    self.cfg.Update(instance, feedback_fn)
11255

    
11256
    # disks are created, waiting for sync
11257
    disk_abort = not _WaitForSync(self, instance,
11258
                                  oneshot=not self.op.wait_for_sync)
11259
    if disk_abort:
11260
      raise errors.OpExecError("There are some degraded disks for"
11261
                               " this instance, please cleanup manually")
11262

    
11263
  def _ConvertDrbdToPlain(self, feedback_fn):
11264
    """Converts an instance from drbd to plain.
11265

11266
    """
11267
    instance = self.instance
11268
    assert len(instance.secondary_nodes) == 1
11269
    pnode = instance.primary_node
11270
    snode = instance.secondary_nodes[0]
11271
    feedback_fn("Converting template to plain")
11272

    
11273
    old_disks = instance.disks
11274
    new_disks = [d.children[0] for d in old_disks]
11275

    
11276
    # copy over size and mode
11277
    for parent, child in zip(old_disks, new_disks):
11278
      child.size = parent.size
11279
      child.mode = parent.mode
11280

    
11281
    # update instance structure
11282
    instance.disks = new_disks
11283
    instance.disk_template = constants.DT_PLAIN
11284
    self.cfg.Update(instance, feedback_fn)
11285

    
11286
    feedback_fn("Removing volumes on the secondary node...")
11287
    for disk in old_disks:
11288
      self.cfg.SetDiskID(disk, snode)
11289
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11290
      if msg:
11291
        self.LogWarning("Could not remove block device %s on node %s,"
11292
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11293

    
11294
    feedback_fn("Removing unneeded volumes on the primary node...")
11295
    for idx, disk in enumerate(old_disks):
11296
      meta = disk.children[1]
11297
      self.cfg.SetDiskID(meta, pnode)
11298
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11299
      if msg:
11300
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11301
                        " continuing anyway: %s", idx, pnode, msg)
11302

    
11303
  def Exec(self, feedback_fn):
11304
    """Modifies an instance.
11305

11306
    All parameters take effect only at the next restart of the instance.
11307

11308
    """
11309
    # Process here the warnings from CheckPrereq, as we don't have a
11310
    # feedback_fn there.
11311
    for warn in self.warn:
11312
      feedback_fn("WARNING: %s" % warn)
11313

    
11314
    result = []
11315
    instance = self.instance
11316
    # disk changes
11317
    for disk_op, disk_dict in self.op.disks:
11318
      if disk_op == constants.DDM_REMOVE:
11319
        # remove the last disk
11320
        device = instance.disks.pop()
11321
        device_idx = len(instance.disks)
11322
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11323
          self.cfg.SetDiskID(disk, node)
11324
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11325
          if msg:
11326
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11327
                            " continuing anyway", device_idx, node, msg)
11328
        result.append(("disk/%d" % device_idx, "remove"))
11329
      elif disk_op == constants.DDM_ADD:
11330
        # add a new disk
11331
        if instance.disk_template in (constants.DT_FILE,
11332
                                        constants.DT_SHARED_FILE):
11333
          file_driver, file_path = instance.disks[0].logical_id
11334
          file_path = os.path.dirname(file_path)
11335
        else:
11336
          file_driver = file_path = None
11337
        disk_idx_base = len(instance.disks)
11338
        new_disk = _GenerateDiskTemplate(self,
11339
                                         instance.disk_template,
11340
                                         instance.name, instance.primary_node,
11341
                                         instance.secondary_nodes,
11342
                                         [disk_dict],
11343
                                         file_path,
11344
                                         file_driver,
11345
                                         disk_idx_base, feedback_fn)[0]
11346
        instance.disks.append(new_disk)
11347
        info = _GetInstanceInfoText(instance)
11348

    
11349
        logging.info("Creating volume %s for instance %s",
11350
                     new_disk.iv_name, instance.name)
11351
        # Note: this needs to be kept in sync with _CreateDisks
11352
        #HARDCODE
11353
        for node in instance.all_nodes:
11354
          f_create = node == instance.primary_node
11355
          try:
11356
            _CreateBlockDev(self, node, instance, new_disk,
11357
                            f_create, info, f_create)
11358
          except errors.OpExecError, err:
11359
            self.LogWarning("Failed to create volume %s (%s) on"
11360
                            " node %s: %s",
11361
                            new_disk.iv_name, new_disk, node, err)
11362
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11363
                       (new_disk.size, new_disk.mode)))
11364
      else:
11365
        # change a given disk
11366
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11367
        result.append(("disk.mode/%d" % disk_op,
11368
                       disk_dict[constants.IDISK_MODE]))
11369

    
11370
    if self.op.disk_template:
11371
      r_shut = _ShutdownInstanceDisks(self, instance)
11372
      if not r_shut:
11373
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11374
                                 " proceed with disk template conversion")
11375
      mode = (instance.disk_template, self.op.disk_template)
11376
      try:
11377
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11378
      except:
11379
        self.cfg.ReleaseDRBDMinors(instance.name)
11380
        raise
11381
      result.append(("disk_template", self.op.disk_template))
11382

    
11383
    # NIC changes
11384
    for nic_op, nic_dict in self.op.nics:
11385
      if nic_op == constants.DDM_REMOVE:
11386
        # remove the last nic
11387
        del instance.nics[-1]
11388
        result.append(("nic.%d" % len(instance.nics), "remove"))
11389
      elif nic_op == constants.DDM_ADD:
11390
        # mac and bridge should be set, by now
11391
        mac = nic_dict[constants.INIC_MAC]
11392
        ip = nic_dict.get(constants.INIC_IP, None)
11393
        nicparams = self.nic_pinst[constants.DDM_ADD]
11394
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11395
        instance.nics.append(new_nic)
11396
        result.append(("nic.%d" % (len(instance.nics) - 1),
11397
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11398
                       (new_nic.mac, new_nic.ip,
11399
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11400
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11401
                       )))
11402
      else:
11403
        for key in (constants.INIC_MAC, constants.INIC_IP):
11404
          if key in nic_dict:
11405
            setattr(instance.nics[nic_op], key, nic_dict[key])
11406
        if nic_op in self.nic_pinst:
11407
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11408
        for key, val in nic_dict.iteritems():
11409
          result.append(("nic.%s/%d" % (key, nic_op), val))
11410

    
11411
    # hvparams changes
11412
    if self.op.hvparams:
11413
      instance.hvparams = self.hv_inst
11414
      for key, val in self.op.hvparams.iteritems():
11415
        result.append(("hv/%s" % key, val))
11416

    
11417
    # beparams changes
11418
    if self.op.beparams:
11419
      instance.beparams = self.be_inst
11420
      for key, val in self.op.beparams.iteritems():
11421
        result.append(("be/%s" % key, val))
11422

    
11423
    # OS change
11424
    if self.op.os_name:
11425
      instance.os = self.op.os_name
11426

    
11427
    # osparams changes
11428
    if self.op.osparams:
11429
      instance.osparams = self.os_inst
11430
      for key, val in self.op.osparams.iteritems():
11431
        result.append(("os/%s" % key, val))
11432

    
11433
    self.cfg.Update(instance, feedback_fn)
11434

    
11435
    return result
11436

    
11437
  _DISK_CONVERSIONS = {
11438
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11439
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11440
    }
11441

    
11442

    
11443
class LUInstanceChangeGroup(LogicalUnit):
11444
  HPATH = "instance-change-group"
11445
  HTYPE = constants.HTYPE_INSTANCE
11446
  REQ_BGL = False
11447

    
11448
  def ExpandNames(self):
11449
    self.share_locks = _ShareAll()
11450
    self.needed_locks = {
11451
      locking.LEVEL_NODEGROUP: [],
11452
      locking.LEVEL_NODE: [],
11453
      }
11454

    
11455
    self._ExpandAndLockInstance()
11456

    
11457
    if self.op.target_groups:
11458
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11459
                                  self.op.target_groups)
11460
    else:
11461
      self.req_target_uuids = None
11462

    
11463
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11464

    
11465
  def DeclareLocks(self, level):
11466
    if level == locking.LEVEL_NODEGROUP:
11467
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11468

    
11469
      if self.req_target_uuids:
11470
        lock_groups = set(self.req_target_uuids)
11471

    
11472
        # Lock all groups used by instance optimistically; this requires going
11473
        # via the node before it's locked, requiring verification later on
11474
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11475
        lock_groups.update(instance_groups)
11476
      else:
11477
        # No target groups, need to lock all of them
11478
        lock_groups = locking.ALL_SET
11479

    
11480
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11481

    
11482
    elif level == locking.LEVEL_NODE:
11483
      if self.req_target_uuids:
11484
        # Lock all nodes used by instances
11485
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11486
        self._LockInstancesNodes()
11487

    
11488
        # Lock all nodes in all potential target groups
11489
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11490
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11491
        member_nodes = [node_name
11492
                        for group in lock_groups
11493
                        for node_name in self.cfg.GetNodeGroup(group).members]
11494
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11495
      else:
11496
        # Lock all nodes as all groups are potential targets
11497
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11498

    
11499
  def CheckPrereq(self):
11500
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11501
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11502
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11503

    
11504
    assert (self.req_target_uuids is None or
11505
            owned_groups.issuperset(self.req_target_uuids))
11506
    assert owned_instances == set([self.op.instance_name])
11507

    
11508
    # Get instance information
11509
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11510

    
11511
    # Check if node groups for locked instance are still correct
11512
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11513
      ("Instance %s's nodes changed while we kept the lock" %
11514
       self.op.instance_name)
11515

    
11516
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11517
                                           owned_groups)
11518

    
11519
    if self.req_target_uuids:
11520
      # User requested specific target groups
11521
      self.target_uuids = self.req_target_uuids
11522
    else:
11523
      # All groups except those used by the instance are potential targets
11524
      self.target_uuids = owned_groups - inst_groups
11525

    
11526
    conflicting_groups = self.target_uuids & inst_groups
11527
    if conflicting_groups:
11528
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11529
                                 " used by the instance '%s'" %
11530
                                 (utils.CommaJoin(conflicting_groups),
11531
                                  self.op.instance_name),
11532
                                 errors.ECODE_INVAL)
11533

    
11534
    if not self.target_uuids:
11535
      raise errors.OpPrereqError("There are no possible target groups",
11536
                                 errors.ECODE_INVAL)
11537

    
11538
  def BuildHooksEnv(self):
11539
    """Build hooks env.
11540

11541
    """
11542
    assert self.target_uuids
11543

    
11544
    env = {
11545
      "TARGET_GROUPS": " ".join(self.target_uuids),
11546
      }
11547

    
11548
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11549

    
11550
    return env
11551

    
11552
  def BuildHooksNodes(self):
11553
    """Build hooks nodes.
11554

11555
    """
11556
    mn = self.cfg.GetMasterNode()
11557
    return ([mn], [mn])
11558

    
11559
  def Exec(self, feedback_fn):
11560
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11561

    
11562
    assert instances == [self.op.instance_name], "Instance not locked"
11563

    
11564
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11565
                     instances=instances, target_groups=list(self.target_uuids))
11566

    
11567
    ial.Run(self.op.iallocator)
11568

    
11569
    if not ial.success:
11570
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11571
                                 " instance '%s' using iallocator '%s': %s" %
11572
                                 (self.op.instance_name, self.op.iallocator,
11573
                                  ial.info),
11574
                                 errors.ECODE_NORES)
11575

    
11576
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11577

    
11578
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11579
                 " instance '%s'", len(jobs), self.op.instance_name)
11580

    
11581
    return ResultWithJobs(jobs)
11582

    
11583

    
11584
class LUBackupQuery(NoHooksLU):
11585
  """Query the exports list
11586

11587
  """
11588
  REQ_BGL = False
11589

    
11590
  def ExpandNames(self):
11591
    self.needed_locks = {}
11592
    self.share_locks[locking.LEVEL_NODE] = 1
11593
    if not self.op.nodes:
11594
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11595
    else:
11596
      self.needed_locks[locking.LEVEL_NODE] = \
11597
        _GetWantedNodes(self, self.op.nodes)
11598

    
11599
  def Exec(self, feedback_fn):
11600
    """Compute the list of all the exported system images.
11601

11602
    @rtype: dict
11603
    @return: a dictionary with the structure node->(export-list)
11604
        where export-list is a list of the instances exported on
11605
        that node.
11606

11607
    """
11608
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11609
    rpcresult = self.rpc.call_export_list(self.nodes)
11610
    result = {}
11611
    for node in rpcresult:
11612
      if rpcresult[node].fail_msg:
11613
        result[node] = False
11614
      else:
11615
        result[node] = rpcresult[node].payload
11616

    
11617
    return result
11618

    
11619

    
11620
class LUBackupPrepare(NoHooksLU):
11621
  """Prepares an instance for an export and returns useful information.
11622

11623
  """
11624
  REQ_BGL = False
11625

    
11626
  def ExpandNames(self):
11627
    self._ExpandAndLockInstance()
11628

    
11629
  def CheckPrereq(self):
11630
    """Check prerequisites.
11631

11632
    """
11633
    instance_name = self.op.instance_name
11634

    
11635
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11636
    assert self.instance is not None, \
11637
          "Cannot retrieve locked instance %s" % self.op.instance_name
11638
    _CheckNodeOnline(self, self.instance.primary_node)
11639

    
11640
    self._cds = _GetClusterDomainSecret()
11641

    
11642
  def Exec(self, feedback_fn):
11643
    """Prepares an instance for an export.
11644

11645
    """
11646
    instance = self.instance
11647

    
11648
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11649
      salt = utils.GenerateSecret(8)
11650

    
11651
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11652
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11653
                                              constants.RIE_CERT_VALIDITY)
11654
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11655

    
11656
      (name, cert_pem) = result.payload
11657

    
11658
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11659
                                             cert_pem)
11660

    
11661
      return {
11662
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11663
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11664
                          salt),
11665
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11666
        }
11667

    
11668
    return None
11669

    
11670

    
11671
class LUBackupExport(LogicalUnit):
11672
  """Export an instance to an image in the cluster.
11673

11674
  """
11675
  HPATH = "instance-export"
11676
  HTYPE = constants.HTYPE_INSTANCE
11677
  REQ_BGL = False
11678

    
11679
  def CheckArguments(self):
11680
    """Check the arguments.
11681

11682
    """
11683
    self.x509_key_name = self.op.x509_key_name
11684
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11685

    
11686
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11687
      if not self.x509_key_name:
11688
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11689
                                   errors.ECODE_INVAL)
11690

    
11691
      if not self.dest_x509_ca_pem:
11692
        raise errors.OpPrereqError("Missing destination X509 CA",
11693
                                   errors.ECODE_INVAL)
11694

    
11695
  def ExpandNames(self):
11696
    self._ExpandAndLockInstance()
11697

    
11698
    # Lock all nodes for local exports
11699
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11700
      # FIXME: lock only instance primary and destination node
11701
      #
11702
      # Sad but true, for now we have do lock all nodes, as we don't know where
11703
      # the previous export might be, and in this LU we search for it and
11704
      # remove it from its current node. In the future we could fix this by:
11705
      #  - making a tasklet to search (share-lock all), then create the
11706
      #    new one, then one to remove, after
11707
      #  - removing the removal operation altogether
11708
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11709

    
11710
  def DeclareLocks(self, level):
11711
    """Last minute lock declaration."""
11712
    # All nodes are locked anyway, so nothing to do here.
11713

    
11714
  def BuildHooksEnv(self):
11715
    """Build hooks env.
11716

11717
    This will run on the master, primary node and target node.
11718

11719
    """
11720
    env = {
11721
      "EXPORT_MODE": self.op.mode,
11722
      "EXPORT_NODE": self.op.target_node,
11723
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11724
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11725
      # TODO: Generic function for boolean env variables
11726
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11727
      }
11728

    
11729
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11730

    
11731
    return env
11732

    
11733
  def BuildHooksNodes(self):
11734
    """Build hooks nodes.
11735

11736
    """
11737
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11738

    
11739
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11740
      nl.append(self.op.target_node)
11741

    
11742
    return (nl, nl)
11743

    
11744
  def CheckPrereq(self):
11745
    """Check prerequisites.
11746

11747
    This checks that the instance and node names are valid.
11748

11749
    """
11750
    instance_name = self.op.instance_name
11751

    
11752
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11753
    assert self.instance is not None, \
11754
          "Cannot retrieve locked instance %s" % self.op.instance_name
11755
    _CheckNodeOnline(self, self.instance.primary_node)
11756

    
11757
    if (self.op.remove_instance and self.instance.admin_up and
11758
        not self.op.shutdown):
11759
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11760
                                 " down before")
11761

    
11762
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11763
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11764
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11765
      assert self.dst_node is not None
11766

    
11767
      _CheckNodeOnline(self, self.dst_node.name)
11768
      _CheckNodeNotDrained(self, self.dst_node.name)
11769

    
11770
      self._cds = None
11771
      self.dest_disk_info = None
11772
      self.dest_x509_ca = None
11773

    
11774
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11775
      self.dst_node = None
11776

    
11777
      if len(self.op.target_node) != len(self.instance.disks):
11778
        raise errors.OpPrereqError(("Received destination information for %s"
11779
                                    " disks, but instance %s has %s disks") %
11780
                                   (len(self.op.target_node), instance_name,
11781
                                    len(self.instance.disks)),
11782
                                   errors.ECODE_INVAL)
11783

    
11784
      cds = _GetClusterDomainSecret()
11785

    
11786
      # Check X509 key name
11787
      try:
11788
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11789
      except (TypeError, ValueError), err:
11790
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11791

    
11792
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11793
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11794
                                   errors.ECODE_INVAL)
11795

    
11796
      # Load and verify CA
11797
      try:
11798
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11799
      except OpenSSL.crypto.Error, err:
11800
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11801
                                   (err, ), errors.ECODE_INVAL)
11802

    
11803
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11804
      if errcode is not None:
11805
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11806
                                   (msg, ), errors.ECODE_INVAL)
11807

    
11808
      self.dest_x509_ca = cert
11809

    
11810
      # Verify target information
11811
      disk_info = []
11812
      for idx, disk_data in enumerate(self.op.target_node):
11813
        try:
11814
          (host, port, magic) = \
11815
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11816
        except errors.GenericError, err:
11817
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11818
                                     (idx, err), errors.ECODE_INVAL)
11819

    
11820
        disk_info.append((host, port, magic))
11821

    
11822
      assert len(disk_info) == len(self.op.target_node)
11823
      self.dest_disk_info = disk_info
11824

    
11825
    else:
11826
      raise errors.ProgrammerError("Unhandled export mode %r" %
11827
                                   self.op.mode)
11828

    
11829
    # instance disk type verification
11830
    # TODO: Implement export support for file-based disks
11831
    for disk in self.instance.disks:
11832
      if disk.dev_type == constants.LD_FILE:
11833
        raise errors.OpPrereqError("Export not supported for instances with"
11834
                                   " file-based disks", errors.ECODE_INVAL)
11835

    
11836
  def _CleanupExports(self, feedback_fn):
11837
    """Removes exports of current instance from all other nodes.
11838

11839
    If an instance in a cluster with nodes A..D was exported to node C, its
11840
    exports will be removed from the nodes A, B and D.
11841

11842
    """
11843
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11844

    
11845
    nodelist = self.cfg.GetNodeList()
11846
    nodelist.remove(self.dst_node.name)
11847

    
11848
    # on one-node clusters nodelist will be empty after the removal
11849
    # if we proceed the backup would be removed because OpBackupQuery
11850
    # substitutes an empty list with the full cluster node list.
11851
    iname = self.instance.name
11852
    if nodelist:
11853
      feedback_fn("Removing old exports for instance %s" % iname)
11854
      exportlist = self.rpc.call_export_list(nodelist)
11855
      for node in exportlist:
11856
        if exportlist[node].fail_msg:
11857
          continue
11858
        if iname in exportlist[node].payload:
11859
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11860
          if msg:
11861
            self.LogWarning("Could not remove older export for instance %s"
11862
                            " on node %s: %s", iname, node, msg)
11863

    
11864
  def Exec(self, feedback_fn):
11865
    """Export an instance to an image in the cluster.
11866

11867
    """
11868
    assert self.op.mode in constants.EXPORT_MODES
11869

    
11870
    instance = self.instance
11871
    src_node = instance.primary_node
11872

    
11873
    if self.op.shutdown:
11874
      # shutdown the instance, but not the disks
11875
      feedback_fn("Shutting down instance %s" % instance.name)
11876
      result = self.rpc.call_instance_shutdown(src_node, instance,
11877
                                               self.op.shutdown_timeout)
11878
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11879
      result.Raise("Could not shutdown instance %s on"
11880
                   " node %s" % (instance.name, src_node))
11881

    
11882
    # set the disks ID correctly since call_instance_start needs the
11883
    # correct drbd minor to create the symlinks
11884
    for disk in instance.disks:
11885
      self.cfg.SetDiskID(disk, src_node)
11886

    
11887
    activate_disks = (not instance.admin_up)
11888

    
11889
    if activate_disks:
11890
      # Activate the instance disks if we'exporting a stopped instance
11891
      feedback_fn("Activating disks for %s" % instance.name)
11892
      _StartInstanceDisks(self, instance, None)
11893

    
11894
    try:
11895
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11896
                                                     instance)
11897

    
11898
      helper.CreateSnapshots()
11899
      try:
11900
        if (self.op.shutdown and instance.admin_up and
11901
            not self.op.remove_instance):
11902
          assert not activate_disks
11903
          feedback_fn("Starting instance %s" % instance.name)
11904
          result = self.rpc.call_instance_start(src_node, instance,
11905
                                                None, None, False)
11906
          msg = result.fail_msg
11907
          if msg:
11908
            feedback_fn("Failed to start instance: %s" % msg)
11909
            _ShutdownInstanceDisks(self, instance)
11910
            raise errors.OpExecError("Could not start instance: %s" % msg)
11911

    
11912
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11913
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11914
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11915
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11916
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11917

    
11918
          (key_name, _, _) = self.x509_key_name
11919

    
11920
          dest_ca_pem = \
11921
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11922
                                            self.dest_x509_ca)
11923

    
11924
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11925
                                                     key_name, dest_ca_pem,
11926
                                                     timeouts)
11927
      finally:
11928
        helper.Cleanup()
11929

    
11930
      # Check for backwards compatibility
11931
      assert len(dresults) == len(instance.disks)
11932
      assert compat.all(isinstance(i, bool) for i in dresults), \
11933
             "Not all results are boolean: %r" % dresults
11934

    
11935
    finally:
11936
      if activate_disks:
11937
        feedback_fn("Deactivating disks for %s" % instance.name)
11938
        _ShutdownInstanceDisks(self, instance)
11939

    
11940
    if not (compat.all(dresults) and fin_resu):
11941
      failures = []
11942
      if not fin_resu:
11943
        failures.append("export finalization")
11944
      if not compat.all(dresults):
11945
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11946
                               if not dsk)
11947
        failures.append("disk export: disk(s) %s" % fdsk)
11948

    
11949
      raise errors.OpExecError("Export failed, errors in %s" %
11950
                               utils.CommaJoin(failures))
11951

    
11952
    # At this point, the export was successful, we can cleanup/finish
11953

    
11954
    # Remove instance if requested
11955
    if self.op.remove_instance:
11956
      feedback_fn("Removing instance %s" % instance.name)
11957
      _RemoveInstance(self, feedback_fn, instance,
11958
                      self.op.ignore_remove_failures)
11959

    
11960
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11961
      self._CleanupExports(feedback_fn)
11962

    
11963
    return fin_resu, dresults
11964

    
11965

    
11966
class LUBackupRemove(NoHooksLU):
11967
  """Remove exports related to the named instance.
11968

11969
  """
11970
  REQ_BGL = False
11971

    
11972
  def ExpandNames(self):
11973
    self.needed_locks = {}
11974
    # We need all nodes to be locked in order for RemoveExport to work, but we
11975
    # don't need to lock the instance itself, as nothing will happen to it (and
11976
    # we can remove exports also for a removed instance)
11977
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11978

    
11979
  def Exec(self, feedback_fn):
11980
    """Remove any export.
11981

11982
    """
11983
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11984
    # If the instance was not found we'll try with the name that was passed in.
11985
    # This will only work if it was an FQDN, though.
11986
    fqdn_warn = False
11987
    if not instance_name:
11988
      fqdn_warn = True
11989
      instance_name = self.op.instance_name
11990

    
11991
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11992
    exportlist = self.rpc.call_export_list(locked_nodes)
11993
    found = False
11994
    for node in exportlist:
11995
      msg = exportlist[node].fail_msg
11996
      if msg:
11997
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11998
        continue
11999
      if instance_name in exportlist[node].payload:
12000
        found = True
12001
        result = self.rpc.call_export_remove(node, instance_name)
12002
        msg = result.fail_msg
12003
        if msg:
12004
          logging.error("Could not remove export for instance %s"
12005
                        " on node %s: %s", instance_name, node, msg)
12006

    
12007
    if fqdn_warn and not found:
12008
      feedback_fn("Export not found. If trying to remove an export belonging"
12009
                  " to a deleted instance please use its Fully Qualified"
12010
                  " Domain Name.")
12011

    
12012

    
12013
class LUGroupAdd(LogicalUnit):
12014
  """Logical unit for creating node groups.
12015

12016
  """
12017
  HPATH = "group-add"
12018
  HTYPE = constants.HTYPE_GROUP
12019
  REQ_BGL = False
12020

    
12021
  def ExpandNames(self):
12022
    # We need the new group's UUID here so that we can create and acquire the
12023
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12024
    # that it should not check whether the UUID exists in the configuration.
12025
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12026
    self.needed_locks = {}
12027
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12028

    
12029
  def CheckPrereq(self):
12030
    """Check prerequisites.
12031

12032
    This checks that the given group name is not an existing node group
12033
    already.
12034

12035
    """
12036
    try:
12037
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12038
    except errors.OpPrereqError:
12039
      pass
12040
    else:
12041
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12042
                                 " node group (UUID: %s)" %
12043
                                 (self.op.group_name, existing_uuid),
12044
                                 errors.ECODE_EXISTS)
12045

    
12046
    if self.op.ndparams:
12047
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12048

    
12049
  def BuildHooksEnv(self):
12050
    """Build hooks env.
12051

12052
    """
12053
    return {
12054
      "GROUP_NAME": self.op.group_name,
12055
      }
12056

    
12057
  def BuildHooksNodes(self):
12058
    """Build hooks nodes.
12059

12060
    """
12061
    mn = self.cfg.GetMasterNode()
12062
    return ([mn], [mn])
12063

    
12064
  def Exec(self, feedback_fn):
12065
    """Add the node group to the cluster.
12066

12067
    """
12068
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12069
                                  uuid=self.group_uuid,
12070
                                  alloc_policy=self.op.alloc_policy,
12071
                                  ndparams=self.op.ndparams)
12072

    
12073
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12074
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12075

    
12076

    
12077
class LUGroupAssignNodes(NoHooksLU):
12078
  """Logical unit for assigning nodes to groups.
12079

12080
  """
12081
  REQ_BGL = False
12082

    
12083
  def ExpandNames(self):
12084
    # These raise errors.OpPrereqError on their own:
12085
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12086
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12087

    
12088
    # We want to lock all the affected nodes and groups. We have readily
12089
    # available the list of nodes, and the *destination* group. To gather the
12090
    # list of "source" groups, we need to fetch node information later on.
12091
    self.needed_locks = {
12092
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12093
      locking.LEVEL_NODE: self.op.nodes,
12094
      }
12095

    
12096
  def DeclareLocks(self, level):
12097
    if level == locking.LEVEL_NODEGROUP:
12098
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12099

    
12100
      # Try to get all affected nodes' groups without having the group or node
12101
      # lock yet. Needs verification later in the code flow.
12102
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12103

    
12104
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12105

    
12106
  def CheckPrereq(self):
12107
    """Check prerequisites.
12108

12109
    """
12110
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12111
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12112
            frozenset(self.op.nodes))
12113

    
12114
    expected_locks = (set([self.group_uuid]) |
12115
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12116
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12117
    if actual_locks != expected_locks:
12118
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12119
                               " current groups are '%s', used to be '%s'" %
12120
                               (utils.CommaJoin(expected_locks),
12121
                                utils.CommaJoin(actual_locks)))
12122

    
12123
    self.node_data = self.cfg.GetAllNodesInfo()
12124
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12125
    instance_data = self.cfg.GetAllInstancesInfo()
12126

    
12127
    if self.group is None:
12128
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12129
                               (self.op.group_name, self.group_uuid))
12130

    
12131
    (new_splits, previous_splits) = \
12132
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12133
                                             for node in self.op.nodes],
12134
                                            self.node_data, instance_data)
12135

    
12136
    if new_splits:
12137
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12138

    
12139
      if not self.op.force:
12140
        raise errors.OpExecError("The following instances get split by this"
12141
                                 " change and --force was not given: %s" %
12142
                                 fmt_new_splits)
12143
      else:
12144
        self.LogWarning("This operation will split the following instances: %s",
12145
                        fmt_new_splits)
12146

    
12147
        if previous_splits:
12148
          self.LogWarning("In addition, these already-split instances continue"
12149
                          " to be split across groups: %s",
12150
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12151

    
12152
  def Exec(self, feedback_fn):
12153
    """Assign nodes to a new group.
12154

12155
    """
12156
    for node in self.op.nodes:
12157
      self.node_data[node].group = self.group_uuid
12158

    
12159
    # FIXME: Depends on side-effects of modifying the result of
12160
    # C{cfg.GetAllNodesInfo}
12161

    
12162
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12163

    
12164
  @staticmethod
12165
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12166
    """Check for split instances after a node assignment.
12167

12168
    This method considers a series of node assignments as an atomic operation,
12169
    and returns information about split instances after applying the set of
12170
    changes.
12171

12172
    In particular, it returns information about newly split instances, and
12173
    instances that were already split, and remain so after the change.
12174

12175
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12176
    considered.
12177

12178
    @type changes: list of (node_name, new_group_uuid) pairs.
12179
    @param changes: list of node assignments to consider.
12180
    @param node_data: a dict with data for all nodes
12181
    @param instance_data: a dict with all instances to consider
12182
    @rtype: a two-tuple
12183
    @return: a list of instances that were previously okay and result split as a
12184
      consequence of this change, and a list of instances that were previously
12185
      split and this change does not fix.
12186

12187
    """
12188
    changed_nodes = dict((node, group) for node, group in changes
12189
                         if node_data[node].group != group)
12190

    
12191
    all_split_instances = set()
12192
    previously_split_instances = set()
12193

    
12194
    def InstanceNodes(instance):
12195
      return [instance.primary_node] + list(instance.secondary_nodes)
12196

    
12197
    for inst in instance_data.values():
12198
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12199
        continue
12200

    
12201
      instance_nodes = InstanceNodes(inst)
12202

    
12203
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12204
        previously_split_instances.add(inst.name)
12205

    
12206
      if len(set(changed_nodes.get(node, node_data[node].group)
12207
                 for node in instance_nodes)) > 1:
12208
        all_split_instances.add(inst.name)
12209

    
12210
    return (list(all_split_instances - previously_split_instances),
12211
            list(previously_split_instances & all_split_instances))
12212

    
12213

    
12214
class _GroupQuery(_QueryBase):
12215
  FIELDS = query.GROUP_FIELDS
12216

    
12217
  def ExpandNames(self, lu):
12218
    lu.needed_locks = {}
12219

    
12220
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12221
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12222

    
12223
    if not self.names:
12224
      self.wanted = [name_to_uuid[name]
12225
                     for name in utils.NiceSort(name_to_uuid.keys())]
12226
    else:
12227
      # Accept names to be either names or UUIDs.
12228
      missing = []
12229
      self.wanted = []
12230
      all_uuid = frozenset(self._all_groups.keys())
12231

    
12232
      for name in self.names:
12233
        if name in all_uuid:
12234
          self.wanted.append(name)
12235
        elif name in name_to_uuid:
12236
          self.wanted.append(name_to_uuid[name])
12237
        else:
12238
          missing.append(name)
12239

    
12240
      if missing:
12241
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12242
                                   utils.CommaJoin(missing),
12243
                                   errors.ECODE_NOENT)
12244

    
12245
  def DeclareLocks(self, lu, level):
12246
    pass
12247

    
12248
  def _GetQueryData(self, lu):
12249
    """Computes the list of node groups and their attributes.
12250

12251
    """
12252
    do_nodes = query.GQ_NODE in self.requested_data
12253
    do_instances = query.GQ_INST in self.requested_data
12254

    
12255
    group_to_nodes = None
12256
    group_to_instances = None
12257

    
12258
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12259
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12260
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12261
    # instance->node. Hence, we will need to process nodes even if we only need
12262
    # instance information.
12263
    if do_nodes or do_instances:
12264
      all_nodes = lu.cfg.GetAllNodesInfo()
12265
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12266
      node_to_group = {}
12267

    
12268
      for node in all_nodes.values():
12269
        if node.group in group_to_nodes:
12270
          group_to_nodes[node.group].append(node.name)
12271
          node_to_group[node.name] = node.group
12272

    
12273
      if do_instances:
12274
        all_instances = lu.cfg.GetAllInstancesInfo()
12275
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12276

    
12277
        for instance in all_instances.values():
12278
          node = instance.primary_node
12279
          if node in node_to_group:
12280
            group_to_instances[node_to_group[node]].append(instance.name)
12281

    
12282
        if not do_nodes:
12283
          # Do not pass on node information if it was not requested.
12284
          group_to_nodes = None
12285

    
12286
    return query.GroupQueryData([self._all_groups[uuid]
12287
                                 for uuid in self.wanted],
12288
                                group_to_nodes, group_to_instances)
12289

    
12290

    
12291
class LUGroupQuery(NoHooksLU):
12292
  """Logical unit for querying node groups.
12293

12294
  """
12295
  REQ_BGL = False
12296

    
12297
  def CheckArguments(self):
12298
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12299
                          self.op.output_fields, False)
12300

    
12301
  def ExpandNames(self):
12302
    self.gq.ExpandNames(self)
12303

    
12304
  def DeclareLocks(self, level):
12305
    self.gq.DeclareLocks(self, level)
12306

    
12307
  def Exec(self, feedback_fn):
12308
    return self.gq.OldStyleQuery(self)
12309

    
12310

    
12311
class LUGroupSetParams(LogicalUnit):
12312
  """Modifies the parameters of a node group.
12313

12314
  """
12315
  HPATH = "group-modify"
12316
  HTYPE = constants.HTYPE_GROUP
12317
  REQ_BGL = False
12318

    
12319
  def CheckArguments(self):
12320
    all_changes = [
12321
      self.op.ndparams,
12322
      self.op.alloc_policy,
12323
      ]
12324

    
12325
    if all_changes.count(None) == len(all_changes):
12326
      raise errors.OpPrereqError("Please pass at least one modification",
12327
                                 errors.ECODE_INVAL)
12328

    
12329
  def ExpandNames(self):
12330
    # This raises errors.OpPrereqError on its own:
12331
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12332

    
12333
    self.needed_locks = {
12334
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12335
      }
12336

    
12337
  def CheckPrereq(self):
12338
    """Check prerequisites.
12339

12340
    """
12341
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12342

    
12343
    if self.group is None:
12344
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12345
                               (self.op.group_name, self.group_uuid))
12346

    
12347
    if self.op.ndparams:
12348
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12349
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12350
      self.new_ndparams = new_ndparams
12351

    
12352
  def BuildHooksEnv(self):
12353
    """Build hooks env.
12354

12355
    """
12356
    return {
12357
      "GROUP_NAME": self.op.group_name,
12358
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12359
      }
12360

    
12361
  def BuildHooksNodes(self):
12362
    """Build hooks nodes.
12363

12364
    """
12365
    mn = self.cfg.GetMasterNode()
12366
    return ([mn], [mn])
12367

    
12368
  def Exec(self, feedback_fn):
12369
    """Modifies the node group.
12370

12371
    """
12372
    result = []
12373

    
12374
    if self.op.ndparams:
12375
      self.group.ndparams = self.new_ndparams
12376
      result.append(("ndparams", str(self.group.ndparams)))
12377

    
12378
    if self.op.alloc_policy:
12379
      self.group.alloc_policy = self.op.alloc_policy
12380

    
12381
    self.cfg.Update(self.group, feedback_fn)
12382
    return result
12383

    
12384

    
12385
class LUGroupRemove(LogicalUnit):
12386
  HPATH = "group-remove"
12387
  HTYPE = constants.HTYPE_GROUP
12388
  REQ_BGL = False
12389

    
12390
  def ExpandNames(self):
12391
    # This will raises errors.OpPrereqError on its own:
12392
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12393
    self.needed_locks = {
12394
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12395
      }
12396

    
12397
  def CheckPrereq(self):
12398
    """Check prerequisites.
12399

12400
    This checks that the given group name exists as a node group, that is
12401
    empty (i.e., contains no nodes), and that is not the last group of the
12402
    cluster.
12403

12404
    """
12405
    # Verify that the group is empty.
12406
    group_nodes = [node.name
12407
                   for node in self.cfg.GetAllNodesInfo().values()
12408
                   if node.group == self.group_uuid]
12409

    
12410
    if group_nodes:
12411
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12412
                                 " nodes: %s" %
12413
                                 (self.op.group_name,
12414
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12415
                                 errors.ECODE_STATE)
12416

    
12417
    # Verify the cluster would not be left group-less.
12418
    if len(self.cfg.GetNodeGroupList()) == 1:
12419
      raise errors.OpPrereqError("Group '%s' is the only group,"
12420
                                 " cannot be removed" %
12421
                                 self.op.group_name,
12422
                                 errors.ECODE_STATE)
12423

    
12424
  def BuildHooksEnv(self):
12425
    """Build hooks env.
12426

12427
    """
12428
    return {
12429
      "GROUP_NAME": self.op.group_name,
12430
      }
12431

    
12432
  def BuildHooksNodes(self):
12433
    """Build hooks nodes.
12434

12435
    """
12436
    mn = self.cfg.GetMasterNode()
12437
    return ([mn], [mn])
12438

    
12439
  def Exec(self, feedback_fn):
12440
    """Remove the node group.
12441

12442
    """
12443
    try:
12444
      self.cfg.RemoveNodeGroup(self.group_uuid)
12445
    except errors.ConfigurationError:
12446
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12447
                               (self.op.group_name, self.group_uuid))
12448

    
12449
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12450

    
12451

    
12452
class LUGroupRename(LogicalUnit):
12453
  HPATH = "group-rename"
12454
  HTYPE = constants.HTYPE_GROUP
12455
  REQ_BGL = False
12456

    
12457
  def ExpandNames(self):
12458
    # This raises errors.OpPrereqError on its own:
12459
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12460

    
12461
    self.needed_locks = {
12462
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12463
      }
12464

    
12465
  def CheckPrereq(self):
12466
    """Check prerequisites.
12467

12468
    Ensures requested new name is not yet used.
12469

12470
    """
12471
    try:
12472
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12473
    except errors.OpPrereqError:
12474
      pass
12475
    else:
12476
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12477
                                 " node group (UUID: %s)" %
12478
                                 (self.op.new_name, new_name_uuid),
12479
                                 errors.ECODE_EXISTS)
12480

    
12481
  def BuildHooksEnv(self):
12482
    """Build hooks env.
12483

12484
    """
12485
    return {
12486
      "OLD_NAME": self.op.group_name,
12487
      "NEW_NAME": self.op.new_name,
12488
      }
12489

    
12490
  def BuildHooksNodes(self):
12491
    """Build hooks nodes.
12492

12493
    """
12494
    mn = self.cfg.GetMasterNode()
12495

    
12496
    all_nodes = self.cfg.GetAllNodesInfo()
12497
    all_nodes.pop(mn, None)
12498

    
12499
    run_nodes = [mn]
12500
    run_nodes.extend(node.name for node in all_nodes.values()
12501
                     if node.group == self.group_uuid)
12502

    
12503
    return (run_nodes, run_nodes)
12504

    
12505
  def Exec(self, feedback_fn):
12506
    """Rename the node group.
12507

12508
    """
12509
    group = self.cfg.GetNodeGroup(self.group_uuid)
12510

    
12511
    if group is None:
12512
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12513
                               (self.op.group_name, self.group_uuid))
12514

    
12515
    group.name = self.op.new_name
12516
    self.cfg.Update(group, feedback_fn)
12517

    
12518
    return self.op.new_name
12519

    
12520

    
12521
class LUGroupEvacuate(LogicalUnit):
12522
  HPATH = "group-evacuate"
12523
  HTYPE = constants.HTYPE_GROUP
12524
  REQ_BGL = False
12525

    
12526
  def ExpandNames(self):
12527
    # This raises errors.OpPrereqError on its own:
12528
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12529

    
12530
    if self.op.target_groups:
12531
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12532
                                  self.op.target_groups)
12533
    else:
12534
      self.req_target_uuids = []
12535

    
12536
    if self.group_uuid in self.req_target_uuids:
12537
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12538
                                 " as a target group (targets are %s)" %
12539
                                 (self.group_uuid,
12540
                                  utils.CommaJoin(self.req_target_uuids)),
12541
                                 errors.ECODE_INVAL)
12542

    
12543
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12544

    
12545
    self.share_locks = _ShareAll()
12546
    self.needed_locks = {
12547
      locking.LEVEL_INSTANCE: [],
12548
      locking.LEVEL_NODEGROUP: [],
12549
      locking.LEVEL_NODE: [],
12550
      }
12551

    
12552
  def DeclareLocks(self, level):
12553
    if level == locking.LEVEL_INSTANCE:
12554
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12555

    
12556
      # Lock instances optimistically, needs verification once node and group
12557
      # locks have been acquired
12558
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12559
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12560

    
12561
    elif level == locking.LEVEL_NODEGROUP:
12562
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12563

    
12564
      if self.req_target_uuids:
12565
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12566

    
12567
        # Lock all groups used by instances optimistically; this requires going
12568
        # via the node before it's locked, requiring verification later on
12569
        lock_groups.update(group_uuid
12570
                           for instance_name in
12571
                             self.owned_locks(locking.LEVEL_INSTANCE)
12572
                           for group_uuid in
12573
                             self.cfg.GetInstanceNodeGroups(instance_name))
12574
      else:
12575
        # No target groups, need to lock all of them
12576
        lock_groups = locking.ALL_SET
12577

    
12578
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12579

    
12580
    elif level == locking.LEVEL_NODE:
12581
      # This will only lock the nodes in the group to be evacuated which
12582
      # contain actual instances
12583
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12584
      self._LockInstancesNodes()
12585

    
12586
      # Lock all nodes in group to be evacuated and target groups
12587
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12588
      assert self.group_uuid in owned_groups
12589
      member_nodes = [node_name
12590
                      for group in owned_groups
12591
                      for node_name in self.cfg.GetNodeGroup(group).members]
12592
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12593

    
12594
  def CheckPrereq(self):
12595
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12596
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12597
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12598

    
12599
    assert owned_groups.issuperset(self.req_target_uuids)
12600
    assert self.group_uuid in owned_groups
12601

    
12602
    # Check if locked instances are still correct
12603
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12604

    
12605
    # Get instance information
12606
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12607

    
12608
    # Check if node groups for locked instances are still correct
12609
    for instance_name in owned_instances:
12610
      inst = self.instances[instance_name]
12611
      assert owned_nodes.issuperset(inst.all_nodes), \
12612
        "Instance %s's nodes changed while we kept the lock" % instance_name
12613

    
12614
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12615
                                             owned_groups)
12616

    
12617
      assert self.group_uuid in inst_groups, \
12618
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12619

    
12620
    if self.req_target_uuids:
12621
      # User requested specific target groups
12622
      self.target_uuids = self.req_target_uuids
12623
    else:
12624
      # All groups except the one to be evacuated are potential targets
12625
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12626
                           if group_uuid != self.group_uuid]
12627

    
12628
      if not self.target_uuids:
12629
        raise errors.OpPrereqError("There are no possible target groups",
12630
                                   errors.ECODE_INVAL)
12631

    
12632
  def BuildHooksEnv(self):
12633
    """Build hooks env.
12634

12635
    """
12636
    return {
12637
      "GROUP_NAME": self.op.group_name,
12638
      "TARGET_GROUPS": " ".join(self.target_uuids),
12639
      }
12640

    
12641
  def BuildHooksNodes(self):
12642
    """Build hooks nodes.
12643

12644
    """
12645
    mn = self.cfg.GetMasterNode()
12646

    
12647
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12648

    
12649
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12650

    
12651
    return (run_nodes, run_nodes)
12652

    
12653
  def Exec(self, feedback_fn):
12654
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12655

    
12656
    assert self.group_uuid not in self.target_uuids
12657

    
12658
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12659
                     instances=instances, target_groups=self.target_uuids)
12660

    
12661
    ial.Run(self.op.iallocator)
12662

    
12663
    if not ial.success:
12664
      raise errors.OpPrereqError("Can't compute group evacuation using"
12665
                                 " iallocator '%s': %s" %
12666
                                 (self.op.iallocator, ial.info),
12667
                                 errors.ECODE_NORES)
12668

    
12669
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12670

    
12671
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12672
                 len(jobs), self.op.group_name)
12673

    
12674
    return ResultWithJobs(jobs)
12675

    
12676

    
12677
class TagsLU(NoHooksLU): # pylint: disable=W0223
12678
  """Generic tags LU.
12679

12680
  This is an abstract class which is the parent of all the other tags LUs.
12681

12682
  """
12683
  def ExpandNames(self):
12684
    self.group_uuid = None
12685
    self.needed_locks = {}
12686
    if self.op.kind == constants.TAG_NODE:
12687
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12688
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12689
    elif self.op.kind == constants.TAG_INSTANCE:
12690
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12691
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12692
    elif self.op.kind == constants.TAG_NODEGROUP:
12693
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12694

    
12695
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12696
    # not possible to acquire the BGL based on opcode parameters)
12697

    
12698
  def CheckPrereq(self):
12699
    """Check prerequisites.
12700

12701
    """
12702
    if self.op.kind == constants.TAG_CLUSTER:
12703
      self.target = self.cfg.GetClusterInfo()
12704
    elif self.op.kind == constants.TAG_NODE:
12705
      self.target = self.cfg.GetNodeInfo(self.op.name)
12706
    elif self.op.kind == constants.TAG_INSTANCE:
12707
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12708
    elif self.op.kind == constants.TAG_NODEGROUP:
12709
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12710
    else:
12711
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12712
                                 str(self.op.kind), errors.ECODE_INVAL)
12713

    
12714

    
12715
class LUTagsGet(TagsLU):
12716
  """Returns the tags of a given object.
12717

12718
  """
12719
  REQ_BGL = False
12720

    
12721
  def ExpandNames(self):
12722
    TagsLU.ExpandNames(self)
12723

    
12724
    # Share locks as this is only a read operation
12725
    self.share_locks = _ShareAll()
12726

    
12727
  def Exec(self, feedback_fn):
12728
    """Returns the tag list.
12729

12730
    """
12731
    return list(self.target.GetTags())
12732

    
12733

    
12734
class LUTagsSearch(NoHooksLU):
12735
  """Searches the tags for a given pattern.
12736

12737
  """
12738
  REQ_BGL = False
12739

    
12740
  def ExpandNames(self):
12741
    self.needed_locks = {}
12742

    
12743
  def CheckPrereq(self):
12744
    """Check prerequisites.
12745

12746
    This checks the pattern passed for validity by compiling it.
12747

12748
    """
12749
    try:
12750
      self.re = re.compile(self.op.pattern)
12751
    except re.error, err:
12752
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12753
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12754

    
12755
  def Exec(self, feedback_fn):
12756
    """Returns the tag list.
12757

12758
    """
12759
    cfg = self.cfg
12760
    tgts = [("/cluster", cfg.GetClusterInfo())]
12761
    ilist = cfg.GetAllInstancesInfo().values()
12762
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12763
    nlist = cfg.GetAllNodesInfo().values()
12764
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12765
    tgts.extend(("/nodegroup/%s" % n.name, n)
12766
                for n in cfg.GetAllNodeGroupsInfo().values())
12767
    results = []
12768
    for path, target in tgts:
12769
      for tag in target.GetTags():
12770
        if self.re.search(tag):
12771
          results.append((path, tag))
12772
    return results
12773

    
12774

    
12775
class LUTagsSet(TagsLU):
12776
  """Sets a tag on a given object.
12777

12778
  """
12779
  REQ_BGL = False
12780

    
12781
  def CheckPrereq(self):
12782
    """Check prerequisites.
12783

12784
    This checks the type and length of the tag name and value.
12785

12786
    """
12787
    TagsLU.CheckPrereq(self)
12788
    for tag in self.op.tags:
12789
      objects.TaggableObject.ValidateTag(tag)
12790

    
12791
  def Exec(self, feedback_fn):
12792
    """Sets the tag.
12793

12794
    """
12795
    try:
12796
      for tag in self.op.tags:
12797
        self.target.AddTag(tag)
12798
    except errors.TagError, err:
12799
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12800
    self.cfg.Update(self.target, feedback_fn)
12801

    
12802

    
12803
class LUTagsDel(TagsLU):
12804
  """Delete a list of tags from a given object.
12805

12806
  """
12807
  REQ_BGL = False
12808

    
12809
  def CheckPrereq(self):
12810
    """Check prerequisites.
12811

12812
    This checks that we have the given tag.
12813

12814
    """
12815
    TagsLU.CheckPrereq(self)
12816
    for tag in self.op.tags:
12817
      objects.TaggableObject.ValidateTag(tag)
12818
    del_tags = frozenset(self.op.tags)
12819
    cur_tags = self.target.GetTags()
12820

    
12821
    diff_tags = del_tags - cur_tags
12822
    if diff_tags:
12823
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12824
      raise errors.OpPrereqError("Tag(s) %s not found" %
12825
                                 (utils.CommaJoin(diff_names), ),
12826
                                 errors.ECODE_NOENT)
12827

    
12828
  def Exec(self, feedback_fn):
12829
    """Remove the tag from the object.
12830

12831
    """
12832
    for tag in self.op.tags:
12833
      self.target.RemoveTag(tag)
12834
    self.cfg.Update(self.target, feedback_fn)
12835

    
12836

    
12837
class LUTestDelay(NoHooksLU):
12838
  """Sleep for a specified amount of time.
12839

12840
  This LU sleeps on the master and/or nodes for a specified amount of
12841
  time.
12842

12843
  """
12844
  REQ_BGL = False
12845

    
12846
  def ExpandNames(self):
12847
    """Expand names and set required locks.
12848

12849
    This expands the node list, if any.
12850

12851
    """
12852
    self.needed_locks = {}
12853
    if self.op.on_nodes:
12854
      # _GetWantedNodes can be used here, but is not always appropriate to use
12855
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12856
      # more information.
12857
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12858
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12859

    
12860
  def _TestDelay(self):
12861
    """Do the actual sleep.
12862

12863
    """
12864
    if self.op.on_master:
12865
      if not utils.TestDelay(self.op.duration):
12866
        raise errors.OpExecError("Error during master delay test")
12867
    if self.op.on_nodes:
12868
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12869
      for node, node_result in result.items():
12870
        node_result.Raise("Failure during rpc call to node %s" % node)
12871

    
12872
  def Exec(self, feedback_fn):
12873
    """Execute the test delay opcode, with the wanted repetitions.
12874

12875
    """
12876
    if self.op.repeat == 0:
12877
      self._TestDelay()
12878
    else:
12879
      top_value = self.op.repeat - 1
12880
      for i in range(self.op.repeat):
12881
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12882
        self._TestDelay()
12883

    
12884

    
12885
class LUTestJqueue(NoHooksLU):
12886
  """Utility LU to test some aspects of the job queue.
12887

12888
  """
12889
  REQ_BGL = False
12890

    
12891
  # Must be lower than default timeout for WaitForJobChange to see whether it
12892
  # notices changed jobs
12893
  _CLIENT_CONNECT_TIMEOUT = 20.0
12894
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12895

    
12896
  @classmethod
12897
  def _NotifyUsingSocket(cls, cb, errcls):
12898
    """Opens a Unix socket and waits for another program to connect.
12899

12900
    @type cb: callable
12901
    @param cb: Callback to send socket name to client
12902
    @type errcls: class
12903
    @param errcls: Exception class to use for errors
12904

12905
    """
12906
    # Using a temporary directory as there's no easy way to create temporary
12907
    # sockets without writing a custom loop around tempfile.mktemp and
12908
    # socket.bind
12909
    tmpdir = tempfile.mkdtemp()
12910
    try:
12911
      tmpsock = utils.PathJoin(tmpdir, "sock")
12912

    
12913
      logging.debug("Creating temporary socket at %s", tmpsock)
12914
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12915
      try:
12916
        sock.bind(tmpsock)
12917
        sock.listen(1)
12918

    
12919
        # Send details to client
12920
        cb(tmpsock)
12921

    
12922
        # Wait for client to connect before continuing
12923
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12924
        try:
12925
          (conn, _) = sock.accept()
12926
        except socket.error, err:
12927
          raise errcls("Client didn't connect in time (%s)" % err)
12928
      finally:
12929
        sock.close()
12930
    finally:
12931
      # Remove as soon as client is connected
12932
      shutil.rmtree(tmpdir)
12933

    
12934
    # Wait for client to close
12935
    try:
12936
      try:
12937
        # pylint: disable=E1101
12938
        # Instance of '_socketobject' has no ... member
12939
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12940
        conn.recv(1)
12941
      except socket.error, err:
12942
        raise errcls("Client failed to confirm notification (%s)" % err)
12943
    finally:
12944
      conn.close()
12945

    
12946
  def _SendNotification(self, test, arg, sockname):
12947
    """Sends a notification to the client.
12948

12949
    @type test: string
12950
    @param test: Test name
12951
    @param arg: Test argument (depends on test)
12952
    @type sockname: string
12953
    @param sockname: Socket path
12954

12955
    """
12956
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12957

    
12958
  def _Notify(self, prereq, test, arg):
12959
    """Notifies the client of a test.
12960

12961
    @type prereq: bool
12962
    @param prereq: Whether this is a prereq-phase test
12963
    @type test: string
12964
    @param test: Test name
12965
    @param arg: Test argument (depends on test)
12966

12967
    """
12968
    if prereq:
12969
      errcls = errors.OpPrereqError
12970
    else:
12971
      errcls = errors.OpExecError
12972

    
12973
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12974
                                                  test, arg),
12975
                                   errcls)
12976

    
12977
  def CheckArguments(self):
12978
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12979
    self.expandnames_calls = 0
12980

    
12981
  def ExpandNames(self):
12982
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12983
    if checkargs_calls < 1:
12984
      raise errors.ProgrammerError("CheckArguments was not called")
12985

    
12986
    self.expandnames_calls += 1
12987

    
12988
    if self.op.notify_waitlock:
12989
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12990

    
12991
    self.LogInfo("Expanding names")
12992

    
12993
    # Get lock on master node (just to get a lock, not for a particular reason)
12994
    self.needed_locks = {
12995
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12996
      }
12997

    
12998
  def Exec(self, feedback_fn):
12999
    if self.expandnames_calls < 1:
13000
      raise errors.ProgrammerError("ExpandNames was not called")
13001

    
13002
    if self.op.notify_exec:
13003
      self._Notify(False, constants.JQT_EXEC, None)
13004

    
13005
    self.LogInfo("Executing")
13006

    
13007
    if self.op.log_messages:
13008
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13009
      for idx, msg in enumerate(self.op.log_messages):
13010
        self.LogInfo("Sending log message %s", idx + 1)
13011
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13012
        # Report how many test messages have been sent
13013
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13014

    
13015
    if self.op.fail:
13016
      raise errors.OpExecError("Opcode failure was requested")
13017

    
13018
    return True
13019

    
13020

    
13021
class IAllocator(object):
13022
  """IAllocator framework.
13023

13024
  An IAllocator instance has three sets of attributes:
13025
    - cfg that is needed to query the cluster
13026
    - input data (all members of the _KEYS class attribute are required)
13027
    - four buffer attributes (in|out_data|text), that represent the
13028
      input (to the external script) in text and data structure format,
13029
      and the output from it, again in two formats
13030
    - the result variables from the script (success, info, nodes) for
13031
      easy usage
13032

13033
  """
13034
  # pylint: disable=R0902
13035
  # lots of instance attributes
13036

    
13037
  def __init__(self, cfg, rpc, mode, **kwargs):
13038
    self.cfg = cfg
13039
    self.rpc = rpc
13040
    # init buffer variables
13041
    self.in_text = self.out_text = self.in_data = self.out_data = None
13042
    # init all input fields so that pylint is happy
13043
    self.mode = mode
13044
    self.memory = self.disks = self.disk_template = None
13045
    self.os = self.tags = self.nics = self.vcpus = None
13046
    self.hypervisor = None
13047
    self.relocate_from = None
13048
    self.name = None
13049
    self.instances = None
13050
    self.evac_mode = None
13051
    self.target_groups = []
13052
    # computed fields
13053
    self.required_nodes = None
13054
    # init result fields
13055
    self.success = self.info = self.result = None
13056

    
13057
    try:
13058
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13059
    except KeyError:
13060
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13061
                                   " IAllocator" % self.mode)
13062

    
13063
    keyset = [n for (n, _) in keydata]
13064

    
13065
    for key in kwargs:
13066
      if key not in keyset:
13067
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13068
                                     " IAllocator" % key)
13069
      setattr(self, key, kwargs[key])
13070

    
13071
    for key in keyset:
13072
      if key not in kwargs:
13073
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13074
                                     " IAllocator" % key)
13075
    self._BuildInputData(compat.partial(fn, self), keydata)
13076

    
13077
  def _ComputeClusterData(self):
13078
    """Compute the generic allocator input data.
13079

13080
    This is the data that is independent of the actual operation.
13081

13082
    """
13083
    cfg = self.cfg
13084
    cluster_info = cfg.GetClusterInfo()
13085
    # cluster data
13086
    data = {
13087
      "version": constants.IALLOCATOR_VERSION,
13088
      "cluster_name": cfg.GetClusterName(),
13089
      "cluster_tags": list(cluster_info.GetTags()),
13090
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13091
      # we don't have job IDs
13092
      }
13093
    ninfo = cfg.GetAllNodesInfo()
13094
    iinfo = cfg.GetAllInstancesInfo().values()
13095
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13096

    
13097
    # node data
13098
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13099

    
13100
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13101
      hypervisor_name = self.hypervisor
13102
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13103
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13104
    else:
13105
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13106

    
13107
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13108
                                        hypervisor_name)
13109
    node_iinfo = \
13110
      self.rpc.call_all_instances_info(node_list,
13111
                                       cluster_info.enabled_hypervisors)
13112

    
13113
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13114

    
13115
    config_ndata = self._ComputeBasicNodeData(ninfo)
13116
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13117
                                                 i_list, config_ndata)
13118
    assert len(data["nodes"]) == len(ninfo), \
13119
        "Incomplete node data computed"
13120

    
13121
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13122

    
13123
    self.in_data = data
13124

    
13125
  @staticmethod
13126
  def _ComputeNodeGroupData(cfg):
13127
    """Compute node groups data.
13128

13129
    """
13130
    ng = dict((guuid, {
13131
      "name": gdata.name,
13132
      "alloc_policy": gdata.alloc_policy,
13133
      })
13134
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13135

    
13136
    return ng
13137

    
13138
  @staticmethod
13139
  def _ComputeBasicNodeData(node_cfg):
13140
    """Compute global node data.
13141

13142
    @rtype: dict
13143
    @returns: a dict of name: (node dict, node config)
13144

13145
    """
13146
    # fill in static (config-based) values
13147
    node_results = dict((ninfo.name, {
13148
      "tags": list(ninfo.GetTags()),
13149
      "primary_ip": ninfo.primary_ip,
13150
      "secondary_ip": ninfo.secondary_ip,
13151
      "offline": ninfo.offline,
13152
      "drained": ninfo.drained,
13153
      "master_candidate": ninfo.master_candidate,
13154
      "group": ninfo.group,
13155
      "master_capable": ninfo.master_capable,
13156
      "vm_capable": ninfo.vm_capable,
13157
      })
13158
      for ninfo in node_cfg.values())
13159

    
13160
    return node_results
13161

    
13162
  @staticmethod
13163
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13164
                              node_results):
13165
    """Compute global node data.
13166

13167
    @param node_results: the basic node structures as filled from the config
13168

13169
    """
13170
    # make a copy of the current dict
13171
    node_results = dict(node_results)
13172
    for nname, nresult in node_data.items():
13173
      assert nname in node_results, "Missing basic data for node %s" % nname
13174
      ninfo = node_cfg[nname]
13175

    
13176
      if not (ninfo.offline or ninfo.drained):
13177
        nresult.Raise("Can't get data for node %s" % nname)
13178
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13179
                                nname)
13180
        remote_info = nresult.payload
13181

    
13182
        for attr in ["memory_total", "memory_free", "memory_dom0",
13183
                     "vg_size", "vg_free", "cpu_total"]:
13184
          if attr not in remote_info:
13185
            raise errors.OpExecError("Node '%s' didn't return attribute"
13186
                                     " '%s'" % (nname, attr))
13187
          if not isinstance(remote_info[attr], int):
13188
            raise errors.OpExecError("Node '%s' returned invalid value"
13189
                                     " for '%s': %s" %
13190
                                     (nname, attr, remote_info[attr]))
13191
        # compute memory used by primary instances
13192
        i_p_mem = i_p_up_mem = 0
13193
        for iinfo, beinfo in i_list:
13194
          if iinfo.primary_node == nname:
13195
            i_p_mem += beinfo[constants.BE_MEMORY]
13196
            if iinfo.name not in node_iinfo[nname].payload:
13197
              i_used_mem = 0
13198
            else:
13199
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13200
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13201
            remote_info["memory_free"] -= max(0, i_mem_diff)
13202

    
13203
            if iinfo.admin_up:
13204
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13205

    
13206
        # compute memory used by instances
13207
        pnr_dyn = {
13208
          "total_memory": remote_info["memory_total"],
13209
          "reserved_memory": remote_info["memory_dom0"],
13210
          "free_memory": remote_info["memory_free"],
13211
          "total_disk": remote_info["vg_size"],
13212
          "free_disk": remote_info["vg_free"],
13213
          "total_cpus": remote_info["cpu_total"],
13214
          "i_pri_memory": i_p_mem,
13215
          "i_pri_up_memory": i_p_up_mem,
13216
          }
13217
        pnr_dyn.update(node_results[nname])
13218
        node_results[nname] = pnr_dyn
13219

    
13220
    return node_results
13221

    
13222
  @staticmethod
13223
  def _ComputeInstanceData(cluster_info, i_list):
13224
    """Compute global instance data.
13225

13226
    """
13227
    instance_data = {}
13228
    for iinfo, beinfo in i_list:
13229
      nic_data = []
13230
      for nic in iinfo.nics:
13231
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13232
        nic_dict = {
13233
          "mac": nic.mac,
13234
          "ip": nic.ip,
13235
          "mode": filled_params[constants.NIC_MODE],
13236
          "link": filled_params[constants.NIC_LINK],
13237
          }
13238
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13239
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13240
        nic_data.append(nic_dict)
13241
      pir = {
13242
        "tags": list(iinfo.GetTags()),
13243
        "admin_up": iinfo.admin_up,
13244
        "vcpus": beinfo[constants.BE_VCPUS],
13245
        "memory": beinfo[constants.BE_MEMORY],
13246
        "os": iinfo.os,
13247
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13248
        "nics": nic_data,
13249
        "disks": [{constants.IDISK_SIZE: dsk.size,
13250
                   constants.IDISK_MODE: dsk.mode}
13251
                  for dsk in iinfo.disks],
13252
        "disk_template": iinfo.disk_template,
13253
        "hypervisor": iinfo.hypervisor,
13254
        }
13255
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13256
                                                 pir["disks"])
13257
      instance_data[iinfo.name] = pir
13258

    
13259
    return instance_data
13260

    
13261
  def _AddNewInstance(self):
13262
    """Add new instance data to allocator structure.
13263

13264
    This in combination with _AllocatorGetClusterData will create the
13265
    correct structure needed as input for the allocator.
13266

13267
    The checks for the completeness of the opcode must have already been
13268
    done.
13269

13270
    """
13271
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13272

    
13273
    if self.disk_template in constants.DTS_INT_MIRROR:
13274
      self.required_nodes = 2
13275
    else:
13276
      self.required_nodes = 1
13277

    
13278
    request = {
13279
      "name": self.name,
13280
      "disk_template": self.disk_template,
13281
      "tags": self.tags,
13282
      "os": self.os,
13283
      "vcpus": self.vcpus,
13284
      "memory": self.memory,
13285
      "disks": self.disks,
13286
      "disk_space_total": disk_space,
13287
      "nics": self.nics,
13288
      "required_nodes": self.required_nodes,
13289
      "hypervisor": self.hypervisor,
13290
      }
13291

    
13292
    return request
13293

    
13294
  def _AddRelocateInstance(self):
13295
    """Add relocate instance data to allocator structure.
13296

13297
    This in combination with _IAllocatorGetClusterData will create the
13298
    correct structure needed as input for the allocator.
13299

13300
    The checks for the completeness of the opcode must have already been
13301
    done.
13302

13303
    """
13304
    instance = self.cfg.GetInstanceInfo(self.name)
13305
    if instance is None:
13306
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13307
                                   " IAllocator" % self.name)
13308

    
13309
    if instance.disk_template not in constants.DTS_MIRRORED:
13310
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13311
                                 errors.ECODE_INVAL)
13312

    
13313
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13314
        len(instance.secondary_nodes) != 1:
13315
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13316
                                 errors.ECODE_STATE)
13317

    
13318
    self.required_nodes = 1
13319
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13320
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13321

    
13322
    request = {
13323
      "name": self.name,
13324
      "disk_space_total": disk_space,
13325
      "required_nodes": self.required_nodes,
13326
      "relocate_from": self.relocate_from,
13327
      }
13328
    return request
13329

    
13330
  def _AddNodeEvacuate(self):
13331
    """Get data for node-evacuate requests.
13332

13333
    """
13334
    return {
13335
      "instances": self.instances,
13336
      "evac_mode": self.evac_mode,
13337
      }
13338

    
13339
  def _AddChangeGroup(self):
13340
    """Get data for node-evacuate requests.
13341

13342
    """
13343
    return {
13344
      "instances": self.instances,
13345
      "target_groups": self.target_groups,
13346
      }
13347

    
13348
  def _BuildInputData(self, fn, keydata):
13349
    """Build input data structures.
13350

13351
    """
13352
    self._ComputeClusterData()
13353

    
13354
    request = fn()
13355
    request["type"] = self.mode
13356
    for keyname, keytype in keydata:
13357
      if keyname not in request:
13358
        raise errors.ProgrammerError("Request parameter %s is missing" %
13359
                                     keyname)
13360
      val = request[keyname]
13361
      if not keytype(val):
13362
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13363
                                     " validation, value %s, expected"
13364
                                     " type %s" % (keyname, val, keytype))
13365
    self.in_data["request"] = request
13366

    
13367
    self.in_text = serializer.Dump(self.in_data)
13368

    
13369
  _STRING_LIST = ht.TListOf(ht.TString)
13370
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13371
     # pylint: disable=E1101
13372
     # Class '...' has no 'OP_ID' member
13373
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13374
                          opcodes.OpInstanceMigrate.OP_ID,
13375
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13376
     })))
13377

    
13378
  _NEVAC_MOVED = \
13379
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13380
                       ht.TItems([ht.TNonEmptyString,
13381
                                  ht.TNonEmptyString,
13382
                                  ht.TListOf(ht.TNonEmptyString),
13383
                                 ])))
13384
  _NEVAC_FAILED = \
13385
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13386
                       ht.TItems([ht.TNonEmptyString,
13387
                                  ht.TMaybeString,
13388
                                 ])))
13389
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13390
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13391

    
13392
  _MODE_DATA = {
13393
    constants.IALLOCATOR_MODE_ALLOC:
13394
      (_AddNewInstance,
13395
       [
13396
        ("name", ht.TString),
13397
        ("memory", ht.TInt),
13398
        ("disks", ht.TListOf(ht.TDict)),
13399
        ("disk_template", ht.TString),
13400
        ("os", ht.TString),
13401
        ("tags", _STRING_LIST),
13402
        ("nics", ht.TListOf(ht.TDict)),
13403
        ("vcpus", ht.TInt),
13404
        ("hypervisor", ht.TString),
13405
        ], ht.TList),
13406
    constants.IALLOCATOR_MODE_RELOC:
13407
      (_AddRelocateInstance,
13408
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13409
       ht.TList),
13410
     constants.IALLOCATOR_MODE_NODE_EVAC:
13411
      (_AddNodeEvacuate, [
13412
        ("instances", _STRING_LIST),
13413
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13414
        ], _NEVAC_RESULT),
13415
     constants.IALLOCATOR_MODE_CHG_GROUP:
13416
      (_AddChangeGroup, [
13417
        ("instances", _STRING_LIST),
13418
        ("target_groups", _STRING_LIST),
13419
        ], _NEVAC_RESULT),
13420
    }
13421

    
13422
  def Run(self, name, validate=True, call_fn=None):
13423
    """Run an instance allocator and return the results.
13424

13425
    """
13426
    if call_fn is None:
13427
      call_fn = self.rpc.call_iallocator_runner
13428

    
13429
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13430
    result.Raise("Failure while running the iallocator script")
13431

    
13432
    self.out_text = result.payload
13433
    if validate:
13434
      self._ValidateResult()
13435

    
13436
  def _ValidateResult(self):
13437
    """Process the allocator results.
13438

13439
    This will process and if successful save the result in
13440
    self.out_data and the other parameters.
13441

13442
    """
13443
    try:
13444
      rdict = serializer.Load(self.out_text)
13445
    except Exception, err:
13446
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13447

    
13448
    if not isinstance(rdict, dict):
13449
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13450

    
13451
    # TODO: remove backwards compatiblity in later versions
13452
    if "nodes" in rdict and "result" not in rdict:
13453
      rdict["result"] = rdict["nodes"]
13454
      del rdict["nodes"]
13455

    
13456
    for key in "success", "info", "result":
13457
      if key not in rdict:
13458
        raise errors.OpExecError("Can't parse iallocator results:"
13459
                                 " missing key '%s'" % key)
13460
      setattr(self, key, rdict[key])
13461

    
13462
    if not self._result_check(self.result):
13463
      raise errors.OpExecError("Iallocator returned invalid result,"
13464
                               " expected %s, got %s" %
13465
                               (self._result_check, self.result),
13466
                               errors.ECODE_INVAL)
13467

    
13468
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13469
      assert self.relocate_from is not None
13470
      assert self.required_nodes == 1
13471

    
13472
      node2group = dict((name, ndata["group"])
13473
                        for (name, ndata) in self.in_data["nodes"].items())
13474

    
13475
      fn = compat.partial(self._NodesToGroups, node2group,
13476
                          self.in_data["nodegroups"])
13477

    
13478
      instance = self.cfg.GetInstanceInfo(self.name)
13479
      request_groups = fn(self.relocate_from + [instance.primary_node])
13480
      result_groups = fn(rdict["result"] + [instance.primary_node])
13481

    
13482
      if self.success and not set(result_groups).issubset(request_groups):
13483
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13484
                                 " differ from original groups (%s)" %
13485
                                 (utils.CommaJoin(result_groups),
13486
                                  utils.CommaJoin(request_groups)))
13487

    
13488
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13489
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13490

    
13491
    self.out_data = rdict
13492

    
13493
  @staticmethod
13494
  def _NodesToGroups(node2group, groups, nodes):
13495
    """Returns a list of unique group names for a list of nodes.
13496

13497
    @type node2group: dict
13498
    @param node2group: Map from node name to group UUID
13499
    @type groups: dict
13500
    @param groups: Group information
13501
    @type nodes: list
13502
    @param nodes: Node names
13503

13504
    """
13505
    result = set()
13506

    
13507
    for node in nodes:
13508
      try:
13509
        group_uuid = node2group[node]
13510
      except KeyError:
13511
        # Ignore unknown node
13512
        pass
13513
      else:
13514
        try:
13515
          group = groups[group_uuid]
13516
        except KeyError:
13517
          # Can't find group, let's use UUID
13518
          group_name = group_uuid
13519
        else:
13520
          group_name = group["name"]
13521

    
13522
        result.add(group_name)
13523

    
13524
    return sorted(result)
13525

    
13526

    
13527
class LUTestAllocator(NoHooksLU):
13528
  """Run allocator tests.
13529

13530
  This LU runs the allocator tests
13531

13532
  """
13533
  def CheckPrereq(self):
13534
    """Check prerequisites.
13535

13536
    This checks the opcode parameters depending on the director and mode test.
13537

13538
    """
13539
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13540
      for attr in ["memory", "disks", "disk_template",
13541
                   "os", "tags", "nics", "vcpus"]:
13542
        if not hasattr(self.op, attr):
13543
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13544
                                     attr, errors.ECODE_INVAL)
13545
      iname = self.cfg.ExpandInstanceName(self.op.name)
13546
      if iname is not None:
13547
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13548
                                   iname, errors.ECODE_EXISTS)
13549
      if not isinstance(self.op.nics, list):
13550
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13551
                                   errors.ECODE_INVAL)
13552
      if not isinstance(self.op.disks, list):
13553
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13554
                                   errors.ECODE_INVAL)
13555
      for row in self.op.disks:
13556
        if (not isinstance(row, dict) or
13557
            constants.IDISK_SIZE not in row or
13558
            not isinstance(row[constants.IDISK_SIZE], int) or
13559
            constants.IDISK_MODE not in row or
13560
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13561
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13562
                                     " parameter", errors.ECODE_INVAL)
13563
      if self.op.hypervisor is None:
13564
        self.op.hypervisor = self.cfg.GetHypervisorType()
13565
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13566
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13567
      self.op.name = fname
13568
      self.relocate_from = \
13569
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13570
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13571
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13572
      if not self.op.instances:
13573
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13574
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13575
    else:
13576
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13577
                                 self.op.mode, errors.ECODE_INVAL)
13578

    
13579
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13580
      if self.op.allocator is None:
13581
        raise errors.OpPrereqError("Missing allocator name",
13582
                                   errors.ECODE_INVAL)
13583
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13584
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13585
                                 self.op.direction, errors.ECODE_INVAL)
13586

    
13587
  def Exec(self, feedback_fn):
13588
    """Run the allocator test.
13589

13590
    """
13591
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13592
      ial = IAllocator(self.cfg, self.rpc,
13593
                       mode=self.op.mode,
13594
                       name=self.op.name,
13595
                       memory=self.op.memory,
13596
                       disks=self.op.disks,
13597
                       disk_template=self.op.disk_template,
13598
                       os=self.op.os,
13599
                       tags=self.op.tags,
13600
                       nics=self.op.nics,
13601
                       vcpus=self.op.vcpus,
13602
                       hypervisor=self.op.hypervisor,
13603
                       )
13604
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13605
      ial = IAllocator(self.cfg, self.rpc,
13606
                       mode=self.op.mode,
13607
                       name=self.op.name,
13608
                       relocate_from=list(self.relocate_from),
13609
                       )
13610
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13611
      ial = IAllocator(self.cfg, self.rpc,
13612
                       mode=self.op.mode,
13613
                       instances=self.op.instances,
13614
                       target_groups=self.op.target_groups)
13615
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13616
      ial = IAllocator(self.cfg, self.rpc,
13617
                       mode=self.op.mode,
13618
                       instances=self.op.instances,
13619
                       evac_mode=self.op.evac_mode)
13620
    else:
13621
      raise errors.ProgrammerError("Uncatched mode %s in"
13622
                                   " LUTestAllocator.Exec", self.op.mode)
13623

    
13624
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13625
      result = ial.in_text
13626
    else:
13627
      ial.Run(self.op.allocator, validate=False)
13628
      result = ial.out_text
13629
    return result
13630

    
13631

    
13632
#: Query type implementations
13633
_QUERY_IMPL = {
13634
  constants.QR_INSTANCE: _InstanceQuery,
13635
  constants.QR_NODE: _NodeQuery,
13636
  constants.QR_GROUP: _GroupQuery,
13637
  constants.QR_OS: _OsQuery,
13638
  }
13639

    
13640
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13641

    
13642

    
13643
def _GetQueryImplementation(name):
13644
  """Returns the implemtnation for a query type.
13645

13646
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13647

13648
  """
13649
  try:
13650
    return _QUERY_IMPL[name]
13651
  except KeyError:
13652
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13653
                               errors.ECODE_INVAL)