Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 2e04d454

History | View | Annotate | Download (493.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577
  """Checks if the owned node groups are still correct for an instance.
578

579
  @type cfg: L{config.ConfigWriter}
580
  @param cfg: The cluster configuration
581
  @type instance_name: string
582
  @param instance_name: Instance name
583
  @type owned_groups: set or frozenset
584
  @param owned_groups: List of currently owned node groups
585

586
  """
587
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
588

    
589
  if not owned_groups.issuperset(inst_groups):
590
    raise errors.OpPrereqError("Instance %s's node groups changed since"
591
                               " locks were acquired, current groups are"
592
                               " are '%s', owning groups '%s'; retry the"
593
                               " operation" %
594
                               (instance_name,
595
                                utils.CommaJoin(inst_groups),
596
                                utils.CommaJoin(owned_groups)),
597
                               errors.ECODE_STATE)
598

    
599
  return inst_groups
600

    
601

    
602
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603
  """Checks if the instances in a node group are still correct.
604

605
  @type cfg: L{config.ConfigWriter}
606
  @param cfg: The cluster configuration
607
  @type group_uuid: string
608
  @param group_uuid: Node group UUID
609
  @type owned_instances: set or frozenset
610
  @param owned_instances: List of currently owned instances
611

612
  """
613
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614
  if owned_instances != wanted_instances:
615
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
616
                               " locks were acquired, wanted '%s', have '%s';"
617
                               " retry the operation" %
618
                               (group_uuid,
619
                                utils.CommaJoin(wanted_instances),
620
                                utils.CommaJoin(owned_instances)),
621
                               errors.ECODE_STATE)
622

    
623
  return wanted_instances
624

    
625

    
626
def _SupportsOob(cfg, node):
627
  """Tells if node supports OOB.
628

629
  @type cfg: L{config.ConfigWriter}
630
  @param cfg: The cluster configuration
631
  @type node: L{objects.Node}
632
  @param node: The node
633
  @return: The OOB script if supported or an empty string otherwise
634

635
  """
636
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
637

    
638

    
639
def _GetWantedNodes(lu, nodes):
640
  """Returns list of checked and expanded node names.
641

642
  @type lu: L{LogicalUnit}
643
  @param lu: the logical unit on whose behalf we execute
644
  @type nodes: list
645
  @param nodes: list of node names or None for all nodes
646
  @rtype: list
647
  @return: the list of nodes, sorted
648
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
649

650
  """
651
  if nodes:
652
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
653

    
654
  return utils.NiceSort(lu.cfg.GetNodeList())
655

    
656

    
657
def _GetWantedInstances(lu, instances):
658
  """Returns list of checked and expanded instance names.
659

660
  @type lu: L{LogicalUnit}
661
  @param lu: the logical unit on whose behalf we execute
662
  @type instances: list
663
  @param instances: list of instance names or None for all instances
664
  @rtype: list
665
  @return: the list of instances, sorted
666
  @raise errors.OpPrereqError: if the instances parameter is wrong type
667
  @raise errors.OpPrereqError: if any of the passed instances is not found
668

669
  """
670
  if instances:
671
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
672
  else:
673
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
674
  return wanted
675

    
676

    
677
def _GetUpdatedParams(old_params, update_dict,
678
                      use_default=True, use_none=False):
679
  """Return the new version of a parameter dictionary.
680

681
  @type old_params: dict
682
  @param old_params: old parameters
683
  @type update_dict: dict
684
  @param update_dict: dict containing new parameter values, or
685
      constants.VALUE_DEFAULT to reset the parameter to its default
686
      value
687
  @param use_default: boolean
688
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689
      values as 'to be deleted' values
690
  @param use_none: boolean
691
  @type use_none: whether to recognise C{None} values as 'to be
692
      deleted' values
693
  @rtype: dict
694
  @return: the new parameter dictionary
695

696
  """
697
  params_copy = copy.deepcopy(old_params)
698
  for key, val in update_dict.iteritems():
699
    if ((use_default and val == constants.VALUE_DEFAULT) or
700
        (use_none and val is None)):
701
      try:
702
        del params_copy[key]
703
      except KeyError:
704
        pass
705
    else:
706
      params_copy[key] = val
707
  return params_copy
708

    
709

    
710
def _ReleaseLocks(lu, level, names=None, keep=None):
711
  """Releases locks owned by an LU.
712

713
  @type lu: L{LogicalUnit}
714
  @param level: Lock level
715
  @type names: list or None
716
  @param names: Names of locks to release
717
  @type keep: list or None
718
  @param keep: Names of locks to retain
719

720
  """
721
  assert not (keep is not None and names is not None), \
722
         "Only one of the 'names' and the 'keep' parameters can be given"
723

    
724
  if names is not None:
725
    should_release = names.__contains__
726
  elif keep:
727
    should_release = lambda name: name not in keep
728
  else:
729
    should_release = None
730

    
731
  owned = lu.owned_locks(level)
732
  if not owned:
733
    # Not owning any lock at this level, do nothing
734
    pass
735

    
736
  elif should_release:
737
    retain = []
738
    release = []
739

    
740
    # Determine which locks to release
741
    for name in owned:
742
      if should_release(name):
743
        release.append(name)
744
      else:
745
        retain.append(name)
746

    
747
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
748

    
749
    # Release just some locks
750
    lu.glm.release(level, names=release)
751

    
752
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
753
  else:
754
    # Release everything
755
    lu.glm.release(level)
756

    
757
    assert not lu.glm.is_owned(level), "No locks should be owned"
758

    
759

    
760
def _MapInstanceDisksToNodes(instances):
761
  """Creates a map from (node, volume) to instance name.
762

763
  @type instances: list of L{objects.Instance}
764
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
765

766
  """
767
  return dict(((node, vol), inst.name)
768
              for inst in instances
769
              for (node, vols) in inst.MapLVsByNode().items()
770
              for vol in vols)
771

    
772

    
773
def _RunPostHook(lu, node_name):
774
  """Runs the post-hook for an opcode on a single node.
775

776
  """
777
  hm = lu.proc.BuildHooksManager(lu)
778
  try:
779
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
780
  except:
781
    # pylint: disable=W0702
782
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
783

    
784

    
785
def _CheckOutputFields(static, dynamic, selected):
786
  """Checks whether all selected fields are valid.
787

788
  @type static: L{utils.FieldSet}
789
  @param static: static fields set
790
  @type dynamic: L{utils.FieldSet}
791
  @param dynamic: dynamic fields set
792

793
  """
794
  f = utils.FieldSet()
795
  f.Extend(static)
796
  f.Extend(dynamic)
797

    
798
  delta = f.NonMatching(selected)
799
  if delta:
800
    raise errors.OpPrereqError("Unknown output fields selected: %s"
801
                               % ",".join(delta), errors.ECODE_INVAL)
802

    
803

    
804
def _CheckGlobalHvParams(params):
805
  """Validates that given hypervisor params are not global ones.
806

807
  This will ensure that instances don't get customised versions of
808
  global params.
809

810
  """
811
  used_globals = constants.HVC_GLOBALS.intersection(params)
812
  if used_globals:
813
    msg = ("The following hypervisor parameters are global and cannot"
814
           " be customized at instance level, please modify them at"
815
           " cluster level: %s" % utils.CommaJoin(used_globals))
816
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
817

    
818

    
819
def _CheckNodeOnline(lu, node, msg=None):
820
  """Ensure that a given node is online.
821

822
  @param lu: the LU on behalf of which we make the check
823
  @param node: the node to check
824
  @param msg: if passed, should be a message to replace the default one
825
  @raise errors.OpPrereqError: if the node is offline
826

827
  """
828
  if msg is None:
829
    msg = "Can't use offline node"
830
  if lu.cfg.GetNodeInfo(node).offline:
831
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
832

    
833

    
834
def _CheckNodeNotDrained(lu, node):
835
  """Ensure that a given node is not drained.
836

837
  @param lu: the LU on behalf of which we make the check
838
  @param node: the node to check
839
  @raise errors.OpPrereqError: if the node is drained
840

841
  """
842
  if lu.cfg.GetNodeInfo(node).drained:
843
    raise errors.OpPrereqError("Can't use drained node %s" % node,
844
                               errors.ECODE_STATE)
845

    
846

    
847
def _CheckNodeVmCapable(lu, node):
848
  """Ensure that a given node is vm capable.
849

850
  @param lu: the LU on behalf of which we make the check
851
  @param node: the node to check
852
  @raise errors.OpPrereqError: if the node is not vm capable
853

854
  """
855
  if not lu.cfg.GetNodeInfo(node).vm_capable:
856
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
857
                               errors.ECODE_STATE)
858

    
859

    
860
def _CheckNodeHasOS(lu, node, os_name, force_variant):
861
  """Ensure that a node supports a given OS.
862

863
  @param lu: the LU on behalf of which we make the check
864
  @param node: the node to check
865
  @param os_name: the OS to query about
866
  @param force_variant: whether to ignore variant errors
867
  @raise errors.OpPrereqError: if the node is not supporting the OS
868

869
  """
870
  result = lu.rpc.call_os_get(node, os_name)
871
  result.Raise("OS '%s' not in supported OS list for node %s" %
872
               (os_name, node),
873
               prereq=True, ecode=errors.ECODE_INVAL)
874
  if not force_variant:
875
    _CheckOSVariant(result.payload, os_name)
876

    
877

    
878
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879
  """Ensure that a node has the given secondary ip.
880

881
  @type lu: L{LogicalUnit}
882
  @param lu: the LU on behalf of which we make the check
883
  @type node: string
884
  @param node: the node to check
885
  @type secondary_ip: string
886
  @param secondary_ip: the ip to check
887
  @type prereq: boolean
888
  @param prereq: whether to throw a prerequisite or an execute error
889
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
891

892
  """
893
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894
  result.Raise("Failure checking secondary ip on node %s" % node,
895
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
896
  if not result.payload:
897
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898
           " please fix and re-run this command" % secondary_ip)
899
    if prereq:
900
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
901
    else:
902
      raise errors.OpExecError(msg)
903

    
904

    
905
def _GetClusterDomainSecret():
906
  """Reads the cluster domain secret.
907

908
  """
909
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
910
                               strict=True)
911

    
912

    
913
def _CheckInstanceState(lu, instance, req_states, msg=None):
914
  """Ensure that an instance is in one of the required states.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param instance: the instance to check
918
  @param msg: if passed, should be a message to replace the default one
919
  @raise errors.OpPrereqError: if the instance is not in the required state
920

921
  """
922
  if msg is None:
923
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
924
  if instance.admin_state not in req_states:
925
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926
                               (instance, instance.admin_state, msg),
927
                               errors.ECODE_STATE)
928

    
929
  if constants.ADMINST_UP not in req_states:
930
    pnode = instance.primary_node
931
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
933
                prereq=True, ecode=errors.ECODE_ENVIRON)
934

    
935
    if instance.name in ins_l.payload:
936
      raise errors.OpPrereqError("Instance %s is running, %s" %
937
                                 (instance.name, msg), errors.ECODE_STATE)
938

    
939

    
940
def _ExpandItemName(fn, name, kind):
941
  """Expand an item name.
942

943
  @param fn: the function to use for expansion
944
  @param name: requested item name
945
  @param kind: text description ('Node' or 'Instance')
946
  @return: the resolved (full) name
947
  @raise errors.OpPrereqError: if the item is not found
948

949
  """
950
  full_name = fn(name)
951
  if full_name is None:
952
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
953
                               errors.ECODE_NOENT)
954
  return full_name
955

    
956

    
957
def _ExpandNodeName(cfg, name):
958
  """Wrapper over L{_ExpandItemName} for nodes."""
959
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
960

    
961

    
962
def _ExpandInstanceName(cfg, name):
963
  """Wrapper over L{_ExpandItemName} for instance."""
964
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
965

    
966

    
967
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968
                          memory, vcpus, nics, disk_template, disks,
969
                          bep, hvp, hypervisor_name, tags):
970
  """Builds instance related env variables for hooks
971

972
  This builds the hook environment from individual variables.
973

974
  @type name: string
975
  @param name: the name of the instance
976
  @type primary_node: string
977
  @param primary_node: the name of the instance's primary node
978
  @type secondary_nodes: list
979
  @param secondary_nodes: list of secondary nodes as strings
980
  @type os_type: string
981
  @param os_type: the name of the instance's OS
982
  @type status: string
983
  @param status: the desired status of the instance
984
  @type memory: string
985
  @param memory: the memory size of the instance
986
  @type vcpus: string
987
  @param vcpus: the count of VCPUs the instance has
988
  @type nics: list
989
  @param nics: list of tuples (ip, mac, mode, link) representing
990
      the NICs the instance has
991
  @type disk_template: string
992
  @param disk_template: the disk template of the instance
993
  @type disks: list
994
  @param disks: the list of (size, mode) pairs
995
  @type bep: dict
996
  @param bep: the backend parameters for the instance
997
  @type hvp: dict
998
  @param hvp: the hypervisor parameters for the instance
999
  @type hypervisor_name: string
1000
  @param hypervisor_name: the hypervisor for the instance
1001
  @type tags: list
1002
  @param tags: list of instance tags as strings
1003
  @rtype: dict
1004
  @return: the hook environment for this instance
1005

1006
  """
1007
  env = {
1008
    "OP_TARGET": name,
1009
    "INSTANCE_NAME": name,
1010
    "INSTANCE_PRIMARY": primary_node,
1011
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1012
    "INSTANCE_OS_TYPE": os_type,
1013
    "INSTANCE_STATUS": status,
1014
    "INSTANCE_MEMORY": memory,
1015
    "INSTANCE_VCPUS": vcpus,
1016
    "INSTANCE_DISK_TEMPLATE": disk_template,
1017
    "INSTANCE_HYPERVISOR": hypervisor_name,
1018
  }
1019

    
1020
  if nics:
1021
    nic_count = len(nics)
1022
    for idx, (ip, mac, mode, link) in enumerate(nics):
1023
      if ip is None:
1024
        ip = ""
1025
      env["INSTANCE_NIC%d_IP" % idx] = ip
1026
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1027
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1028
      env["INSTANCE_NIC%d_LINK" % idx] = link
1029
      if mode == constants.NIC_MODE_BRIDGED:
1030
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1031
  else:
1032
    nic_count = 0
1033

    
1034
  env["INSTANCE_NIC_COUNT"] = nic_count
1035

    
1036
  if disks:
1037
    disk_count = len(disks)
1038
    for idx, (size, mode) in enumerate(disks):
1039
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1040
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1041
  else:
1042
    disk_count = 0
1043

    
1044
  env["INSTANCE_DISK_COUNT"] = disk_count
1045

    
1046
  if not tags:
1047
    tags = []
1048

    
1049
  env["INSTANCE_TAGS"] = " ".join(tags)
1050

    
1051
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1052
    for key, value in source.items():
1053
      env["INSTANCE_%s_%s" % (kind, key)] = value
1054

    
1055
  return env
1056

    
1057

    
1058
def _NICListToTuple(lu, nics):
1059
  """Build a list of nic information tuples.
1060

1061
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1062
  value in LUInstanceQueryData.
1063

1064
  @type lu:  L{LogicalUnit}
1065
  @param lu: the logical unit on whose behalf we execute
1066
  @type nics: list of L{objects.NIC}
1067
  @param nics: list of nics to convert to hooks tuples
1068

1069
  """
1070
  hooks_nics = []
1071
  cluster = lu.cfg.GetClusterInfo()
1072
  for nic in nics:
1073
    ip = nic.ip
1074
    mac = nic.mac
1075
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1076
    mode = filled_params[constants.NIC_MODE]
1077
    link = filled_params[constants.NIC_LINK]
1078
    hooks_nics.append((ip, mac, mode, link))
1079
  return hooks_nics
1080

    
1081

    
1082
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1083
  """Builds instance related env variables for hooks from an object.
1084

1085
  @type lu: L{LogicalUnit}
1086
  @param lu: the logical unit on whose behalf we execute
1087
  @type instance: L{objects.Instance}
1088
  @param instance: the instance for which we should build the
1089
      environment
1090
  @type override: dict
1091
  @param override: dictionary with key/values that will override
1092
      our values
1093
  @rtype: dict
1094
  @return: the hook environment dictionary
1095

1096
  """
1097
  cluster = lu.cfg.GetClusterInfo()
1098
  bep = cluster.FillBE(instance)
1099
  hvp = cluster.FillHV(instance)
1100
  args = {
1101
    "name": instance.name,
1102
    "primary_node": instance.primary_node,
1103
    "secondary_nodes": instance.secondary_nodes,
1104
    "os_type": instance.os,
1105
    "status": instance.admin_state,
1106
    "memory": bep[constants.BE_MEMORY],
1107
    "vcpus": bep[constants.BE_VCPUS],
1108
    "nics": _NICListToTuple(lu, instance.nics),
1109
    "disk_template": instance.disk_template,
1110
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1111
    "bep": bep,
1112
    "hvp": hvp,
1113
    "hypervisor_name": instance.hypervisor,
1114
    "tags": instance.tags,
1115
  }
1116
  if override:
1117
    args.update(override)
1118
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1119

    
1120

    
1121
def _AdjustCandidatePool(lu, exceptions):
1122
  """Adjust the candidate pool after node operations.
1123

1124
  """
1125
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1126
  if mod_list:
1127
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1128
               utils.CommaJoin(node.name for node in mod_list))
1129
    for name in mod_list:
1130
      lu.context.ReaddNode(name)
1131
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1132
  if mc_now > mc_max:
1133
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1134
               (mc_now, mc_max))
1135

    
1136

    
1137
def _DecideSelfPromotion(lu, exceptions=None):
1138
  """Decide whether I should promote myself as a master candidate.
1139

1140
  """
1141
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1142
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1143
  # the new node will increase mc_max with one, so:
1144
  mc_should = min(mc_should + 1, cp_size)
1145
  return mc_now < mc_should
1146

    
1147

    
1148
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1149
  """Check that the brigdes needed by a list of nics exist.
1150

1151
  """
1152
  cluster = lu.cfg.GetClusterInfo()
1153
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1154
  brlist = [params[constants.NIC_LINK] for params in paramslist
1155
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1156
  if brlist:
1157
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1158
    result.Raise("Error checking bridges on destination node '%s'" %
1159
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1160

    
1161

    
1162
def _CheckInstanceBridgesExist(lu, instance, node=None):
1163
  """Check that the brigdes needed by an instance exist.
1164

1165
  """
1166
  if node is None:
1167
    node = instance.primary_node
1168
  _CheckNicsBridgesExist(lu, instance.nics, node)
1169

    
1170

    
1171
def _CheckOSVariant(os_obj, name):
1172
  """Check whether an OS name conforms to the os variants specification.
1173

1174
  @type os_obj: L{objects.OS}
1175
  @param os_obj: OS object to check
1176
  @type name: string
1177
  @param name: OS name passed by the user, to check for validity
1178

1179
  """
1180
  variant = objects.OS.GetVariant(name)
1181
  if not os_obj.supported_variants:
1182
    if variant:
1183
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1184
                                 " passed)" % (os_obj.name, variant),
1185
                                 errors.ECODE_INVAL)
1186
    return
1187
  if not variant:
1188
    raise errors.OpPrereqError("OS name must include a variant",
1189
                               errors.ECODE_INVAL)
1190

    
1191
  if variant not in os_obj.supported_variants:
1192
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1193

    
1194

    
1195
def _GetNodeInstancesInner(cfg, fn):
1196
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1197

    
1198

    
1199
def _GetNodeInstances(cfg, node_name):
1200
  """Returns a list of all primary and secondary instances on a node.
1201

1202
  """
1203

    
1204
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1205

    
1206

    
1207
def _GetNodePrimaryInstances(cfg, node_name):
1208
  """Returns primary instances on a node.
1209

1210
  """
1211
  return _GetNodeInstancesInner(cfg,
1212
                                lambda inst: node_name == inst.primary_node)
1213

    
1214

    
1215
def _GetNodeSecondaryInstances(cfg, node_name):
1216
  """Returns secondary instances on a node.
1217

1218
  """
1219
  return _GetNodeInstancesInner(cfg,
1220
                                lambda inst: node_name in inst.secondary_nodes)
1221

    
1222

    
1223
def _GetStorageTypeArgs(cfg, storage_type):
1224
  """Returns the arguments for a storage type.
1225

1226
  """
1227
  # Special case for file storage
1228
  if storage_type == constants.ST_FILE:
1229
    # storage.FileStorage wants a list of storage directories
1230
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1231

    
1232
  return []
1233

    
1234

    
1235
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1236
  faulty = []
1237

    
1238
  for dev in instance.disks:
1239
    cfg.SetDiskID(dev, node_name)
1240

    
1241
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1242
  result.Raise("Failed to get disk status from node %s" % node_name,
1243
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1244

    
1245
  for idx, bdev_status in enumerate(result.payload):
1246
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1247
      faulty.append(idx)
1248

    
1249
  return faulty
1250

    
1251

    
1252
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1253
  """Check the sanity of iallocator and node arguments and use the
1254
  cluster-wide iallocator if appropriate.
1255

1256
  Check that at most one of (iallocator, node) is specified. If none is
1257
  specified, then the LU's opcode's iallocator slot is filled with the
1258
  cluster-wide default iallocator.
1259

1260
  @type iallocator_slot: string
1261
  @param iallocator_slot: the name of the opcode iallocator slot
1262
  @type node_slot: string
1263
  @param node_slot: the name of the opcode target node slot
1264

1265
  """
1266
  node = getattr(lu.op, node_slot, None)
1267
  iallocator = getattr(lu.op, iallocator_slot, None)
1268

    
1269
  if node is not None and iallocator is not None:
1270
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1271
                               errors.ECODE_INVAL)
1272
  elif node is None and iallocator is None:
1273
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1274
    if default_iallocator:
1275
      setattr(lu.op, iallocator_slot, default_iallocator)
1276
    else:
1277
      raise errors.OpPrereqError("No iallocator or node given and no"
1278
                                 " cluster-wide default iallocator found;"
1279
                                 " please specify either an iallocator or a"
1280
                                 " node, or set a cluster-wide default"
1281
                                 " iallocator")
1282

    
1283

    
1284
def _GetDefaultIAllocator(cfg, iallocator):
1285
  """Decides on which iallocator to use.
1286

1287
  @type cfg: L{config.ConfigWriter}
1288
  @param cfg: Cluster configuration object
1289
  @type iallocator: string or None
1290
  @param iallocator: Iallocator specified in opcode
1291
  @rtype: string
1292
  @return: Iallocator name
1293

1294
  """
1295
  if not iallocator:
1296
    # Use default iallocator
1297
    iallocator = cfg.GetDefaultIAllocator()
1298

    
1299
  if not iallocator:
1300
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1301
                               " opcode nor as a cluster-wide default",
1302
                               errors.ECODE_INVAL)
1303

    
1304
  return iallocator
1305

    
1306

    
1307
class LUClusterPostInit(LogicalUnit):
1308
  """Logical unit for running hooks after cluster initialization.
1309

1310
  """
1311
  HPATH = "cluster-init"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [self.cfg.GetMasterNode()])
1327

    
1328
  def Exec(self, feedback_fn):
1329
    """Nothing to do.
1330

1331
    """
1332
    return True
1333

    
1334

    
1335
class LUClusterDestroy(LogicalUnit):
1336
  """Logical unit for destroying the cluster.
1337

1338
  """
1339
  HPATH = "cluster-destroy"
1340
  HTYPE = constants.HTYPE_CLUSTER
1341

    
1342
  def BuildHooksEnv(self):
1343
    """Build hooks env.
1344

1345
    """
1346
    return {
1347
      "OP_TARGET": self.cfg.GetClusterName(),
1348
      }
1349

    
1350
  def BuildHooksNodes(self):
1351
    """Build hooks nodes.
1352

1353
    """
1354
    return ([], [])
1355

    
1356
  def CheckPrereq(self):
1357
    """Check prerequisites.
1358

1359
    This checks whether the cluster is empty.
1360

1361
    Any errors are signaled by raising errors.OpPrereqError.
1362

1363
    """
1364
    master = self.cfg.GetMasterNode()
1365

    
1366
    nodelist = self.cfg.GetNodeList()
1367
    if len(nodelist) != 1 or nodelist[0] != master:
1368
      raise errors.OpPrereqError("There are still %d node(s) in"
1369
                                 " this cluster." % (len(nodelist) - 1),
1370
                                 errors.ECODE_INVAL)
1371
    instancelist = self.cfg.GetInstanceList()
1372
    if instancelist:
1373
      raise errors.OpPrereqError("There are still %d instance(s) in"
1374
                                 " this cluster." % len(instancelist),
1375
                                 errors.ECODE_INVAL)
1376

    
1377
  def Exec(self, feedback_fn):
1378
    """Destroys the cluster.
1379

1380
    """
1381
    master_params = self.cfg.GetMasterNetworkParameters()
1382

    
1383
    # Run post hooks on master node before it's removed
1384
    _RunPostHook(self, master_params.name)
1385

    
1386
    ems = self.cfg.GetUseExternalMipScript()
1387
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1388
                                                     master_params, ems)
1389
    result.Raise("Could not disable the master role")
1390

    
1391
    return master_params.name
1392

    
1393

    
1394
def _VerifyCertificate(filename):
1395
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1396

1397
  @type filename: string
1398
  @param filename: Path to PEM file
1399

1400
  """
1401
  try:
1402
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1403
                                           utils.ReadFile(filename))
1404
  except Exception, err: # pylint: disable=W0703
1405
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1406
            "Failed to load X509 certificate %s: %s" % (filename, err))
1407

    
1408
  (errcode, msg) = \
1409
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1410
                                constants.SSL_CERT_EXPIRATION_ERROR)
1411

    
1412
  if msg:
1413
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1414
  else:
1415
    fnamemsg = None
1416

    
1417
  if errcode is None:
1418
    return (None, fnamemsg)
1419
  elif errcode == utils.CERT_WARNING:
1420
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1421
  elif errcode == utils.CERT_ERROR:
1422
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1423

    
1424
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1425

    
1426

    
1427
def _GetAllHypervisorParameters(cluster, instances):
1428
  """Compute the set of all hypervisor parameters.
1429

1430
  @type cluster: L{objects.Cluster}
1431
  @param cluster: the cluster object
1432
  @param instances: list of L{objects.Instance}
1433
  @param instances: additional instances from which to obtain parameters
1434
  @rtype: list of (origin, hypervisor, parameters)
1435
  @return: a list with all parameters found, indicating the hypervisor they
1436
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1437

1438
  """
1439
  hvp_data = []
1440

    
1441
  for hv_name in cluster.enabled_hypervisors:
1442
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1443

    
1444
  for os_name, os_hvp in cluster.os_hvp.items():
1445
    for hv_name, hv_params in os_hvp.items():
1446
      if hv_params:
1447
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1448
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1449

    
1450
  # TODO: collapse identical parameter values in a single one
1451
  for instance in instances:
1452
    if instance.hvparams:
1453
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1454
                       cluster.FillHV(instance)))
1455

    
1456
  return hvp_data
1457

    
1458

    
1459
class _VerifyErrors(object):
1460
  """Mix-in for cluster/group verify LUs.
1461

1462
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1463
  self.op and self._feedback_fn to be available.)
1464

1465
  """
1466

    
1467
  ETYPE_FIELD = "code"
1468
  ETYPE_ERROR = "ERROR"
1469
  ETYPE_WARNING = "WARNING"
1470

    
1471
  def _Error(self, ecode, item, msg, *args, **kwargs):
1472
    """Format an error message.
1473

1474
    Based on the opcode's error_codes parameter, either format a
1475
    parseable error code, or a simpler error string.
1476

1477
    This must be called only from Exec and functions called from Exec.
1478

1479
    """
1480
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1481
    itype, etxt, _ = ecode
1482
    # first complete the msg
1483
    if args:
1484
      msg = msg % args
1485
    # then format the whole message
1486
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1487
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1488
    else:
1489
      if item:
1490
        item = " " + item
1491
      else:
1492
        item = ""
1493
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1494
    # and finally report it via the feedback_fn
1495
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1496

    
1497
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1498
    """Log an error message if the passed condition is True.
1499

1500
    """
1501
    cond = (bool(cond)
1502
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1503

    
1504
    # If the error code is in the list of ignored errors, demote the error to a
1505
    # warning
1506
    (_, etxt, _) = ecode
1507
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1508
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1509

    
1510
    if cond:
1511
      self._Error(ecode, *args, **kwargs)
1512

    
1513
    # do not mark the operation as failed for WARN cases only
1514
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1515
      self.bad = self.bad or cond
1516

    
1517

    
1518
class LUClusterVerify(NoHooksLU):
1519
  """Submits all jobs necessary to verify the cluster.
1520

1521
  """
1522
  REQ_BGL = False
1523

    
1524
  def ExpandNames(self):
1525
    self.needed_locks = {}
1526

    
1527
  def Exec(self, feedback_fn):
1528
    jobs = []
1529

    
1530
    if self.op.group_name:
1531
      groups = [self.op.group_name]
1532
      depends_fn = lambda: None
1533
    else:
1534
      groups = self.cfg.GetNodeGroupList()
1535

    
1536
      # Verify global configuration
1537
      jobs.append([
1538
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1539
        ])
1540

    
1541
      # Always depend on global verification
1542
      depends_fn = lambda: [(-len(jobs), [])]
1543

    
1544
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1545
                                            ignore_errors=self.op.ignore_errors,
1546
                                            depends=depends_fn())]
1547
                for group in groups)
1548

    
1549
    # Fix up all parameters
1550
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1551
      op.debug_simulate_errors = self.op.debug_simulate_errors
1552
      op.verbose = self.op.verbose
1553
      op.error_codes = self.op.error_codes
1554
      try:
1555
        op.skip_checks = self.op.skip_checks
1556
      except AttributeError:
1557
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1558

    
1559
    return ResultWithJobs(jobs)
1560

    
1561

    
1562
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1563
  """Verifies the cluster config.
1564

1565
  """
1566
  REQ_BGL = True
1567

    
1568
  def _VerifyHVP(self, hvp_data):
1569
    """Verifies locally the syntax of the hypervisor parameters.
1570

1571
    """
1572
    for item, hv_name, hv_params in hvp_data:
1573
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1574
             (item, hv_name))
1575
      try:
1576
        hv_class = hypervisor.GetHypervisor(hv_name)
1577
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1578
        hv_class.CheckParameterSyntax(hv_params)
1579
      except errors.GenericError, err:
1580
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1581

    
1582
  def ExpandNames(self):
1583
    # Information can be safely retrieved as the BGL is acquired in exclusive
1584
    # mode
1585
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1586
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1587
    self.all_node_info = self.cfg.GetAllNodesInfo()
1588
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1589
    self.needed_locks = {}
1590

    
1591
  def Exec(self, feedback_fn):
1592
    """Verify integrity of cluster, performing various test on nodes.
1593

1594
    """
1595
    self.bad = False
1596
    self._feedback_fn = feedback_fn
1597

    
1598
    feedback_fn("* Verifying cluster config")
1599

    
1600
    for msg in self.cfg.VerifyConfig():
1601
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1602

    
1603
    feedback_fn("* Verifying cluster certificate files")
1604

    
1605
    for cert_filename in constants.ALL_CERT_FILES:
1606
      (errcode, msg) = _VerifyCertificate(cert_filename)
1607
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1608

    
1609
    feedback_fn("* Verifying hypervisor parameters")
1610

    
1611
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1612
                                                self.all_inst_info.values()))
1613

    
1614
    feedback_fn("* Verifying all nodes belong to an existing group")
1615

    
1616
    # We do this verification here because, should this bogus circumstance
1617
    # occur, it would never be caught by VerifyGroup, which only acts on
1618
    # nodes/instances reachable from existing node groups.
1619

    
1620
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1621
                         if node.group not in self.all_group_info)
1622

    
1623
    dangling_instances = {}
1624
    no_node_instances = []
1625

    
1626
    for inst in self.all_inst_info.values():
1627
      if inst.primary_node in dangling_nodes:
1628
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1629
      elif inst.primary_node not in self.all_node_info:
1630
        no_node_instances.append(inst.name)
1631

    
1632
    pretty_dangling = [
1633
        "%s (%s)" %
1634
        (node.name,
1635
         utils.CommaJoin(dangling_instances.get(node.name,
1636
                                                ["no instances"])))
1637
        for node in dangling_nodes]
1638

    
1639
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1640
                  None,
1641
                  "the following nodes (and their instances) belong to a non"
1642
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1643

    
1644
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1645
                  None,
1646
                  "the following instances have a non-existing primary-node:"
1647
                  " %s", utils.CommaJoin(no_node_instances))
1648

    
1649
    return not self.bad
1650

    
1651

    
1652
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1653
  """Verifies the status of a node group.
1654

1655
  """
1656
  HPATH = "cluster-verify"
1657
  HTYPE = constants.HTYPE_CLUSTER
1658
  REQ_BGL = False
1659

    
1660
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1661

    
1662
  class NodeImage(object):
1663
    """A class representing the logical and physical status of a node.
1664

1665
    @type name: string
1666
    @ivar name: the node name to which this object refers
1667
    @ivar volumes: a structure as returned from
1668
        L{ganeti.backend.GetVolumeList} (runtime)
1669
    @ivar instances: a list of running instances (runtime)
1670
    @ivar pinst: list of configured primary instances (config)
1671
    @ivar sinst: list of configured secondary instances (config)
1672
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1673
        instances for which this node is secondary (config)
1674
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1675
    @ivar dfree: free disk, as reported by the node (runtime)
1676
    @ivar offline: the offline status (config)
1677
    @type rpc_fail: boolean
1678
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1679
        not whether the individual keys were correct) (runtime)
1680
    @type lvm_fail: boolean
1681
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1682
    @type hyp_fail: boolean
1683
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1684
    @type ghost: boolean
1685
    @ivar ghost: whether this is a known node or not (config)
1686
    @type os_fail: boolean
1687
    @ivar os_fail: whether the RPC call didn't return valid OS data
1688
    @type oslist: list
1689
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1690
    @type vm_capable: boolean
1691
    @ivar vm_capable: whether the node can host instances
1692

1693
    """
1694
    def __init__(self, offline=False, name=None, vm_capable=True):
1695
      self.name = name
1696
      self.volumes = {}
1697
      self.instances = []
1698
      self.pinst = []
1699
      self.sinst = []
1700
      self.sbp = {}
1701
      self.mfree = 0
1702
      self.dfree = 0
1703
      self.offline = offline
1704
      self.vm_capable = vm_capable
1705
      self.rpc_fail = False
1706
      self.lvm_fail = False
1707
      self.hyp_fail = False
1708
      self.ghost = False
1709
      self.os_fail = False
1710
      self.oslist = {}
1711

    
1712
  def ExpandNames(self):
1713
    # This raises errors.OpPrereqError on its own:
1714
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1715

    
1716
    # Get instances in node group; this is unsafe and needs verification later
1717
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1718

    
1719
    self.needed_locks = {
1720
      locking.LEVEL_INSTANCE: inst_names,
1721
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1722
      locking.LEVEL_NODE: [],
1723
      }
1724

    
1725
    self.share_locks = _ShareAll()
1726

    
1727
  def DeclareLocks(self, level):
1728
    if level == locking.LEVEL_NODE:
1729
      # Get members of node group; this is unsafe and needs verification later
1730
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1731

    
1732
      all_inst_info = self.cfg.GetAllInstancesInfo()
1733

    
1734
      # In Exec(), we warn about mirrored instances that have primary and
1735
      # secondary living in separate node groups. To fully verify that
1736
      # volumes for these instances are healthy, we will need to do an
1737
      # extra call to their secondaries. We ensure here those nodes will
1738
      # be locked.
1739
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1740
        # Important: access only the instances whose lock is owned
1741
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1742
          nodes.update(all_inst_info[inst].secondary_nodes)
1743

    
1744
      self.needed_locks[locking.LEVEL_NODE] = nodes
1745

    
1746
  def CheckPrereq(self):
1747
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1748
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1749

    
1750
    group_nodes = set(self.group_info.members)
1751
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1752

    
1753
    unlocked_nodes = \
1754
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1755

    
1756
    unlocked_instances = \
1757
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1758

    
1759
    if unlocked_nodes:
1760
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1761
                                 utils.CommaJoin(unlocked_nodes))
1762

    
1763
    if unlocked_instances:
1764
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1765
                                 utils.CommaJoin(unlocked_instances))
1766

    
1767
    self.all_node_info = self.cfg.GetAllNodesInfo()
1768
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1769

    
1770
    self.my_node_names = utils.NiceSort(group_nodes)
1771
    self.my_inst_names = utils.NiceSort(group_instances)
1772

    
1773
    self.my_node_info = dict((name, self.all_node_info[name])
1774
                             for name in self.my_node_names)
1775

    
1776
    self.my_inst_info = dict((name, self.all_inst_info[name])
1777
                             for name in self.my_inst_names)
1778

    
1779
    # We detect here the nodes that will need the extra RPC calls for verifying
1780
    # split LV volumes; they should be locked.
1781
    extra_lv_nodes = set()
1782

    
1783
    for inst in self.my_inst_info.values():
1784
      if inst.disk_template in constants.DTS_INT_MIRROR:
1785
        group = self.my_node_info[inst.primary_node].group
1786
        for nname in inst.secondary_nodes:
1787
          if self.all_node_info[nname].group != group:
1788
            extra_lv_nodes.add(nname)
1789

    
1790
    unlocked_lv_nodes = \
1791
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1792

    
1793
    if unlocked_lv_nodes:
1794
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1795
                                 utils.CommaJoin(unlocked_lv_nodes))
1796
    self.extra_lv_nodes = list(extra_lv_nodes)
1797

    
1798
  def _VerifyNode(self, ninfo, nresult):
1799
    """Perform some basic validation on data returned from a node.
1800

1801
      - check the result data structure is well formed and has all the
1802
        mandatory fields
1803
      - check ganeti version
1804

1805
    @type ninfo: L{objects.Node}
1806
    @param ninfo: the node to check
1807
    @param nresult: the results from the node
1808
    @rtype: boolean
1809
    @return: whether overall this call was successful (and we can expect
1810
         reasonable values in the respose)
1811

1812
    """
1813
    node = ninfo.name
1814
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1815

    
1816
    # main result, nresult should be a non-empty dict
1817
    test = not nresult or not isinstance(nresult, dict)
1818
    _ErrorIf(test, constants.CV_ENODERPC, node,
1819
                  "unable to verify node: no data returned")
1820
    if test:
1821
      return False
1822

    
1823
    # compares ganeti version
1824
    local_version = constants.PROTOCOL_VERSION
1825
    remote_version = nresult.get("version", None)
1826
    test = not (remote_version and
1827
                isinstance(remote_version, (list, tuple)) and
1828
                len(remote_version) == 2)
1829
    _ErrorIf(test, constants.CV_ENODERPC, node,
1830
             "connection to node returned invalid data")
1831
    if test:
1832
      return False
1833

    
1834
    test = local_version != remote_version[0]
1835
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1836
             "incompatible protocol versions: master %s,"
1837
             " node %s", local_version, remote_version[0])
1838
    if test:
1839
      return False
1840

    
1841
    # node seems compatible, we can actually try to look into its results
1842

    
1843
    # full package version
1844
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1845
                  constants.CV_ENODEVERSION, node,
1846
                  "software version mismatch: master %s, node %s",
1847
                  constants.RELEASE_VERSION, remote_version[1],
1848
                  code=self.ETYPE_WARNING)
1849

    
1850
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1851
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1852
      for hv_name, hv_result in hyp_result.iteritems():
1853
        test = hv_result is not None
1854
        _ErrorIf(test, constants.CV_ENODEHV, node,
1855
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1856

    
1857
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1858
    if ninfo.vm_capable and isinstance(hvp_result, list):
1859
      for item, hv_name, hv_result in hvp_result:
1860
        _ErrorIf(True, constants.CV_ENODEHV, node,
1861
                 "hypervisor %s parameter verify failure (source %s): %s",
1862
                 hv_name, item, hv_result)
1863

    
1864
    test = nresult.get(constants.NV_NODESETUP,
1865
                       ["Missing NODESETUP results"])
1866
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1867
             "; ".join(test))
1868

    
1869
    return True
1870

    
1871
  def _VerifyNodeTime(self, ninfo, nresult,
1872
                      nvinfo_starttime, nvinfo_endtime):
1873
    """Check the node time.
1874

1875
    @type ninfo: L{objects.Node}
1876
    @param ninfo: the node to check
1877
    @param nresult: the remote results for the node
1878
    @param nvinfo_starttime: the start time of the RPC call
1879
    @param nvinfo_endtime: the end time of the RPC call
1880

1881
    """
1882
    node = ninfo.name
1883
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1884

    
1885
    ntime = nresult.get(constants.NV_TIME, None)
1886
    try:
1887
      ntime_merged = utils.MergeTime(ntime)
1888
    except (ValueError, TypeError):
1889
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1890
      return
1891

    
1892
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1893
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1894
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1895
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1896
    else:
1897
      ntime_diff = None
1898

    
1899
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1900
             "Node time diverges by at least %s from master node time",
1901
             ntime_diff)
1902

    
1903
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1904
    """Check the node LVM results.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param vg_name: the configured VG name
1910

1911
    """
1912
    if vg_name is None:
1913
      return
1914

    
1915
    node = ninfo.name
1916
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1917

    
1918
    # checks vg existence and size > 20G
1919
    vglist = nresult.get(constants.NV_VGLIST, None)
1920
    test = not vglist
1921
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1922
    if not test:
1923
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1924
                                            constants.MIN_VG_SIZE)
1925
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1926

    
1927
    # check pv names
1928
    pvlist = nresult.get(constants.NV_PVLIST, None)
1929
    test = pvlist is None
1930
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1931
    if not test:
1932
      # check that ':' is not present in PV names, since it's a
1933
      # special character for lvcreate (denotes the range of PEs to
1934
      # use on the PV)
1935
      for _, pvname, owner_vg in pvlist:
1936
        test = ":" in pvname
1937
        _ErrorIf(test, constants.CV_ENODELVM, node,
1938
                 "Invalid character ':' in PV '%s' of VG '%s'",
1939
                 pvname, owner_vg)
1940

    
1941
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1942
    """Check the node bridges.
1943

1944
    @type ninfo: L{objects.Node}
1945
    @param ninfo: the node to check
1946
    @param nresult: the remote results for the node
1947
    @param bridges: the expected list of bridges
1948

1949
    """
1950
    if not bridges:
1951
      return
1952

    
1953
    node = ninfo.name
1954
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1955

    
1956
    missing = nresult.get(constants.NV_BRIDGES, None)
1957
    test = not isinstance(missing, list)
1958
    _ErrorIf(test, constants.CV_ENODENET, node,
1959
             "did not return valid bridge information")
1960
    if not test:
1961
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1962
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1963

    
1964
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1965
    """Check the results of user scripts presence and executability on the node
1966

1967
    @type ninfo: L{objects.Node}
1968
    @param ninfo: the node to check
1969
    @param nresult: the remote results for the node
1970

1971
    """
1972
    node = ninfo.name
1973

    
1974
    test = not constants.NV_USERSCRIPTS in nresult
1975
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1976
                  "did not return user scripts information")
1977

    
1978
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1979
    if not test:
1980
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1981
                    "user scripts not present or not executable: %s" %
1982
                    utils.CommaJoin(sorted(broken_scripts)))
1983

    
1984
  def _VerifyNodeNetwork(self, ninfo, nresult):
1985
    """Check the node network connectivity results.
1986

1987
    @type ninfo: L{objects.Node}
1988
    @param ninfo: the node to check
1989
    @param nresult: the remote results for the node
1990

1991
    """
1992
    node = ninfo.name
1993
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994

    
1995
    test = constants.NV_NODELIST not in nresult
1996
    _ErrorIf(test, constants.CV_ENODESSH, node,
1997
             "node hasn't returned node ssh connectivity data")
1998
    if not test:
1999
      if nresult[constants.NV_NODELIST]:
2000
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2001
          _ErrorIf(True, constants.CV_ENODESSH, node,
2002
                   "ssh communication with node '%s': %s", a_node, a_msg)
2003

    
2004
    test = constants.NV_NODENETTEST not in nresult
2005
    _ErrorIf(test, constants.CV_ENODENET, node,
2006
             "node hasn't returned node tcp connectivity data")
2007
    if not test:
2008
      if nresult[constants.NV_NODENETTEST]:
2009
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2010
        for anode in nlist:
2011
          _ErrorIf(True, constants.CV_ENODENET, node,
2012
                   "tcp communication with node '%s': %s",
2013
                   anode, nresult[constants.NV_NODENETTEST][anode])
2014

    
2015
    test = constants.NV_MASTERIP not in nresult
2016
    _ErrorIf(test, constants.CV_ENODENET, node,
2017
             "node hasn't returned node master IP reachability data")
2018
    if not test:
2019
      if not nresult[constants.NV_MASTERIP]:
2020
        if node == self.master_node:
2021
          msg = "the master node cannot reach the master IP (not configured?)"
2022
        else:
2023
          msg = "cannot reach the master IP"
2024
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2025

    
2026
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2027
                      diskstatus):
2028
    """Verify an instance.
2029

2030
    This function checks to see if the required block devices are
2031
    available on the instance's node.
2032

2033
    """
2034
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2035
    node_current = instanceconfig.primary_node
2036

    
2037
    node_vol_should = {}
2038
    instanceconfig.MapLVsByNode(node_vol_should)
2039

    
2040
    for node in node_vol_should:
2041
      n_img = node_image[node]
2042
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043
        # ignore missing volumes on offline or broken nodes
2044
        continue
2045
      for volume in node_vol_should[node]:
2046
        test = volume not in n_img.volumes
2047
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2048
                 "volume %s missing on node %s", volume, node)
2049

    
2050
    if instanceconfig.admin_state == constants.ADMINST_UP:
2051
      pri_img = node_image[node_current]
2052
      test = instance not in pri_img.instances and not pri_img.offline
2053
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2054
               "instance not running on its primary node %s",
2055
               node_current)
2056

    
2057
    diskdata = [(nname, success, status, idx)
2058
                for (nname, disks) in diskstatus.items()
2059
                for idx, (success, status) in enumerate(disks)]
2060

    
2061
    for nname, success, bdev_status, idx in diskdata:
2062
      # the 'ghost node' construction in Exec() ensures that we have a
2063
      # node here
2064
      snode = node_image[nname]
2065
      bad_snode = snode.ghost or snode.offline
2066
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2067
               not success and not bad_snode,
2068
               constants.CV_EINSTANCEFAULTYDISK, instance,
2069
               "couldn't retrieve status for disk/%s on %s: %s",
2070
               idx, nname, bdev_status)
2071
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2072
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2073
               constants.CV_EINSTANCEFAULTYDISK, instance,
2074
               "disk/%s on %s is faulty", idx, nname)
2075

    
2076
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2077
    """Verify if there are any unknown volumes in the cluster.
2078

2079
    The .os, .swap and backup volumes are ignored. All other volumes are
2080
    reported as unknown.
2081

2082
    @type reserved: L{ganeti.utils.FieldSet}
2083
    @param reserved: a FieldSet of reserved volume names
2084

2085
    """
2086
    for node, n_img in node_image.items():
2087
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2088
        # skip non-healthy nodes
2089
        continue
2090
      for volume in n_img.volumes:
2091
        test = ((node not in node_vol_should or
2092
                volume not in node_vol_should[node]) and
2093
                not reserved.Matches(volume))
2094
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2095
                      "volume %s is unknown", volume)
2096

    
2097
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2098
    """Verify N+1 Memory Resilience.
2099

2100
    Check that if one single node dies we can still start all the
2101
    instances it was primary for.
2102

2103
    """
2104
    cluster_info = self.cfg.GetClusterInfo()
2105
    for node, n_img in node_image.items():
2106
      # This code checks that every node which is now listed as
2107
      # secondary has enough memory to host all instances it is
2108
      # supposed to should a single other node in the cluster fail.
2109
      # FIXME: not ready for failover to an arbitrary node
2110
      # FIXME: does not support file-backed instances
2111
      # WARNING: we currently take into account down instances as well
2112
      # as up ones, considering that even if they're down someone
2113
      # might want to start them even in the event of a node failure.
2114
      if n_img.offline:
2115
        # we're skipping offline nodes from the N+1 warning, since
2116
        # most likely we don't have good memory infromation from them;
2117
        # we already list instances living on such nodes, and that's
2118
        # enough warning
2119
        continue
2120
      for prinode, instances in n_img.sbp.items():
2121
        needed_mem = 0
2122
        for instance in instances:
2123
          bep = cluster_info.FillBE(instance_cfg[instance])
2124
          if bep[constants.BE_AUTO_BALANCE]:
2125
            needed_mem += bep[constants.BE_MEMORY]
2126
        test = n_img.mfree < needed_mem
2127
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2128
                      "not enough memory to accomodate instance failovers"
2129
                      " should node %s fail (%dMiB needed, %dMiB available)",
2130
                      prinode, needed_mem, n_img.mfree)
2131

    
2132
  @classmethod
2133
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2134
                   (files_all, files_opt, files_mc, files_vm)):
2135
    """Verifies file checksums collected from all nodes.
2136

2137
    @param errorif: Callback for reporting errors
2138
    @param nodeinfo: List of L{objects.Node} objects
2139
    @param master_node: Name of master node
2140
    @param all_nvinfo: RPC results
2141

2142
    """
2143
    # Define functions determining which nodes to consider for a file
2144
    files2nodefn = [
2145
      (files_all, None),
2146
      (files_mc, lambda node: (node.master_candidate or
2147
                               node.name == master_node)),
2148
      (files_vm, lambda node: node.vm_capable),
2149
      ]
2150

    
2151
    # Build mapping from filename to list of nodes which should have the file
2152
    nodefiles = {}
2153
    for (files, fn) in files2nodefn:
2154
      if fn is None:
2155
        filenodes = nodeinfo
2156
      else:
2157
        filenodes = filter(fn, nodeinfo)
2158
      nodefiles.update((filename,
2159
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2160
                       for filename in files)
2161

    
2162
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2163

    
2164
    fileinfo = dict((filename, {}) for filename in nodefiles)
2165
    ignore_nodes = set()
2166

    
2167
    for node in nodeinfo:
2168
      if node.offline:
2169
        ignore_nodes.add(node.name)
2170
        continue
2171

    
2172
      nresult = all_nvinfo[node.name]
2173

    
2174
      if nresult.fail_msg or not nresult.payload:
2175
        node_files = None
2176
      else:
2177
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2178

    
2179
      test = not (node_files and isinstance(node_files, dict))
2180
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2181
              "Node did not return file checksum data")
2182
      if test:
2183
        ignore_nodes.add(node.name)
2184
        continue
2185

    
2186
      # Build per-checksum mapping from filename to nodes having it
2187
      for (filename, checksum) in node_files.items():
2188
        assert filename in nodefiles
2189
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2190

    
2191
    for (filename, checksums) in fileinfo.items():
2192
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2193

    
2194
      # Nodes having the file
2195
      with_file = frozenset(node_name
2196
                            for nodes in fileinfo[filename].values()
2197
                            for node_name in nodes) - ignore_nodes
2198

    
2199
      expected_nodes = nodefiles[filename] - ignore_nodes
2200

    
2201
      # Nodes missing file
2202
      missing_file = expected_nodes - with_file
2203

    
2204
      if filename in files_opt:
2205
        # All or no nodes
2206
        errorif(missing_file and missing_file != expected_nodes,
2207
                constants.CV_ECLUSTERFILECHECK, None,
2208
                "File %s is optional, but it must exist on all or no"
2209
                " nodes (not found on %s)",
2210
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2211
      else:
2212
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2213
                "File %s is missing from node(s) %s", filename,
2214
                utils.CommaJoin(utils.NiceSort(missing_file)))
2215

    
2216
        # Warn if a node has a file it shouldn't
2217
        unexpected = with_file - expected_nodes
2218
        errorif(unexpected,
2219
                constants.CV_ECLUSTERFILECHECK, None,
2220
                "File %s should not exist on node(s) %s",
2221
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2222

    
2223
      # See if there are multiple versions of the file
2224
      test = len(checksums) > 1
2225
      if test:
2226
        variants = ["variant %s on %s" %
2227
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228
                    for (idx, (checksum, nodes)) in
2229
                      enumerate(sorted(checksums.items()))]
2230
      else:
2231
        variants = []
2232

    
2233
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2234
              "File %s found with %s different checksums (%s)",
2235
              filename, len(checksums), "; ".join(variants))
2236

    
2237
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2238
                      drbd_map):
2239
    """Verifies and the node DRBD status.
2240

2241
    @type ninfo: L{objects.Node}
2242
    @param ninfo: the node to check
2243
    @param nresult: the remote results for the node
2244
    @param instanceinfo: the dict of instances
2245
    @param drbd_helper: the configured DRBD usermode helper
2246
    @param drbd_map: the DRBD map as returned by
2247
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252

    
2253
    if drbd_helper:
2254
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255
      test = (helper_result == None)
2256
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2257
               "no drbd usermode helper returned")
2258
      if helper_result:
2259
        status, payload = helper_result
2260
        test = not status
2261
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262
                 "drbd usermode helper check unsuccessful: %s", payload)
2263
        test = status and (payload != drbd_helper)
2264
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2265
                 "wrong drbd usermode helper: %s", payload)
2266

    
2267
    # compute the DRBD minors
2268
    node_drbd = {}
2269
    for minor, instance in drbd_map[node].items():
2270
      test = instance not in instanceinfo
2271
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2272
               "ghost instance '%s' in temporary DRBD map", instance)
2273
        # ghost instance should not be running, but otherwise we
2274
        # don't give double warnings (both ghost instance and
2275
        # unallocated minor in use)
2276
      if test:
2277
        node_drbd[minor] = (instance, False)
2278
      else:
2279
        instance = instanceinfo[instance]
2280
        node_drbd[minor] = (instance.name,
2281
                            instance.admin_state == constants.ADMINST_UP)
2282

    
2283
    # and now check them
2284
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2285
    test = not isinstance(used_minors, (tuple, list))
2286
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2287
             "cannot parse drbd status file: %s", str(used_minors))
2288
    if test:
2289
      # we cannot check drbd status
2290
      return
2291

    
2292
    for minor, (iname, must_exist) in node_drbd.items():
2293
      test = minor not in used_minors and must_exist
2294
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2295
               "drbd minor %d of instance %s is not active", minor, iname)
2296
    for minor in used_minors:
2297
      test = minor not in node_drbd
2298
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2299
               "unallocated drbd minor %d is in use", minor)
2300

    
2301
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2302
    """Builds the node OS structures.
2303

2304
    @type ninfo: L{objects.Node}
2305
    @param ninfo: the node to check
2306
    @param nresult: the remote results for the node
2307
    @param nimg: the node image object
2308

2309
    """
2310
    node = ninfo.name
2311
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2312

    
2313
    remote_os = nresult.get(constants.NV_OSLIST, None)
2314
    test = (not isinstance(remote_os, list) or
2315
            not compat.all(isinstance(v, list) and len(v) == 7
2316
                           for v in remote_os))
2317

    
2318
    _ErrorIf(test, constants.CV_ENODEOS, node,
2319
             "node hasn't returned valid OS data")
2320

    
2321
    nimg.os_fail = test
2322

    
2323
    if test:
2324
      return
2325

    
2326
    os_dict = {}
2327

    
2328
    for (name, os_path, status, diagnose,
2329
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2330

    
2331
      if name not in os_dict:
2332
        os_dict[name] = []
2333

    
2334
      # parameters is a list of lists instead of list of tuples due to
2335
      # JSON lacking a real tuple type, fix it:
2336
      parameters = [tuple(v) for v in parameters]
2337
      os_dict[name].append((os_path, status, diagnose,
2338
                            set(variants), set(parameters), set(api_ver)))
2339

    
2340
    nimg.oslist = os_dict
2341

    
2342
  def _VerifyNodeOS(self, ninfo, nimg, base):
2343
    """Verifies the node OS list.
2344

2345
    @type ninfo: L{objects.Node}
2346
    @param ninfo: the node to check
2347
    @param nimg: the node image object
2348
    @param base: the 'template' node we match against (e.g. from the master)
2349

2350
    """
2351
    node = ninfo.name
2352
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353

    
2354
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2355

    
2356
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2357
    for os_name, os_data in nimg.oslist.items():
2358
      assert os_data, "Empty OS status for OS %s?!" % os_name
2359
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2360
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2361
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2362
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2363
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2364
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2365
      # comparisons with the 'base' image
2366
      test = os_name not in base.oslist
2367
      _ErrorIf(test, constants.CV_ENODEOS, node,
2368
               "Extra OS %s not present on reference node (%s)",
2369
               os_name, base.name)
2370
      if test:
2371
        continue
2372
      assert base.oslist[os_name], "Base node has empty OS status?"
2373
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2374
      if not b_status:
2375
        # base OS is invalid, skipping
2376
        continue
2377
      for kind, a, b in [("API version", f_api, b_api),
2378
                         ("variants list", f_var, b_var),
2379
                         ("parameters", beautify_params(f_param),
2380
                          beautify_params(b_param))]:
2381
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2382
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2383
                 kind, os_name, base.name,
2384
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2385

    
2386
    # check any missing OSes
2387
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2388
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2389
             "OSes present on reference node %s but missing on this node: %s",
2390
             base.name, utils.CommaJoin(missing))
2391

    
2392
  def _VerifyOob(self, ninfo, nresult):
2393
    """Verifies out of band functionality of a node.
2394

2395
    @type ninfo: L{objects.Node}
2396
    @param ninfo: the node to check
2397
    @param nresult: the remote results for the node
2398

2399
    """
2400
    node = ninfo.name
2401
    # We just have to verify the paths on master and/or master candidates
2402
    # as the oob helper is invoked on the master
2403
    if ((ninfo.master_candidate or ninfo.master_capable) and
2404
        constants.NV_OOB_PATHS in nresult):
2405
      for path_result in nresult[constants.NV_OOB_PATHS]:
2406
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2407

    
2408
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2409
    """Verifies and updates the node volume data.
2410

2411
    This function will update a L{NodeImage}'s internal structures
2412
    with data from the remote call.
2413

2414
    @type ninfo: L{objects.Node}
2415
    @param ninfo: the node to check
2416
    @param nresult: the remote results for the node
2417
    @param nimg: the node image object
2418
    @param vg_name: the configured VG name
2419

2420
    """
2421
    node = ninfo.name
2422
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423

    
2424
    nimg.lvm_fail = True
2425
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2426
    if vg_name is None:
2427
      pass
2428
    elif isinstance(lvdata, basestring):
2429
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2430
               utils.SafeEncode(lvdata))
2431
    elif not isinstance(lvdata, dict):
2432
      _ErrorIf(True, constants.CV_ENODELVM, node,
2433
               "rpc call to node failed (lvlist)")
2434
    else:
2435
      nimg.volumes = lvdata
2436
      nimg.lvm_fail = False
2437

    
2438
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2439
    """Verifies and updates the node instance list.
2440

2441
    If the listing was successful, then updates this node's instance
2442
    list. Otherwise, it marks the RPC call as failed for the instance
2443
    list key.
2444

2445
    @type ninfo: L{objects.Node}
2446
    @param ninfo: the node to check
2447
    @param nresult: the remote results for the node
2448
    @param nimg: the node image object
2449

2450
    """
2451
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2452
    test = not isinstance(idata, list)
2453
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2454
                  "rpc call to node failed (instancelist): %s",
2455
                  utils.SafeEncode(str(idata)))
2456
    if test:
2457
      nimg.hyp_fail = True
2458
    else:
2459
      nimg.instances = idata
2460

    
2461
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2462
    """Verifies and computes a node information map
2463

2464
    @type ninfo: L{objects.Node}
2465
    @param ninfo: the node to check
2466
    @param nresult: the remote results for the node
2467
    @param nimg: the node image object
2468
    @param vg_name: the configured VG name
2469

2470
    """
2471
    node = ninfo.name
2472
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473

    
2474
    # try to read free memory (from the hypervisor)
2475
    hv_info = nresult.get(constants.NV_HVINFO, None)
2476
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2477
    _ErrorIf(test, constants.CV_ENODEHV, node,
2478
             "rpc call to node failed (hvinfo)")
2479
    if not test:
2480
      try:
2481
        nimg.mfree = int(hv_info["memory_free"])
2482
      except (ValueError, TypeError):
2483
        _ErrorIf(True, constants.CV_ENODERPC, node,
2484
                 "node returned invalid nodeinfo, check hypervisor")
2485

    
2486
    # FIXME: devise a free space model for file based instances as well
2487
    if vg_name is not None:
2488
      test = (constants.NV_VGLIST not in nresult or
2489
              vg_name not in nresult[constants.NV_VGLIST])
2490
      _ErrorIf(test, constants.CV_ENODELVM, node,
2491
               "node didn't return data for the volume group '%s'"
2492
               " - it is either missing or broken", vg_name)
2493
      if not test:
2494
        try:
2495
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496
        except (ValueError, TypeError):
2497
          _ErrorIf(True, constants.CV_ENODERPC, node,
2498
                   "node returned invalid LVM info, check LVM status")
2499

    
2500
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501
    """Gets per-disk status information for all instances.
2502

2503
    @type nodelist: list of strings
2504
    @param nodelist: Node names
2505
    @type node_image: dict of (name, L{objects.Node})
2506
    @param node_image: Node objects
2507
    @type instanceinfo: dict of (name, L{objects.Instance})
2508
    @param instanceinfo: Instance objects
2509
    @rtype: {instance: {node: [(succes, payload)]}}
2510
    @return: a dictionary of per-instance dictionaries with nodes as
2511
        keys and disk information as values; the disk information is a
2512
        list of tuples (success, payload)
2513

2514
    """
2515
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516

    
2517
    node_disks = {}
2518
    node_disks_devonly = {}
2519
    diskless_instances = set()
2520
    diskless = constants.DT_DISKLESS
2521

    
2522
    for nname in nodelist:
2523
      node_instances = list(itertools.chain(node_image[nname].pinst,
2524
                                            node_image[nname].sinst))
2525
      diskless_instances.update(inst for inst in node_instances
2526
                                if instanceinfo[inst].disk_template == diskless)
2527
      disks = [(inst, disk)
2528
               for inst in node_instances
2529
               for disk in instanceinfo[inst].disks]
2530

    
2531
      if not disks:
2532
        # No need to collect data
2533
        continue
2534

    
2535
      node_disks[nname] = disks
2536

    
2537
      # Creating copies as SetDiskID below will modify the objects and that can
2538
      # lead to incorrect data returned from nodes
2539
      devonly = [dev.Copy() for (_, dev) in disks]
2540

    
2541
      for dev in devonly:
2542
        self.cfg.SetDiskID(dev, nname)
2543

    
2544
      node_disks_devonly[nname] = devonly
2545

    
2546
    assert len(node_disks) == len(node_disks_devonly)
2547

    
2548
    # Collect data from all nodes with disks
2549
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2550
                                                          node_disks_devonly)
2551

    
2552
    assert len(result) == len(node_disks)
2553

    
2554
    instdisk = {}
2555

    
2556
    for (nname, nres) in result.items():
2557
      disks = node_disks[nname]
2558

    
2559
      if nres.offline:
2560
        # No data from this node
2561
        data = len(disks) * [(False, "node offline")]
2562
      else:
2563
        msg = nres.fail_msg
2564
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2565
                 "while getting disk information: %s", msg)
2566
        if msg:
2567
          # No data from this node
2568
          data = len(disks) * [(False, msg)]
2569
        else:
2570
          data = []
2571
          for idx, i in enumerate(nres.payload):
2572
            if isinstance(i, (tuple, list)) and len(i) == 2:
2573
              data.append(i)
2574
            else:
2575
              logging.warning("Invalid result from node %s, entry %d: %s",
2576
                              nname, idx, i)
2577
              data.append((False, "Invalid result from the remote node"))
2578

    
2579
      for ((inst, _), status) in zip(disks, data):
2580
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2581

    
2582
    # Add empty entries for diskless instances.
2583
    for inst in diskless_instances:
2584
      assert inst not in instdisk
2585
      instdisk[inst] = {}
2586

    
2587
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589
                      compat.all(isinstance(s, (tuple, list)) and
2590
                                 len(s) == 2 for s in statuses)
2591
                      for inst, nnames in instdisk.items()
2592
                      for nname, statuses in nnames.items())
2593
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2594

    
2595
    return instdisk
2596

    
2597
  @staticmethod
2598
  def _SshNodeSelector(group_uuid, all_nodes):
2599
    """Create endless iterators for all potential SSH check hosts.
2600

2601
    """
2602
    nodes = [node for node in all_nodes
2603
             if (node.group != group_uuid and
2604
                 not node.offline)]
2605
    keyfunc = operator.attrgetter("group")
2606

    
2607
    return map(itertools.cycle,
2608
               [sorted(map(operator.attrgetter("name"), names))
2609
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2610
                                                  keyfunc)])
2611

    
2612
  @classmethod
2613
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614
    """Choose which nodes should talk to which other nodes.
2615

2616
    We will make nodes contact all nodes in their group, and one node from
2617
    every other group.
2618

2619
    @warning: This algorithm has a known issue if one node group is much
2620
      smaller than others (e.g. just one node). In such a case all other
2621
      nodes will talk to the single node.
2622

2623
    """
2624
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2626

    
2627
    return (online_nodes,
2628
            dict((name, sorted([i.next() for i in sel]))
2629
                 for name in online_nodes))
2630

    
2631
  def BuildHooksEnv(self):
2632
    """Build hooks env.
2633

2634
    Cluster-Verify hooks just ran in the post phase and their failure makes
2635
    the output be logged in the verify output and the verification to fail.
2636

2637
    """
2638
    env = {
2639
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2640
      }
2641

    
2642
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643
               for node in self.my_node_info.values())
2644

    
2645
    return env
2646

    
2647
  def BuildHooksNodes(self):
2648
    """Build hooks nodes.
2649

2650
    """
2651
    return ([], self.my_node_names)
2652

    
2653
  def Exec(self, feedback_fn):
2654
    """Verify integrity of the node group, performing various test on nodes.
2655

2656
    """
2657
    # This method has too many local variables. pylint: disable=R0914
2658
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2659

    
2660
    if not self.my_node_names:
2661
      # empty node group
2662
      feedback_fn("* Empty node group, skipping verification")
2663
      return True
2664

    
2665
    self.bad = False
2666
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667
    verbose = self.op.verbose
2668
    self._feedback_fn = feedback_fn
2669

    
2670
    vg_name = self.cfg.GetVGName()
2671
    drbd_helper = self.cfg.GetDRBDHelper()
2672
    cluster = self.cfg.GetClusterInfo()
2673
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674
    hypervisors = cluster.enabled_hypervisors
2675
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2676

    
2677
    i_non_redundant = [] # Non redundant instances
2678
    i_non_a_balanced = [] # Non auto-balanced instances
2679
    i_offline = 0 # Count of offline instances
2680
    n_offline = 0 # Count of offline nodes
2681
    n_drained = 0 # Count of nodes being drained
2682
    node_vol_should = {}
2683

    
2684
    # FIXME: verify OS list
2685

    
2686
    # File verification
2687
    filemap = _ComputeAncillaryFiles(cluster, False)
2688

    
2689
    # do local checksums
2690
    master_node = self.master_node = self.cfg.GetMasterNode()
2691
    master_ip = self.cfg.GetMasterIP()
2692

    
2693
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2694

    
2695
    user_scripts = []
2696
    if self.cfg.GetUseExternalMipScript():
2697
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2698

    
2699
    node_verify_param = {
2700
      constants.NV_FILELIST:
2701
        utils.UniqueSequence(filename
2702
                             for files in filemap
2703
                             for filename in files),
2704
      constants.NV_NODELIST:
2705
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2706
                                  self.all_node_info.values()),
2707
      constants.NV_HYPERVISOR: hypervisors,
2708
      constants.NV_HVPARAMS:
2709
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2710
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2711
                                 for node in node_data_list
2712
                                 if not node.offline],
2713
      constants.NV_INSTANCELIST: hypervisors,
2714
      constants.NV_VERSION: None,
2715
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2716
      constants.NV_NODESETUP: None,
2717
      constants.NV_TIME: None,
2718
      constants.NV_MASTERIP: (master_node, master_ip),
2719
      constants.NV_OSLIST: None,
2720
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2721
      constants.NV_USERSCRIPTS: user_scripts,
2722
      }
2723

    
2724
    if vg_name is not None:
2725
      node_verify_param[constants.NV_VGLIST] = None
2726
      node_verify_param[constants.NV_LVLIST] = vg_name
2727
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2728
      node_verify_param[constants.NV_DRBDLIST] = None
2729

    
2730
    if drbd_helper:
2731
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2732

    
2733
    # bridge checks
2734
    # FIXME: this needs to be changed per node-group, not cluster-wide
2735
    bridges = set()
2736
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2737
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2738
      bridges.add(default_nicpp[constants.NIC_LINK])
2739
    for instance in self.my_inst_info.values():
2740
      for nic in instance.nics:
2741
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2742
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743
          bridges.add(full_nic[constants.NIC_LINK])
2744

    
2745
    if bridges:
2746
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2747

    
2748
    # Build our expected cluster state
2749
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2750
                                                 name=node.name,
2751
                                                 vm_capable=node.vm_capable))
2752
                      for node in node_data_list)
2753

    
2754
    # Gather OOB paths
2755
    oob_paths = []
2756
    for node in self.all_node_info.values():
2757
      path = _SupportsOob(self.cfg, node)
2758
      if path and path not in oob_paths:
2759
        oob_paths.append(path)
2760

    
2761
    if oob_paths:
2762
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2763

    
2764
    for instance in self.my_inst_names:
2765
      inst_config = self.my_inst_info[instance]
2766

    
2767
      for nname in inst_config.all_nodes:
2768
        if nname not in node_image:
2769
          gnode = self.NodeImage(name=nname)
2770
          gnode.ghost = (nname not in self.all_node_info)
2771
          node_image[nname] = gnode
2772

    
2773
      inst_config.MapLVsByNode(node_vol_should)
2774

    
2775
      pnode = inst_config.primary_node
2776
      node_image[pnode].pinst.append(instance)
2777

    
2778
      for snode in inst_config.secondary_nodes:
2779
        nimg = node_image[snode]
2780
        nimg.sinst.append(instance)
2781
        if pnode not in nimg.sbp:
2782
          nimg.sbp[pnode] = []
2783
        nimg.sbp[pnode].append(instance)
2784

    
2785
    # At this point, we have the in-memory data structures complete,
2786
    # except for the runtime information, which we'll gather next
2787

    
2788
    # Due to the way our RPC system works, exact response times cannot be
2789
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2790
    # time before and after executing the request, we can at least have a time
2791
    # window.
2792
    nvinfo_starttime = time.time()
2793
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2794
                                           node_verify_param,
2795
                                           self.cfg.GetClusterName())
2796
    nvinfo_endtime = time.time()
2797

    
2798
    if self.extra_lv_nodes and vg_name is not None:
2799
      extra_lv_nvinfo = \
2800
          self.rpc.call_node_verify(self.extra_lv_nodes,
2801
                                    {constants.NV_LVLIST: vg_name},
2802
                                    self.cfg.GetClusterName())
2803
    else:
2804
      extra_lv_nvinfo = {}
2805

    
2806
    all_drbd_map = self.cfg.ComputeDRBDMap()
2807

    
2808
    feedback_fn("* Gathering disk information (%s nodes)" %
2809
                len(self.my_node_names))
2810
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2811
                                     self.my_inst_info)
2812

    
2813
    feedback_fn("* Verifying configuration file consistency")
2814

    
2815
    # If not all nodes are being checked, we need to make sure the master node
2816
    # and a non-checked vm_capable node are in the list.
2817
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2818
    if absent_nodes:
2819
      vf_nvinfo = all_nvinfo.copy()
2820
      vf_node_info = list(self.my_node_info.values())
2821
      additional_nodes = []
2822
      if master_node not in self.my_node_info:
2823
        additional_nodes.append(master_node)
2824
        vf_node_info.append(self.all_node_info[master_node])
2825
      # Add the first vm_capable node we find which is not included
2826
      for node in absent_nodes:
2827
        nodeinfo = self.all_node_info[node]
2828
        if nodeinfo.vm_capable and not nodeinfo.offline:
2829
          additional_nodes.append(node)
2830
          vf_node_info.append(self.all_node_info[node])
2831
          break
2832
      key = constants.NV_FILELIST
2833
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2834
                                                 {key: node_verify_param[key]},
2835
                                                 self.cfg.GetClusterName()))
2836
    else:
2837
      vf_nvinfo = all_nvinfo
2838
      vf_node_info = self.my_node_info.values()
2839

    
2840
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2841

    
2842
    feedback_fn("* Verifying node status")
2843

    
2844
    refos_img = None
2845

    
2846
    for node_i in node_data_list:
2847
      node = node_i.name
2848
      nimg = node_image[node]
2849

    
2850
      if node_i.offline:
2851
        if verbose:
2852
          feedback_fn("* Skipping offline node %s" % (node,))
2853
        n_offline += 1
2854
        continue
2855

    
2856
      if node == master_node:
2857
        ntype = "master"
2858
      elif node_i.master_candidate:
2859
        ntype = "master candidate"
2860
      elif node_i.drained:
2861
        ntype = "drained"
2862
        n_drained += 1
2863
      else:
2864
        ntype = "regular"
2865
      if verbose:
2866
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2867

    
2868
      msg = all_nvinfo[node].fail_msg
2869
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2870
               msg)
2871
      if msg:
2872
        nimg.rpc_fail = True
2873
        continue
2874

    
2875
      nresult = all_nvinfo[node].payload
2876

    
2877
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2878
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2879
      self._VerifyNodeNetwork(node_i, nresult)
2880
      self._VerifyNodeUserScripts(node_i, nresult)
2881
      self._VerifyOob(node_i, nresult)
2882

    
2883
      if nimg.vm_capable:
2884
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2885
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2886
                             all_drbd_map)
2887

    
2888
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2889
        self._UpdateNodeInstances(node_i, nresult, nimg)
2890
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2891
        self._UpdateNodeOS(node_i, nresult, nimg)
2892

    
2893
        if not nimg.os_fail:
2894
          if refos_img is None:
2895
            refos_img = nimg
2896
          self._VerifyNodeOS(node_i, nimg, refos_img)
2897
        self._VerifyNodeBridges(node_i, nresult, bridges)
2898

    
2899
        # Check whether all running instancies are primary for the node. (This
2900
        # can no longer be done from _VerifyInstance below, since some of the
2901
        # wrong instances could be from other node groups.)
2902
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2903

    
2904
        for inst in non_primary_inst:
2905
          # FIXME: investigate best way to handle offline insts
2906
          if inst.admin_state == constants.ADMINST_OFFLINE:
2907
            if verbose:
2908
              feedback_fn("* Skipping offline instance %s" % inst.name)
2909
            i_offline += 1
2910
            continue
2911
          test = inst in self.all_inst_info
2912
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2913
                   "instance should not run on node %s", node_i.name)
2914
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2915
                   "node is running unknown instance %s", inst)
2916

    
2917
    for node, result in extra_lv_nvinfo.items():
2918
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2919
                              node_image[node], vg_name)
2920

    
2921
    feedback_fn("* Verifying instance status")
2922
    for instance in self.my_inst_names:
2923
      if verbose:
2924
        feedback_fn("* Verifying instance %s" % instance)
2925
      inst_config = self.my_inst_info[instance]
2926
      self._VerifyInstance(instance, inst_config, node_image,
2927
                           instdisk[instance])
2928
      inst_nodes_offline = []
2929

    
2930
      pnode = inst_config.primary_node
2931
      pnode_img = node_image[pnode]
2932
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2933
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2934
               " primary node failed", instance)
2935

    
2936
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2937
               pnode_img.offline,
2938
               constants.CV_EINSTANCEBADNODE, instance,
2939
               "instance is marked as running and lives on offline node %s",
2940
               inst_config.primary_node)
2941

    
2942
      # If the instance is non-redundant we cannot survive losing its primary
2943
      # node, so we are not N+1 compliant. On the other hand we have no disk
2944
      # templates with more than one secondary so that situation is not well
2945
      # supported either.
2946
      # FIXME: does not support file-backed instances
2947
      if not inst_config.secondary_nodes:
2948
        i_non_redundant.append(instance)
2949

    
2950
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2951
               constants.CV_EINSTANCELAYOUT,
2952
               instance, "instance has multiple secondary nodes: %s",
2953
               utils.CommaJoin(inst_config.secondary_nodes),
2954
               code=self.ETYPE_WARNING)
2955

    
2956
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2957
        pnode = inst_config.primary_node
2958
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2959
        instance_groups = {}
2960

    
2961
        for node in instance_nodes:
2962
          instance_groups.setdefault(self.all_node_info[node].group,
2963
                                     []).append(node)
2964

    
2965
        pretty_list = [
2966
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2967
          # Sort so that we always list the primary node first.
2968
          for group, nodes in sorted(instance_groups.items(),
2969
                                     key=lambda (_, nodes): pnode in nodes,
2970
                                     reverse=True)]
2971

    
2972
        self._ErrorIf(len(instance_groups) > 1,
2973
                      constants.CV_EINSTANCESPLITGROUPS,
2974
                      instance, "instance has primary and secondary nodes in"
2975
                      " different groups: %s", utils.CommaJoin(pretty_list),
2976
                      code=self.ETYPE_WARNING)
2977

    
2978
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2979
        i_non_a_balanced.append(instance)
2980

    
2981
      for snode in inst_config.secondary_nodes:
2982
        s_img = node_image[snode]
2983
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2984
                 snode, "instance %s, connection to secondary node failed",
2985
                 instance)
2986

    
2987
        if s_img.offline:
2988
          inst_nodes_offline.append(snode)
2989

    
2990
      # warn that the instance lives on offline nodes
2991
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2992
               "instance has offline secondary node(s) %s",
2993
               utils.CommaJoin(inst_nodes_offline))
2994
      # ... or ghost/non-vm_capable nodes
2995
      for node in inst_config.all_nodes:
2996
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2997
                 instance, "instance lives on ghost node %s", node)
2998
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2999
                 instance, "instance lives on non-vm_capable node %s", node)
3000

    
3001
    feedback_fn("* Verifying orphan volumes")
3002
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3003

    
3004
    # We will get spurious "unknown volume" warnings if any node of this group
3005
    # is secondary for an instance whose primary is in another group. To avoid
3006
    # them, we find these instances and add their volumes to node_vol_should.
3007
    for inst in self.all_inst_info.values():
3008
      for secondary in inst.secondary_nodes:
3009
        if (secondary in self.my_node_info
3010
            and inst.name not in self.my_inst_info):
3011
          inst.MapLVsByNode(node_vol_should)
3012
          break
3013

    
3014
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3015

    
3016
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3017
      feedback_fn("* Verifying N+1 Memory redundancy")
3018
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3019

    
3020
    feedback_fn("* Other Notes")
3021
    if i_non_redundant:
3022
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3023
                  % len(i_non_redundant))
3024

    
3025
    if i_non_a_balanced:
3026
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3027
                  % len(i_non_a_balanced))
3028

    
3029
    if i_offline:
3030
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3031

    
3032
    if n_offline:
3033
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3034

    
3035
    if n_drained:
3036
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3037

    
3038
    return not self.bad
3039

    
3040
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3041
    """Analyze the post-hooks' result
3042

3043
    This method analyses the hook result, handles it, and sends some
3044
    nicely-formatted feedback back to the user.
3045

3046
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3047
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3048
    @param hooks_results: the results of the multi-node hooks rpc call
3049
    @param feedback_fn: function used send feedback back to the caller
3050
    @param lu_result: previous Exec result
3051
    @return: the new Exec result, based on the previous result
3052
        and hook results
3053

3054
    """
3055
    # We only really run POST phase hooks, only for non-empty groups,
3056
    # and are only interested in their results
3057
    if not self.my_node_names:
3058
      # empty node group
3059
      pass
3060
    elif phase == constants.HOOKS_PHASE_POST:
3061
      # Used to change hooks' output to proper indentation
3062
      feedback_fn("* Hooks Results")
3063
      assert hooks_results, "invalid result from hooks"
3064

    
3065
      for node_name in hooks_results:
3066
        res = hooks_results[node_name]
3067
        msg = res.fail_msg
3068
        test = msg and not res.offline
3069
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3070
                      "Communication failure in hooks execution: %s", msg)
3071
        if res.offline or msg:
3072
          # No need to investigate payload if node is offline or gave
3073
          # an error.
3074
          continue
3075
        for script, hkr, output in res.payload:
3076
          test = hkr == constants.HKR_FAIL
3077
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3078
                        "Script %s failed, output:", script)
3079
          if test:
3080
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3081
            feedback_fn("%s" % output)
3082
            lu_result = False
3083

    
3084
    return lu_result
3085

    
3086

    
3087
class LUClusterVerifyDisks(NoHooksLU):
3088
  """Verifies the cluster disks status.
3089

3090
  """
3091
  REQ_BGL = False
3092

    
3093
  def ExpandNames(self):
3094
    self.share_locks = _ShareAll()
3095
    self.needed_locks = {
3096
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3097
      }
3098

    
3099
  def Exec(self, feedback_fn):
3100
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3101

    
3102
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3103
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3104
                           for group in group_names])
3105

    
3106

    
3107
class LUGroupVerifyDisks(NoHooksLU):
3108
  """Verifies the status of all disks in a node group.
3109

3110
  """
3111
  REQ_BGL = False
3112

    
3113
  def ExpandNames(self):
3114
    # Raises errors.OpPrereqError on its own if group can't be found
3115
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3116

    
3117
    self.share_locks = _ShareAll()
3118
    self.needed_locks = {
3119
      locking.LEVEL_INSTANCE: [],
3120
      locking.LEVEL_NODEGROUP: [],
3121
      locking.LEVEL_NODE: [],
3122
      }
3123

    
3124
  def DeclareLocks(self, level):
3125
    if level == locking.LEVEL_INSTANCE:
3126
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3127

    
3128
      # Lock instances optimistically, needs verification once node and group
3129
      # locks have been acquired
3130
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3131
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3132

    
3133
    elif level == locking.LEVEL_NODEGROUP:
3134
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3135

    
3136
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3137
        set([self.group_uuid] +
3138
            # Lock all groups used by instances optimistically; this requires
3139
            # going via the node before it's locked, requiring verification
3140
            # later on
3141
            [group_uuid
3142
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3143
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3144

    
3145
    elif level == locking.LEVEL_NODE:
3146
      # This will only lock the nodes in the group to be verified which contain
3147
      # actual instances
3148
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3149
      self._LockInstancesNodes()
3150

    
3151
      # Lock all nodes in group to be verified
3152
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3153
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3154
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3155

    
3156
  def CheckPrereq(self):
3157
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3158
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3159
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3160

    
3161
    assert self.group_uuid in owned_groups
3162

    
3163
    # Check if locked instances are still correct
3164
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3165

    
3166
    # Get instance information
3167
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3168

    
3169
    # Check if node groups for locked instances are still correct
3170
    for (instance_name, inst) in self.instances.items():
3171
      assert owned_nodes.issuperset(inst.all_nodes), \
3172
        "Instance %s's nodes changed while we kept the lock" % instance_name
3173

    
3174
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3175
                                             owned_groups)
3176

    
3177
      assert self.group_uuid in inst_groups, \
3178
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3179

    
3180
  def Exec(self, feedback_fn):
3181
    """Verify integrity of cluster disks.
3182

3183
    @rtype: tuple of three items
3184
    @return: a tuple of (dict of node-to-node_error, list of instances
3185
        which need activate-disks, dict of instance: (node, volume) for
3186
        missing volumes
3187

3188
    """
3189
    res_nodes = {}
3190
    res_instances = set()
3191
    res_missing = {}
3192

    
3193
    nv_dict = _MapInstanceDisksToNodes([inst
3194
            for inst in self.instances.values()
3195
            if inst.admin_state == constants.ADMINST_UP])
3196

    
3197
    if nv_dict:
3198
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3199
                             set(self.cfg.GetVmCapableNodeList()))
3200

    
3201
      node_lvs = self.rpc.call_lv_list(nodes, [])
3202

    
3203
      for (node, node_res) in node_lvs.items():
3204
        if node_res.offline:
3205
          continue
3206

    
3207
        msg = node_res.fail_msg
3208
        if msg:
3209
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3210
          res_nodes[node] = msg
3211
          continue
3212

    
3213
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3214
          inst = nv_dict.pop((node, lv_name), None)
3215
          if not (lv_online or inst is None):
3216
            res_instances.add(inst)
3217

    
3218
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3219
      # better
3220
      for key, inst in nv_dict.iteritems():
3221
        res_missing.setdefault(inst, []).append(list(key))
3222

    
3223
    return (res_nodes, list(res_instances), res_missing)
3224

    
3225

    
3226
class LUClusterRepairDiskSizes(NoHooksLU):
3227
  """Verifies the cluster disks sizes.
3228

3229
  """
3230
  REQ_BGL = False
3231

    
3232
  def ExpandNames(self):
3233
    if self.op.instances:
3234
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3235
      self.needed_locks = {
3236
        locking.LEVEL_NODE_RES: [],
3237
        locking.LEVEL_INSTANCE: self.wanted_names,
3238
        }
3239
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3240
    else:
3241
      self.wanted_names = None
3242
      self.needed_locks = {
3243
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3244
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3245
        }
3246
    self.share_locks = _ShareAll()
3247

    
3248
  def DeclareLocks(self, level):
3249
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3250
      self._LockInstancesNodes(primary_only=True, level=level)
3251

    
3252
  def CheckPrereq(self):
3253
    """Check prerequisites.
3254

3255
    This only checks the optional instance list against the existing names.
3256

3257
    """
3258
    if self.wanted_names is None:
3259
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3260

    
3261
    self.wanted_instances = \
3262
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3263

    
3264
  def _EnsureChildSizes(self, disk):
3265
    """Ensure children of the disk have the needed disk size.
3266

3267
    This is valid mainly for DRBD8 and fixes an issue where the
3268
    children have smaller disk size.
3269

3270
    @param disk: an L{ganeti.objects.Disk} object
3271

3272
    """
3273
    if disk.dev_type == constants.LD_DRBD8:
3274
      assert disk.children, "Empty children for DRBD8?"
3275
      fchild = disk.children[0]
3276
      mismatch = fchild.size < disk.size
3277
      if mismatch:
3278
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3279
                     fchild.size, disk.size)
3280
        fchild.size = disk.size
3281

    
3282
      # and we recurse on this child only, not on the metadev
3283
      return self._EnsureChildSizes(fchild) or mismatch
3284
    else:
3285
      return False
3286

    
3287
  def Exec(self, feedback_fn):
3288
    """Verify the size of cluster disks.
3289

3290
    """
3291
    # TODO: check child disks too
3292
    # TODO: check differences in size between primary/secondary nodes
3293
    per_node_disks = {}
3294
    for instance in self.wanted_instances:
3295
      pnode = instance.primary_node
3296
      if pnode not in per_node_disks:
3297
        per_node_disks[pnode] = []
3298
      for idx, disk in enumerate(instance.disks):
3299
        per_node_disks[pnode].append((instance, idx, disk))
3300

    
3301
    assert not (frozenset(per_node_disks.keys()) -
3302
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3303
      "Not owning correct locks"
3304
    assert not self.owned_locks(locking.LEVEL_NODE)
3305

    
3306
    changed = []
3307
    for node, dskl in per_node_disks.items():
3308
      newl = [v[2].Copy() for v in dskl]
3309
      for dsk in newl:
3310
        self.cfg.SetDiskID(dsk, node)
3311
      result = self.rpc.call_blockdev_getsize(node, newl)
3312
      if result.fail_msg:
3313
        self.LogWarning("Failure in blockdev_getsize call to node"
3314
                        " %s, ignoring", node)
3315
        continue
3316
      if len(result.payload) != len(dskl):
3317
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3318
                        " result.payload=%s", node, len(dskl), result.payload)
3319
        self.LogWarning("Invalid result from node %s, ignoring node results",
3320
                        node)
3321
        continue
3322
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3323
        if size is None:
3324
          self.LogWarning("Disk %d of instance %s did not return size"
3325
                          " information, ignoring", idx, instance.name)
3326
          continue
3327
        if not isinstance(size, (int, long)):
3328
          self.LogWarning("Disk %d of instance %s did not return valid"
3329
                          " size information, ignoring", idx, instance.name)
3330
          continue
3331
        size = size >> 20
3332
        if size != disk.size:
3333
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3334
                       " correcting: recorded %d, actual %d", idx,
3335
                       instance.name, disk.size, size)
3336
          disk.size = size
3337
          self.cfg.Update(instance, feedback_fn)
3338
          changed.append((instance.name, idx, size))
3339
        if self._EnsureChildSizes(disk):
3340
          self.cfg.Update(instance, feedback_fn)
3341
          changed.append((instance.name, idx, disk.size))
3342
    return changed
3343

    
3344

    
3345
class LUClusterRename(LogicalUnit):
3346
  """Rename the cluster.
3347

3348
  """
3349
  HPATH = "cluster-rename"
3350
  HTYPE = constants.HTYPE_CLUSTER
3351

    
3352
  def BuildHooksEnv(self):
3353
    """Build hooks env.
3354

3355
    """
3356
    return {
3357
      "OP_TARGET": self.cfg.GetClusterName(),
3358
      "NEW_NAME": self.op.name,
3359
      }
3360

    
3361
  def BuildHooksNodes(self):
3362
    """Build hooks nodes.
3363

3364
    """
3365
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3366

    
3367
  def CheckPrereq(self):
3368
    """Verify that the passed name is a valid one.
3369

3370
    """
3371
    hostname = netutils.GetHostname(name=self.op.name,
3372
                                    family=self.cfg.GetPrimaryIPFamily())
3373

    
3374
    new_name = hostname.name
3375
    self.ip = new_ip = hostname.ip
3376
    old_name = self.cfg.GetClusterName()
3377
    old_ip = self.cfg.GetMasterIP()
3378
    if new_name == old_name and new_ip == old_ip:
3379
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3380
                                 " cluster has changed",
3381
                                 errors.ECODE_INVAL)
3382
    if new_ip != old_ip:
3383
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3384
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3385
                                   " reachable on the network" %
3386
                                   new_ip, errors.ECODE_NOTUNIQUE)
3387

    
3388
    self.op.name = new_name
3389

    
3390
  def Exec(self, feedback_fn):
3391
    """Rename the cluster.
3392

3393
    """
3394
    clustername = self.op.name
3395
    new_ip = self.ip
3396

    
3397
    # shutdown the master IP
3398
    master_params = self.cfg.GetMasterNetworkParameters()
3399
    ems = self.cfg.GetUseExternalMipScript()
3400
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3401
                                                     master_params, ems)
3402
    result.Raise("Could not disable the master role")
3403

    
3404
    try:
3405
      cluster = self.cfg.GetClusterInfo()
3406
      cluster.cluster_name = clustername
3407
      cluster.master_ip = new_ip
3408
      self.cfg.Update(cluster, feedback_fn)
3409

    
3410
      # update the known hosts file
3411
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3412
      node_list = self.cfg.GetOnlineNodeList()
3413
      try:
3414
        node_list.remove(master_params.name)
3415
      except ValueError:
3416
        pass
3417
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3418
    finally:
3419
      master_params.ip = new_ip
3420
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3421
                                                     master_params, ems)
3422
      msg = result.fail_msg
3423
      if msg:
3424
        self.LogWarning("Could not re-enable the master role on"
3425
                        " the master, please restart manually: %s", msg)
3426

    
3427
    return clustername
3428

    
3429

    
3430
def _ValidateNetmask(cfg, netmask):
3431
  """Checks if a netmask is valid.
3432

3433
  @type cfg: L{config.ConfigWriter}
3434
  @param cfg: The cluster configuration
3435
  @type netmask: int
3436
  @param netmask: the netmask to be verified
3437
  @raise errors.OpPrereqError: if the validation fails
3438

3439
  """
3440
  ip_family = cfg.GetPrimaryIPFamily()
3441
  try:
3442
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3443
  except errors.ProgrammerError:
3444
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3445
                               ip_family)
3446
  if not ipcls.ValidateNetmask(netmask):
3447
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3448
                                (netmask))
3449

    
3450

    
3451
class LUClusterSetParams(LogicalUnit):
3452
  """Change the parameters of the cluster.
3453

3454
  """
3455
  HPATH = "cluster-modify"
3456
  HTYPE = constants.HTYPE_CLUSTER
3457
  REQ_BGL = False
3458

    
3459
  def CheckArguments(self):
3460
    """Check parameters
3461

3462
    """
3463
    if self.op.uid_pool:
3464
      uidpool.CheckUidPool(self.op.uid_pool)
3465

    
3466
    if self.op.add_uids:
3467
      uidpool.CheckUidPool(self.op.add_uids)
3468

    
3469
    if self.op.remove_uids:
3470
      uidpool.CheckUidPool(self.op.remove_uids)
3471

    
3472
    if self.op.master_netmask is not None:
3473
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3474

    
3475
  def ExpandNames(self):
3476
    # FIXME: in the future maybe other cluster params won't require checking on
3477
    # all nodes to be modified.
3478
    self.needed_locks = {
3479
      locking.LEVEL_NODE: locking.ALL_SET,
3480
    }
3481
    self.share_locks[locking.LEVEL_NODE] = 1
3482

    
3483
  def BuildHooksEnv(self):
3484
    """Build hooks env.
3485

3486
    """
3487
    return {
3488
      "OP_TARGET": self.cfg.GetClusterName(),
3489
      "NEW_VG_NAME": self.op.vg_name,
3490
      }
3491

    
3492
  def BuildHooksNodes(self):
3493
    """Build hooks nodes.
3494

3495
    """
3496
    mn = self.cfg.GetMasterNode()
3497
    return ([mn], [mn])
3498

    
3499
  def CheckPrereq(self):
3500
    """Check prerequisites.
3501

3502
    This checks whether the given params don't conflict and
3503
    if the given volume group is valid.
3504

3505
    """
3506
    if self.op.vg_name is not None and not self.op.vg_name:
3507
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3508
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3509
                                   " instances exist", errors.ECODE_INVAL)
3510

    
3511
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3512
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3513
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3514
                                   " drbd-based instances exist",
3515
                                   errors.ECODE_INVAL)
3516

    
3517
    node_list = self.owned_locks(locking.LEVEL_NODE)
3518

    
3519
    # if vg_name not None, checks given volume group on all nodes
3520
    if self.op.vg_name:
3521
      vglist = self.rpc.call_vg_list(node_list)
3522
      for node in node_list:
3523
        msg = vglist[node].fail_msg
3524
        if msg:
3525
          # ignoring down node
3526
          self.LogWarning("Error while gathering data on node %s"
3527
                          " (ignoring node): %s", node, msg)
3528
          continue
3529
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3530
                                              self.op.vg_name,
3531
                                              constants.MIN_VG_SIZE)
3532
        if vgstatus:
3533
          raise errors.OpPrereqError("Error on node '%s': %s" %
3534
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3535

    
3536
    if self.op.drbd_helper:
3537
      # checks given drbd helper on all nodes
3538
      helpers = self.rpc.call_drbd_helper(node_list)
3539
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3540
        if ninfo.offline:
3541
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3542
          continue
3543
        msg = helpers[node].fail_msg
3544
        if msg:
3545
          raise errors.OpPrereqError("Error checking drbd helper on node"
3546
                                     " '%s': %s" % (node, msg),
3547
                                     errors.ECODE_ENVIRON)
3548
        node_helper = helpers[node].payload
3549
        if node_helper != self.op.drbd_helper:
3550
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3551
                                     (node, node_helper), errors.ECODE_ENVIRON)
3552

    
3553
    self.cluster = cluster = self.cfg.GetClusterInfo()
3554
    # validate params changes
3555
    if self.op.beparams:
3556
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3557
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3558

    
3559
    if self.op.ndparams:
3560
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3561
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3562

    
3563
      # TODO: we need a more general way to handle resetting
3564
      # cluster-level parameters to default values
3565
      if self.new_ndparams["oob_program"] == "":
3566
        self.new_ndparams["oob_program"] = \
3567
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3568

    
3569
    if self.op.nicparams:
3570
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3571
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3572
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3573
      nic_errors = []
3574

    
3575
      # check all instances for consistency
3576
      for instance in self.cfg.GetAllInstancesInfo().values():
3577
        for nic_idx, nic in enumerate(instance.nics):
3578
          params_copy = copy.deepcopy(nic.nicparams)
3579
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3580

    
3581
          # check parameter syntax
3582
          try:
3583
            objects.NIC.CheckParameterSyntax(params_filled)
3584
          except errors.ConfigurationError, err:
3585
            nic_errors.append("Instance %s, nic/%d: %s" %
3586
                              (instance.name, nic_idx, err))
3587

    
3588
          # if we're moving instances to routed, check that they have an ip
3589
          target_mode = params_filled[constants.NIC_MODE]
3590
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3591
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3592
                              " address" % (instance.name, nic_idx))
3593
      if nic_errors:
3594
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3595
                                   "\n".join(nic_errors))
3596

    
3597
    # hypervisor list/parameters
3598
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3599
    if self.op.hvparams:
3600
      for hv_name, hv_dict in self.op.hvparams.items():
3601
        if hv_name not in self.new_hvparams:
3602
          self.new_hvparams[hv_name] = hv_dict
3603
        else:
3604
          self.new_hvparams[hv_name].update(hv_dict)
3605

    
3606
    # os hypervisor parameters
3607
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3608
    if self.op.os_hvp:
3609
      for os_name, hvs in self.op.os_hvp.items():
3610
        if os_name not in self.new_os_hvp:
3611
          self.new_os_hvp[os_name] = hvs
3612
        else:
3613
          for hv_name, hv_dict in hvs.items():
3614
            if hv_name not in self.new_os_hvp[os_name]:
3615
              self.new_os_hvp[os_name][hv_name] = hv_dict
3616
            else:
3617
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3618

    
3619
    # os parameters
3620
    self.new_osp = objects.FillDict(cluster.osparams, {})
3621
    if self.op.osparams:
3622
      for os_name, osp in self.op.osparams.items():
3623
        if os_name not in self.new_osp:
3624
          self.new_osp[os_name] = {}
3625

    
3626
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3627
                                                  use_none=True)
3628

    
3629
        if not self.new_osp[os_name]:
3630
          # we removed all parameters
3631
          del self.new_osp[os_name]
3632
        else:
3633
          # check the parameter validity (remote check)
3634
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3635
                         os_name, self.new_osp[os_name])
3636

    
3637
    # changes to the hypervisor list
3638
    if self.op.enabled_hypervisors is not None:
3639
      self.hv_list = self.op.enabled_hypervisors
3640
      for hv in self.hv_list:
3641
        # if the hypervisor doesn't already exist in the cluster
3642
        # hvparams, we initialize it to empty, and then (in both
3643
        # cases) we make sure to fill the defaults, as we might not
3644
        # have a complete defaults list if the hypervisor wasn't
3645
        # enabled before
3646
        if hv not in new_hvp:
3647
          new_hvp[hv] = {}
3648
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3649
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3650
    else:
3651
      self.hv_list = cluster.enabled_hypervisors
3652

    
3653
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3654
      # either the enabled list has changed, or the parameters have, validate
3655
      for hv_name, hv_params in self.new_hvparams.items():
3656
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3657
            (self.op.enabled_hypervisors and
3658
             hv_name in self.op.enabled_hypervisors)):
3659
          # either this is a new hypervisor, or its parameters have changed
3660
          hv_class = hypervisor.GetHypervisor(hv_name)
3661
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3662
          hv_class.CheckParameterSyntax(hv_params)
3663
          _CheckHVParams(self, node_list, hv_name, hv_params)
3664

    
3665
    if self.op.os_hvp:
3666
      # no need to check any newly-enabled hypervisors, since the
3667
      # defaults have already been checked in the above code-block
3668
      for os_name, os_hvp in self.new_os_hvp.items():
3669
        for hv_name, hv_params in os_hvp.items():
3670
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3671
          # we need to fill in the new os_hvp on top of the actual hv_p
3672
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3673
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3674
          hv_class = hypervisor.GetHypervisor(hv_name)
3675
          hv_class.CheckParameterSyntax(new_osp)
3676
          _CheckHVParams(self, node_list, hv_name, new_osp)
3677

    
3678
    if self.op.default_iallocator:
3679
      alloc_script = utils.FindFile(self.op.default_iallocator,
3680
                                    constants.IALLOCATOR_SEARCH_PATH,
3681
                                    os.path.isfile)
3682
      if alloc_script is None:
3683
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3684
                                   " specified" % self.op.default_iallocator,
3685
                                   errors.ECODE_INVAL)
3686

    
3687
  def Exec(self, feedback_fn):
3688
    """Change the parameters of the cluster.
3689

3690
    """
3691
    if self.op.vg_name is not None:
3692
      new_volume = self.op.vg_name
3693
      if not new_volume:
3694
        new_volume = None
3695
      if new_volume != self.cfg.GetVGName():
3696
        self.cfg.SetVGName(new_volume)
3697
      else:
3698
        feedback_fn("Cluster LVM configuration already in desired"
3699
                    " state, not changing")
3700
    if self.op.drbd_helper is not None:
3701
      new_helper = self.op.drbd_helper
3702
      if not new_helper:
3703
        new_helper = None
3704
      if new_helper != self.cfg.GetDRBDHelper():
3705
        self.cfg.SetDRBDHelper(new_helper)
3706
      else:
3707
        feedback_fn("Cluster DRBD helper already in desired state,"
3708
                    " not changing")
3709
    if self.op.hvparams:
3710
      self.cluster.hvparams = self.new_hvparams
3711
    if self.op.os_hvp:
3712
      self.cluster.os_hvp = self.new_os_hvp
3713
    if self.op.enabled_hypervisors is not None:
3714
      self.cluster.hvparams = self.new_hvparams
3715
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3716
    if self.op.beparams:
3717
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3718
    if self.op.nicparams:
3719
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3720
    if self.op.osparams:
3721
      self.cluster.osparams = self.new_osp
3722
    if self.op.ndparams:
3723
      self.cluster.ndparams = self.new_ndparams
3724

    
3725
    if self.op.candidate_pool_size is not None:
3726
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3727
      # we need to update the pool size here, otherwise the save will fail
3728
      _AdjustCandidatePool(self, [])
3729

    
3730
    if self.op.maintain_node_health is not None:
3731
      self.cluster.maintain_node_health = self.op.maintain_node_health
3732

    
3733
    if self.op.prealloc_wipe_disks is not None:
3734
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3735

    
3736
    if self.op.add_uids is not None:
3737
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3738

    
3739
    if self.op.remove_uids is not None:
3740
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3741

    
3742
    if self.op.uid_pool is not None:
3743
      self.cluster.uid_pool = self.op.uid_pool
3744

    
3745
    if self.op.default_iallocator is not None:
3746
      self.cluster.default_iallocator = self.op.default_iallocator
3747

    
3748
    if self.op.reserved_lvs is not None:
3749
      self.cluster.reserved_lvs = self.op.reserved_lvs
3750

    
3751
    if self.op.use_external_mip_script is not None:
3752
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3753

    
3754
    def helper_os(aname, mods, desc):
3755
      desc += " OS list"
3756
      lst = getattr(self.cluster, aname)
3757
      for key, val in mods:
3758
        if key == constants.DDM_ADD:
3759
          if val in lst:
3760
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3761
          else:
3762
            lst.append(val)
3763
        elif key == constants.DDM_REMOVE:
3764
          if val in lst:
3765
            lst.remove(val)
3766
          else:
3767
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3768
        else:
3769
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3770

    
3771
    if self.op.hidden_os:
3772
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3773

    
3774
    if self.op.blacklisted_os:
3775
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3776

    
3777
    if self.op.master_netdev:
3778
      master_params = self.cfg.GetMasterNetworkParameters()
3779
      ems = self.cfg.GetUseExternalMipScript()
3780
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3781
                  self.cluster.master_netdev)
3782
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3783
                                                       master_params, ems)
3784
      result.Raise("Could not disable the master ip")
3785
      feedback_fn("Changing master_netdev from %s to %s" %
3786
                  (master_params.netdev, self.op.master_netdev))
3787
      self.cluster.master_netdev = self.op.master_netdev
3788

    
3789
    if self.op.master_netmask:
3790
      master_params = self.cfg.GetMasterNetworkParameters()
3791
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3792
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3793
                                                        master_params.netmask,
3794
                                                        self.op.master_netmask,
3795
                                                        master_params.ip,
3796
                                                        master_params.netdev)
3797
      if result.fail_msg:
3798
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3799
        feedback_fn(msg)
3800

    
3801
      self.cluster.master_netmask = self.op.master_netmask
3802

    
3803
    self.cfg.Update(self.cluster, feedback_fn)
3804

    
3805
    if self.op.master_netdev:
3806
      master_params = self.cfg.GetMasterNetworkParameters()
3807
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3808
                  self.op.master_netdev)
3809
      ems = self.cfg.GetUseExternalMipScript()
3810
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3811
                                                     master_params, ems)
3812
      if result.fail_msg:
3813
        self.LogWarning("Could not re-enable the master ip on"
3814
                        " the master, please restart manually: %s",
3815
                        result.fail_msg)
3816

    
3817

    
3818
def _UploadHelper(lu, nodes, fname):
3819
  """Helper for uploading a file and showing warnings.
3820

3821
  """
3822
  if os.path.exists(fname):
3823
    result = lu.rpc.call_upload_file(nodes, fname)
3824
    for to_node, to_result in result.items():
3825
      msg = to_result.fail_msg
3826
      if msg:
3827
        msg = ("Copy of file %s to node %s failed: %s" %
3828
               (fname, to_node, msg))
3829
        lu.proc.LogWarning(msg)
3830

    
3831

    
3832
def _ComputeAncillaryFiles(cluster, redist):
3833
  """Compute files external to Ganeti which need to be consistent.
3834

3835
  @type redist: boolean
3836
  @param redist: Whether to include files which need to be redistributed
3837

3838
  """
3839
  # Compute files for all nodes
3840
  files_all = set([
3841
    constants.SSH_KNOWN_HOSTS_FILE,
3842
    constants.CONFD_HMAC_KEY,
3843
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3844
    constants.SPICE_CERT_FILE,
3845
    constants.SPICE_CACERT_FILE,
3846
    constants.RAPI_USERS_FILE,
3847
    ])
3848

    
3849
  if not redist:
3850
    files_all.update(constants.ALL_CERT_FILES)
3851
    files_all.update(ssconf.SimpleStore().GetFileList())
3852
  else:
3853
    # we need to ship at least the RAPI certificate
3854
    files_all.add(constants.RAPI_CERT_FILE)
3855

    
3856
  if cluster.modify_etc_hosts:
3857
    files_all.add(constants.ETC_HOSTS)
3858

    
3859
  # Files which are optional, these must:
3860
  # - be present in one other category as well
3861
  # - either exist or not exist on all nodes of that category (mc, vm all)
3862
  files_opt = set([
3863
    constants.RAPI_USERS_FILE,
3864
    ])
3865

    
3866
  # Files which should only be on master candidates
3867
  files_mc = set()
3868

    
3869
  if not redist:
3870
    files_mc.add(constants.CLUSTER_CONF_FILE)
3871

    
3872
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3873
    # replication
3874
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3875

    
3876
  # Files which should only be on VM-capable nodes
3877
  files_vm = set(filename
3878
    for hv_name in cluster.enabled_hypervisors
3879
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3880

    
3881
  files_opt |= set(filename
3882
    for hv_name in cluster.enabled_hypervisors
3883
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3884

    
3885
  # Filenames in each category must be unique
3886
  all_files_set = files_all | files_mc | files_vm
3887
  assert (len(all_files_set) ==
3888
          sum(map(len, [files_all, files_mc, files_vm]))), \
3889
         "Found file listed in more than one file list"
3890

    
3891
  # Optional files must be present in one other category
3892
  assert all_files_set.issuperset(files_opt), \
3893
         "Optional file not in a different required list"
3894

    
3895
  return (files_all, files_opt, files_mc, files_vm)
3896

    
3897

    
3898
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3899
  """Distribute additional files which are part of the cluster configuration.
3900

3901
  ConfigWriter takes care of distributing the config and ssconf files, but
3902
  there are more files which should be distributed to all nodes. This function
3903
  makes sure those are copied.
3904

3905
  @param lu: calling logical unit
3906
  @param additional_nodes: list of nodes not in the config to distribute to
3907
  @type additional_vm: boolean
3908
  @param additional_vm: whether the additional nodes are vm-capable or not
3909

3910
  """
3911
  # Gather target nodes
3912
  cluster = lu.cfg.GetClusterInfo()
3913
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3914

    
3915
  online_nodes = lu.cfg.GetOnlineNodeList()
3916
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3917

    
3918
  if additional_nodes is not None:
3919
    online_nodes.extend(additional_nodes)
3920
    if additional_vm:
3921
      vm_nodes.extend(additional_nodes)
3922

    
3923
  # Never distribute to master node
3924
  for nodelist in [online_nodes, vm_nodes]:
3925
    if master_info.name in nodelist:
3926
      nodelist.remove(master_info.name)
3927

    
3928
  # Gather file lists
3929
  (files_all, _, files_mc, files_vm) = \
3930
    _ComputeAncillaryFiles(cluster, True)
3931

    
3932
  # Never re-distribute configuration file from here
3933
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3934
              constants.CLUSTER_CONF_FILE in files_vm)
3935
  assert not files_mc, "Master candidates not handled in this function"
3936

    
3937
  filemap = [
3938
    (online_nodes, files_all),
3939
    (vm_nodes, files_vm),
3940
    ]
3941

    
3942
  # Upload the files
3943
  for (node_list, files) in filemap:
3944
    for fname in files:
3945
      _UploadHelper(lu, node_list, fname)
3946

    
3947

    
3948
class LUClusterRedistConf(NoHooksLU):
3949
  """Force the redistribution of cluster configuration.
3950

3951
  This is a very simple LU.
3952

3953
  """
3954
  REQ_BGL = False
3955

    
3956
  def ExpandNames(self):
3957
    self.needed_locks = {
3958
      locking.LEVEL_NODE: locking.ALL_SET,
3959
    }
3960
    self.share_locks[locking.LEVEL_NODE] = 1
3961

    
3962
  def Exec(self, feedback_fn):
3963
    """Redistribute the configuration.
3964

3965
    """
3966
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3967
    _RedistributeAncillaryFiles(self)
3968

    
3969

    
3970
class LUClusterActivateMasterIp(NoHooksLU):
3971
  """Activate the master IP on the master node.
3972

3973
  """
3974
  def Exec(self, feedback_fn):
3975
    """Activate the master IP.
3976

3977
    """
3978
    master_params = self.cfg.GetMasterNetworkParameters()
3979
    ems = self.cfg.GetUseExternalMipScript()
3980
    self.rpc.call_node_activate_master_ip(master_params.name,
3981
                                          master_params, ems)
3982

    
3983

    
3984
class LUClusterDeactivateMasterIp(NoHooksLU):
3985
  """Deactivate the master IP on the master node.
3986

3987
  """
3988
  def Exec(self, feedback_fn):
3989
    """Deactivate the master IP.
3990

3991
    """
3992
    master_params = self.cfg.GetMasterNetworkParameters()
3993
    ems = self.cfg.GetUseExternalMipScript()
3994
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3995
                                            ems)
3996

    
3997

    
3998
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3999
  """Sleep and poll for an instance's disk to sync.
4000

4001
  """
4002
  if not instance.disks or disks is not None and not disks:
4003
    return True
4004

    
4005
  disks = _ExpandCheckDisks(instance, disks)
4006

    
4007
  if not oneshot:
4008
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4009

    
4010
  node = instance.primary_node
4011

    
4012
  for dev in disks:
4013
    lu.cfg.SetDiskID(dev, node)
4014

    
4015
  # TODO: Convert to utils.Retry
4016

    
4017
  retries = 0
4018
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4019
  while True:
4020
    max_time = 0
4021
    done = True
4022
    cumul_degraded = False
4023
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4024
    msg = rstats.fail_msg
4025
    if msg:
4026
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4027
      retries += 1
4028
      if retries >= 10:
4029
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4030
                                 " aborting." % node)
4031
      time.sleep(6)
4032
      continue
4033
    rstats = rstats.payload
4034
    retries = 0
4035
    for i, mstat in enumerate(rstats):
4036
      if mstat is None:
4037
        lu.LogWarning("Can't compute data for node %s/%s",
4038
                           node, disks[i].iv_name)
4039
        continue
4040

    
4041
      cumul_degraded = (cumul_degraded or
4042
                        (mstat.is_degraded and mstat.sync_percent is None))
4043
      if mstat.sync_percent is not None:
4044
        done = False
4045
        if mstat.estimated_time is not None:
4046
          rem_time = ("%s remaining (estimated)" %
4047
                      utils.FormatSeconds(mstat.estimated_time))
4048
          max_time = mstat.estimated_time
4049
        else:
4050
          rem_time = "no time estimate"
4051
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4052
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4053

    
4054
    # if we're done but degraded, let's do a few small retries, to
4055
    # make sure we see a stable and not transient situation; therefore
4056
    # we force restart of the loop
4057
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4058
      logging.info("Degraded disks found, %d retries left", degr_retries)
4059
      degr_retries -= 1
4060
      time.sleep(1)
4061
      continue
4062

    
4063
    if done or oneshot:
4064
      break
4065

    
4066
    time.sleep(min(60, max_time))
4067

    
4068
  if done:
4069
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4070
  return not cumul_degraded
4071

    
4072

    
4073
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4074
  """Check that mirrors are not degraded.
4075

4076
  The ldisk parameter, if True, will change the test from the
4077
  is_degraded attribute (which represents overall non-ok status for
4078
  the device(s)) to the ldisk (representing the local storage status).
4079

4080
  """
4081
  lu.cfg.SetDiskID(dev, node)
4082

    
4083
  result = True
4084

    
4085
  if on_primary or dev.AssembleOnSecondary():
4086
    rstats = lu.rpc.call_blockdev_find(node, dev)
4087
    msg = rstats.fail_msg
4088
    if msg:
4089
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4090
      result = False
4091
    elif not rstats.payload:
4092
      lu.LogWarning("Can't find disk on node %s", node)
4093
      result = False
4094
    else:
4095
      if ldisk:
4096
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4097
      else:
4098
        result = result and not rstats.payload.is_degraded
4099

    
4100
  if dev.children:
4101
    for child in dev.children:
4102
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4103

    
4104
  return result
4105

    
4106

    
4107
class LUOobCommand(NoHooksLU):
4108
  """Logical unit for OOB handling.
4109

4110
  """
4111
  REG_BGL = False
4112
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4113

    
4114
  def ExpandNames(self):
4115
    """Gather locks we need.
4116

4117
    """
4118
    if self.op.node_names:
4119
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4120
      lock_names = self.op.node_names
4121
    else:
4122
      lock_names = locking.ALL_SET
4123

    
4124
    self.needed_locks = {
4125
      locking.LEVEL_NODE: lock_names,
4126
      }
4127

    
4128
  def CheckPrereq(self):
4129
    """Check prerequisites.
4130

4131
    This checks:
4132
     - the node exists in the configuration
4133
     - OOB is supported
4134

4135
    Any errors are signaled by raising errors.OpPrereqError.
4136

4137
    """
4138
    self.nodes = []
4139
    self.master_node = self.cfg.GetMasterNode()
4140

    
4141
    assert self.op.power_delay >= 0.0
4142

    
4143
    if self.op.node_names:
4144
      if (self.op.command in self._SKIP_MASTER and
4145
          self.master_node in self.op.node_names):
4146
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4147
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4148

    
4149
        if master_oob_handler:
4150
          additional_text = ("run '%s %s %s' if you want to operate on the"
4151
                             " master regardless") % (master_oob_handler,
4152
                                                      self.op.command,
4153
                                                      self.master_node)
4154
        else:
4155
          additional_text = "it does not support out-of-band operations"
4156

    
4157
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4158
                                    " allowed for %s; %s") %
4159
                                   (self.master_node, self.op.command,
4160
                                    additional_text), errors.ECODE_INVAL)
4161
    else:
4162
      self.op.node_names = self.cfg.GetNodeList()
4163
      if self.op.command in self._SKIP_MASTER:
4164
        self.op.node_names.remove(self.master_node)
4165

    
4166
    if self.op.command in self._SKIP_MASTER:
4167
      assert self.master_node not in self.op.node_names
4168

    
4169
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4170
      if node is None:
4171
        raise errors.OpPrereqError("Node %s not found" % node_name,
4172
                                   errors.ECODE_NOENT)
4173
      else:
4174
        self.nodes.append(node)
4175

    
4176
      if (not self.op.ignore_status and
4177
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4178
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4179
                                    " not marked offline") % node_name,
4180
                                   errors.ECODE_STATE)
4181

    
4182
  def Exec(self, feedback_fn):
4183
    """Execute OOB and return result if we expect any.
4184

4185
    """
4186
    master_node = self.master_node
4187
    ret = []
4188

    
4189
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4190
                                              key=lambda node: node.name)):
4191
      node_entry = [(constants.RS_NORMAL, node.name)]
4192
      ret.append(node_entry)
4193

    
4194
      oob_program = _SupportsOob(self.cfg, node)
4195

    
4196
      if not oob_program:
4197
        node_entry.append((constants.RS_UNAVAIL, None))
4198
        continue
4199

    
4200
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4201
                   self.op.command, oob_program, node.name)
4202
      result = self.rpc.call_run_oob(master_node, oob_program,
4203
                                     self.op.command, node.name,
4204
                                     self.op.timeout)
4205

    
4206
      if result.fail_msg:
4207
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4208
                        node.name, result.fail_msg)
4209
        node_entry.append((constants.RS_NODATA, None))
4210
      else:
4211
        try:
4212
          self._CheckPayload(result)
4213
        except errors.OpExecError, err:
4214
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4215
                          node.name, err)
4216
          node_entry.append((constants.RS_NODATA, None))
4217
        else:
4218
          if self.op.command == constants.OOB_HEALTH:
4219
            # For health we should log important events
4220
            for item, status in result.payload:
4221
              if status in [constants.OOB_STATUS_WARNING,
4222
                            constants.OOB_STATUS_CRITICAL]:
4223
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4224
                                item, node.name, status)
4225

    
4226
          if self.op.command == constants.OOB_POWER_ON:
4227
            node.powered = True
4228
          elif self.op.command == constants.OOB_POWER_OFF:
4229
            node.powered = False
4230
          elif self.op.command == constants.OOB_POWER_STATUS:
4231
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4232
            if powered != node.powered:
4233
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4234
                               " match actual power state (%s)"), node.powered,
4235
                              node.name, powered)
4236

    
4237
          # For configuration changing commands we should update the node
4238
          if self.op.command in (constants.OOB_POWER_ON,
4239
                                 constants.OOB_POWER_OFF):
4240
            self.cfg.Update(node, feedback_fn)
4241

    
4242
          node_entry.append((constants.RS_NORMAL, result.payload))
4243

    
4244
          if (self.op.command == constants.OOB_POWER_ON and
4245
              idx < len(self.nodes) - 1):
4246
            time.sleep(self.op.power_delay)
4247

    
4248
    return ret
4249

    
4250
  def _CheckPayload(self, result):
4251
    """Checks if the payload is valid.
4252

4253
    @param result: RPC result
4254
    @raises errors.OpExecError: If payload is not valid
4255

4256
    """
4257
    errs = []
4258
    if self.op.command == constants.OOB_HEALTH:
4259
      if not isinstance(result.payload, list):
4260
        errs.append("command 'health' is expected to return a list but got %s" %
4261
                    type(result.payload))
4262
      else:
4263
        for item, status in result.payload:
4264
          if status not in constants.OOB_STATUSES:
4265
            errs.append("health item '%s' has invalid status '%s'" %
4266
                        (item, status))
4267

    
4268
    if self.op.command == constants.OOB_POWER_STATUS:
4269
      if not isinstance(result.payload, dict):
4270
        errs.append("power-status is expected to return a dict but got %s" %
4271
                    type(result.payload))
4272

    
4273
    if self.op.command in [
4274
        constants.OOB_POWER_ON,
4275
        constants.OOB_POWER_OFF,
4276
        constants.OOB_POWER_CYCLE,
4277
        ]:
4278
      if result.payload is not None:
4279
        errs.append("%s is expected to not return payload but got '%s'" %
4280
                    (self.op.command, result.payload))
4281

    
4282
    if errs:
4283
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4284
                               utils.CommaJoin(errs))
4285

    
4286

    
4287
class _OsQuery(_QueryBase):
4288
  FIELDS = query.OS_FIELDS
4289

    
4290
  def ExpandNames(self, lu):
4291
    # Lock all nodes in shared mode
4292
    # Temporary removal of locks, should be reverted later
4293
    # TODO: reintroduce locks when they are lighter-weight
4294
    lu.needed_locks = {}
4295
    #self.share_locks[locking.LEVEL_NODE] = 1
4296
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4297

    
4298
    # The following variables interact with _QueryBase._GetNames
4299
    if self.names:
4300
      self.wanted = self.names
4301
    else:
4302
      self.wanted = locking.ALL_SET
4303

    
4304
    self.do_locking = self.use_locking
4305

    
4306
  def DeclareLocks(self, lu, level):
4307
    pass
4308

    
4309
  @staticmethod
4310
  def _DiagnoseByOS(rlist):
4311
    """Remaps a per-node return list into an a per-os per-node dictionary
4312

4313
    @param rlist: a map with node names as keys and OS objects as values
4314

4315
    @rtype: dict
4316
    @return: a dictionary with osnames as keys and as value another
4317
        map, with nodes as keys and tuples of (path, status, diagnose,
4318
        variants, parameters, api_versions) as values, eg::
4319

4320
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4321
                                     (/srv/..., False, "invalid api")],
4322
                           "node2": [(/srv/..., True, "", [], [])]}
4323
          }
4324

4325
    """
4326
    all_os = {}
4327
    # we build here the list of nodes that didn't fail the RPC (at RPC
4328
    # level), so that nodes with a non-responding node daemon don't
4329
    # make all OSes invalid
4330
    good_nodes = [node_name for node_name in rlist
4331
                  if not rlist[node_name].fail_msg]
4332
    for node_name, nr in rlist.items():
4333
      if nr.fail_msg or not nr.payload:
4334
        continue
4335
      for (name, path, status, diagnose, variants,
4336
           params, api_versions) in nr.payload:
4337
        if name not in all_os:
4338
          # build a list of nodes for this os containing empty lists
4339
          # for each node in node_list
4340
          all_os[name] = {}
4341
          for nname in good_nodes:
4342
            all_os[name][nname] = []
4343
        # convert params from [name, help] to (name, help)
4344
        params = [tuple(v) for v in params]
4345
        all_os[name][node_name].append((path, status, diagnose,
4346
                                        variants, params, api_versions))
4347
    return all_os
4348

    
4349
  def _GetQueryData(self, lu):
4350
    """Computes the list of nodes and their attributes.
4351

4352
    """
4353
    # Locking is not used
4354
    assert not (compat.any(lu.glm.is_owned(level)
4355
                           for level in locking.LEVELS
4356
                           if level != locking.LEVEL_CLUSTER) or
4357
                self.do_locking or self.use_locking)
4358

    
4359
    valid_nodes = [node.name
4360
                   for node in lu.cfg.GetAllNodesInfo().values()
4361
                   if not node.offline and node.vm_capable]
4362
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4363
    cluster = lu.cfg.GetClusterInfo()
4364

    
4365
    data = {}
4366

    
4367
    for (os_name, os_data) in pol.items():
4368
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4369
                          hidden=(os_name in cluster.hidden_os),
4370
                          blacklisted=(os_name in cluster.blacklisted_os))
4371

    
4372
      variants = set()
4373
      parameters = set()
4374
      api_versions = set()
4375

    
4376
      for idx, osl in enumerate(os_data.values()):
4377
        info.valid = bool(info.valid and osl and osl[0][1])
4378
        if not info.valid:
4379
          break
4380

    
4381
        (node_variants, node_params, node_api) = osl[0][3:6]
4382
        if idx == 0:
4383
          # First entry
4384
          variants.update(node_variants)
4385
          parameters.update(node_params)
4386
          api_versions.update(node_api)
4387
        else:
4388
          # Filter out inconsistent values
4389
          variants.intersection_update(node_variants)
4390
          parameters.intersection_update(node_params)
4391
          api_versions.intersection_update(node_api)
4392

    
4393
      info.variants = list(variants)
4394
      info.parameters = list(parameters)
4395
      info.api_versions = list(api_versions)
4396

    
4397
      data[os_name] = info
4398

    
4399
    # Prepare data in requested order
4400
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4401
            if name in data]
4402

    
4403

    
4404
class LUOsDiagnose(NoHooksLU):
4405
  """Logical unit for OS diagnose/query.
4406

4407
  """
4408
  REQ_BGL = False
4409

    
4410
  @staticmethod
4411
  def _BuildFilter(fields, names):
4412
    """Builds a filter for querying OSes.
4413

4414
    """
4415
    name_filter = qlang.MakeSimpleFilter("name", names)
4416

    
4417
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4418
    # respective field is not requested
4419
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4420
                     for fname in ["hidden", "blacklisted"]
4421
                     if fname not in fields]
4422
    if "valid" not in fields:
4423
      status_filter.append([qlang.OP_TRUE, "valid"])
4424

    
4425
    if status_filter:
4426
      status_filter.insert(0, qlang.OP_AND)
4427
    else:
4428
      status_filter = None
4429

    
4430
    if name_filter and status_filter:
4431
      return [qlang.OP_AND, name_filter, status_filter]
4432
    elif name_filter:
4433
      return name_filter
4434
    else:
4435
      return status_filter
4436

    
4437
  def CheckArguments(self):
4438
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4439
                       self.op.output_fields, False)
4440

    
4441
  def ExpandNames(self):
4442
    self.oq.ExpandNames(self)
4443

    
4444
  def Exec(self, feedback_fn):
4445
    return self.oq.OldStyleQuery(self)
4446

    
4447

    
4448
class LUNodeRemove(LogicalUnit):
4449
  """Logical unit for removing a node.
4450

4451
  """
4452
  HPATH = "node-remove"
4453
  HTYPE = constants.HTYPE_NODE
4454

    
4455
  def BuildHooksEnv(self):
4456
    """Build hooks env.
4457

4458
    This doesn't run on the target node in the pre phase as a failed
4459
    node would then be impossible to remove.
4460

4461
    """
4462
    return {
4463
      "OP_TARGET": self.op.node_name,
4464
      "NODE_NAME": self.op.node_name,
4465
      }
4466

    
4467
  def BuildHooksNodes(self):
4468
    """Build hooks nodes.
4469

4470
    """
4471
    all_nodes = self.cfg.GetNodeList()
4472
    try:
4473
      all_nodes.remove(self.op.node_name)
4474
    except ValueError:
4475
      logging.warning("Node '%s', which is about to be removed, was not found"
4476
                      " in the list of all nodes", self.op.node_name)
4477
    return (all_nodes, all_nodes)
4478

    
4479
  def CheckPrereq(self):
4480
    """Check prerequisites.
4481

4482
    This checks:
4483
     - the node exists in the configuration
4484
     - it does not have primary or secondary instances
4485
     - it's not the master
4486

4487
    Any errors are signaled by raising errors.OpPrereqError.
4488

4489
    """
4490
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4491
    node = self.cfg.GetNodeInfo(self.op.node_name)
4492
    assert node is not None
4493

    
4494
    masternode = self.cfg.GetMasterNode()
4495
    if node.name == masternode:
4496
      raise errors.OpPrereqError("Node is the master node, failover to another"
4497
                                 " node is required", errors.ECODE_INVAL)
4498

    
4499
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4500
      if node.name in instance.all_nodes:
4501
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4502
                                   " please remove first" % instance_name,
4503
                                   errors.ECODE_INVAL)
4504
    self.op.node_name = node.name
4505
    self.node = node
4506

    
4507
  def Exec(self, feedback_fn):
4508
    """Removes the node from the cluster.
4509

4510
    """
4511
    node = self.node
4512
    logging.info("Stopping the node daemon and removing configs from node %s",
4513
                 node.name)
4514

    
4515
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4516

    
4517
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4518
      "Not owning BGL"
4519

    
4520
    # Promote nodes to master candidate as needed
4521
    _AdjustCandidatePool(self, exceptions=[node.name])
4522
    self.context.RemoveNode(node.name)
4523

    
4524
    # Run post hooks on the node before it's removed
4525
    _RunPostHook(self, node.name)
4526

    
4527
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4528
    msg = result.fail_msg
4529
    if msg:
4530
      self.LogWarning("Errors encountered on the remote node while leaving"
4531
                      " the cluster: %s", msg)
4532

    
4533
    # Remove node from our /etc/hosts
4534
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4535
      master_node = self.cfg.GetMasterNode()
4536
      result = self.rpc.call_etc_hosts_modify(master_node,
4537
                                              constants.ETC_HOSTS_REMOVE,
4538
                                              node.name, None)
4539
      result.Raise("Can't update hosts file with new host data")
4540
      _RedistributeAncillaryFiles(self)
4541

    
4542

    
4543
class _NodeQuery(_QueryBase):
4544
  FIELDS = query.NODE_FIELDS
4545

    
4546
  def ExpandNames(self, lu):
4547
    lu.needed_locks = {}
4548
    lu.share_locks = _ShareAll()
4549

    
4550
    if self.names:
4551
      self.wanted = _GetWantedNodes(lu, self.names)
4552
    else:
4553
      self.wanted = locking.ALL_SET
4554

    
4555
    self.do_locking = (self.use_locking and
4556
                       query.NQ_LIVE in self.requested_data)
4557

    
4558
    if self.do_locking:
4559
      # If any non-static field is requested we need to lock the nodes
4560
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4561

    
4562
  def DeclareLocks(self, lu, level):
4563
    pass
4564

    
4565
  def _GetQueryData(self, lu):
4566
    """Computes the list of nodes and their attributes.
4567

4568
    """
4569
    all_info = lu.cfg.GetAllNodesInfo()
4570

    
4571
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4572

    
4573
    # Gather data as requested
4574
    if query.NQ_LIVE in self.requested_data:
4575
      # filter out non-vm_capable nodes
4576
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4577

    
4578
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4579
                                        lu.cfg.GetHypervisorType())
4580
      live_data = dict((name, nresult.payload)
4581
                       for (name, nresult) in node_data.items()
4582
                       if not nresult.fail_msg and nresult.payload)
4583
    else:
4584
      live_data = None
4585

    
4586
    if query.NQ_INST in self.requested_data:
4587
      node_to_primary = dict([(name, set()) for name in nodenames])
4588
      node_to_secondary = dict([(name, set()) for name in nodenames])
4589

    
4590
      inst_data = lu.cfg.GetAllInstancesInfo()
4591

    
4592
      for inst in inst_data.values():
4593
        if inst.primary_node in node_to_primary:
4594
          node_to_primary[inst.primary_node].add(inst.name)
4595
        for secnode in inst.secondary_nodes:
4596
          if secnode in node_to_secondary:
4597
            node_to_secondary[secnode].add(inst.name)
4598
    else:
4599
      node_to_primary = None
4600
      node_to_secondary = None
4601

    
4602
    if query.NQ_OOB in self.requested_data:
4603
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4604
                         for name, node in all_info.iteritems())
4605
    else:
4606
      oob_support = None
4607

    
4608
    if query.NQ_GROUP in self.requested_data:
4609
      groups = lu.cfg.GetAllNodeGroupsInfo()
4610
    else:
4611
      groups = {}
4612

    
4613
    return query.NodeQueryData([all_info[name] for name in nodenames],
4614
                               live_data, lu.cfg.GetMasterNode(),
4615
                               node_to_primary, node_to_secondary, groups,
4616
                               oob_support, lu.cfg.GetClusterInfo())
4617

    
4618

    
4619
class LUNodeQuery(NoHooksLU):
4620
  """Logical unit for querying nodes.
4621

4622
  """
4623
  # pylint: disable=W0142
4624
  REQ_BGL = False
4625

    
4626
  def CheckArguments(self):
4627
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4628
                         self.op.output_fields, self.op.use_locking)
4629

    
4630
  def ExpandNames(self):
4631
    self.nq.ExpandNames(self)
4632

    
4633
  def DeclareLocks(self, level):
4634
    self.nq.DeclareLocks(self, level)
4635

    
4636
  def Exec(self, feedback_fn):
4637
    return self.nq.OldStyleQuery(self)
4638

    
4639

    
4640
class LUNodeQueryvols(NoHooksLU):
4641
  """Logical unit for getting volumes on node(s).
4642

4643
  """
4644
  REQ_BGL = False
4645
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4646
  _FIELDS_STATIC = utils.FieldSet("node")
4647

    
4648
  def CheckArguments(self):
4649
    _CheckOutputFields(static=self._FIELDS_STATIC,
4650
                       dynamic=self._FIELDS_DYNAMIC,
4651
                       selected=self.op.output_fields)
4652

    
4653
  def ExpandNames(self):
4654
    self.share_locks = _ShareAll()
4655
    self.needed_locks = {}
4656

    
4657
    if not self.op.nodes:
4658
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4659
    else:
4660
      self.needed_locks[locking.LEVEL_NODE] = \
4661
        _GetWantedNodes(self, self.op.nodes)
4662

    
4663
  def Exec(self, feedback_fn):
4664
    """Computes the list of nodes and their attributes.
4665

4666
    """
4667
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4668
    volumes = self.rpc.call_node_volumes(nodenames)
4669

    
4670
    ilist = self.cfg.GetAllInstancesInfo()
4671
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4672

    
4673
    output = []
4674
    for node in nodenames:
4675
      nresult = volumes[node]
4676
      if nresult.offline:
4677
        continue
4678
      msg = nresult.fail_msg
4679
      if msg:
4680
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4681
        continue
4682

    
4683
      node_vols = sorted(nresult.payload,
4684
                         key=operator.itemgetter("dev"))
4685

    
4686
      for vol in node_vols:
4687
        node_output = []
4688
        for field in self.op.output_fields:
4689
          if field == "node":
4690
            val = node
4691
          elif field == "phys":
4692
            val = vol["dev"]
4693
          elif field == "vg":
4694
            val = vol["vg"]
4695
          elif field == "name":
4696
            val = vol["name"]
4697
          elif field == "size":
4698
            val = int(float(vol["size"]))
4699
          elif field == "instance":
4700
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4701
          else:
4702
            raise errors.ParameterError(field)
4703
          node_output.append(str(val))
4704

    
4705
        output.append(node_output)
4706

    
4707
    return output
4708

    
4709

    
4710
class LUNodeQueryStorage(NoHooksLU):
4711
  """Logical unit for getting information on storage units on node(s).
4712

4713
  """
4714
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4715
  REQ_BGL = False
4716

    
4717
  def CheckArguments(self):
4718
    _CheckOutputFields(static=self._FIELDS_STATIC,
4719
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4720
                       selected=self.op.output_fields)
4721

    
4722
  def ExpandNames(self):
4723
    self.share_locks = _ShareAll()
4724
    self.needed_locks = {}
4725

    
4726
    if self.op.nodes:
4727
      self.needed_locks[locking.LEVEL_NODE] = \
4728
        _GetWantedNodes(self, self.op.nodes)
4729
    else:
4730
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4731

    
4732
  def Exec(self, feedback_fn):
4733
    """Computes the list of nodes and their attributes.
4734

4735
    """
4736
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4737

    
4738
    # Always get name to sort by
4739
    if constants.SF_NAME in self.op.output_fields:
4740
      fields = self.op.output_fields[:]
4741
    else:
4742
      fields = [constants.SF_NAME] + self.op.output_fields
4743

    
4744
    # Never ask for node or type as it's only known to the LU
4745
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4746
      while extra in fields:
4747
        fields.remove(extra)
4748

    
4749
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4750
    name_idx = field_idx[constants.SF_NAME]
4751

    
4752
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4753
    data = self.rpc.call_storage_list(self.nodes,
4754
                                      self.op.storage_type, st_args,
4755
                                      self.op.name, fields)
4756

    
4757
    result = []
4758

    
4759
    for node in utils.NiceSort(self.nodes):
4760
      nresult = data[node]
4761
      if nresult.offline:
4762
        continue
4763

    
4764
      msg = nresult.fail_msg
4765
      if msg:
4766
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4767
        continue
4768

    
4769
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4770

    
4771
      for name in utils.NiceSort(rows.keys()):
4772
        row = rows[name]
4773

    
4774
        out = []
4775

    
4776
        for field in self.op.output_fields:
4777
          if field == constants.SF_NODE:
4778
            val = node
4779
          elif field == constants.SF_TYPE:
4780
            val = self.op.storage_type
4781
          elif field in field_idx:
4782
            val = row[field_idx[field]]
4783
          else:
4784
            raise errors.ParameterError(field)
4785

    
4786
          out.append(val)
4787

    
4788
        result.append(out)
4789

    
4790
    return result
4791

    
4792

    
4793
class _InstanceQuery(_QueryBase):
4794
  FIELDS = query.INSTANCE_FIELDS
4795

    
4796
  def ExpandNames(self, lu):
4797
    lu.needed_locks = {}
4798
    lu.share_locks = _ShareAll()
4799

    
4800
    if self.names:
4801
      self.wanted = _GetWantedInstances(lu, self.names)
4802
    else:
4803
      self.wanted = locking.ALL_SET
4804

    
4805
    self.do_locking = (self.use_locking and
4806
                       query.IQ_LIVE in self.requested_data)
4807
    if self.do_locking:
4808
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4809
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4810
      lu.needed_locks[locking.LEVEL_NODE] = []
4811
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4812

    
4813
    self.do_grouplocks = (self.do_locking and
4814
                          query.IQ_NODES in self.requested_data)
4815

    
4816
  def DeclareLocks(self, lu, level):
4817
    if self.do_locking:
4818
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4819
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4820

    
4821
        # Lock all groups used by instances optimistically; this requires going
4822
        # via the node before it's locked, requiring verification later on
4823
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4824
          set(group_uuid
4825
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4826
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4827
      elif level == locking.LEVEL_NODE:
4828
        lu._LockInstancesNodes() # pylint: disable=W0212
4829

    
4830
  @staticmethod
4831
  def _CheckGroupLocks(lu):
4832
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4833
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4834

    
4835
    # Check if node groups for locked instances are still correct
4836
    for instance_name in owned_instances:
4837
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4838

    
4839
  def _GetQueryData(self, lu):
4840
    """Computes the list of instances and their attributes.
4841

4842
    """
4843
    if self.do_grouplocks:
4844
      self._CheckGroupLocks(lu)
4845

    
4846
    cluster = lu.cfg.GetClusterInfo()
4847
    all_info = lu.cfg.GetAllInstancesInfo()
4848

    
4849
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4850

    
4851
    instance_list = [all_info[name] for name in instance_names]
4852
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4853
                                        for inst in instance_list)))
4854
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4855
    bad_nodes = []
4856
    offline_nodes = []
4857
    wrongnode_inst = set()
4858

    
4859
    # Gather data as requested
4860
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4861
      live_data = {}
4862
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4863
      for name in nodes:
4864
        result = node_data[name]
4865
        if result.offline:
4866
          # offline nodes will be in both lists
4867
          assert result.fail_msg
4868
          offline_nodes.append(name)
4869
        if result.fail_msg:
4870
          bad_nodes.append(name)
4871
        elif result.payload:
4872
          for inst in result.payload:
4873
            if inst in all_info:
4874
              if all_info[inst].primary_node == name:
4875
                live_data.update(result.payload)
4876
              else:
4877
                wrongnode_inst.add(inst)
4878
            else:
4879
              # orphan instance; we don't list it here as we don't
4880
              # handle this case yet in the output of instance listing
4881
              logging.warning("Orphan instance '%s' found on node %s",
4882
                              inst, name)
4883
        # else no instance is alive
4884
    else:
4885
      live_data = {}
4886

    
4887
    if query.IQ_DISKUSAGE in self.requested_data:
4888
      disk_usage = dict((inst.name,
4889
                         _ComputeDiskSize(inst.disk_template,
4890
                                          [{constants.IDISK_SIZE: disk.size}
4891
                                           for disk in inst.disks]))
4892
                        for inst in instance_list)
4893
    else:
4894
      disk_usage = None
4895

    
4896
    if query.IQ_CONSOLE in self.requested_data:
4897
      consinfo = {}
4898
      for inst in instance_list:
4899
        if inst.name in live_data:
4900
          # Instance is running
4901
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4902
        else:
4903
          consinfo[inst.name] = None
4904
      assert set(consinfo.keys()) == set(instance_names)
4905
    else:
4906
      consinfo = None
4907

    
4908
    if query.IQ_NODES in self.requested_data:
4909
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4910
                                            instance_list)))
4911
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4912
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4913
                    for uuid in set(map(operator.attrgetter("group"),
4914
                                        nodes.values())))
4915
    else:
4916
      nodes = None
4917
      groups = None
4918

    
4919
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4920
                                   disk_usage, offline_nodes, bad_nodes,
4921
                                   live_data, wrongnode_inst, consinfo,
4922
                                   nodes, groups)
4923

    
4924

    
4925
class LUQuery(NoHooksLU):
4926
  """Query for resources/items of a certain kind.
4927

4928
  """
4929
  # pylint: disable=W0142
4930
  REQ_BGL = False
4931

    
4932
  def CheckArguments(self):
4933
    qcls = _GetQueryImplementation(self.op.what)
4934

    
4935
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4936

    
4937
  def ExpandNames(self):
4938
    self.impl.ExpandNames(self)
4939

    
4940
  def DeclareLocks(self, level):
4941
    self.impl.DeclareLocks(self, level)
4942

    
4943
  def Exec(self, feedback_fn):
4944
    return self.impl.NewStyleQuery(self)
4945

    
4946

    
4947
class LUQueryFields(NoHooksLU):
4948
  """Query for resources/items of a certain kind.
4949

4950
  """
4951
  # pylint: disable=W0142
4952
  REQ_BGL = False
4953

    
4954
  def CheckArguments(self):
4955
    self.qcls = _GetQueryImplementation(self.op.what)
4956

    
4957
  def ExpandNames(self):
4958
    self.needed_locks = {}
4959

    
4960
  def Exec(self, feedback_fn):
4961
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4962

    
4963

    
4964
class LUNodeModifyStorage(NoHooksLU):
4965
  """Logical unit for modifying a storage volume on a node.
4966

4967
  """
4968
  REQ_BGL = False
4969

    
4970
  def CheckArguments(self):
4971
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4972

    
4973
    storage_type = self.op.storage_type
4974

    
4975
    try:
4976
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4977
    except KeyError:
4978
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4979
                                 " modified" % storage_type,
4980
                                 errors.ECODE_INVAL)
4981

    
4982
    diff = set(self.op.changes.keys()) - modifiable
4983
    if diff:
4984
      raise errors.OpPrereqError("The following fields can not be modified for"
4985
                                 " storage units of type '%s': %r" %
4986
                                 (storage_type, list(diff)),
4987
                                 errors.ECODE_INVAL)
4988

    
4989
  def ExpandNames(self):
4990
    self.needed_locks = {
4991
      locking.LEVEL_NODE: self.op.node_name,
4992
      }
4993

    
4994
  def Exec(self, feedback_fn):
4995
    """Computes the list of nodes and their attributes.
4996

4997
    """
4998
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4999
    result = self.rpc.call_storage_modify(self.op.node_name,
5000
                                          self.op.storage_type, st_args,
5001
                                          self.op.name, self.op.changes)
5002
    result.Raise("Failed to modify storage unit '%s' on %s" %
5003
                 (self.op.name, self.op.node_name))
5004

    
5005

    
5006
class LUNodeAdd(LogicalUnit):
5007
  """Logical unit for adding node to the cluster.
5008

5009
  """
5010
  HPATH = "node-add"
5011
  HTYPE = constants.HTYPE_NODE
5012
  _NFLAGS = ["master_capable", "vm_capable"]
5013

    
5014
  def CheckArguments(self):
5015
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5016
    # validate/normalize the node name
5017
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5018
                                         family=self.primary_ip_family)
5019
    self.op.node_name = self.hostname.name
5020

    
5021
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5022
      raise errors.OpPrereqError("Cannot readd the master node",
5023
                                 errors.ECODE_STATE)
5024

    
5025
    if self.op.readd and self.op.group:
5026
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5027
                                 " being readded", errors.ECODE_INVAL)
5028

    
5029
  def BuildHooksEnv(self):
5030
    """Build hooks env.
5031

5032
    This will run on all nodes before, and on all nodes + the new node after.
5033

5034
    """
5035
    return {
5036
      "OP_TARGET": self.op.node_name,
5037
      "NODE_NAME": self.op.node_name,
5038
      "NODE_PIP": self.op.primary_ip,
5039
      "NODE_SIP": self.op.secondary_ip,
5040
      "MASTER_CAPABLE": str(self.op.master_capable),
5041
      "VM_CAPABLE": str(self.op.vm_capable),
5042
      }
5043

    
5044
  def BuildHooksNodes(self):
5045
    """Build hooks nodes.
5046

5047
    """
5048
    # Exclude added node
5049
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5050
    post_nodes = pre_nodes + [self.op.node_name, ]
5051

    
5052
    return (pre_nodes, post_nodes)
5053

    
5054
  def CheckPrereq(self):
5055
    """Check prerequisites.
5056

5057
    This checks:
5058
     - the new node is not already in the config
5059
     - it is resolvable
5060
     - its parameters (single/dual homed) matches the cluster
5061

5062
    Any errors are signaled by raising errors.OpPrereqError.
5063

5064
    """
5065
    cfg = self.cfg
5066
    hostname = self.hostname
5067
    node = hostname.name
5068
    primary_ip = self.op.primary_ip = hostname.ip
5069
    if self.op.secondary_ip is None:
5070
      if self.primary_ip_family == netutils.IP6Address.family:
5071
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5072
                                   " IPv4 address must be given as secondary",
5073
                                   errors.ECODE_INVAL)
5074
      self.op.secondary_ip = primary_ip
5075

    
5076
    secondary_ip = self.op.secondary_ip
5077
    if not netutils.IP4Address.IsValid(secondary_ip):
5078
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5079
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5080

    
5081
    node_list = cfg.GetNodeList()
5082
    if not self.op.readd and node in node_list:
5083
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5084
                                 node, errors.ECODE_EXISTS)
5085
    elif self.op.readd and node not in node_list:
5086
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5087
                                 errors.ECODE_NOENT)
5088

    
5089
    self.changed_primary_ip = False
5090

    
5091
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5092
      if self.op.readd and node == existing_node_name:
5093
        if existing_node.secondary_ip != secondary_ip:
5094
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5095
                                     " address configuration as before",
5096
                                     errors.ECODE_INVAL)
5097
        if existing_node.primary_ip != primary_ip:
5098
          self.changed_primary_ip = True
5099

    
5100
        continue
5101

    
5102
      if (existing_node.primary_ip == primary_ip or
5103
          existing_node.secondary_ip == primary_ip or
5104
          existing_node.primary_ip == secondary_ip or
5105
          existing_node.secondary_ip == secondary_ip):
5106
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5107
                                   " existing node %s" % existing_node.name,
5108
                                   errors.ECODE_NOTUNIQUE)
5109

    
5110
    # After this 'if' block, None is no longer a valid value for the
5111
    # _capable op attributes
5112
    if self.op.readd:
5113
      old_node = self.cfg.GetNodeInfo(node)
5114
      assert old_node is not None, "Can't retrieve locked node %s" % node
5115
      for attr in self._NFLAGS:
5116
        if getattr(self.op, attr) is None:
5117
          setattr(self.op, attr, getattr(old_node, attr))
5118
    else:
5119
      for attr in self._NFLAGS:
5120
        if getattr(self.op, attr) is None:
5121
          setattr(self.op, attr, True)
5122

    
5123
    if self.op.readd and not self.op.vm_capable:
5124
      pri, sec = cfg.GetNodeInstances(node)
5125
      if pri or sec:
5126
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5127
                                   " flag set to false, but it already holds"
5128
                                   " instances" % node,
5129
                                   errors.ECODE_STATE)
5130

    
5131
    # check that the type of the node (single versus dual homed) is the
5132
    # same as for the master
5133
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5134
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5135
    newbie_singlehomed = secondary_ip == primary_ip
5136
    if master_singlehomed != newbie_singlehomed:
5137
      if master_singlehomed:
5138
        raise errors.OpPrereqError("The master has no secondary ip but the"
5139
                                   " new node has one",
5140
                                   errors.ECODE_INVAL)
5141
      else:
5142
        raise errors.OpPrereqError("The master has a secondary ip but the"
5143
                                   " new node doesn't have one",
5144
                                   errors.ECODE_INVAL)
5145

    
5146
    # checks reachability
5147
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5148
      raise errors.OpPrereqError("Node not reachable by ping",
5149
                                 errors.ECODE_ENVIRON)
5150

    
5151
    if not newbie_singlehomed:
5152
      # check reachability from my secondary ip to newbie's secondary ip
5153
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5154
                           source=myself.secondary_ip):
5155
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5156
                                   " based ping to node daemon port",
5157
                                   errors.ECODE_ENVIRON)
5158

    
5159
    if self.op.readd:
5160
      exceptions = [node]
5161
    else:
5162
      exceptions = []
5163

    
5164
    if self.op.master_capable:
5165
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5166
    else:
5167
      self.master_candidate = False
5168

    
5169
    if self.op.readd:
5170
      self.new_node = old_node
5171
    else:
5172
      node_group = cfg.LookupNodeGroup(self.op.group)
5173
      self.new_node = objects.Node(name=node,
5174
                                   primary_ip=primary_ip,
5175
                                   secondary_ip=secondary_ip,
5176
                                   master_candidate=self.master_candidate,
5177
                                   offline=False, drained=False,
5178
                                   group=node_group)
5179

    
5180
    if self.op.ndparams:
5181
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5182

    
5183
  def Exec(self, feedback_fn):
5184
    """Adds the new node to the cluster.
5185

5186
    """
5187
    new_node = self.new_node
5188
    node = new_node.name
5189

    
5190
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5191
      "Not owning BGL"
5192

    
5193
    # We adding a new node so we assume it's powered
5194
    new_node.powered = True
5195

    
5196
    # for re-adds, reset the offline/drained/master-candidate flags;
5197
    # we need to reset here, otherwise offline would prevent RPC calls
5198
    # later in the procedure; this also means that if the re-add
5199
    # fails, we are left with a non-offlined, broken node
5200
    if self.op.readd:
5201
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5202
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5203
      # if we demote the node, we do cleanup later in the procedure
5204
      new_node.master_candidate = self.master_candidate
5205
      if self.changed_primary_ip:
5206
        new_node.primary_ip = self.op.primary_ip
5207

    
5208
    # copy the master/vm_capable flags
5209
    for attr in self._NFLAGS:
5210
      setattr(new_node, attr, getattr(self.op, attr))
5211

    
5212
    # notify the user about any possible mc promotion
5213
    if new_node.master_candidate:
5214
      self.LogInfo("Node will be a master candidate")
5215

    
5216
    if self.op.ndparams:
5217
      new_node.ndparams = self.op.ndparams
5218
    else:
5219
      new_node.ndparams = {}
5220

    
5221
    # check connectivity
5222
    result = self.rpc.call_version([node])[node]
5223
    result.Raise("Can't get version information from node %s" % node)
5224
    if constants.PROTOCOL_VERSION == result.payload:
5225
      logging.info("Communication to node %s fine, sw version %s match",
5226
                   node, result.payload)
5227
    else:
5228
      raise errors.OpExecError("Version mismatch master version %s,"
5229
                               " node version %s" %
5230
                               (constants.PROTOCOL_VERSION, result.payload))
5231

    
5232
    # Add node to our /etc/hosts, and add key to known_hosts
5233
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5234
      master_node = self.cfg.GetMasterNode()
5235
      result = self.rpc.call_etc_hosts_modify(master_node,
5236
                                              constants.ETC_HOSTS_ADD,
5237
                                              self.hostname.name,
5238
                                              self.hostname.ip)
5239
      result.Raise("Can't update hosts file with new host data")
5240

    
5241
    if new_node.secondary_ip != new_node.primary_ip:
5242
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5243
                               False)
5244

    
5245
    node_verify_list = [self.cfg.GetMasterNode()]
5246
    node_verify_param = {
5247
      constants.NV_NODELIST: ([node], {}),
5248
      # TODO: do a node-net-test as well?
5249
    }
5250

    
5251
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5252
                                       self.cfg.GetClusterName())
5253
    for verifier in node_verify_list:
5254
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5255
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5256
      if nl_payload:
5257
        for failed in nl_payload:
5258
          feedback_fn("ssh/hostname verification failed"
5259
                      " (checking from %s): %s" %
5260
                      (verifier, nl_payload[failed]))
5261
        raise errors.OpExecError("ssh/hostname verification failed")
5262

    
5263
    if self.op.readd:
5264
      _RedistributeAncillaryFiles(self)
5265
      self.context.ReaddNode(new_node)
5266
      # make sure we redistribute the config
5267
      self.cfg.Update(new_node, feedback_fn)
5268
      # and make sure the new node will not have old files around
5269
      if not new_node.master_candidate:
5270
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5271
        msg = result.fail_msg
5272
        if msg:
5273
          self.LogWarning("Node failed to demote itself from master"
5274
                          " candidate status: %s" % msg)
5275
    else:
5276
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5277
                                  additional_vm=self.op.vm_capable)
5278
      self.context.AddNode(new_node, self.proc.GetECId())
5279

    
5280

    
5281
class LUNodeSetParams(LogicalUnit):
5282
  """Modifies the parameters of a node.
5283

5284
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5285
      to the node role (as _ROLE_*)
5286
  @cvar _R2F: a dictionary from node role to tuples of flags
5287
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5288

5289
  """
5290
  HPATH = "node-modify"
5291
  HTYPE = constants.HTYPE_NODE
5292
  REQ_BGL = False
5293
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5294
  _F2R = {
5295
    (True, False, False): _ROLE_CANDIDATE,
5296
    (False, True, False): _ROLE_DRAINED,
5297
    (False, False, True): _ROLE_OFFLINE,
5298
    (False, False, False): _ROLE_REGULAR,
5299
    }
5300
  _R2F = dict((v, k) for k, v in _F2R.items())
5301
  _FLAGS = ["master_candidate", "drained", "offline"]
5302

    
5303
  def CheckArguments(self):
5304
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5305
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5306
                self.op.master_capable, self.op.vm_capable,
5307
                self.op.secondary_ip, self.op.ndparams]
5308
    if all_mods.count(None) == len(all_mods):
5309
      raise errors.OpPrereqError("Please pass at least one modification",
5310
                                 errors.ECODE_INVAL)
5311
    if all_mods.count(True) > 1:
5312
      raise errors.OpPrereqError("Can't set the node into more than one"
5313
                                 " state at the same time",
5314
                                 errors.ECODE_INVAL)
5315

    
5316
    # Boolean value that tells us whether we might be demoting from MC
5317
    self.might_demote = (self.op.master_candidate == False or
5318
                         self.op.offline == True or
5319
                         self.op.drained == True or
5320
                         self.op.master_capable == False)
5321

    
5322
    if self.op.secondary_ip:
5323
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5324
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5325
                                   " address" % self.op.secondary_ip,
5326
                                   errors.ECODE_INVAL)
5327

    
5328
    self.lock_all = self.op.auto_promote and self.might_demote
5329
    self.lock_instances = self.op.secondary_ip is not None
5330

    
5331
  def _InstanceFilter(self, instance):
5332
    """Filter for getting affected instances.
5333

5334
    """
5335
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5336
            self.op.node_name in instance.all_nodes)
5337

    
5338
  def ExpandNames(self):
5339
    if self.lock_all:
5340
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5341
    else:
5342
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5343

    
5344
    # Since modifying a node can have severe effects on currently running
5345
    # operations the resource lock is at least acquired in shared mode
5346
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5347
      self.needed_locks[locking.LEVEL_NODE]
5348

    
5349
    # Get node resource and instance locks in shared mode; they are not used
5350
    # for anything but read-only access
5351
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5352
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5353

    
5354
    if self.lock_instances:
5355
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5356
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5357

    
5358
  def BuildHooksEnv(self):
5359
    """Build hooks env.
5360

5361
    This runs on the master node.
5362

5363
    """
5364
    return {
5365
      "OP_TARGET": self.op.node_name,
5366
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5367
      "OFFLINE": str(self.op.offline),
5368
      "DRAINED": str(self.op.drained),
5369
      "MASTER_CAPABLE": str(self.op.master_capable),
5370
      "VM_CAPABLE": str(self.op.vm_capable),
5371
      }
5372

    
5373
  def BuildHooksNodes(self):
5374
    """Build hooks nodes.
5375

5376
    """
5377
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5378
    return (nl, nl)
5379

    
5380
  def CheckPrereq(self):
5381
    """Check prerequisites.
5382

5383
    This only checks the instance list against the existing names.
5384

5385
    """
5386
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5387

    
5388
    if self.lock_instances:
5389
      affected_instances = \
5390
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5391

    
5392
      # Verify instance locks
5393
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5394
      wanted_instances = frozenset(affected_instances.keys())
5395
      if wanted_instances - owned_instances:
5396
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5397
                                   " secondary IP address have changed since"
5398
                                   " locks were acquired, wanted '%s', have"
5399
                                   " '%s'; retry the operation" %
5400
                                   (self.op.node_name,
5401
                                    utils.CommaJoin(wanted_instances),
5402
                                    utils.CommaJoin(owned_instances)),
5403
                                   errors.ECODE_STATE)
5404
    else:
5405
      affected_instances = None
5406

    
5407
    if (self.op.master_candidate is not None or
5408
        self.op.drained is not None or
5409
        self.op.offline is not None):
5410
      # we can't change the master's node flags
5411
      if self.op.node_name == self.cfg.GetMasterNode():
5412
        raise errors.OpPrereqError("The master role can be changed"
5413
                                   " only via master-failover",
5414
                                   errors.ECODE_INVAL)
5415

    
5416
    if self.op.master_candidate and not node.master_capable:
5417
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5418
                                 " it a master candidate" % node.name,
5419
                                 errors.ECODE_STATE)
5420

    
5421
    if self.op.vm_capable == False:
5422
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5423
      if ipri or isec:
5424
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5425
                                   " the vm_capable flag" % node.name,
5426
                                   errors.ECODE_STATE)
5427

    
5428
    if node.master_candidate and self.might_demote and not self.lock_all:
5429
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5430
      # check if after removing the current node, we're missing master
5431
      # candidates
5432
      (mc_remaining, mc_should, _) = \
5433
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5434
      if mc_remaining < mc_should:
5435
        raise errors.OpPrereqError("Not enough master candidates, please"
5436
                                   " pass auto promote option to allow"
5437
                                   " promotion", errors.ECODE_STATE)
5438

    
5439
    self.old_flags = old_flags = (node.master_candidate,
5440
                                  node.drained, node.offline)
5441
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5442
    self.old_role = old_role = self._F2R[old_flags]
5443

    
5444
    # Check for ineffective changes
5445
    for attr in self._FLAGS:
5446
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5447
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5448
        setattr(self.op, attr, None)
5449

    
5450
    # Past this point, any flag change to False means a transition
5451
    # away from the respective state, as only real changes are kept
5452

    
5453
    # TODO: We might query the real power state if it supports OOB
5454
    if _SupportsOob(self.cfg, node):
5455
      if self.op.offline is False and not (node.powered or
5456
                                           self.op.powered == True):
5457
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5458
                                    " offline status can be reset") %
5459
                                   self.op.node_name)
5460
    elif self.op.powered is not None:
5461
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5462
                                  " as it does not support out-of-band"
5463
                                  " handling") % self.op.node_name)
5464

    
5465
    # If we're being deofflined/drained, we'll MC ourself if needed
5466
    if (self.op.drained == False or self.op.offline == False or
5467
        (self.op.master_capable and not node.master_capable)):
5468
      if _DecideSelfPromotion(self):
5469
        self.op.master_candidate = True
5470
        self.LogInfo("Auto-promoting node to master candidate")
5471

    
5472
    # If we're no longer master capable, we'll demote ourselves from MC
5473
    if self.op.master_capable == False and node.master_candidate:
5474
      self.LogInfo("Demoting from master candidate")
5475
      self.op.master_candidate = False
5476

    
5477
    # Compute new role
5478
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5479
    if self.op.master_candidate:
5480
      new_role = self._ROLE_CANDIDATE
5481
    elif self.op.drained:
5482
      new_role = self._ROLE_DRAINED
5483
    elif self.op.offline:
5484
      new_role = self._ROLE_OFFLINE
5485
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5486
      # False is still in new flags, which means we're un-setting (the
5487
      # only) True flag
5488
      new_role = self._ROLE_REGULAR
5489
    else: # no new flags, nothing, keep old role
5490
      new_role = old_role
5491

    
5492
    self.new_role = new_role
5493

    
5494
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5495
      # Trying to transition out of offline status
5496
      # TODO: Use standard RPC runner, but make sure it works when the node is
5497
      # still marked offline
5498
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5499
      if result.fail_msg:
5500
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5501
                                   " to report its version: %s" %
5502
                                   (node.name, result.fail_msg),
5503
                                   errors.ECODE_STATE)
5504
      else:
5505
        self.LogWarning("Transitioning node from offline to online state"
5506
                        " without using re-add. Please make sure the node"
5507
                        " is healthy!")
5508

    
5509
    if self.op.secondary_ip:
5510
      # Ok even without locking, because this can't be changed by any LU
5511
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5512
      master_singlehomed = master.secondary_ip == master.primary_ip
5513
      if master_singlehomed and self.op.secondary_ip:
5514
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5515
                                   " homed cluster", errors.ECODE_INVAL)
5516

    
5517
      assert not (frozenset(affected_instances) -
5518
                  self.owned_locks(locking.LEVEL_INSTANCE))
5519

    
5520
      if node.offline:
5521
        if affected_instances:
5522
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5523
                                     " offline node has instances (%s)"
5524
                                     " configured to use it" %
5525
                                     utils.CommaJoin(affected_instances.keys()))
5526
      else:
5527
        # On online nodes, check that no instances are running, and that
5528
        # the node has the new ip and we can reach it.
5529
        for instance in affected_instances.values():
5530
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5531
                              msg="cannot change secondary ip")
5532

    
5533
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5534
        if master.name != node.name:
5535
          # check reachability from master secondary ip to new secondary ip
5536
          if not netutils.TcpPing(self.op.secondary_ip,
5537
                                  constants.DEFAULT_NODED_PORT,
5538
                                  source=master.secondary_ip):
5539
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5540
                                       " based ping to node daemon port",
5541
                                       errors.ECODE_ENVIRON)
5542

    
5543
    if self.op.ndparams:
5544
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5545
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5546
      self.new_ndparams = new_ndparams
5547

    
5548
  def Exec(self, feedback_fn):
5549
    """Modifies a node.
5550

5551
    """
5552
    node = self.node
5553
    old_role = self.old_role
5554
    new_role = self.new_role
5555

    
5556
    result = []
5557

    
5558
    if self.op.ndparams:
5559
      node.ndparams = self.new_ndparams
5560

    
5561
    if self.op.powered is not None:
5562
      node.powered = self.op.powered
5563

    
5564
    for attr in ["master_capable", "vm_capable"]:
5565
      val = getattr(self.op, attr)
5566
      if val is not None:
5567
        setattr(node, attr, val)
5568
        result.append((attr, str(val)))
5569

    
5570
    if new_role != old_role:
5571
      # Tell the node to demote itself, if no longer MC and not offline
5572
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5573
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5574
        if msg:
5575
          self.LogWarning("Node failed to demote itself: %s", msg)
5576

    
5577
      new_flags = self._R2F[new_role]
5578
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5579
        if of != nf:
5580
          result.append((desc, str(nf)))
5581
      (node.master_candidate, node.drained, node.offline) = new_flags
5582

    
5583
      # we locked all nodes, we adjust the CP before updating this node
5584
      if self.lock_all:
5585
        _AdjustCandidatePool(self, [node.name])
5586

    
5587
    if self.op.secondary_ip:
5588
      node.secondary_ip = self.op.secondary_ip
5589
      result.append(("secondary_ip", self.op.secondary_ip))
5590

    
5591
    # this will trigger configuration file update, if needed
5592
    self.cfg.Update(node, feedback_fn)
5593

    
5594
    # this will trigger job queue propagation or cleanup if the mc
5595
    # flag changed
5596
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5597
      self.context.ReaddNode(node)
5598

    
5599
    return result
5600

    
5601

    
5602
class LUNodePowercycle(NoHooksLU):
5603
  """Powercycles a node.
5604

5605
  """
5606
  REQ_BGL = False
5607

    
5608
  def CheckArguments(self):
5609
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5610
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5611
      raise errors.OpPrereqError("The node is the master and the force"
5612
                                 " parameter was not set",
5613
                                 errors.ECODE_INVAL)
5614

    
5615
  def ExpandNames(self):
5616
    """Locking for PowercycleNode.
5617

5618
    This is a last-resort option and shouldn't block on other
5619
    jobs. Therefore, we grab no locks.
5620

5621
    """
5622
    self.needed_locks = {}
5623

    
5624
  def Exec(self, feedback_fn):
5625
    """Reboots a node.
5626

5627
    """
5628
    result = self.rpc.call_node_powercycle(self.op.node_name,
5629
                                           self.cfg.GetHypervisorType())
5630
    result.Raise("Failed to schedule the reboot")
5631
    return result.payload
5632

    
5633

    
5634
class LUClusterQuery(NoHooksLU):
5635
  """Query cluster configuration.
5636

5637
  """
5638
  REQ_BGL = False
5639

    
5640
  def ExpandNames(self):
5641
    self.needed_locks = {}
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Return cluster config.
5645

5646
    """
5647
    cluster = self.cfg.GetClusterInfo()
5648
    os_hvp = {}
5649

    
5650
    # Filter just for enabled hypervisors
5651
    for os_name, hv_dict in cluster.os_hvp.items():
5652
      os_hvp[os_name] = {}
5653
      for hv_name, hv_params in hv_dict.items():
5654
        if hv_name in cluster.enabled_hypervisors:
5655
          os_hvp[os_name][hv_name] = hv_params
5656

    
5657
    # Convert ip_family to ip_version
5658
    primary_ip_version = constants.IP4_VERSION
5659
    if cluster.primary_ip_family == netutils.IP6Address.family:
5660
      primary_ip_version = constants.IP6_VERSION
5661

    
5662
    result = {
5663
      "software_version": constants.RELEASE_VERSION,
5664
      "protocol_version": constants.PROTOCOL_VERSION,
5665
      "config_version": constants.CONFIG_VERSION,
5666
      "os_api_version": max(constants.OS_API_VERSIONS),
5667
      "export_version": constants.EXPORT_VERSION,
5668
      "architecture": (platform.architecture()[0], platform.machine()),
5669
      "name": cluster.cluster_name,
5670
      "master": cluster.master_node,
5671
      "default_hypervisor": cluster.enabled_hypervisors[0],
5672
      "enabled_hypervisors": cluster.enabled_hypervisors,
5673
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5674
                        for hypervisor_name in cluster.enabled_hypervisors]),
5675
      "os_hvp": os_hvp,
5676
      "beparams": cluster.beparams,
5677
      "osparams": cluster.osparams,
5678
      "nicparams": cluster.nicparams,
5679
      "ndparams": cluster.ndparams,
5680
      "candidate_pool_size": cluster.candidate_pool_size,
5681
      "master_netdev": cluster.master_netdev,
5682
      "master_netmask": cluster.master_netmask,
5683
      "use_external_mip_script": cluster.use_external_mip_script,
5684
      "volume_group_name": cluster.volume_group_name,
5685
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5686
      "file_storage_dir": cluster.file_storage_dir,
5687
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5688
      "maintain_node_health": cluster.maintain_node_health,
5689
      "ctime": cluster.ctime,
5690
      "mtime": cluster.mtime,
5691
      "uuid": cluster.uuid,
5692
      "tags": list(cluster.GetTags()),
5693
      "uid_pool": cluster.uid_pool,
5694
      "default_iallocator": cluster.default_iallocator,
5695
      "reserved_lvs": cluster.reserved_lvs,
5696
      "primary_ip_version": primary_ip_version,
5697
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5698
      "hidden_os": cluster.hidden_os,
5699
      "blacklisted_os": cluster.blacklisted_os,
5700
      }
5701

    
5702
    return result
5703

    
5704

    
5705
class LUClusterConfigQuery(NoHooksLU):
5706
  """Return configuration values.
5707

5708
  """
5709
  REQ_BGL = False
5710
  _FIELDS_DYNAMIC = utils.FieldSet()
5711
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5712
                                  "watcher_pause", "volume_group_name")
5713

    
5714
  def CheckArguments(self):
5715
    _CheckOutputFields(static=self._FIELDS_STATIC,
5716
                       dynamic=self._FIELDS_DYNAMIC,
5717
                       selected=self.op.output_fields)
5718

    
5719
  def ExpandNames(self):
5720
    self.needed_locks = {}
5721

    
5722
  def Exec(self, feedback_fn):
5723
    """Dump a representation of the cluster config to the standard output.
5724

5725
    """
5726
    values = []
5727
    for field in self.op.output_fields:
5728
      if field == "cluster_name":
5729
        entry = self.cfg.GetClusterName()
5730
      elif field == "master_node":
5731
        entry = self.cfg.GetMasterNode()
5732
      elif field == "drain_flag":
5733
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5734
      elif field == "watcher_pause":
5735
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5736
      elif field == "volume_group_name":
5737
        entry = self.cfg.GetVGName()
5738
      else:
5739
        raise errors.ParameterError(field)
5740
      values.append(entry)
5741
    return values
5742

    
5743

    
5744
class LUInstanceActivateDisks(NoHooksLU):
5745
  """Bring up an instance's disks.
5746

5747
  """
5748
  REQ_BGL = False
5749

    
5750
  def ExpandNames(self):
5751
    self._ExpandAndLockInstance()
5752
    self.needed_locks[locking.LEVEL_NODE] = []
5753
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5754

    
5755
  def DeclareLocks(self, level):
5756
    if level == locking.LEVEL_NODE:
5757
      self._LockInstancesNodes()
5758

    
5759
  def CheckPrereq(self):
5760
    """Check prerequisites.
5761

5762
    This checks that the instance is in the cluster.
5763

5764
    """
5765
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5766
    assert self.instance is not None, \
5767
      "Cannot retrieve locked instance %s" % self.op.instance_name
5768
    _CheckNodeOnline(self, self.instance.primary_node)
5769

    
5770
  def Exec(self, feedback_fn):
5771
    """Activate the disks.
5772

5773
    """
5774
    disks_ok, disks_info = \
5775
              _AssembleInstanceDisks(self, self.instance,
5776
                                     ignore_size=self.op.ignore_size)
5777
    if not disks_ok:
5778
      raise errors.OpExecError("Cannot activate block devices")
5779

    
5780
    return disks_info
5781

    
5782

    
5783
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5784
                           ignore_size=False):
5785
  """Prepare the block devices for an instance.
5786

5787
  This sets up the block devices on all nodes.
5788

5789
  @type lu: L{LogicalUnit}
5790
  @param lu: the logical unit on whose behalf we execute
5791
  @type instance: L{objects.Instance}
5792
  @param instance: the instance for whose disks we assemble
5793
  @type disks: list of L{objects.Disk} or None
5794
  @param disks: which disks to assemble (or all, if None)
5795
  @type ignore_secondaries: boolean
5796
  @param ignore_secondaries: if true, errors on secondary nodes
5797
      won't result in an error return from the function
5798
  @type ignore_size: boolean
5799
  @param ignore_size: if true, the current known size of the disk
5800
      will not be used during the disk activation, useful for cases
5801
      when the size is wrong
5802
  @return: False if the operation failed, otherwise a list of
5803
      (host, instance_visible_name, node_visible_name)
5804
      with the mapping from node devices to instance devices
5805

5806
  """
5807
  device_info = []
5808
  disks_ok = True
5809
  iname = instance.name
5810
  disks = _ExpandCheckDisks(instance, disks)
5811

    
5812
  # With the two passes mechanism we try to reduce the window of
5813
  # opportunity for the race condition of switching DRBD to primary
5814
  # before handshaking occured, but we do not eliminate it
5815

    
5816
  # The proper fix would be to wait (with some limits) until the
5817
  # connection has been made and drbd transitions from WFConnection
5818
  # into any other network-connected state (Connected, SyncTarget,
5819
  # SyncSource, etc.)
5820

    
5821
  # 1st pass, assemble on all nodes in secondary mode
5822
  for idx, inst_disk in enumerate(disks):
5823
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5824
      if ignore_size:
5825
        node_disk = node_disk.Copy()
5826
        node_disk.UnsetSize()
5827
      lu.cfg.SetDiskID(node_disk, node)
5828
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5829
      msg = result.fail_msg
5830
      if msg:
5831
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5832
                           " (is_primary=False, pass=1): %s",
5833
                           inst_disk.iv_name, node, msg)
5834
        if not ignore_secondaries:
5835
          disks_ok = False
5836

    
5837
  # FIXME: race condition on drbd migration to primary
5838

    
5839
  # 2nd pass, do only the primary node
5840
  for idx, inst_disk in enumerate(disks):
5841
    dev_path = None
5842

    
5843
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5844
      if node != instance.primary_node:
5845
        continue
5846
      if ignore_size:
5847
        node_disk = node_disk.Copy()
5848
        node_disk.UnsetSize()
5849
      lu.cfg.SetDiskID(node_disk, node)
5850
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5851
      msg = result.fail_msg
5852
      if msg:
5853
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5854
                           " (is_primary=True, pass=2): %s",
5855
                           inst_disk.iv_name, node, msg)
5856
        disks_ok = False
5857
      else:
5858
        dev_path = result.payload
5859

    
5860
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5861

    
5862
  # leave the disks configured for the primary node
5863
  # this is a workaround that would be fixed better by
5864
  # improving the logical/physical id handling
5865
  for disk in disks:
5866
    lu.cfg.SetDiskID(disk, instance.primary_node)
5867

    
5868
  return disks_ok, device_info
5869

    
5870

    
5871
def _StartInstanceDisks(lu, instance, force):
5872
  """Start the disks of an instance.
5873

5874
  """
5875
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5876
                                           ignore_secondaries=force)
5877
  if not disks_ok:
5878
    _ShutdownInstanceDisks(lu, instance)
5879
    if force is not None and not force:
5880
      lu.proc.LogWarning("", hint="If the message above refers to a"
5881
                         " secondary node,"
5882
                         " you can retry the operation using '--force'.")
5883
    raise errors.OpExecError("Disk consistency error")
5884

    
5885

    
5886
class LUInstanceDeactivateDisks(NoHooksLU):
5887
  """Shutdown an instance's disks.
5888

5889
  """
5890
  REQ_BGL = False
5891

    
5892
  def ExpandNames(self):
5893
    self._ExpandAndLockInstance()
5894
    self.needed_locks[locking.LEVEL_NODE] = []
5895
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5896

    
5897
  def DeclareLocks(self, level):
5898
    if level == locking.LEVEL_NODE:
5899
      self._LockInstancesNodes()
5900

    
5901
  def CheckPrereq(self):
5902
    """Check prerequisites.
5903

5904
    This checks that the instance is in the cluster.
5905

5906
    """
5907
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908
    assert self.instance is not None, \
5909
      "Cannot retrieve locked instance %s" % self.op.instance_name
5910

    
5911
  def Exec(self, feedback_fn):
5912
    """Deactivate the disks
5913

5914
    """
5915
    instance = self.instance
5916
    if self.op.force:
5917
      _ShutdownInstanceDisks(self, instance)
5918
    else:
5919
      _SafeShutdownInstanceDisks(self, instance)
5920

    
5921

    
5922
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5923
  """Shutdown block devices of an instance.
5924

5925
  This function checks if an instance is running, before calling
5926
  _ShutdownInstanceDisks.
5927

5928
  """
5929
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5930
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5931

    
5932

    
5933
def _ExpandCheckDisks(instance, disks):
5934
  """Return the instance disks selected by the disks list
5935

5936
  @type disks: list of L{objects.Disk} or None
5937
  @param disks: selected disks
5938
  @rtype: list of L{objects.Disk}
5939
  @return: selected instance disks to act on
5940

5941
  """
5942
  if disks is None:
5943
    return instance.disks
5944
  else:
5945
    if not set(disks).issubset(instance.disks):
5946
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5947
                                   " target instance")
5948
    return disks
5949

    
5950

    
5951
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5952
  """Shutdown block devices of an instance.
5953

5954
  This does the shutdown on all nodes of the instance.
5955

5956
  If the ignore_primary is false, errors on the primary node are
5957
  ignored.
5958

5959
  """
5960
  all_result = True
5961
  disks = _ExpandCheckDisks(instance, disks)
5962

    
5963
  for disk in disks:
5964
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5965
      lu.cfg.SetDiskID(top_disk, node)
5966
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5967
      msg = result.fail_msg
5968
      if msg:
5969
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5970
                      disk.iv_name, node, msg)
5971
        if ((node == instance.primary_node and not ignore_primary) or
5972
            (node != instance.primary_node and not result.offline)):
5973
          all_result = False
5974
  return all_result
5975

    
5976

    
5977
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5978
  """Checks if a node has enough free memory.
5979

5980
  This function check if a given node has the needed amount of free
5981
  memory. In case the node has less memory or we cannot get the
5982
  information from the node, this function raise an OpPrereqError
5983
  exception.
5984

5985
  @type lu: C{LogicalUnit}
5986
  @param lu: a logical unit from which we get configuration data
5987
  @type node: C{str}
5988
  @param node: the node to check
5989
  @type reason: C{str}
5990
  @param reason: string to use in the error message
5991
  @type requested: C{int}
5992
  @param requested: the amount of memory in MiB to check for
5993
  @type hypervisor_name: C{str}
5994
  @param hypervisor_name: the hypervisor to ask for memory stats
5995
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5996
      we cannot check the node
5997

5998
  """
5999
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6000
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6001
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6002
  free_mem = nodeinfo[node].payload.get("memory_free", None)
6003
  if not isinstance(free_mem, int):
6004
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6005
                               " was '%s'" % (node, free_mem),
6006
                               errors.ECODE_ENVIRON)
6007
  if requested > free_mem:
6008
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6009
                               " needed %s MiB, available %s MiB" %
6010
                               (node, reason, requested, free_mem),
6011
                               errors.ECODE_NORES)
6012

    
6013

    
6014
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6015
  """Checks if nodes have enough free disk space in the all VGs.
6016

6017
  This function check if all given nodes have the needed amount of
6018
  free disk. In case any node has less disk or we cannot get the
6019
  information from the node, this function raise an OpPrereqError
6020
  exception.
6021

6022
  @type lu: C{LogicalUnit}
6023
  @param lu: a logical unit from which we get configuration data
6024
  @type nodenames: C{list}
6025
  @param nodenames: the list of node names to check
6026
  @type req_sizes: C{dict}
6027
  @param req_sizes: the hash of vg and corresponding amount of disk in
6028
      MiB to check for
6029
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6030
      or we cannot check the node
6031

6032
  """
6033
  for vg, req_size in req_sizes.items():
6034
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6035

    
6036

    
6037
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6038
  """Checks if nodes have enough free disk space in the specified VG.
6039

6040
  This function check if all given nodes have the needed amount of
6041
  free disk. In case any node has less disk or we cannot get the
6042
  information from the node, this function raise an OpPrereqError
6043
  exception.
6044

6045
  @type lu: C{LogicalUnit}
6046
  @param lu: a logical unit from which we get configuration data
6047
  @type nodenames: C{list}
6048
  @param nodenames: the list of node names to check
6049
  @type vg: C{str}
6050
  @param vg: the volume group to check
6051
  @type requested: C{int}
6052
  @param requested: the amount of disk in MiB to check for
6053
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6054
      or we cannot check the node
6055

6056
  """
6057
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6058
  for node in nodenames:
6059
    info = nodeinfo[node]
6060
    info.Raise("Cannot get current information from node %s" % node,
6061
               prereq=True, ecode=errors.ECODE_ENVIRON)
6062
    vg_free = info.payload.get("vg_free", None)
6063
    if not isinstance(vg_free, int):
6064
      raise errors.OpPrereqError("Can't compute free disk space on node"
6065
                                 " %s for vg %s, result was '%s'" %
6066
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6067
    if requested > vg_free:
6068
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6069
                                 " vg %s: required %d MiB, available %d MiB" %
6070
                                 (node, vg, requested, vg_free),
6071
                                 errors.ECODE_NORES)
6072

    
6073

    
6074
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6075
  """Checks if nodes have enough physical CPUs
6076

6077
  This function checks if all given nodes have the needed number of
6078
  physical CPUs. In case any node has less CPUs or we cannot get the
6079
  information from the node, this function raises an OpPrereqError
6080
  exception.
6081

6082
  @type lu: C{LogicalUnit}
6083
  @param lu: a logical unit from which we get configuration data
6084
  @type nodenames: C{list}
6085
  @param nodenames: the list of node names to check
6086
  @type requested: C{int}
6087
  @param requested: the minimum acceptable number of physical CPUs
6088
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6089
      or we cannot check the node
6090

6091
  """
6092
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6093
  for node in nodenames:
6094
    info = nodeinfo[node]
6095
    info.Raise("Cannot get current information from node %s" % node,
6096
               prereq=True, ecode=errors.ECODE_ENVIRON)
6097
    num_cpus = info.payload.get("cpu_total", None)
6098
    if not isinstance(num_cpus, int):
6099
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6100
                                 " on node %s, result was '%s'" %
6101
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6102
    if requested > num_cpus:
6103
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6104
                                 "required" % (node, num_cpus, requested),
6105
                                 errors.ECODE_NORES)
6106

    
6107

    
6108
class LUInstanceStartup(LogicalUnit):
6109
  """Starts an instance.
6110

6111
  """
6112
  HPATH = "instance-start"
6113
  HTYPE = constants.HTYPE_INSTANCE
6114
  REQ_BGL = False
6115

    
6116
  def CheckArguments(self):
6117
    # extra beparams
6118
    if self.op.beparams:
6119
      # fill the beparams dict
6120
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6121

    
6122
  def ExpandNames(self):
6123
    self._ExpandAndLockInstance()
6124

    
6125
  def BuildHooksEnv(self):
6126
    """Build hooks env.
6127

6128
    This runs on master, primary and secondary nodes of the instance.
6129

6130
    """
6131
    env = {
6132
      "FORCE": self.op.force,
6133
      }
6134

    
6135
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6136

    
6137
    return env
6138

    
6139
  def BuildHooksNodes(self):
6140
    """Build hooks nodes.
6141

6142
    """
6143
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6144
    return (nl, nl)
6145

    
6146
  def CheckPrereq(self):
6147
    """Check prerequisites.
6148

6149
    This checks that the instance is in the cluster.
6150

6151
    """
6152
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6153
    assert self.instance is not None, \
6154
      "Cannot retrieve locked instance %s" % self.op.instance_name
6155

    
6156
    # extra hvparams
6157
    if self.op.hvparams:
6158
      # check hypervisor parameter syntax (locally)
6159
      cluster = self.cfg.GetClusterInfo()
6160
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6161
      filled_hvp = cluster.FillHV(instance)
6162
      filled_hvp.update(self.op.hvparams)
6163
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6164
      hv_type.CheckParameterSyntax(filled_hvp)
6165
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6166

    
6167
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6168

    
6169
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6170

    
6171
    if self.primary_offline and self.op.ignore_offline_nodes:
6172
      self.proc.LogWarning("Ignoring offline primary node")
6173

    
6174
      if self.op.hvparams or self.op.beparams:
6175
        self.proc.LogWarning("Overridden parameters are ignored")
6176
    else:
6177
      _CheckNodeOnline(self, instance.primary_node)
6178

    
6179
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6180

    
6181
      # check bridges existence
6182
      _CheckInstanceBridgesExist(self, instance)
6183

    
6184
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6185
                                                instance.name,
6186
                                                instance.hypervisor)
6187
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6188
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6189
      if not remote_info.payload: # not running already
6190
        _CheckNodeFreeMemory(self, instance.primary_node,
6191
                             "starting instance %s" % instance.name,
6192
                             bep[constants.BE_MEMORY], instance.hypervisor)
6193

    
6194
  def Exec(self, feedback_fn):
6195
    """Start the instance.
6196

6197
    """
6198
    instance = self.instance
6199
    force = self.op.force
6200

    
6201
    if not self.op.no_remember:
6202
      self.cfg.MarkInstanceUp(instance.name)
6203

    
6204
    if self.primary_offline:
6205
      assert self.op.ignore_offline_nodes
6206
      self.proc.LogInfo("Primary node offline, marked instance as started")
6207
    else:
6208
      node_current = instance.primary_node
6209

    
6210
      _StartInstanceDisks(self, instance, force)
6211

    
6212
      result = \
6213
        self.rpc.call_instance_start(node_current,
6214
                                     (instance, self.op.hvparams,
6215
                                      self.op.beparams),
6216
                                     self.op.startup_paused)
6217
      msg = result.fail_msg
6218
      if msg:
6219
        _ShutdownInstanceDisks(self, instance)
6220
        raise errors.OpExecError("Could not start instance: %s" % msg)
6221

    
6222

    
6223
class LUInstanceReboot(LogicalUnit):
6224
  """Reboot an instance.
6225

6226
  """
6227
  HPATH = "instance-reboot"
6228
  HTYPE = constants.HTYPE_INSTANCE
6229
  REQ_BGL = False
6230

    
6231
  def ExpandNames(self):
6232
    self._ExpandAndLockInstance()
6233

    
6234
  def BuildHooksEnv(self):
6235
    """Build hooks env.
6236

6237
    This runs on master, primary and secondary nodes of the instance.
6238

6239
    """
6240
    env = {
6241
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6242
      "REBOOT_TYPE": self.op.reboot_type,
6243
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6244
      }
6245

    
6246
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6247

    
6248
    return env
6249

    
6250
  def BuildHooksNodes(self):
6251
    """Build hooks nodes.
6252

6253
    """
6254
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6255
    return (nl, nl)
6256

    
6257
  def CheckPrereq(self):
6258
    """Check prerequisites.
6259

6260
    This checks that the instance is in the cluster.
6261

6262
    """
6263
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264
    assert self.instance is not None, \
6265
      "Cannot retrieve locked instance %s" % self.op.instance_name
6266
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6267
    _CheckNodeOnline(self, instance.primary_node)
6268

    
6269
    # check bridges existence
6270
    _CheckInstanceBridgesExist(self, instance)
6271

    
6272
  def Exec(self, feedback_fn):
6273
    """Reboot the instance.
6274

6275
    """
6276
    instance = self.instance
6277
    ignore_secondaries = self.op.ignore_secondaries
6278
    reboot_type = self.op.reboot_type
6279

    
6280
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6281
                                              instance.name,
6282
                                              instance.hypervisor)
6283
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6284
    instance_running = bool(remote_info.payload)
6285

    
6286
    node_current = instance.primary_node
6287

    
6288
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6289
                                            constants.INSTANCE_REBOOT_HARD]:
6290
      for disk in instance.disks:
6291
        self.cfg.SetDiskID(disk, node_current)
6292
      result = self.rpc.call_instance_reboot(node_current, instance,
6293
                                             reboot_type,
6294
                                             self.op.shutdown_timeout)
6295
      result.Raise("Could not reboot instance")
6296
    else:
6297
      if instance_running:
6298
        result = self.rpc.call_instance_shutdown(node_current, instance,
6299
                                                 self.op.shutdown_timeout)
6300
        result.Raise("Could not shutdown instance for full reboot")
6301
        _ShutdownInstanceDisks(self, instance)
6302
      else:
6303
        self.LogInfo("Instance %s was already stopped, starting now",
6304
                     instance.name)
6305
      _StartInstanceDisks(self, instance, ignore_secondaries)
6306
      result = self.rpc.call_instance_start(node_current,
6307
                                            (instance, None, None), False)
6308
      msg = result.fail_msg
6309
      if msg:
6310
        _ShutdownInstanceDisks(self, instance)
6311
        raise errors.OpExecError("Could not start instance for"
6312
                                 " full reboot: %s" % msg)
6313

    
6314
    self.cfg.MarkInstanceUp(instance.name)
6315

    
6316

    
6317
class LUInstanceShutdown(LogicalUnit):
6318
  """Shutdown an instance.
6319

6320
  """
6321
  HPATH = "instance-stop"
6322
  HTYPE = constants.HTYPE_INSTANCE
6323
  REQ_BGL = False
6324

    
6325
  def ExpandNames(self):
6326
    self._ExpandAndLockInstance()
6327

    
6328
  def BuildHooksEnv(self):
6329
    """Build hooks env.
6330

6331
    This runs on master, primary and secondary nodes of the instance.
6332

6333
    """
6334
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6335
    env["TIMEOUT"] = self.op.timeout
6336
    return env
6337

    
6338
  def BuildHooksNodes(self):
6339
    """Build hooks nodes.
6340

6341
    """
6342
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6343
    return (nl, nl)
6344

    
6345
  def CheckPrereq(self):
6346
    """Check prerequisites.
6347

6348
    This checks that the instance is in the cluster.
6349

6350
    """
6351
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6352
    assert self.instance is not None, \
6353
      "Cannot retrieve locked instance %s" % self.op.instance_name
6354

    
6355
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6356

    
6357
    self.primary_offline = \
6358
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6359

    
6360
    if self.primary_offline and self.op.ignore_offline_nodes:
6361
      self.proc.LogWarning("Ignoring offline primary node")
6362
    else:
6363
      _CheckNodeOnline(self, self.instance.primary_node)
6364

    
6365
  def Exec(self, feedback_fn):
6366
    """Shutdown the instance.
6367

6368
    """
6369
    instance = self.instance
6370
    node_current = instance.primary_node
6371
    timeout = self.op.timeout
6372

    
6373
    if not self.op.no_remember:
6374
      self.cfg.MarkInstanceDown(instance.name)
6375

    
6376
    if self.primary_offline:
6377
      assert self.op.ignore_offline_nodes
6378
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6379
    else:
6380
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6381
      msg = result.fail_msg
6382
      if msg:
6383
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6384

    
6385
      _ShutdownInstanceDisks(self, instance)
6386

    
6387

    
6388
class LUInstanceReinstall(LogicalUnit):
6389
  """Reinstall an instance.
6390

6391
  """
6392
  HPATH = "instance-reinstall"
6393
  HTYPE = constants.HTYPE_INSTANCE
6394
  REQ_BGL = False
6395

    
6396
  def ExpandNames(self):
6397
    self._ExpandAndLockInstance()
6398

    
6399
  def BuildHooksEnv(self):
6400
    """Build hooks env.
6401

6402
    This runs on master, primary and secondary nodes of the instance.
6403

6404
    """
6405
    return _BuildInstanceHookEnvByObject(self, self.instance)
6406

    
6407
  def BuildHooksNodes(self):
6408
    """Build hooks nodes.
6409

6410
    """
6411
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6412
    return (nl, nl)
6413

    
6414
  def CheckPrereq(self):
6415
    """Check prerequisites.
6416

6417
    This checks that the instance is in the cluster and is not running.
6418

6419
    """
6420
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6421
    assert instance is not None, \
6422
      "Cannot retrieve locked instance %s" % self.op.instance_name
6423
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6424
                     " offline, cannot reinstall")
6425
    for node in instance.secondary_nodes:
6426
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6427
                       " cannot reinstall")
6428

    
6429
    if instance.disk_template == constants.DT_DISKLESS:
6430
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6431
                                 self.op.instance_name,
6432
                                 errors.ECODE_INVAL)
6433
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6434

    
6435
    if self.op.os_type is not None:
6436
      # OS verification
6437
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6438
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6439
      instance_os = self.op.os_type
6440
    else:
6441
      instance_os = instance.os
6442

    
6443
    nodelist = list(instance.all_nodes)
6444

    
6445
    if self.op.osparams:
6446
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6447
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6448
      self.os_inst = i_osdict # the new dict (without defaults)
6449
    else:
6450
      self.os_inst = None
6451

    
6452
    self.instance = instance
6453

    
6454
  def Exec(self, feedback_fn):
6455
    """Reinstall the instance.
6456

6457
    """
6458
    inst = self.instance
6459

    
6460
    if self.op.os_type is not None:
6461
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6462
      inst.os = self.op.os_type
6463
      # Write to configuration
6464
      self.cfg.Update(inst, feedback_fn)
6465

    
6466
    _StartInstanceDisks(self, inst, None)
6467
    try:
6468
      feedback_fn("Running the instance OS create scripts...")
6469
      # FIXME: pass debug option from opcode to backend
6470
      result = self.rpc.call_instance_os_add(inst.primary_node,
6471
                                             (inst, self.os_inst), True,
6472
                                             self.op.debug_level)
6473
      result.Raise("Could not install OS for instance %s on node %s" %
6474
                   (inst.name, inst.primary_node))
6475
    finally:
6476
      _ShutdownInstanceDisks(self, inst)
6477

    
6478

    
6479
class LUInstanceRecreateDisks(LogicalUnit):
6480
  """Recreate an instance's missing disks.
6481

6482
  """
6483
  HPATH = "instance-recreate-disks"
6484
  HTYPE = constants.HTYPE_INSTANCE
6485
  REQ_BGL = False
6486

    
6487
  def CheckArguments(self):
6488
    # normalise the disk list
6489
    self.op.disks = sorted(frozenset(self.op.disks))
6490

    
6491
  def ExpandNames(self):
6492
    self._ExpandAndLockInstance()
6493
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6494
    if self.op.nodes:
6495
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6496
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6497
    else:
6498
      self.needed_locks[locking.LEVEL_NODE] = []
6499

    
6500
  def DeclareLocks(self, level):
6501
    if level == locking.LEVEL_NODE:
6502
      # if we replace the nodes, we only need to lock the old primary,
6503
      # otherwise we need to lock all nodes for disk re-creation
6504
      primary_only = bool(self.op.nodes)
6505
      self._LockInstancesNodes(primary_only=primary_only)
6506
    elif level == locking.LEVEL_NODE_RES:
6507
      # Copy node locks
6508
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6509
        self.needed_locks[locking.LEVEL_NODE][:]
6510

    
6511
  def BuildHooksEnv(self):
6512
    """Build hooks env.
6513

6514
    This runs on master, primary and secondary nodes of the instance.
6515

6516
    """
6517
    return _BuildInstanceHookEnvByObject(self, self.instance)
6518

    
6519
  def BuildHooksNodes(self):
6520
    """Build hooks nodes.
6521

6522
    """
6523
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6524
    return (nl, nl)
6525

    
6526
  def CheckPrereq(self):
6527
    """Check prerequisites.
6528

6529
    This checks that the instance is in the cluster and is not running.
6530

6531
    """
6532
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6533
    assert instance is not None, \
6534
      "Cannot retrieve locked instance %s" % self.op.instance_name
6535
    if self.op.nodes:
6536
      if len(self.op.nodes) != len(instance.all_nodes):
6537
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6538
                                   " %d replacement nodes were specified" %
6539
                                   (instance.name, len(instance.all_nodes),
6540
                                    len(self.op.nodes)),
6541
                                   errors.ECODE_INVAL)
6542
      assert instance.disk_template != constants.DT_DRBD8 or \
6543
          len(self.op.nodes) == 2
6544
      assert instance.disk_template != constants.DT_PLAIN or \
6545
          len(self.op.nodes) == 1
6546
      primary_node = self.op.nodes[0]
6547
    else:
6548
      primary_node = instance.primary_node
6549
    _CheckNodeOnline(self, primary_node)
6550

    
6551
    if instance.disk_template == constants.DT_DISKLESS:
6552
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6553
                                 self.op.instance_name, errors.ECODE_INVAL)
6554
    # if we replace nodes *and* the old primary is offline, we don't
6555
    # check
6556
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6557
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6558
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6559
    if not (self.op.nodes and old_pnode.offline):
6560
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6561
                          msg="cannot recreate disks")
6562

    
6563
    if not self.op.disks:
6564
      self.op.disks = range(len(instance.disks))
6565
    else:
6566
      for idx in self.op.disks:
6567
        if idx >= len(instance.disks):
6568
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6569
                                     errors.ECODE_INVAL)
6570
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6571
      raise errors.OpPrereqError("Can't recreate disks partially and"
6572
                                 " change the nodes at the same time",
6573
                                 errors.ECODE_INVAL)
6574
    self.instance = instance
6575

    
6576
  def Exec(self, feedback_fn):
6577
    """Recreate the disks.
6578

6579
    """
6580
    instance = self.instance
6581

    
6582
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6583
            self.owned_locks(locking.LEVEL_NODE_RES))
6584

    
6585
    to_skip = []
6586
    mods = [] # keeps track of needed logical_id changes
6587

    
6588
    for idx, disk in enumerate(instance.disks):
6589
      if idx not in self.op.disks: # disk idx has not been passed in
6590
        to_skip.append(idx)
6591
        continue
6592
      # update secondaries for disks, if needed
6593
      if self.op.nodes:
6594
        if disk.dev_type == constants.LD_DRBD8:
6595
          # need to update the nodes and minors
6596
          assert len(self.op.nodes) == 2
6597
          assert len(disk.logical_id) == 6 # otherwise disk internals
6598
                                           # have changed
6599
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6600
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6601
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6602
                    new_minors[0], new_minors[1], old_secret)
6603
          assert len(disk.logical_id) == len(new_id)
6604
          mods.append((idx, new_id))
6605

    
6606
    # now that we have passed all asserts above, we can apply the mods
6607
    # in a single run (to avoid partial changes)
6608
    for idx, new_id in mods:
6609
      instance.disks[idx].logical_id = new_id
6610

    
6611
    # change primary node, if needed
6612
    if self.op.nodes:
6613
      instance.primary_node = self.op.nodes[0]
6614
      self.LogWarning("Changing the instance's nodes, you will have to"
6615
                      " remove any disks left on the older nodes manually")
6616

    
6617
    if self.op.nodes:
6618
      self.cfg.Update(instance, feedback_fn)
6619

    
6620
    _CreateDisks(self, instance, to_skip=to_skip)
6621

    
6622

    
6623
class LUInstanceRename(LogicalUnit):
6624
  """Rename an instance.
6625

6626
  """
6627
  HPATH = "instance-rename"
6628
  HTYPE = constants.HTYPE_INSTANCE
6629

    
6630
  def CheckArguments(self):
6631
    """Check arguments.
6632

6633
    """
6634
    if self.op.ip_check and not self.op.name_check:
6635
      # TODO: make the ip check more flexible and not depend on the name check
6636
      raise errors.OpPrereqError("IP address check requires a name check",
6637
                                 errors.ECODE_INVAL)
6638

    
6639
  def BuildHooksEnv(self):
6640
    """Build hooks env.
6641

6642
    This runs on master, primary and secondary nodes of the instance.
6643

6644
    """
6645
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6646
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6647
    return env
6648

    
6649
  def BuildHooksNodes(self):
6650
    """Build hooks nodes.
6651

6652
    """
6653
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6654
    return (nl, nl)
6655

    
6656
  def CheckPrereq(self):
6657
    """Check prerequisites.
6658

6659
    This checks that the instance is in the cluster and is not running.
6660

6661
    """
6662
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6663
                                                self.op.instance_name)
6664
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6665
    assert instance is not None
6666
    _CheckNodeOnline(self, instance.primary_node)
6667
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6668
                        msg="cannot rename")
6669
    self.instance = instance
6670

    
6671
    new_name = self.op.new_name
6672
    if self.op.name_check:
6673
      hostname = netutils.GetHostname(name=new_name)
6674
      if hostname != new_name:
6675
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6676
                     hostname.name)
6677
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6678
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6679
                                    " same as given hostname '%s'") %
6680
                                    (hostname.name, self.op.new_name),
6681
                                    errors.ECODE_INVAL)
6682
      new_name = self.op.new_name = hostname.name
6683
      if (self.op.ip_check and
6684
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6685
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6686
                                   (hostname.ip, new_name),
6687
                                   errors.ECODE_NOTUNIQUE)
6688

    
6689
    instance_list = self.cfg.GetInstanceList()
6690
    if new_name in instance_list and new_name != instance.name:
6691
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6692
                                 new_name, errors.ECODE_EXISTS)
6693

    
6694
  def Exec(self, feedback_fn):
6695
    """Rename the instance.
6696

6697
    """
6698
    inst = self.instance
6699
    old_name = inst.name
6700

    
6701
    rename_file_storage = False
6702
    if (inst.disk_template in constants.DTS_FILEBASED and
6703
        self.op.new_name != inst.name):
6704
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6705
      rename_file_storage = True
6706

    
6707
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6708
    # Change the instance lock. This is definitely safe while we hold the BGL.
6709
    # Otherwise the new lock would have to be added in acquired mode.
6710
    assert self.REQ_BGL
6711
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6712
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6713

    
6714
    # re-read the instance from the configuration after rename
6715
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6716

    
6717
    if rename_file_storage:
6718
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6719
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6720
                                                     old_file_storage_dir,
6721
                                                     new_file_storage_dir)
6722
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6723
                   " (but the instance has been renamed in Ganeti)" %
6724
                   (inst.primary_node, old_file_storage_dir,
6725
                    new_file_storage_dir))
6726

    
6727
    _StartInstanceDisks(self, inst, None)
6728
    try:
6729
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6730
                                                 old_name, self.op.debug_level)
6731
      msg = result.fail_msg
6732
      if msg:
6733
        msg = ("Could not run OS rename script for instance %s on node %s"
6734
               " (but the instance has been renamed in Ganeti): %s" %
6735
               (inst.name, inst.primary_node, msg))
6736
        self.proc.LogWarning(msg)
6737
    finally:
6738
      _ShutdownInstanceDisks(self, inst)
6739

    
6740
    return inst.name
6741

    
6742

    
6743
class LUInstanceRemove(LogicalUnit):
6744
  """Remove an instance.
6745

6746
  """
6747
  HPATH = "instance-remove"
6748
  HTYPE = constants.HTYPE_INSTANCE
6749
  REQ_BGL = False
6750

    
6751
  def ExpandNames(self):
6752
    self._ExpandAndLockInstance()
6753
    self.needed_locks[locking.LEVEL_NODE] = []
6754
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6755
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6756

    
6757
  def DeclareLocks(self, level):
6758
    if level == locking.LEVEL_NODE:
6759
      self._LockInstancesNodes()
6760
    elif level == locking.LEVEL_NODE_RES:
6761
      # Copy node locks
6762
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6763
        self.needed_locks[locking.LEVEL_NODE][:]
6764

    
6765
  def BuildHooksEnv(self):
6766
    """Build hooks env.
6767

6768
    This runs on master, primary and secondary nodes of the instance.
6769

6770
    """
6771
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6772
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6773
    return env
6774

    
6775
  def BuildHooksNodes(self):
6776
    """Build hooks nodes.
6777

6778
    """
6779
    nl = [self.cfg.GetMasterNode()]
6780
    nl_post = list(self.instance.all_nodes) + nl
6781
    return (nl, nl_post)
6782

    
6783
  def CheckPrereq(self):
6784
    """Check prerequisites.
6785

6786
    This checks that the instance is in the cluster.
6787

6788
    """
6789
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6790
    assert self.instance is not None, \
6791
      "Cannot retrieve locked instance %s" % self.op.instance_name
6792

    
6793
  def Exec(self, feedback_fn):
6794
    """Remove the instance.
6795

6796
    """
6797
    instance = self.instance
6798
    logging.info("Shutting down instance %s on node %s",
6799
                 instance.name, instance.primary_node)
6800

    
6801
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6802
                                             self.op.shutdown_timeout)
6803
    msg = result.fail_msg
6804
    if msg:
6805
      if self.op.ignore_failures:
6806
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6807
      else:
6808
        raise errors.OpExecError("Could not shutdown instance %s on"
6809
                                 " node %s: %s" %
6810
                                 (instance.name, instance.primary_node, msg))
6811

    
6812
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6813
            self.owned_locks(locking.LEVEL_NODE_RES))
6814
    assert not (set(instance.all_nodes) -
6815
                self.owned_locks(locking.LEVEL_NODE)), \
6816
      "Not owning correct locks"
6817

    
6818
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6819

    
6820

    
6821
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6822
  """Utility function to remove an instance.
6823

6824
  """
6825
  logging.info("Removing block devices for instance %s", instance.name)
6826

    
6827
  if not _RemoveDisks(lu, instance):
6828
    if not ignore_failures:
6829
      raise errors.OpExecError("Can't remove instance's disks")
6830
    feedback_fn("Warning: can't remove instance's disks")
6831

    
6832
  logging.info("Removing instance %s out of cluster config", instance.name)
6833

    
6834
  lu.cfg.RemoveInstance(instance.name)
6835

    
6836
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6837
    "Instance lock removal conflict"
6838

    
6839
  # Remove lock for the instance
6840
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6841

    
6842

    
6843
class LUInstanceQuery(NoHooksLU):
6844
  """Logical unit for querying instances.
6845

6846
  """
6847
  # pylint: disable=W0142
6848
  REQ_BGL = False
6849

    
6850
  def CheckArguments(self):
6851
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6852
                             self.op.output_fields, self.op.use_locking)
6853

    
6854
  def ExpandNames(self):
6855
    self.iq.ExpandNames(self)
6856

    
6857
  def DeclareLocks(self, level):
6858
    self.iq.DeclareLocks(self, level)
6859

    
6860
  def Exec(self, feedback_fn):
6861
    return self.iq.OldStyleQuery(self)
6862

    
6863

    
6864
class LUInstanceFailover(LogicalUnit):
6865
  """Failover an instance.
6866

6867
  """
6868
  HPATH = "instance-failover"
6869
  HTYPE = constants.HTYPE_INSTANCE
6870
  REQ_BGL = False
6871

    
6872
  def CheckArguments(self):
6873
    """Check the arguments.
6874

6875
    """
6876
    self.iallocator = getattr(self.op, "iallocator", None)
6877
    self.target_node = getattr(self.op, "target_node", None)
6878

    
6879
  def ExpandNames(self):
6880
    self._ExpandAndLockInstance()
6881

    
6882
    if self.op.target_node is not None:
6883
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6884

    
6885
    self.needed_locks[locking.LEVEL_NODE] = []
6886
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6887

    
6888
    ignore_consistency = self.op.ignore_consistency
6889
    shutdown_timeout = self.op.shutdown_timeout
6890
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6891
                                       cleanup=False,
6892
                                       failover=True,
6893
                                       ignore_consistency=ignore_consistency,
6894
                                       shutdown_timeout=shutdown_timeout)
6895
    self.tasklets = [self._migrater]
6896

    
6897
  def DeclareLocks(self, level):
6898
    if level == locking.LEVEL_NODE:
6899
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6900
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6901
        if self.op.target_node is None:
6902
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6903
        else:
6904
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6905
                                                   self.op.target_node]
6906
        del self.recalculate_locks[locking.LEVEL_NODE]
6907
      else:
6908
        self._LockInstancesNodes()
6909

    
6910
  def BuildHooksEnv(self):
6911
    """Build hooks env.
6912

6913
    This runs on master, primary and secondary nodes of the instance.
6914

6915
    """
6916
    instance = self._migrater.instance
6917
    source_node = instance.primary_node
6918
    target_node = self.op.target_node
6919
    env = {
6920
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6921
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6922
      "OLD_PRIMARY": source_node,
6923
      "NEW_PRIMARY": target_node,
6924
      }
6925

    
6926
    if instance.disk_template in constants.DTS_INT_MIRROR:
6927
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6928
      env["NEW_SECONDARY"] = source_node
6929
    else:
6930
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6931

    
6932
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6933

    
6934
    return env
6935

    
6936
  def BuildHooksNodes(self):
6937
    """Build hooks nodes.
6938

6939
    """
6940
    instance = self._migrater.instance
6941
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6942
    return (nl, nl + [instance.primary_node])
6943

    
6944

    
6945
class LUInstanceMigrate(LogicalUnit):
6946
  """Migrate an instance.
6947

6948
  This is migration without shutting down, compared to the failover,
6949
  which is done with shutdown.
6950

6951
  """
6952
  HPATH = "instance-migrate"
6953
  HTYPE = constants.HTYPE_INSTANCE
6954
  REQ_BGL = False
6955

    
6956
  def ExpandNames(self):
6957
    self._ExpandAndLockInstance()
6958

    
6959
    if self.op.target_node is not None:
6960
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6961

    
6962
    self.needed_locks[locking.LEVEL_NODE] = []
6963
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6964

    
6965
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6966
                                       cleanup=self.op.cleanup,
6967
                                       failover=False,
6968
                                       fallback=self.op.allow_failover)
6969
    self.tasklets = [self._migrater]
6970

    
6971
  def DeclareLocks(self, level):
6972
    if level == locking.LEVEL_NODE:
6973
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6974
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6975
        if self.op.target_node is None:
6976
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6977
        else:
6978
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6979
                                                   self.op.target_node]
6980
        del self.recalculate_locks[locking.LEVEL_NODE]
6981
      else:
6982
        self._LockInstancesNodes()
6983

    
6984
  def BuildHooksEnv(self):
6985
    """Build hooks env.
6986

6987
    This runs on master, primary and secondary nodes of the instance.
6988

6989
    """
6990
    instance = self._migrater.instance
6991
    source_node = instance.primary_node
6992
    target_node = self.op.target_node
6993
    env = _BuildInstanceHookEnvByObject(self, instance)
6994
    env.update({
6995
      "MIGRATE_LIVE": self._migrater.live,
6996
      "MIGRATE_CLEANUP": self.op.cleanup,
6997
      "OLD_PRIMARY": source_node,
6998
      "NEW_PRIMARY": target_node,
6999
      })
7000

    
7001
    if instance.disk_template in constants.DTS_INT_MIRROR:
7002
      env["OLD_SECONDARY"] = target_node
7003
      env["NEW_SECONDARY"] = source_node
7004
    else:
7005
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7006

    
7007
    return env
7008

    
7009
  def BuildHooksNodes(self):
7010
    """Build hooks nodes.
7011

7012
    """
7013
    instance = self._migrater.instance
7014
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7015
    return (nl, nl + [instance.primary_node])
7016

    
7017

    
7018
class LUInstanceMove(LogicalUnit):
7019
  """Move an instance by data-copying.
7020

7021
  """
7022
  HPATH = "instance-move"
7023
  HTYPE = constants.HTYPE_INSTANCE
7024
  REQ_BGL = False
7025

    
7026
  def ExpandNames(self):
7027
    self._ExpandAndLockInstance()
7028
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7029
    self.op.target_node = target_node
7030
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7031
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7032
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7033

    
7034
  def DeclareLocks(self, level):
7035
    if level == locking.LEVEL_NODE:
7036
      self._LockInstancesNodes(primary_only=True)
7037
    elif level == locking.LEVEL_NODE_RES:
7038
      # Copy node locks
7039
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7040
        self.needed_locks[locking.LEVEL_NODE][:]
7041

    
7042
  def BuildHooksEnv(self):
7043
    """Build hooks env.
7044

7045
    This runs on master, primary and secondary nodes of the instance.
7046

7047
    """
7048
    env = {
7049
      "TARGET_NODE": self.op.target_node,
7050
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7051
      }
7052
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7053
    return env
7054

    
7055
  def BuildHooksNodes(self):
7056
    """Build hooks nodes.
7057

7058
    """
7059
    nl = [
7060
      self.cfg.GetMasterNode(),
7061
      self.instance.primary_node,
7062
      self.op.target_node,
7063
      ]
7064
    return (nl, nl)
7065

    
7066
  def CheckPrereq(self):
7067
    """Check prerequisites.
7068

7069
    This checks that the instance is in the cluster.
7070

7071
    """
7072
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7073
    assert self.instance is not None, \
7074
      "Cannot retrieve locked instance %s" % self.op.instance_name
7075

    
7076
    node = self.cfg.GetNodeInfo(self.op.target_node)
7077
    assert node is not None, \
7078
      "Cannot retrieve locked node %s" % self.op.target_node
7079

    
7080
    self.target_node = target_node = node.name
7081

    
7082
    if target_node == instance.primary_node:
7083
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7084
                                 (instance.name, target_node),
7085
                                 errors.ECODE_STATE)
7086

    
7087
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7088

    
7089
    for idx, dsk in enumerate(instance.disks):
7090
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7091
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7092
                                   " cannot copy" % idx, errors.ECODE_STATE)
7093

    
7094
    _CheckNodeOnline(self, target_node)
7095
    _CheckNodeNotDrained(self, target_node)
7096
    _CheckNodeVmCapable(self, target_node)
7097

    
7098
    if instance.admin_state == constants.ADMINST_UP:
7099
      # check memory requirements on the secondary node
7100
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7101
                           instance.name, bep[constants.BE_MEMORY],
7102
                           instance.hypervisor)
7103
    else:
7104
      self.LogInfo("Not checking memory on the secondary node as"
7105
                   " instance will not be started")
7106

    
7107
    # check bridge existance
7108
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7109

    
7110
  def Exec(self, feedback_fn):
7111
    """Move an instance.
7112

7113
    The move is done by shutting it down on its present node, copying
7114
    the data over (slow) and starting it on the new node.
7115

7116
    """
7117
    instance = self.instance
7118

    
7119
    source_node = instance.primary_node
7120
    target_node = self.target_node
7121

    
7122
    self.LogInfo("Shutting down instance %s on source node %s",
7123
                 instance.name, source_node)
7124

    
7125
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7126
            self.owned_locks(locking.LEVEL_NODE_RES))
7127

    
7128
    result = self.rpc.call_instance_shutdown(source_node, instance,
7129
                                             self.op.shutdown_timeout)
7130
    msg = result.fail_msg
7131
    if msg:
7132
      if self.op.ignore_consistency:
7133
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7134
                             " Proceeding anyway. Please make sure node"
7135
                             " %s is down. Error details: %s",
7136
                             instance.name, source_node, source_node, msg)
7137
      else:
7138
        raise errors.OpExecError("Could not shutdown instance %s on"
7139
                                 " node %s: %s" %
7140
                                 (instance.name, source_node, msg))
7141

    
7142
    # create the target disks
7143
    try:
7144
      _CreateDisks(self, instance, target_node=target_node)
7145
    except errors.OpExecError:
7146
      self.LogWarning("Device creation failed, reverting...")
7147
      try:
7148
        _RemoveDisks(self, instance, target_node=target_node)
7149
      finally:
7150
        self.cfg.ReleaseDRBDMinors(instance.name)
7151
        raise
7152

    
7153
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7154

    
7155
    errs = []
7156
    # activate, get path, copy the data over
7157
    for idx, disk in enumerate(instance.disks):
7158
      self.LogInfo("Copying data for disk %d", idx)
7159
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7160
                                               instance.name, True, idx)
7161
      if result.fail_msg:
7162
        self.LogWarning("Can't assemble newly created disk %d: %s",
7163
                        idx, result.fail_msg)
7164
        errs.append(result.fail_msg)
7165
        break
7166
      dev_path = result.payload
7167
      result = self.rpc.call_blockdev_export(source_node, disk,
7168
                                             target_node, dev_path,
7169
                                             cluster_name)
7170
      if result.fail_msg:
7171
        self.LogWarning("Can't copy data over for disk %d: %s",
7172
                        idx, result.fail_msg)
7173
        errs.append(result.fail_msg)
7174
        break
7175

    
7176
    if errs:
7177
      self.LogWarning("Some disks failed to copy, aborting")
7178
      try:
7179
        _RemoveDisks(self, instance, target_node=target_node)
7180
      finally:
7181
        self.cfg.ReleaseDRBDMinors(instance.name)
7182
        raise errors.OpExecError("Errors during disk copy: %s" %
7183
                                 (",".join(errs),))
7184

    
7185
    instance.primary_node = target_node
7186
    self.cfg.Update(instance, feedback_fn)
7187

    
7188
    self.LogInfo("Removing the disks on the original node")
7189
    _RemoveDisks(self, instance, target_node=source_node)
7190

    
7191
    # Only start the instance if it's marked as up
7192
    if instance.admin_state == constants.ADMINST_UP:
7193
      self.LogInfo("Starting instance %s on node %s",
7194
                   instance.name, target_node)
7195

    
7196
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7197
                                           ignore_secondaries=True)
7198
      if not disks_ok:
7199
        _ShutdownInstanceDisks(self, instance)
7200
        raise errors.OpExecError("Can't activate the instance's disks")
7201

    
7202
      result = self.rpc.call_instance_start(target_node,
7203
                                            (instance, None, None), False)
7204
      msg = result.fail_msg
7205
      if msg:
7206
        _ShutdownInstanceDisks(self, instance)
7207
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7208
                                 (instance.name, target_node, msg))
7209

    
7210

    
7211
class LUNodeMigrate(LogicalUnit):
7212
  """Migrate all instances from a node.
7213

7214
  """
7215
  HPATH = "node-migrate"
7216
  HTYPE = constants.HTYPE_NODE
7217
  REQ_BGL = False
7218

    
7219
  def CheckArguments(self):
7220
    pass
7221

    
7222
  def ExpandNames(self):
7223
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7224

    
7225
    self.share_locks = _ShareAll()
7226
    self.needed_locks = {
7227
      locking.LEVEL_NODE: [self.op.node_name],
7228
      }
7229

    
7230
  def BuildHooksEnv(self):
7231
    """Build hooks env.
7232

7233
    This runs on the master, the primary and all the secondaries.
7234

7235
    """
7236
    return {
7237
      "NODE_NAME": self.op.node_name,
7238
      }
7239

    
7240
  def BuildHooksNodes(self):
7241
    """Build hooks nodes.
7242

7243
    """
7244
    nl = [self.cfg.GetMasterNode()]
7245
    return (nl, nl)
7246

    
7247
  def CheckPrereq(self):
7248
    pass
7249

    
7250
  def Exec(self, feedback_fn):
7251
    # Prepare jobs for migration instances
7252
    jobs = [
7253
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7254
                                 mode=self.op.mode,
7255
                                 live=self.op.live,
7256
                                 iallocator=self.op.iallocator,
7257
                                 target_node=self.op.target_node)]
7258
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7259
      ]
7260

    
7261
    # TODO: Run iallocator in this opcode and pass correct placement options to
7262
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7263
    # running the iallocator and the actual migration, a good consistency model
7264
    # will have to be found.
7265

    
7266
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7267
            frozenset([self.op.node_name]))
7268

    
7269
    return ResultWithJobs(jobs)
7270

    
7271

    
7272
class TLMigrateInstance(Tasklet):
7273
  """Tasklet class for instance migration.
7274

7275
  @type live: boolean
7276
  @ivar live: whether the migration will be done live or non-live;
7277
      this variable is initalized only after CheckPrereq has run
7278
  @type cleanup: boolean
7279
  @ivar cleanup: Wheater we cleanup from a failed migration
7280
  @type iallocator: string
7281
  @ivar iallocator: The iallocator used to determine target_node
7282
  @type target_node: string
7283
  @ivar target_node: If given, the target_node to reallocate the instance to
7284
  @type failover: boolean
7285
  @ivar failover: Whether operation results in failover or migration
7286
  @type fallback: boolean
7287
  @ivar fallback: Whether fallback to failover is allowed if migration not
7288
                  possible
7289
  @type ignore_consistency: boolean
7290
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7291
                            and target node
7292
  @type shutdown_timeout: int
7293
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7294

7295
  """
7296

    
7297
  # Constants
7298
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7299
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7300

    
7301
  def __init__(self, lu, instance_name, cleanup=False,
7302
               failover=False, fallback=False,
7303
               ignore_consistency=False,
7304
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7305
    """Initializes this class.
7306

7307
    """
7308
    Tasklet.__init__(self, lu)
7309

    
7310
    # Parameters
7311
    self.instance_name = instance_name
7312
    self.cleanup = cleanup
7313
    self.live = False # will be overridden later
7314
    self.failover = failover
7315
    self.fallback = fallback
7316
    self.ignore_consistency = ignore_consistency
7317
    self.shutdown_timeout = shutdown_timeout
7318

    
7319
  def CheckPrereq(self):
7320
    """Check prerequisites.
7321

7322
    This checks that the instance is in the cluster.
7323

7324
    """
7325
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7326
    instance = self.cfg.GetInstanceInfo(instance_name)
7327
    assert instance is not None
7328
    self.instance = instance
7329

    
7330
    if (not self.cleanup and
7331
        not instance.admin_state == constants.ADMINST_UP and
7332
        not self.failover and self.fallback):
7333
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7334
                      " switching to failover")
7335
      self.failover = True
7336

    
7337
    if instance.disk_template not in constants.DTS_MIRRORED:
7338
      if self.failover:
7339
        text = "failovers"
7340
      else:
7341
        text = "migrations"
7342
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7343
                                 " %s" % (instance.disk_template, text),
7344
                                 errors.ECODE_STATE)
7345

    
7346
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7347
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7348

    
7349
      if self.lu.op.iallocator:
7350
        self._RunAllocator()
7351
      else:
7352
        # We set set self.target_node as it is required by
7353
        # BuildHooksEnv
7354
        self.target_node = self.lu.op.target_node
7355

    
7356
      # self.target_node is already populated, either directly or by the
7357
      # iallocator run
7358
      target_node = self.target_node
7359
      if self.target_node == instance.primary_node:
7360
        raise errors.OpPrereqError("Cannot migrate instance %s"
7361
                                   " to its primary (%s)" %
7362
                                   (instance.name, instance.primary_node))
7363

    
7364
      if len(self.lu.tasklets) == 1:
7365
        # It is safe to release locks only when we're the only tasklet
7366
        # in the LU
7367
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7368
                      keep=[instance.primary_node, self.target_node])
7369

    
7370
    else:
7371
      secondary_nodes = instance.secondary_nodes
7372
      if not secondary_nodes:
7373
        raise errors.ConfigurationError("No secondary node but using"
7374
                                        " %s disk template" %
7375
                                        instance.disk_template)
7376
      target_node = secondary_nodes[0]
7377
      if self.lu.op.iallocator or (self.lu.op.target_node and
7378
                                   self.lu.op.target_node != target_node):
7379
        if self.failover:
7380
          text = "failed over"
7381
        else:
7382
          text = "migrated"
7383
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7384
                                   " be %s to arbitrary nodes"
7385
                                   " (neither an iallocator nor a target"
7386
                                   " node can be passed)" %
7387
                                   (instance.disk_template, text),
7388
                                   errors.ECODE_INVAL)
7389

    
7390
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7391

    
7392
    # check memory requirements on the secondary node
7393
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7394
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7395
                           instance.name, i_be[constants.BE_MEMORY],
7396
                           instance.hypervisor)
7397
    else:
7398
      self.lu.LogInfo("Not checking memory on the secondary node as"
7399
                      " instance will not be started")
7400

    
7401
    # check bridge existance
7402
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7403

    
7404
    if not self.cleanup:
7405
      _CheckNodeNotDrained(self.lu, target_node)
7406
      if not self.failover:
7407
        result = self.rpc.call_instance_migratable(instance.primary_node,
7408
                                                   instance)
7409
        if result.fail_msg and self.fallback:
7410
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7411
                          " failover")
7412
          self.failover = True
7413
        else:
7414
          result.Raise("Can't migrate, please use failover",
7415
                       prereq=True, ecode=errors.ECODE_STATE)
7416

    
7417
    assert not (self.failover and self.cleanup)
7418

    
7419
    if not self.failover:
7420
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7421
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7422
                                   " parameters are accepted",
7423
                                   errors.ECODE_INVAL)
7424
      if self.lu.op.live is not None:
7425
        if self.lu.op.live:
7426
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7427
        else:
7428
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7429
        # reset the 'live' parameter to None so that repeated
7430
        # invocations of CheckPrereq do not raise an exception
7431
        self.lu.op.live = None
7432
      elif self.lu.op.mode is None:
7433
        # read the default value from the hypervisor
7434
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7435
                                                skip_globals=False)
7436
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7437

    
7438
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7439
    else:
7440
      # Failover is never live
7441
      self.live = False
7442

    
7443
  def _RunAllocator(self):
7444
    """Run the allocator based on input opcode.
7445

7446
    """
7447
    ial = IAllocator(self.cfg, self.rpc,
7448
                     mode=constants.IALLOCATOR_MODE_RELOC,
7449
                     name=self.instance_name,
7450
                     # TODO See why hail breaks with a single node below
7451
                     relocate_from=[self.instance.primary_node,
7452
                                    self.instance.primary_node],
7453
                     )
7454

    
7455
    ial.Run(self.lu.op.iallocator)
7456

    
7457
    if not ial.success:
7458
      raise errors.OpPrereqError("Can't compute nodes using"
7459
                                 " iallocator '%s': %s" %
7460
                                 (self.lu.op.iallocator, ial.info),
7461
                                 errors.ECODE_NORES)
7462
    if len(ial.result) != ial.required_nodes:
7463
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7464
                                 " of nodes (%s), required %s" %
7465
                                 (self.lu.op.iallocator, len(ial.result),
7466
                                  ial.required_nodes), errors.ECODE_FAULT)
7467
    self.target_node = ial.result[0]
7468
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7469
                 self.instance_name, self.lu.op.iallocator,
7470
                 utils.CommaJoin(ial.result))
7471

    
7472
  def _WaitUntilSync(self):
7473
    """Poll with custom rpc for disk sync.
7474

7475
    This uses our own step-based rpc call.
7476

7477
    """
7478
    self.feedback_fn("* wait until resync is done")
7479
    all_done = False
7480
    while not all_done:
7481
      all_done = True
7482
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7483
                                            self.nodes_ip,
7484
                                            self.instance.disks)
7485
      min_percent = 100
7486
      for node, nres in result.items():
7487
        nres.Raise("Cannot resync disks on node %s" % node)
7488
        node_done, node_percent = nres.payload
7489
        all_done = all_done and node_done
7490
        if node_percent is not None:
7491
          min_percent = min(min_percent, node_percent)
7492
      if not all_done:
7493
        if min_percent < 100:
7494
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7495
        time.sleep(2)
7496

    
7497
  def _EnsureSecondary(self, node):
7498
    """Demote a node to secondary.
7499

7500
    """
7501
    self.feedback_fn("* switching node %s to secondary mode" % node)
7502

    
7503
    for dev in self.instance.disks:
7504
      self.cfg.SetDiskID(dev, node)
7505

    
7506
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7507
                                          self.instance.disks)
7508
    result.Raise("Cannot change disk to secondary on node %s" % node)
7509

    
7510
  def _GoStandalone(self):
7511
    """Disconnect from the network.
7512

7513
    """
7514
    self.feedback_fn("* changing into standalone mode")
7515
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7516
                                               self.instance.disks)
7517
    for node, nres in result.items():
7518
      nres.Raise("Cannot disconnect disks node %s" % node)
7519

    
7520
  def _GoReconnect(self, multimaster):
7521
    """Reconnect to the network.
7522

7523
    """
7524
    if multimaster:
7525
      msg = "dual-master"
7526
    else:
7527
      msg = "single-master"
7528
    self.feedback_fn("* changing disks into %s mode" % msg)
7529
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7530
                                           self.instance.disks,
7531
                                           self.instance.name, multimaster)
7532
    for node, nres in result.items():
7533
      nres.Raise("Cannot change disks config on node %s" % node)
7534

    
7535
  def _ExecCleanup(self):
7536
    """Try to cleanup after a failed migration.
7537

7538
    The cleanup is done by:
7539
      - check that the instance is running only on one node
7540
        (and update the config if needed)
7541
      - change disks on its secondary node to secondary
7542
      - wait until disks are fully synchronized
7543
      - disconnect from the network
7544
      - change disks into single-master mode
7545
      - wait again until disks are fully synchronized
7546

7547
    """
7548
    instance = self.instance
7549
    target_node = self.target_node
7550
    source_node = self.source_node
7551

    
7552
    # check running on only one node
7553
    self.feedback_fn("* checking where the instance actually runs"
7554
                     " (if this hangs, the hypervisor might be in"
7555
                     " a bad state)")
7556
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7557
    for node, result in ins_l.items():
7558
      result.Raise("Can't contact node %s" % node)
7559

    
7560
    runningon_source = instance.name in ins_l[source_node].payload
7561
    runningon_target = instance.name in ins_l[target_node].payload
7562

    
7563
    if runningon_source and runningon_target:
7564
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7565
                               " or the hypervisor is confused; you will have"
7566
                               " to ensure manually that it runs only on one"
7567
                               " and restart this operation")
7568

    
7569
    if not (runningon_source or runningon_target):
7570
      raise errors.OpExecError("Instance does not seem to be running at all;"
7571
                               " in this case it's safer to repair by"
7572
                               " running 'gnt-instance stop' to ensure disk"
7573
                               " shutdown, and then restarting it")
7574

    
7575
    if runningon_target:
7576
      # the migration has actually succeeded, we need to update the config
7577
      self.feedback_fn("* instance running on secondary node (%s),"
7578
                       " updating config" % target_node)
7579
      instance.primary_node = target_node
7580
      self.cfg.Update(instance, self.feedback_fn)
7581
      demoted_node = source_node
7582
    else:
7583
      self.feedback_fn("* instance confirmed to be running on its"
7584
                       " primary node (%s)" % source_node)
7585
      demoted_node = target_node
7586

    
7587
    if instance.disk_template in constants.DTS_INT_MIRROR:
7588
      self._EnsureSecondary(demoted_node)
7589
      try:
7590
        self._WaitUntilSync()
7591
      except errors.OpExecError:
7592
        # we ignore here errors, since if the device is standalone, it
7593
        # won't be able to sync
7594
        pass
7595
      self._GoStandalone()
7596
      self._GoReconnect(False)
7597
      self._WaitUntilSync()
7598

    
7599
    self.feedback_fn("* done")
7600

    
7601
  def _RevertDiskStatus(self):
7602
    """Try to revert the disk status after a failed migration.
7603

7604
    """
7605
    target_node = self.target_node
7606
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7607
      return
7608

    
7609
    try:
7610
      self._EnsureSecondary(target_node)
7611
      self._GoStandalone()
7612
      self._GoReconnect(False)
7613
      self._WaitUntilSync()
7614
    except errors.OpExecError, err:
7615
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7616
                         " please try to recover the instance manually;"
7617
                         " error '%s'" % str(err))
7618

    
7619
  def _AbortMigration(self):
7620
    """Call the hypervisor code to abort a started migration.
7621

7622
    """
7623
    instance = self.instance
7624
    target_node = self.target_node
7625
    source_node = self.source_node
7626
    migration_info = self.migration_info
7627

    
7628
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7629
                                                                 instance,
7630
                                                                 migration_info,
7631
                                                                 False)
7632
    abort_msg = abort_result.fail_msg
7633
    if abort_msg:
7634
      logging.error("Aborting migration failed on target node %s: %s",
7635
                    target_node, abort_msg)
7636
      # Don't raise an exception here, as we stil have to try to revert the
7637
      # disk status, even if this step failed.
7638

    
7639
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7640
        instance, False, self.live)
7641
    abort_msg = abort_result.fail_msg
7642
    if abort_msg:
7643
      logging.error("Aborting migration failed on source node %s: %s",
7644
                    source_node, abort_msg)
7645

    
7646
  def _ExecMigration(self):
7647
    """Migrate an instance.
7648

7649
    The migrate is done by:
7650
      - change the disks into dual-master mode
7651
      - wait until disks are fully synchronized again
7652
      - migrate the instance
7653
      - change disks on the new secondary node (the old primary) to secondary
7654
      - wait until disks are fully synchronized
7655
      - change disks into single-master mode
7656

7657
    """
7658
    instance = self.instance
7659
    target_node = self.target_node
7660
    source_node = self.source_node
7661

    
7662
    # Check for hypervisor version mismatch and warn the user.
7663
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7664
                                       None, self.instance.hypervisor)
7665
    src_info = nodeinfo[source_node]
7666
    dst_info = nodeinfo[target_node]
7667

    
7668
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7669
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7670
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7671
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7672
      if src_version != dst_version:
7673
        self.feedback_fn("* warning: hypervisor version mismatch between"
7674
                         " source (%s) and target (%s) node" %
7675
                         (src_version, dst_version))
7676

    
7677
    self.feedback_fn("* checking disk consistency between source and target")
7678
    for dev in instance.disks:
7679
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7680
        raise errors.OpExecError("Disk %s is degraded or not fully"
7681
                                 " synchronized on target node,"
7682
                                 " aborting migration" % dev.iv_name)
7683

    
7684
    # First get the migration information from the remote node
7685
    result = self.rpc.call_migration_info(source_node, instance)
7686
    msg = result.fail_msg
7687
    if msg:
7688
      log_err = ("Failed fetching source migration information from %s: %s" %
7689
                 (source_node, msg))
7690
      logging.error(log_err)
7691
      raise errors.OpExecError(log_err)
7692

    
7693
    self.migration_info = migration_info = result.payload
7694

    
7695
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7696
      # Then switch the disks to master/master mode
7697
      self._EnsureSecondary(target_node)
7698
      self._GoStandalone()
7699
      self._GoReconnect(True)
7700
      self._WaitUntilSync()
7701

    
7702
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7703
    result = self.rpc.call_accept_instance(target_node,
7704
                                           instance,
7705
                                           migration_info,
7706
                                           self.nodes_ip[target_node])
7707

    
7708
    msg = result.fail_msg
7709
    if msg:
7710
      logging.error("Instance pre-migration failed, trying to revert"
7711
                    " disk status: %s", msg)
7712
      self.feedback_fn("Pre-migration failed, aborting")
7713
      self._AbortMigration()
7714
      self._RevertDiskStatus()
7715
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7716
                               (instance.name, msg))
7717

    
7718
    self.feedback_fn("* migrating instance to %s" % target_node)
7719
    result = self.rpc.call_instance_migrate(source_node, instance,
7720
                                            self.nodes_ip[target_node],
7721
                                            self.live)
7722
    msg = result.fail_msg
7723
    if msg:
7724
      logging.error("Instance migration failed, trying to revert"
7725
                    " disk status: %s", msg)
7726
      self.feedback_fn("Migration failed, aborting")
7727
      self._AbortMigration()
7728
      self._RevertDiskStatus()
7729
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7730
                               (instance.name, msg))
7731

    
7732
    self.feedback_fn("* starting memory transfer")
7733
    last_feedback = time.time()
7734
    while True:
7735
      result = self.rpc.call_instance_get_migration_status(source_node,
7736
                                                           instance)
7737
      msg = result.fail_msg
7738
      ms = result.payload   # MigrationStatus instance
7739
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7740
        logging.error("Instance migration failed, trying to revert"
7741
                      " disk status: %s", msg)
7742
        self.feedback_fn("Migration failed, aborting")
7743
        self._AbortMigration()
7744
        self._RevertDiskStatus()
7745
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7746
                                 (instance.name, msg))
7747

    
7748
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7749
        self.feedback_fn("* memory transfer complete")
7750
        break
7751

    
7752
      if (utils.TimeoutExpired(last_feedback,
7753
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7754
          ms.transferred_ram is not None):
7755
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7756
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7757
        last_feedback = time.time()
7758

    
7759
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7760

    
7761
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7762
                                                           instance,
7763
                                                           True,
7764
                                                           self.live)
7765
    msg = result.fail_msg
7766
    if msg:
7767
      logging.error("Instance migration succeeded, but finalization failed"
7768
                    " on the source node: %s", msg)
7769
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7770
                               msg)
7771

    
7772
    instance.primary_node = target_node
7773

    
7774
    # distribute new instance config to the other nodes
7775
    self.cfg.Update(instance, self.feedback_fn)
7776

    
7777
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7778
                                                           instance,
7779
                                                           migration_info,
7780
                                                           True)
7781
    msg = result.fail_msg
7782
    if msg:
7783
      logging.error("Instance migration succeeded, but finalization failed"
7784
                    " on the target node: %s", msg)
7785
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7786
                               msg)
7787

    
7788
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7789
      self._EnsureSecondary(source_node)
7790
      self._WaitUntilSync()
7791
      self._GoStandalone()
7792
      self._GoReconnect(False)
7793
      self._WaitUntilSync()
7794

    
7795
    self.feedback_fn("* done")
7796

    
7797
  def _ExecFailover(self):
7798
    """Failover an instance.
7799

7800
    The failover is done by shutting it down on its present node and
7801
    starting it on the secondary.
7802

7803
    """
7804
    instance = self.instance
7805
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7806

    
7807
    source_node = instance.primary_node
7808
    target_node = self.target_node
7809

    
7810
    if instance.admin_state == constants.ADMINST_UP:
7811
      self.feedback_fn("* checking disk consistency between source and target")
7812
      for dev in instance.disks:
7813
        # for drbd, these are drbd over lvm
7814
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7815
          if primary_node.offline:
7816
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7817
                             " target node %s" %
7818
                             (primary_node.name, dev.iv_name, target_node))
7819
          elif not self.ignore_consistency:
7820
            raise errors.OpExecError("Disk %s is degraded on target node,"
7821
                                     " aborting failover" % dev.iv_name)
7822
    else:
7823
      self.feedback_fn("* not checking disk consistency as instance is not"
7824
                       " running")
7825

    
7826
    self.feedback_fn("* shutting down instance on source node")
7827
    logging.info("Shutting down instance %s on node %s",
7828
                 instance.name, source_node)
7829

    
7830
    result = self.rpc.call_instance_shutdown(source_node, instance,
7831
                                             self.shutdown_timeout)
7832
    msg = result.fail_msg
7833
    if msg:
7834
      if self.ignore_consistency or primary_node.offline:
7835
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7836
                           " proceeding anyway; please make sure node"
7837
                           " %s is down; error details: %s",
7838
                           instance.name, source_node, source_node, msg)
7839
      else:
7840
        raise errors.OpExecError("Could not shutdown instance %s on"
7841
                                 " node %s: %s" %
7842
                                 (instance.name, source_node, msg))
7843

    
7844
    self.feedback_fn("* deactivating the instance's disks on source node")
7845
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7846
      raise errors.OpExecError("Can't shut down the instance's disks")
7847

    
7848
    instance.primary_node = target_node
7849
    # distribute new instance config to the other nodes
7850
    self.cfg.Update(instance, self.feedback_fn)
7851

    
7852
    # Only start the instance if it's marked as up
7853
    if instance.admin_state == constants.ADMINST_UP:
7854
      self.feedback_fn("* activating the instance's disks on target node %s" %
7855
                       target_node)
7856
      logging.info("Starting instance %s on node %s",
7857
                   instance.name, target_node)
7858

    
7859
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7860
                                           ignore_secondaries=True)
7861
      if not disks_ok:
7862
        _ShutdownInstanceDisks(self.lu, instance)
7863
        raise errors.OpExecError("Can't activate the instance's disks")
7864

    
7865
      self.feedback_fn("* starting the instance on the target node %s" %
7866
                       target_node)
7867
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7868
                                            False)
7869
      msg = result.fail_msg
7870
      if msg:
7871
        _ShutdownInstanceDisks(self.lu, instance)
7872
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7873
                                 (instance.name, target_node, msg))
7874

    
7875
  def Exec(self, feedback_fn):
7876
    """Perform the migration.
7877

7878
    """
7879
    self.feedback_fn = feedback_fn
7880
    self.source_node = self.instance.primary_node
7881

    
7882
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7883
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7884
      self.target_node = self.instance.secondary_nodes[0]
7885
      # Otherwise self.target_node has been populated either
7886
      # directly, or through an iallocator.
7887

    
7888
    self.all_nodes = [self.source_node, self.target_node]
7889
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7890
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7891

    
7892
    if self.failover:
7893
      feedback_fn("Failover instance %s" % self.instance.name)
7894
      self._ExecFailover()
7895
    else:
7896
      feedback_fn("Migrating instance %s" % self.instance.name)
7897

    
7898
      if self.cleanup:
7899
        return self._ExecCleanup()
7900
      else:
7901
        return self._ExecMigration()
7902

    
7903

    
7904
def _CreateBlockDev(lu, node, instance, device, force_create,
7905
                    info, force_open):
7906
  """Create a tree of block devices on a given node.
7907

7908
  If this device type has to be created on secondaries, create it and
7909
  all its children.
7910

7911
  If not, just recurse to children keeping the same 'force' value.
7912

7913
  @param lu: the lu on whose behalf we execute
7914
  @param node: the node on which to create the device
7915
  @type instance: L{objects.Instance}
7916
  @param instance: the instance which owns the device
7917
  @type device: L{objects.Disk}
7918
  @param device: the device to create
7919
  @type force_create: boolean
7920
  @param force_create: whether to force creation of this device; this
7921
      will be change to True whenever we find a device which has
7922
      CreateOnSecondary() attribute
7923
  @param info: the extra 'metadata' we should attach to the device
7924
      (this will be represented as a LVM tag)
7925
  @type force_open: boolean
7926
  @param force_open: this parameter will be passes to the
7927
      L{backend.BlockdevCreate} function where it specifies
7928
      whether we run on primary or not, and it affects both
7929
      the child assembly and the device own Open() execution
7930

7931
  """
7932
  if device.CreateOnSecondary():
7933
    force_create = True
7934

    
7935
  if device.children:
7936
    for child in device.children:
7937
      _CreateBlockDev(lu, node, instance, child, force_create,
7938
                      info, force_open)
7939

    
7940
  if not force_create:
7941
    return
7942

    
7943
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7944

    
7945

    
7946
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7947
  """Create a single block device on a given node.
7948

7949
  This will not recurse over children of the device, so they must be
7950
  created in advance.
7951

7952
  @param lu: the lu on whose behalf we execute
7953
  @param node: the node on which to create the device
7954
  @type instance: L{objects.Instance}
7955
  @param instance: the instance which owns the device
7956
  @type device: L{objects.Disk}
7957
  @param device: the device to create
7958
  @param info: the extra 'metadata' we should attach to the device
7959
      (this will be represented as a LVM tag)
7960
  @type force_open: boolean
7961
  @param force_open: this parameter will be passes to the
7962
      L{backend.BlockdevCreate} function where it specifies
7963
      whether we run on primary or not, and it affects both
7964
      the child assembly and the device own Open() execution
7965

7966
  """
7967
  lu.cfg.SetDiskID(device, node)
7968
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7969
                                       instance.name, force_open, info)
7970
  result.Raise("Can't create block device %s on"
7971
               " node %s for instance %s" % (device, node, instance.name))
7972
  if device.physical_id is None:
7973
    device.physical_id = result.payload
7974

    
7975

    
7976
def _GenerateUniqueNames(lu, exts):
7977
  """Generate a suitable LV name.
7978

7979
  This will generate a logical volume name for the given instance.
7980

7981
  """
7982
  results = []
7983
  for val in exts:
7984
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7985
    results.append("%s%s" % (new_id, val))
7986
  return results
7987

    
7988

    
7989
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7990
                         iv_name, p_minor, s_minor):
7991
  """Generate a drbd8 device complete with its children.
7992

7993
  """
7994
  assert len(vgnames) == len(names) == 2
7995
  port = lu.cfg.AllocatePort()
7996
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7997
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7998
                          logical_id=(vgnames[0], names[0]))
7999
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8000
                          logical_id=(vgnames[1], names[1]))
8001
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8002
                          logical_id=(primary, secondary, port,
8003
                                      p_minor, s_minor,
8004
                                      shared_secret),
8005
                          children=[dev_data, dev_meta],
8006
                          iv_name=iv_name)
8007
  return drbd_dev
8008

    
8009

    
8010
def _GenerateDiskTemplate(lu, template_name,
8011
                          instance_name, primary_node,
8012
                          secondary_nodes, disk_info,
8013
                          file_storage_dir, file_driver,
8014
                          base_index, feedback_fn):
8015
  """Generate the entire disk layout for a given template type.
8016

8017
  """
8018
  #TODO: compute space requirements
8019

    
8020
  vgname = lu.cfg.GetVGName()
8021
  disk_count = len(disk_info)
8022
  disks = []
8023
  if template_name == constants.DT_DISKLESS:
8024
    pass
8025
  elif template_name == constants.DT_PLAIN:
8026
    if len(secondary_nodes) != 0:
8027
      raise errors.ProgrammerError("Wrong template configuration")
8028

    
8029
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8030
                                      for i in range(disk_count)])
8031
    for idx, disk in enumerate(disk_info):
8032
      disk_index = idx + base_index
8033
      vg = disk.get(constants.IDISK_VG, vgname)
8034
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8035
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8036
                              size=disk[constants.IDISK_SIZE],
8037
                              logical_id=(vg, names[idx]),
8038
                              iv_name="disk/%d" % disk_index,
8039
                              mode=disk[constants.IDISK_MODE])
8040
      disks.append(disk_dev)
8041
  elif template_name == constants.DT_DRBD8:
8042
    if len(secondary_nodes) != 1:
8043
      raise errors.ProgrammerError("Wrong template configuration")
8044
    remote_node = secondary_nodes[0]
8045
    minors = lu.cfg.AllocateDRBDMinor(
8046
      [primary_node, remote_node] * len(disk_info), instance_name)
8047

    
8048
    names = []
8049
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8050
                                               for i in range(disk_count)]):
8051
      names.append(lv_prefix + "_data")
8052
      names.append(lv_prefix + "_meta")
8053
    for idx, disk in enumerate(disk_info):
8054
      disk_index = idx + base_index
8055
      data_vg = disk.get(constants.IDISK_VG, vgname)
8056
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8057
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8058
                                      disk[constants.IDISK_SIZE],
8059
                                      [data_vg, meta_vg],
8060
                                      names[idx * 2:idx * 2 + 2],
8061
                                      "disk/%d" % disk_index,
8062
                                      minors[idx * 2], minors[idx * 2 + 1])
8063
      disk_dev.mode = disk[constants.IDISK_MODE]
8064
      disks.append(disk_dev)
8065
  elif template_name == constants.DT_FILE:
8066
    if len(secondary_nodes) != 0:
8067
      raise errors.ProgrammerError("Wrong template configuration")
8068

    
8069
    opcodes.RequireFileStorage()
8070

    
8071
    for idx, disk in enumerate(disk_info):
8072
      disk_index = idx + base_index
8073
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8074
                              size=disk[constants.IDISK_SIZE],
8075
                              iv_name="disk/%d" % disk_index,
8076
                              logical_id=(file_driver,
8077
                                          "%s/disk%d" % (file_storage_dir,
8078
                                                         disk_index)),
8079
                              mode=disk[constants.IDISK_MODE])
8080
      disks.append(disk_dev)
8081
  elif template_name == constants.DT_SHARED_FILE:
8082
    if len(secondary_nodes) != 0:
8083
      raise errors.ProgrammerError("Wrong template configuration")
8084

    
8085
    opcodes.RequireSharedFileStorage()
8086

    
8087
    for idx, disk in enumerate(disk_info):
8088
      disk_index = idx + base_index
8089
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8090
                              size=disk[constants.IDISK_SIZE],
8091
                              iv_name="disk/%d" % disk_index,
8092
                              logical_id=(file_driver,
8093
                                          "%s/disk%d" % (file_storage_dir,
8094
                                                         disk_index)),
8095
                              mode=disk[constants.IDISK_MODE])
8096
      disks.append(disk_dev)
8097
  elif template_name == constants.DT_BLOCK:
8098
    if len(secondary_nodes) != 0:
8099
      raise errors.ProgrammerError("Wrong template configuration")
8100

    
8101
    for idx, disk in enumerate(disk_info):
8102
      disk_index = idx + base_index
8103
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8104
                              size=disk[constants.IDISK_SIZE],
8105
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8106
                                          disk[constants.IDISK_ADOPT]),
8107
                              iv_name="disk/%d" % disk_index,
8108
                              mode=disk[constants.IDISK_MODE])
8109
      disks.append(disk_dev)
8110

    
8111
  else:
8112
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8113
  return disks
8114

    
8115

    
8116
def _GetInstanceInfoText(instance):
8117
  """Compute that text that should be added to the disk's metadata.
8118

8119
  """
8120
  return "originstname+%s" % instance.name
8121

    
8122

    
8123
def _CalcEta(time_taken, written, total_size):
8124
  """Calculates the ETA based on size written and total size.
8125

8126
  @param time_taken: The time taken so far
8127
  @param written: amount written so far
8128
  @param total_size: The total size of data to be written
8129
  @return: The remaining time in seconds
8130

8131
  """
8132
  avg_time = time_taken / float(written)
8133
  return (total_size - written) * avg_time
8134

    
8135

    
8136
def _WipeDisks(lu, instance):
8137
  """Wipes instance disks.
8138

8139
  @type lu: L{LogicalUnit}
8140
  @param lu: the logical unit on whose behalf we execute
8141
  @type instance: L{objects.Instance}
8142
  @param instance: the instance whose disks we should create
8143
  @return: the success of the wipe
8144

8145
  """
8146
  node = instance.primary_node
8147

    
8148
  for device in instance.disks:
8149
    lu.cfg.SetDiskID(device, node)
8150

    
8151
  logging.info("Pause sync of instance %s disks", instance.name)
8152
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8153

    
8154
  for idx, success in enumerate(result.payload):
8155
    if not success:
8156
      logging.warn("pause-sync of instance %s for disks %d failed",
8157
                   instance.name, idx)
8158

    
8159
  try:
8160
    for idx, device in enumerate(instance.disks):
8161
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8162
      # MAX_WIPE_CHUNK at max
8163
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8164
                            constants.MIN_WIPE_CHUNK_PERCENT)
8165
      # we _must_ make this an int, otherwise rounding errors will
8166
      # occur
8167
      wipe_chunk_size = int(wipe_chunk_size)
8168

    
8169
      lu.LogInfo("* Wiping disk %d", idx)
8170
      logging.info("Wiping disk %d for instance %s, node %s using"
8171
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8172

    
8173
      offset = 0
8174
      size = device.size
8175
      last_output = 0
8176
      start_time = time.time()
8177

    
8178
      while offset < size:
8179
        wipe_size = min(wipe_chunk_size, size - offset)
8180
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8181
                      idx, offset, wipe_size)
8182
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8183
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8184
                     (idx, offset, wipe_size))
8185
        now = time.time()
8186
        offset += wipe_size
8187
        if now - last_output >= 60:
8188
          eta = _CalcEta(now - start_time, offset, size)
8189
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8190
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8191
          last_output = now
8192
  finally:
8193
    logging.info("Resume sync of instance %s disks", instance.name)
8194

    
8195
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8196

    
8197
    for idx, success in enumerate(result.payload):
8198
      if not success:
8199
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8200
                      " look at the status and troubleshoot the issue", idx)
8201
        logging.warn("resume-sync of instance %s for disks %d failed",
8202
                     instance.name, idx)
8203

    
8204

    
8205
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8206
  """Create all disks for an instance.
8207

8208
  This abstracts away some work from AddInstance.
8209

8210
  @type lu: L{LogicalUnit}
8211
  @param lu: the logical unit on whose behalf we execute
8212
  @type instance: L{objects.Instance}
8213
  @param instance: the instance whose disks we should create
8214
  @type to_skip: list
8215
  @param to_skip: list of indices to skip
8216
  @type target_node: string
8217
  @param target_node: if passed, overrides the target node for creation
8218
  @rtype: boolean
8219
  @return: the success of the creation
8220

8221
  """
8222
  info = _GetInstanceInfoText(instance)
8223
  if target_node is None:
8224
    pnode = instance.primary_node
8225
    all_nodes = instance.all_nodes
8226
  else:
8227
    pnode = target_node
8228
    all_nodes = [pnode]
8229

    
8230
  if instance.disk_template in constants.DTS_FILEBASED:
8231
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8232
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8233

    
8234
    result.Raise("Failed to create directory '%s' on"
8235
                 " node %s" % (file_storage_dir, pnode))
8236

    
8237
  # Note: this needs to be kept in sync with adding of disks in
8238
  # LUInstanceSetParams
8239
  for idx, device in enumerate(instance.disks):
8240
    if to_skip and idx in to_skip:
8241
      continue
8242
    logging.info("Creating volume %s for instance %s",
8243
                 device.iv_name, instance.name)
8244
    #HARDCODE
8245
    for node in all_nodes:
8246
      f_create = node == pnode
8247
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8248

    
8249

    
8250
def _RemoveDisks(lu, instance, target_node=None):
8251
  """Remove all disks for an instance.
8252

8253
  This abstracts away some work from `AddInstance()` and
8254
  `RemoveInstance()`. Note that in case some of the devices couldn't
8255
  be removed, the removal will continue with the other ones (compare
8256
  with `_CreateDisks()`).
8257

8258
  @type lu: L{LogicalUnit}
8259
  @param lu: the logical unit on whose behalf we execute
8260
  @type instance: L{objects.Instance}
8261
  @param instance: the instance whose disks we should remove
8262
  @type target_node: string
8263
  @param target_node: used to override the node on which to remove the disks
8264
  @rtype: boolean
8265
  @return: the success of the removal
8266

8267
  """
8268
  logging.info("Removing block devices for instance %s", instance.name)
8269

    
8270
  all_result = True
8271
  for device in instance.disks:
8272
    if target_node:
8273
      edata = [(target_node, device)]
8274
    else:
8275
      edata = device.ComputeNodeTree(instance.primary_node)
8276
    for node, disk in edata:
8277
      lu.cfg.SetDiskID(disk, node)
8278
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8279
      if msg:
8280
        lu.LogWarning("Could not remove block device %s on node %s,"
8281
                      " continuing anyway: %s", device.iv_name, node, msg)
8282
        all_result = False
8283

    
8284
  if instance.disk_template == constants.DT_FILE:
8285
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8286
    if target_node:
8287
      tgt = target_node
8288
    else:
8289
      tgt = instance.primary_node
8290
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8291
    if result.fail_msg:
8292
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8293
                    file_storage_dir, instance.primary_node, result.fail_msg)
8294
      all_result = False
8295

    
8296
  return all_result
8297

    
8298

    
8299
def _ComputeDiskSizePerVG(disk_template, disks):
8300
  """Compute disk size requirements in the volume group
8301

8302
  """
8303
  def _compute(disks, payload):
8304
    """Universal algorithm.
8305

8306
    """
8307
    vgs = {}
8308
    for disk in disks:
8309
      vgs[disk[constants.IDISK_VG]] = \
8310
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8311

    
8312
    return vgs
8313

    
8314
  # Required free disk space as a function of disk and swap space
8315
  req_size_dict = {
8316
    constants.DT_DISKLESS: {},
8317
    constants.DT_PLAIN: _compute(disks, 0),
8318
    # 128 MB are added for drbd metadata for each disk
8319
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8320
    constants.DT_FILE: {},
8321
    constants.DT_SHARED_FILE: {},
8322
  }
8323

    
8324
  if disk_template not in req_size_dict:
8325
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8326
                                 " is unknown" % disk_template)
8327

    
8328
  return req_size_dict[disk_template]
8329

    
8330

    
8331
def _ComputeDiskSize(disk_template, disks):
8332
  """Compute disk size requirements in the volume group
8333

8334
  """
8335
  # Required free disk space as a function of disk and swap space
8336
  req_size_dict = {
8337
    constants.DT_DISKLESS: None,
8338
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8339
    # 128 MB are added for drbd metadata for each disk
8340
    constants.DT_DRBD8:
8341
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8342
    constants.DT_FILE: None,
8343
    constants.DT_SHARED_FILE: 0,
8344
    constants.DT_BLOCK: 0,
8345
  }
8346

    
8347
  if disk_template not in req_size_dict:
8348
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8349
                                 " is unknown" % disk_template)
8350

    
8351
  return req_size_dict[disk_template]
8352

    
8353

    
8354
def _FilterVmNodes(lu, nodenames):
8355
  """Filters out non-vm_capable nodes from a list.
8356

8357
  @type lu: L{LogicalUnit}
8358
  @param lu: the logical unit for which we check
8359
  @type nodenames: list
8360
  @param nodenames: the list of nodes on which we should check
8361
  @rtype: list
8362
  @return: the list of vm-capable nodes
8363

8364
  """
8365
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8366
  return [name for name in nodenames if name not in vm_nodes]
8367

    
8368

    
8369
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8370
  """Hypervisor parameter validation.
8371

8372
  This function abstract the hypervisor parameter validation to be
8373
  used in both instance create and instance modify.
8374

8375
  @type lu: L{LogicalUnit}
8376
  @param lu: the logical unit for which we check
8377
  @type nodenames: list
8378
  @param nodenames: the list of nodes on which we should check
8379
  @type hvname: string
8380
  @param hvname: the name of the hypervisor we should use
8381
  @type hvparams: dict
8382
  @param hvparams: the parameters which we need to check
8383
  @raise errors.OpPrereqError: if the parameters are not valid
8384

8385
  """
8386
  nodenames = _FilterVmNodes(lu, nodenames)
8387

    
8388
  cluster = lu.cfg.GetClusterInfo()
8389
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8390

    
8391
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8392
  for node in nodenames:
8393
    info = hvinfo[node]
8394
    if info.offline:
8395
      continue
8396
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8397

    
8398

    
8399
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8400
  """OS parameters validation.
8401

8402
  @type lu: L{LogicalUnit}
8403
  @param lu: the logical unit for which we check
8404
  @type required: boolean
8405
  @param required: whether the validation should fail if the OS is not
8406
      found
8407
  @type nodenames: list
8408
  @param nodenames: the list of nodes on which we should check
8409
  @type osname: string
8410
  @param osname: the name of the hypervisor we should use
8411
  @type osparams: dict
8412
  @param osparams: the parameters which we need to check
8413
  @raise errors.OpPrereqError: if the parameters are not valid
8414

8415
  """
8416
  nodenames = _FilterVmNodes(lu, nodenames)
8417
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8418
                                   [constants.OS_VALIDATE_PARAMETERS],
8419
                                   osparams)
8420
  for node, nres in result.items():
8421
    # we don't check for offline cases since this should be run only
8422
    # against the master node and/or an instance's nodes
8423
    nres.Raise("OS Parameters validation failed on node %s" % node)
8424
    if not nres.payload:
8425
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8426
                 osname, node)
8427

    
8428

    
8429
class LUInstanceCreate(LogicalUnit):
8430
  """Create an instance.
8431

8432
  """
8433
  HPATH = "instance-add"
8434
  HTYPE = constants.HTYPE_INSTANCE
8435
  REQ_BGL = False
8436

    
8437
  def CheckArguments(self):
8438
    """Check arguments.
8439

8440
    """
8441
    # do not require name_check to ease forward/backward compatibility
8442
    # for tools
8443
    if self.op.no_install and self.op.start:
8444
      self.LogInfo("No-installation mode selected, disabling startup")
8445
      self.op.start = False
8446
    # validate/normalize the instance name
8447
    self.op.instance_name = \
8448
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8449

    
8450
    if self.op.ip_check and not self.op.name_check:
8451
      # TODO: make the ip check more flexible and not depend on the name check
8452
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8453
                                 " check", errors.ECODE_INVAL)
8454

    
8455
    # check nics' parameter names
8456
    for nic in self.op.nics:
8457
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8458

    
8459
    # check disks. parameter names and consistent adopt/no-adopt strategy
8460
    has_adopt = has_no_adopt = False
8461
    for disk in self.op.disks:
8462
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8463
      if constants.IDISK_ADOPT in disk:
8464
        has_adopt = True
8465
      else:
8466
        has_no_adopt = True
8467
    if has_adopt and has_no_adopt:
8468
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8469
                                 errors.ECODE_INVAL)
8470
    if has_adopt:
8471
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8472
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8473
                                   " '%s' disk template" %
8474
                                   self.op.disk_template,
8475
                                   errors.ECODE_INVAL)
8476
      if self.op.iallocator is not None:
8477
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8478
                                   " iallocator script", errors.ECODE_INVAL)
8479
      if self.op.mode == constants.INSTANCE_IMPORT:
8480
        raise errors.OpPrereqError("Disk adoption not allowed for"
8481
                                   " instance import", errors.ECODE_INVAL)
8482
    else:
8483
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8484
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8485
                                   " but no 'adopt' parameter given" %
8486
                                   self.op.disk_template,
8487
                                   errors.ECODE_INVAL)
8488

    
8489
    self.adopt_disks = has_adopt
8490

    
8491
    # instance name verification
8492
    if self.op.name_check:
8493
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8494
      self.op.instance_name = self.hostname1.name
8495
      # used in CheckPrereq for ip ping check
8496
      self.check_ip = self.hostname1.ip
8497
    else:
8498
      self.check_ip = None
8499

    
8500
    # file storage checks
8501
    if (self.op.file_driver and
8502
        not self.op.file_driver in constants.FILE_DRIVER):
8503
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8504
                                 self.op.file_driver, errors.ECODE_INVAL)
8505

    
8506
    if self.op.disk_template == constants.DT_FILE:
8507
      opcodes.RequireFileStorage()
8508
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8509
      opcodes.RequireSharedFileStorage()
8510

    
8511
    ### Node/iallocator related checks
8512
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8513

    
8514
    if self.op.pnode is not None:
8515
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8516
        if self.op.snode is None:
8517
          raise errors.OpPrereqError("The networked disk templates need"
8518
                                     " a mirror node", errors.ECODE_INVAL)
8519
      elif self.op.snode:
8520
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8521
                        " template")
8522
        self.op.snode = None
8523

    
8524
    self._cds = _GetClusterDomainSecret()
8525

    
8526
    if self.op.mode == constants.INSTANCE_IMPORT:
8527
      # On import force_variant must be True, because if we forced it at
8528
      # initial install, our only chance when importing it back is that it
8529
      # works again!
8530
      self.op.force_variant = True
8531

    
8532
      if self.op.no_install:
8533
        self.LogInfo("No-installation mode has no effect during import")
8534

    
8535
    elif self.op.mode == constants.INSTANCE_CREATE:
8536
      if self.op.os_type is None:
8537
        raise errors.OpPrereqError("No guest OS specified",
8538
                                   errors.ECODE_INVAL)
8539
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8540
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8541
                                   " installation" % self.op.os_type,
8542
                                   errors.ECODE_STATE)
8543
      if self.op.disk_template is None:
8544
        raise errors.OpPrereqError("No disk template specified",
8545
                                   errors.ECODE_INVAL)
8546

    
8547
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8548
      # Check handshake to ensure both clusters have the same domain secret
8549
      src_handshake = self.op.source_handshake
8550
      if not src_handshake:
8551
        raise errors.OpPrereqError("Missing source handshake",
8552
                                   errors.ECODE_INVAL)
8553

    
8554
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8555
                                                           src_handshake)
8556
      if errmsg:
8557
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8558
                                   errors.ECODE_INVAL)
8559

    
8560
      # Load and check source CA
8561
      self.source_x509_ca_pem = self.op.source_x509_ca
8562
      if not self.source_x509_ca_pem:
8563
        raise errors.OpPrereqError("Missing source X509 CA",
8564
                                   errors.ECODE_INVAL)
8565

    
8566
      try:
8567
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8568
                                                    self._cds)
8569
      except OpenSSL.crypto.Error, err:
8570
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8571
                                   (err, ), errors.ECODE_INVAL)
8572

    
8573
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8574
      if errcode is not None:
8575
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8576
                                   errors.ECODE_INVAL)
8577

    
8578
      self.source_x509_ca = cert
8579

    
8580
      src_instance_name = self.op.source_instance_name
8581
      if not src_instance_name:
8582
        raise errors.OpPrereqError("Missing source instance name",
8583
                                   errors.ECODE_INVAL)
8584

    
8585
      self.source_instance_name = \
8586
          netutils.GetHostname(name=src_instance_name).name
8587

    
8588
    else:
8589
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8590
                                 self.op.mode, errors.ECODE_INVAL)
8591

    
8592
  def ExpandNames(self):
8593
    """ExpandNames for CreateInstance.
8594

8595
    Figure out the right locks for instance creation.
8596

8597
    """
8598
    self.needed_locks = {}
8599

    
8600
    instance_name = self.op.instance_name
8601
    # this is just a preventive check, but someone might still add this
8602
    # instance in the meantime, and creation will fail at lock-add time
8603
    if instance_name in self.cfg.GetInstanceList():
8604
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8605
                                 instance_name, errors.ECODE_EXISTS)
8606

    
8607
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8608

    
8609
    if self.op.iallocator:
8610
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8611
      # specifying a group on instance creation and then selecting nodes from
8612
      # that group
8613
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8614
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8615
    else:
8616
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8617
      nodelist = [self.op.pnode]
8618
      if self.op.snode is not None:
8619
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8620
        nodelist.append(self.op.snode)
8621
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8622
      # Lock resources of instance's primary and secondary nodes (copy to
8623
      # prevent accidential modification)
8624
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8625

    
8626
    # in case of import lock the source node too
8627
    if self.op.mode == constants.INSTANCE_IMPORT:
8628
      src_node = self.op.src_node
8629
      src_path = self.op.src_path
8630

    
8631
      if src_path is None:
8632
        self.op.src_path = src_path = self.op.instance_name
8633

    
8634
      if src_node is None:
8635
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8636
        self.op.src_node = None
8637
        if os.path.isabs(src_path):
8638
          raise errors.OpPrereqError("Importing an instance from a path"
8639
                                     " requires a source node option",
8640
                                     errors.ECODE_INVAL)
8641
      else:
8642
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8643
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8644
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8645
        if not os.path.isabs(src_path):
8646
          self.op.src_path = src_path = \
8647
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8648

    
8649
  def _RunAllocator(self):
8650
    """Run the allocator based on input opcode.
8651

8652
    """
8653
    nics = [n.ToDict() for n in self.nics]
8654
    ial = IAllocator(self.cfg, self.rpc,
8655
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8656
                     name=self.op.instance_name,
8657
                     disk_template=self.op.disk_template,
8658
                     tags=self.op.tags,
8659
                     os=self.op.os_type,
8660
                     vcpus=self.be_full[constants.BE_VCPUS],
8661
                     memory=self.be_full[constants.BE_MEMORY],
8662
                     disks=self.disks,
8663
                     nics=nics,
8664
                     hypervisor=self.op.hypervisor,
8665
                     )
8666

    
8667
    ial.Run(self.op.iallocator)
8668

    
8669
    if not ial.success:
8670
      raise errors.OpPrereqError("Can't compute nodes using"
8671
                                 " iallocator '%s': %s" %
8672
                                 (self.op.iallocator, ial.info),
8673
                                 errors.ECODE_NORES)
8674
    if len(ial.result) != ial.required_nodes:
8675
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8676
                                 " of nodes (%s), required %s" %
8677
                                 (self.op.iallocator, len(ial.result),
8678
                                  ial.required_nodes), errors.ECODE_FAULT)
8679
    self.op.pnode = ial.result[0]
8680
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8681
                 self.op.instance_name, self.op.iallocator,
8682
                 utils.CommaJoin(ial.result))
8683
    if ial.required_nodes == 2:
8684
      self.op.snode = ial.result[1]
8685

    
8686
  def BuildHooksEnv(self):
8687
    """Build hooks env.
8688

8689
    This runs on master, primary and secondary nodes of the instance.
8690

8691
    """
8692
    env = {
8693
      "ADD_MODE": self.op.mode,
8694
      }
8695
    if self.op.mode == constants.INSTANCE_IMPORT:
8696
      env["SRC_NODE"] = self.op.src_node
8697
      env["SRC_PATH"] = self.op.src_path
8698
      env["SRC_IMAGES"] = self.src_images
8699

    
8700
    env.update(_BuildInstanceHookEnv(
8701
      name=self.op.instance_name,
8702
      primary_node=self.op.pnode,
8703
      secondary_nodes=self.secondaries,
8704
      status=self.op.start,
8705
      os_type=self.op.os_type,
8706
      memory=self.be_full[constants.BE_MEMORY],
8707
      vcpus=self.be_full[constants.BE_VCPUS],
8708
      nics=_NICListToTuple(self, self.nics),
8709
      disk_template=self.op.disk_template,
8710
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8711
             for d in self.disks],
8712
      bep=self.be_full,
8713
      hvp=self.hv_full,
8714
      hypervisor_name=self.op.hypervisor,
8715
      tags=self.op.tags,
8716
    ))
8717

    
8718
    return env
8719

    
8720
  def BuildHooksNodes(self):
8721
    """Build hooks nodes.
8722

8723
    """
8724
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8725
    return nl, nl
8726

    
8727
  def _ReadExportInfo(self):
8728
    """Reads the export information from disk.
8729

8730
    It will override the opcode source node and path with the actual
8731
    information, if these two were not specified before.
8732

8733
    @return: the export information
8734

8735
    """
8736
    assert self.op.mode == constants.INSTANCE_IMPORT
8737

    
8738
    src_node = self.op.src_node
8739
    src_path = self.op.src_path
8740

    
8741
    if src_node is None:
8742
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8743
      exp_list = self.rpc.call_export_list(locked_nodes)
8744
      found = False
8745
      for node in exp_list:
8746
        if exp_list[node].fail_msg:
8747
          continue
8748
        if src_path in exp_list[node].payload:
8749
          found = True
8750
          self.op.src_node = src_node = node
8751
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8752
                                                       src_path)
8753
          break
8754
      if not found:
8755
        raise errors.OpPrereqError("No export found for relative path %s" %
8756
                                    src_path, errors.ECODE_INVAL)
8757

    
8758
    _CheckNodeOnline(self, src_node)
8759
    result = self.rpc.call_export_info(src_node, src_path)
8760
    result.Raise("No export or invalid export found in dir %s" % src_path)
8761

    
8762
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8763
    if not export_info.has_section(constants.INISECT_EXP):
8764
      raise errors.ProgrammerError("Corrupted export config",
8765
                                   errors.ECODE_ENVIRON)
8766

    
8767
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8768
    if (int(ei_version) != constants.EXPORT_VERSION):
8769
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8770
                                 (ei_version, constants.EXPORT_VERSION),
8771
                                 errors.ECODE_ENVIRON)
8772
    return export_info
8773

    
8774
  def _ReadExportParams(self, einfo):
8775
    """Use export parameters as defaults.
8776

8777
    In case the opcode doesn't specify (as in override) some instance
8778
    parameters, then try to use them from the export information, if
8779
    that declares them.
8780

8781
    """
8782
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8783

    
8784
    if self.op.disk_template is None:
8785
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8786
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8787
                                          "disk_template")
8788
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8789
          raise errors.OpPrereqError("Disk template specified in configuration"
8790
                                     " file is not one of the allowed values:"
8791
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8792
      else:
8793
        raise errors.OpPrereqError("No disk template specified and the export"
8794
                                   " is missing the disk_template information",
8795
                                   errors.ECODE_INVAL)
8796

    
8797
    if not self.op.disks:
8798
      disks = []
8799
      # TODO: import the disk iv_name too
8800
      for idx in range(constants.MAX_DISKS):
8801
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8802
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8803
          disks.append({constants.IDISK_SIZE: disk_sz})
8804
      self.op.disks = disks
8805
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8806
        raise errors.OpPrereqError("No disk info specified and the export"
8807
                                   " is missing the disk information",
8808
                                   errors.ECODE_INVAL)
8809

    
8810
    if not self.op.nics:
8811
      nics = []
8812
      for idx in range(constants.MAX_NICS):
8813
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8814
          ndict = {}
8815
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8816
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8817
            ndict[name] = v
8818
          nics.append(ndict)
8819
        else:
8820
          break
8821
      self.op.nics = nics
8822

    
8823
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8824
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8825

    
8826
    if (self.op.hypervisor is None and
8827
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8828
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8829

    
8830
    if einfo.has_section(constants.INISECT_HYP):
8831
      # use the export parameters but do not override the ones
8832
      # specified by the user
8833
      for name, value in einfo.items(constants.INISECT_HYP):
8834
        if name not in self.op.hvparams:
8835
          self.op.hvparams[name] = value
8836

    
8837
    if einfo.has_section(constants.INISECT_BEP):
8838
      # use the parameters, without overriding
8839
      for name, value in einfo.items(constants.INISECT_BEP):
8840
        if name not in self.op.beparams:
8841
          self.op.beparams[name] = value
8842
    else:
8843
      # try to read the parameters old style, from the main section
8844
      for name in constants.BES_PARAMETERS:
8845
        if (name not in self.op.beparams and
8846
            einfo.has_option(constants.INISECT_INS, name)):
8847
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8848

    
8849
    if einfo.has_section(constants.INISECT_OSP):
8850
      # use the parameters, without overriding
8851
      for name, value in einfo.items(constants.INISECT_OSP):
8852
        if name not in self.op.osparams:
8853
          self.op.osparams[name] = value
8854

    
8855
  def _RevertToDefaults(self, cluster):
8856
    """Revert the instance parameters to the default values.
8857

8858
    """
8859
    # hvparams
8860
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8861
    for name in self.op.hvparams.keys():
8862
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8863
        del self.op.hvparams[name]
8864
    # beparams
8865
    be_defs = cluster.SimpleFillBE({})
8866
    for name in self.op.beparams.keys():
8867
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8868
        del self.op.beparams[name]
8869
    # nic params
8870
    nic_defs = cluster.SimpleFillNIC({})
8871
    for nic in self.op.nics:
8872
      for name in constants.NICS_PARAMETERS:
8873
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8874
          del nic[name]
8875
    # osparams
8876
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8877
    for name in self.op.osparams.keys():
8878
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8879
        del self.op.osparams[name]
8880

    
8881
  def _CalculateFileStorageDir(self):
8882
    """Calculate final instance file storage dir.
8883

8884
    """
8885
    # file storage dir calculation/check
8886
    self.instance_file_storage_dir = None
8887
    if self.op.disk_template in constants.DTS_FILEBASED:
8888
      # build the full file storage dir path
8889
      joinargs = []
8890

    
8891
      if self.op.disk_template == constants.DT_SHARED_FILE:
8892
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8893
      else:
8894
        get_fsd_fn = self.cfg.GetFileStorageDir
8895

    
8896
      cfg_storagedir = get_fsd_fn()
8897
      if not cfg_storagedir:
8898
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8899
      joinargs.append(cfg_storagedir)
8900

    
8901
      if self.op.file_storage_dir is not None:
8902
        joinargs.append(self.op.file_storage_dir)
8903

    
8904
      joinargs.append(self.op.instance_name)
8905

    
8906
      # pylint: disable=W0142
8907
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8908

    
8909
  def CheckPrereq(self):
8910
    """Check prerequisites.
8911

8912
    """
8913
    self._CalculateFileStorageDir()
8914

    
8915
    if self.op.mode == constants.INSTANCE_IMPORT:
8916
      export_info = self._ReadExportInfo()
8917
      self._ReadExportParams(export_info)
8918

    
8919
    if (not self.cfg.GetVGName() and
8920
        self.op.disk_template not in constants.DTS_NOT_LVM):
8921
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8922
                                 " instances", errors.ECODE_STATE)
8923

    
8924
    if (self.op.hypervisor is None or
8925
        self.op.hypervisor == constants.VALUE_AUTO):
8926
      self.op.hypervisor = self.cfg.GetHypervisorType()
8927

    
8928
    cluster = self.cfg.GetClusterInfo()
8929
    enabled_hvs = cluster.enabled_hypervisors
8930
    if self.op.hypervisor not in enabled_hvs:
8931
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8932
                                 " cluster (%s)" % (self.op.hypervisor,
8933
                                  ",".join(enabled_hvs)),
8934
                                 errors.ECODE_STATE)
8935

    
8936
    # Check tag validity
8937
    for tag in self.op.tags:
8938
      objects.TaggableObject.ValidateTag(tag)
8939

    
8940
    # check hypervisor parameter syntax (locally)
8941
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8942
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8943
                                      self.op.hvparams)
8944
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8945
    hv_type.CheckParameterSyntax(filled_hvp)
8946
    self.hv_full = filled_hvp
8947
    # check that we don't specify global parameters on an instance
8948
    _CheckGlobalHvParams(self.op.hvparams)
8949

    
8950
    # fill and remember the beparams dict
8951
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8952
    for param, value in self.op.beparams.iteritems():
8953
      if value == constants.VALUE_AUTO:
8954
        self.op.beparams[param] = default_beparams[param]
8955
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8956
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8957

    
8958
    # build os parameters
8959
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8960

    
8961
    # now that hvp/bep are in final format, let's reset to defaults,
8962
    # if told to do so
8963
    if self.op.identify_defaults:
8964
      self._RevertToDefaults(cluster)
8965

    
8966
    # NIC buildup
8967
    self.nics = []
8968
    for idx, nic in enumerate(self.op.nics):
8969
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8970
      nic_mode = nic_mode_req
8971
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8972
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8973

    
8974
      # in routed mode, for the first nic, the default ip is 'auto'
8975
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8976
        default_ip_mode = constants.VALUE_AUTO
8977
      else:
8978
        default_ip_mode = constants.VALUE_NONE
8979

    
8980
      # ip validity checks
8981
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8982
      if ip is None or ip.lower() == constants.VALUE_NONE:
8983
        nic_ip = None
8984
      elif ip.lower() == constants.VALUE_AUTO:
8985
        if not self.op.name_check:
8986
          raise errors.OpPrereqError("IP address set to auto but name checks"
8987
                                     " have been skipped",
8988
                                     errors.ECODE_INVAL)
8989
        nic_ip = self.hostname1.ip
8990
      else:
8991
        if not netutils.IPAddress.IsValid(ip):
8992
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8993
                                     errors.ECODE_INVAL)
8994
        nic_ip = ip
8995

    
8996
      # TODO: check the ip address for uniqueness
8997
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8998
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8999
                                   errors.ECODE_INVAL)
9000

    
9001
      # MAC address verification
9002
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9003
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9004
        mac = utils.NormalizeAndValidateMac(mac)
9005

    
9006
        try:
9007
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9008
        except errors.ReservationError:
9009
          raise errors.OpPrereqError("MAC address %s already in use"
9010
                                     " in cluster" % mac,
9011
                                     errors.ECODE_NOTUNIQUE)
9012

    
9013
      #  Build nic parameters
9014
      link = nic.get(constants.INIC_LINK, None)
9015
      if link == constants.VALUE_AUTO:
9016
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9017
      nicparams = {}
9018
      if nic_mode_req:
9019
        nicparams[constants.NIC_MODE] = nic_mode
9020
      if link:
9021
        nicparams[constants.NIC_LINK] = link
9022

    
9023
      check_params = cluster.SimpleFillNIC(nicparams)
9024
      objects.NIC.CheckParameterSyntax(check_params)
9025
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9026

    
9027
    # disk checks/pre-build
9028
    default_vg = self.cfg.GetVGName()
9029
    self.disks = []
9030
    for disk in self.op.disks:
9031
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9032
      if mode not in constants.DISK_ACCESS_SET:
9033
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9034
                                   mode, errors.ECODE_INVAL)
9035
      size = disk.get(constants.IDISK_SIZE, None)
9036
      if size is None:
9037
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9038
      try:
9039
        size = int(size)
9040
      except (TypeError, ValueError):
9041
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9042
                                   errors.ECODE_INVAL)
9043

    
9044
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9045
      new_disk = {
9046
        constants.IDISK_SIZE: size,
9047
        constants.IDISK_MODE: mode,
9048
        constants.IDISK_VG: data_vg,
9049
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9050
        }
9051
      if constants.IDISK_ADOPT in disk:
9052
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9053
      self.disks.append(new_disk)
9054

    
9055
    if self.op.mode == constants.INSTANCE_IMPORT:
9056
      disk_images = []
9057
      for idx in range(len(self.disks)):
9058
        option = "disk%d_dump" % idx
9059
        if export_info.has_option(constants.INISECT_INS, option):
9060
          # FIXME: are the old os-es, disk sizes, etc. useful?
9061
          export_name = export_info.get(constants.INISECT_INS, option)
9062
          image = utils.PathJoin(self.op.src_path, export_name)
9063
          disk_images.append(image)
9064
        else:
9065
          disk_images.append(False)
9066

    
9067
      self.src_images = disk_images
9068

    
9069
      old_name = export_info.get(constants.INISECT_INS, "name")
9070
      if self.op.instance_name == old_name:
9071
        for idx, nic in enumerate(self.nics):
9072
          if nic.mac == constants.VALUE_AUTO:
9073
            nic_mac_ini = "nic%d_mac" % idx
9074
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9075

    
9076
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9077

    
9078
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9079
    if self.op.ip_check:
9080
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9081
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9082
                                   (self.check_ip, self.op.instance_name),
9083
                                   errors.ECODE_NOTUNIQUE)
9084

    
9085
    #### mac address generation
9086
    # By generating here the mac address both the allocator and the hooks get
9087
    # the real final mac address rather than the 'auto' or 'generate' value.
9088
    # There is a race condition between the generation and the instance object
9089
    # creation, which means that we know the mac is valid now, but we're not
9090
    # sure it will be when we actually add the instance. If things go bad
9091
    # adding the instance will abort because of a duplicate mac, and the
9092
    # creation job will fail.
9093
    for nic in self.nics:
9094
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9095
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9096

    
9097
    #### allocator run
9098

    
9099
    if self.op.iallocator is not None:
9100
      self._RunAllocator()
9101

    
9102
    #### node related checks
9103

    
9104
    # check primary node
9105
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9106
    assert self.pnode is not None, \
9107
      "Cannot retrieve locked node %s" % self.op.pnode
9108
    if pnode.offline:
9109
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9110
                                 pnode.name, errors.ECODE_STATE)
9111
    if pnode.drained:
9112
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9113
                                 pnode.name, errors.ECODE_STATE)
9114
    if not pnode.vm_capable:
9115
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9116
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9117

    
9118
    self.secondaries = []
9119

    
9120
    # mirror node verification
9121
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9122
      if self.op.snode == pnode.name:
9123
        raise errors.OpPrereqError("The secondary node cannot be the"
9124
                                   " primary node", errors.ECODE_INVAL)
9125
      _CheckNodeOnline(self, self.op.snode)
9126
      _CheckNodeNotDrained(self, self.op.snode)
9127
      _CheckNodeVmCapable(self, self.op.snode)
9128
      self.secondaries.append(self.op.snode)
9129

    
9130
    nodenames = [pnode.name] + self.secondaries
9131

    
9132
    if not self.adopt_disks:
9133
      # Check lv size requirements, if not adopting
9134
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9135
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9136

    
9137
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9138
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9139
                                disk[constants.IDISK_ADOPT])
9140
                     for disk in self.disks])
9141
      if len(all_lvs) != len(self.disks):
9142
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9143
                                   errors.ECODE_INVAL)
9144
      for lv_name in all_lvs:
9145
        try:
9146
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9147
          # to ReserveLV uses the same syntax
9148
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9149
        except errors.ReservationError:
9150
          raise errors.OpPrereqError("LV named %s used by another instance" %
9151
                                     lv_name, errors.ECODE_NOTUNIQUE)
9152

    
9153
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9154
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9155

    
9156
      node_lvs = self.rpc.call_lv_list([pnode.name],
9157
                                       vg_names.payload.keys())[pnode.name]
9158
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9159
      node_lvs = node_lvs.payload
9160

    
9161
      delta = all_lvs.difference(node_lvs.keys())
9162
      if delta:
9163
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9164
                                   utils.CommaJoin(delta),
9165
                                   errors.ECODE_INVAL)
9166
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9167
      if online_lvs:
9168
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9169
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9170
                                   errors.ECODE_STATE)
9171
      # update the size of disk based on what is found
9172
      for dsk in self.disks:
9173
        dsk[constants.IDISK_SIZE] = \
9174
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9175
                                        dsk[constants.IDISK_ADOPT])][0]))
9176

    
9177
    elif self.op.disk_template == constants.DT_BLOCK:
9178
      # Normalize and de-duplicate device paths
9179
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9180
                       for disk in self.disks])
9181
      if len(all_disks) != len(self.disks):
9182
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9183
                                   errors.ECODE_INVAL)
9184
      baddisks = [d for d in all_disks
9185
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9186
      if baddisks:
9187
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9188
                                   " cannot be adopted" %
9189
                                   (", ".join(baddisks),
9190
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9191
                                   errors.ECODE_INVAL)
9192

    
9193
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9194
                                            list(all_disks))[pnode.name]
9195
      node_disks.Raise("Cannot get block device information from node %s" %
9196
                       pnode.name)
9197
      node_disks = node_disks.payload
9198
      delta = all_disks.difference(node_disks.keys())
9199
      if delta:
9200
        raise errors.OpPrereqError("Missing block device(s): %s" %
9201
                                   utils.CommaJoin(delta),
9202
                                   errors.ECODE_INVAL)
9203
      for dsk in self.disks:
9204
        dsk[constants.IDISK_SIZE] = \
9205
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9206

    
9207
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9208

    
9209
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9210
    # check OS parameters (remotely)
9211
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9212

    
9213
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9214

    
9215
    # memory check on primary node
9216
    if self.op.start:
9217
      _CheckNodeFreeMemory(self, self.pnode.name,
9218
                           "creating instance %s" % self.op.instance_name,
9219
                           self.be_full[constants.BE_MEMORY],
9220
                           self.op.hypervisor)
9221

    
9222
    self.dry_run_result = list(nodenames)
9223

    
9224
  def Exec(self, feedback_fn):
9225
    """Create and add the instance to the cluster.
9226

9227
    """
9228
    instance = self.op.instance_name
9229
    pnode_name = self.pnode.name
9230

    
9231
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9232
                self.owned_locks(locking.LEVEL_NODE)), \
9233
      "Node locks differ from node resource locks"
9234

    
9235
    ht_kind = self.op.hypervisor
9236
    if ht_kind in constants.HTS_REQ_PORT:
9237
      network_port = self.cfg.AllocatePort()
9238
    else:
9239
      network_port = None
9240

    
9241
    disks = _GenerateDiskTemplate(self,
9242
                                  self.op.disk_template,
9243
                                  instance, pnode_name,
9244
                                  self.secondaries,
9245
                                  self.disks,
9246
                                  self.instance_file_storage_dir,
9247
                                  self.op.file_driver,
9248
                                  0,
9249
                                  feedback_fn)
9250

    
9251
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9252
                            primary_node=pnode_name,
9253
                            nics=self.nics, disks=disks,
9254
                            disk_template=self.op.disk_template,
9255
                            admin_state=constants.ADMINST_DOWN,
9256
                            network_port=network_port,
9257
                            beparams=self.op.beparams,
9258
                            hvparams=self.op.hvparams,
9259
                            hypervisor=self.op.hypervisor,
9260
                            osparams=self.op.osparams,
9261
                            )
9262

    
9263
    if self.op.tags:
9264
      for tag in self.op.tags:
9265
        iobj.AddTag(tag)
9266

    
9267
    if self.adopt_disks:
9268
      if self.op.disk_template == constants.DT_PLAIN:
9269
        # rename LVs to the newly-generated names; we need to construct
9270
        # 'fake' LV disks with the old data, plus the new unique_id
9271
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9272
        rename_to = []
9273
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9274
          rename_to.append(t_dsk.logical_id)
9275
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9276
          self.cfg.SetDiskID(t_dsk, pnode_name)
9277
        result = self.rpc.call_blockdev_rename(pnode_name,
9278
                                               zip(tmp_disks, rename_to))
9279
        result.Raise("Failed to rename adoped LVs")
9280
    else:
9281
      feedback_fn("* creating instance disks...")
9282
      try:
9283
        _CreateDisks(self, iobj)
9284
      except errors.OpExecError:
9285
        self.LogWarning("Device creation failed, reverting...")
9286
        try:
9287
          _RemoveDisks(self, iobj)
9288
        finally:
9289
          self.cfg.ReleaseDRBDMinors(instance)
9290
          raise
9291

    
9292
    feedback_fn("adding instance %s to cluster config" % instance)
9293

    
9294
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9295

    
9296
    # Declare that we don't want to remove the instance lock anymore, as we've
9297
    # added the instance to the config
9298
    del self.remove_locks[locking.LEVEL_INSTANCE]
9299

    
9300
    if self.op.mode == constants.INSTANCE_IMPORT:
9301
      # Release unused nodes
9302
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9303
    else:
9304
      # Release all nodes
9305
      _ReleaseLocks(self, locking.LEVEL_NODE)
9306

    
9307
    disk_abort = False
9308
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9309
      feedback_fn("* wiping instance disks...")
9310
      try:
9311
        _WipeDisks(self, iobj)
9312
      except errors.OpExecError, err:
9313
        logging.exception("Wiping disks failed")
9314
        self.LogWarning("Wiping instance disks failed (%s)", err)
9315
        disk_abort = True
9316

    
9317
    if disk_abort:
9318
      # Something is already wrong with the disks, don't do anything else
9319
      pass
9320
    elif self.op.wait_for_sync:
9321
      disk_abort = not _WaitForSync(self, iobj)
9322
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9323
      # make sure the disks are not degraded (still sync-ing is ok)
9324
      feedback_fn("* checking mirrors status")
9325
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9326
    else:
9327
      disk_abort = False
9328

    
9329
    if disk_abort:
9330
      _RemoveDisks(self, iobj)
9331
      self.cfg.RemoveInstance(iobj.name)
9332
      # Make sure the instance lock gets removed
9333
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9334
      raise errors.OpExecError("There are some degraded disks for"
9335
                               " this instance")
9336

    
9337
    # Release all node resource locks
9338
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9339

    
9340
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9341
      if self.op.mode == constants.INSTANCE_CREATE:
9342
        if not self.op.no_install:
9343
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9344
                        not self.op.wait_for_sync)
9345
          if pause_sync:
9346
            feedback_fn("* pausing disk sync to install instance OS")
9347
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9348
                                                              iobj.disks, True)
9349
            for idx, success in enumerate(result.payload):
9350
              if not success:
9351
                logging.warn("pause-sync of instance %s for disk %d failed",
9352
                             instance, idx)
9353

    
9354
          feedback_fn("* running the instance OS create scripts...")
9355
          # FIXME: pass debug option from opcode to backend
9356
          os_add_result = \
9357
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9358
                                          self.op.debug_level)
9359
          if pause_sync:
9360
            feedback_fn("* resuming disk sync")
9361
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9362
                                                              iobj.disks, False)
9363
            for idx, success in enumerate(result.payload):
9364
              if not success:
9365
                logging.warn("resume-sync of instance %s for disk %d failed",
9366
                             instance, idx)
9367

    
9368
          os_add_result.Raise("Could not add os for instance %s"
9369
                              " on node %s" % (instance, pnode_name))
9370

    
9371
      elif self.op.mode == constants.INSTANCE_IMPORT:
9372
        feedback_fn("* running the instance OS import scripts...")
9373

    
9374
        transfers = []
9375

    
9376
        for idx, image in enumerate(self.src_images):
9377
          if not image:
9378
            continue
9379

    
9380
          # FIXME: pass debug option from opcode to backend
9381
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9382
                                             constants.IEIO_FILE, (image, ),
9383
                                             constants.IEIO_SCRIPT,
9384
                                             (iobj.disks[idx], idx),
9385
                                             None)
9386
          transfers.append(dt)
9387

    
9388
        import_result = \
9389
          masterd.instance.TransferInstanceData(self, feedback_fn,
9390
                                                self.op.src_node, pnode_name,
9391
                                                self.pnode.secondary_ip,
9392
                                                iobj, transfers)
9393
        if not compat.all(import_result):
9394
          self.LogWarning("Some disks for instance %s on node %s were not"
9395
                          " imported successfully" % (instance, pnode_name))
9396

    
9397
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9398
        feedback_fn("* preparing remote import...")
9399
        # The source cluster will stop the instance before attempting to make a
9400
        # connection. In some cases stopping an instance can take a long time,
9401
        # hence the shutdown timeout is added to the connection timeout.
9402
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9403
                           self.op.source_shutdown_timeout)
9404
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9405

    
9406
        assert iobj.primary_node == self.pnode.name
9407
        disk_results = \
9408
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9409
                                        self.source_x509_ca,
9410
                                        self._cds, timeouts)
9411
        if not compat.all(disk_results):
9412
          # TODO: Should the instance still be started, even if some disks
9413
          # failed to import (valid for local imports, too)?
9414
          self.LogWarning("Some disks for instance %s on node %s were not"
9415
                          " imported successfully" % (instance, pnode_name))
9416

    
9417
        # Run rename script on newly imported instance
9418
        assert iobj.name == instance
9419
        feedback_fn("Running rename script for %s" % instance)
9420
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9421
                                                   self.source_instance_name,
9422
                                                   self.op.debug_level)
9423
        if result.fail_msg:
9424
          self.LogWarning("Failed to run rename script for %s on node"
9425
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9426

    
9427
      else:
9428
        # also checked in the prereq part
9429
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9430
                                     % self.op.mode)
9431

    
9432
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9433

    
9434
    if self.op.start:
9435
      iobj.admin_state = constants.ADMINST_UP
9436
      self.cfg.Update(iobj, feedback_fn)
9437
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9438
      feedback_fn("* starting instance...")
9439
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9440
                                            False)
9441
      result.Raise("Could not start instance")
9442

    
9443
    return list(iobj.all_nodes)
9444

    
9445

    
9446
class LUInstanceConsole(NoHooksLU):
9447
  """Connect to an instance's console.
9448

9449
  This is somewhat special in that it returns the command line that
9450
  you need to run on the master node in order to connect to the
9451
  console.
9452

9453
  """
9454
  REQ_BGL = False
9455

    
9456
  def ExpandNames(self):
9457
    self.share_locks = _ShareAll()
9458
    self._ExpandAndLockInstance()
9459

    
9460
  def CheckPrereq(self):
9461
    """Check prerequisites.
9462

9463
    This checks that the instance is in the cluster.
9464

9465
    """
9466
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9467
    assert self.instance is not None, \
9468
      "Cannot retrieve locked instance %s" % self.op.instance_name
9469
    _CheckNodeOnline(self, self.instance.primary_node)
9470

    
9471
  def Exec(self, feedback_fn):
9472
    """Connect to the console of an instance
9473

9474
    """
9475
    instance = self.instance
9476
    node = instance.primary_node
9477

    
9478
    node_insts = self.rpc.call_instance_list([node],
9479
                                             [instance.hypervisor])[node]
9480
    node_insts.Raise("Can't get node information from %s" % node)
9481

    
9482
    if instance.name not in node_insts.payload:
9483
      if instance.admin_state == constants.ADMINST_UP:
9484
        state = constants.INSTST_ERRORDOWN
9485
      elif instance.admin_state == constants.ADMINST_DOWN:
9486
        state = constants.INSTST_ADMINDOWN
9487
      else:
9488
        state = constants.INSTST_ADMINOFFLINE
9489
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9490
                               (instance.name, state))
9491

    
9492
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9493

    
9494
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9495

    
9496

    
9497
def _GetInstanceConsole(cluster, instance):
9498
  """Returns console information for an instance.
9499

9500
  @type cluster: L{objects.Cluster}
9501
  @type instance: L{objects.Instance}
9502
  @rtype: dict
9503

9504
  """
9505
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9506
  # beparams and hvparams are passed separately, to avoid editing the
9507
  # instance and then saving the defaults in the instance itself.
9508
  hvparams = cluster.FillHV(instance)
9509
  beparams = cluster.FillBE(instance)
9510
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9511

    
9512
  assert console.instance == instance.name
9513
  assert console.Validate()
9514

    
9515
  return console.ToDict()
9516

    
9517

    
9518
class LUInstanceReplaceDisks(LogicalUnit):
9519
  """Replace the disks of an instance.
9520

9521
  """
9522
  HPATH = "mirrors-replace"
9523
  HTYPE = constants.HTYPE_INSTANCE
9524
  REQ_BGL = False
9525

    
9526
  def CheckArguments(self):
9527
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9528
                                  self.op.iallocator)
9529

    
9530
  def ExpandNames(self):
9531
    self._ExpandAndLockInstance()
9532

    
9533
    assert locking.LEVEL_NODE not in self.needed_locks
9534
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9535
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9536

    
9537
    assert self.op.iallocator is None or self.op.remote_node is None, \
9538
      "Conflicting options"
9539

    
9540
    if self.op.remote_node is not None:
9541
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9542

    
9543
      # Warning: do not remove the locking of the new secondary here
9544
      # unless DRBD8.AddChildren is changed to work in parallel;
9545
      # currently it doesn't since parallel invocations of
9546
      # FindUnusedMinor will conflict
9547
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9548
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9549
    else:
9550
      self.needed_locks[locking.LEVEL_NODE] = []
9551
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9552

    
9553
      if self.op.iallocator is not None:
9554
        # iallocator will select a new node in the same group
9555
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9556

    
9557
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9558

    
9559
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9560
                                   self.op.iallocator, self.op.remote_node,
9561
                                   self.op.disks, False, self.op.early_release)
9562

    
9563
    self.tasklets = [self.replacer]
9564

    
9565
  def DeclareLocks(self, level):
9566
    if level == locking.LEVEL_NODEGROUP:
9567
      assert self.op.remote_node is None
9568
      assert self.op.iallocator is not None
9569
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9570

    
9571
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9572
      # Lock all groups used by instance optimistically; this requires going
9573
      # via the node before it's locked, requiring verification later on
9574
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9575
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9576

    
9577
    elif level == locking.LEVEL_NODE:
9578
      if self.op.iallocator is not None:
9579
        assert self.op.remote_node is None
9580
        assert not self.needed_locks[locking.LEVEL_NODE]
9581

    
9582
        # Lock member nodes of all locked groups
9583
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9584
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9585
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9586
      else:
9587
        self._LockInstancesNodes()
9588
    elif level == locking.LEVEL_NODE_RES:
9589
      # Reuse node locks
9590
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9591
        self.needed_locks[locking.LEVEL_NODE]
9592

    
9593
  def BuildHooksEnv(self):
9594
    """Build hooks env.
9595

9596
    This runs on the master, the primary and all the secondaries.
9597

9598
    """
9599
    instance = self.replacer.instance
9600
    env = {
9601
      "MODE": self.op.mode,
9602
      "NEW_SECONDARY": self.op.remote_node,
9603
      "OLD_SECONDARY": instance.secondary_nodes[0],
9604
      }
9605
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9606
    return env
9607

    
9608
  def BuildHooksNodes(self):
9609
    """Build hooks nodes.
9610

9611
    """
9612
    instance = self.replacer.instance
9613
    nl = [
9614
      self.cfg.GetMasterNode(),
9615
      instance.primary_node,
9616
      ]
9617
    if self.op.remote_node is not None:
9618
      nl.append(self.op.remote_node)
9619
    return nl, nl
9620

    
9621
  def CheckPrereq(self):
9622
    """Check prerequisites.
9623

9624
    """
9625
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9626
            self.op.iallocator is None)
9627

    
9628
    # Verify if node group locks are still correct
9629
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9630
    if owned_groups:
9631
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9632

    
9633
    return LogicalUnit.CheckPrereq(self)
9634

    
9635

    
9636
class TLReplaceDisks(Tasklet):
9637
  """Replaces disks for an instance.
9638

9639
  Note: Locking is not within the scope of this class.
9640

9641
  """
9642
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9643
               disks, delay_iallocator, early_release):
9644
    """Initializes this class.
9645

9646
    """
9647
    Tasklet.__init__(self, lu)
9648

    
9649
    # Parameters
9650
    self.instance_name = instance_name
9651
    self.mode = mode
9652
    self.iallocator_name = iallocator_name
9653
    self.remote_node = remote_node
9654
    self.disks = disks
9655
    self.delay_iallocator = delay_iallocator
9656
    self.early_release = early_release
9657

    
9658
    # Runtime data
9659
    self.instance = None
9660
    self.new_node = None
9661
    self.target_node = None
9662
    self.other_node = None
9663
    self.remote_node_info = None
9664
    self.node_secondary_ip = None
9665

    
9666
  @staticmethod
9667
  def CheckArguments(mode, remote_node, iallocator):
9668
    """Helper function for users of this class.
9669

9670
    """
9671
    # check for valid parameter combination
9672
    if mode == constants.REPLACE_DISK_CHG:
9673
      if remote_node is None and iallocator is None:
9674
        raise errors.OpPrereqError("When changing the secondary either an"
9675
                                   " iallocator script must be used or the"
9676
                                   " new node given", errors.ECODE_INVAL)
9677

    
9678
      if remote_node is not None and iallocator is not None:
9679
        raise errors.OpPrereqError("Give either the iallocator or the new"
9680
                                   " secondary, not both", errors.ECODE_INVAL)
9681

    
9682
    elif remote_node is not None or iallocator is not None:
9683
      # Not replacing the secondary
9684
      raise errors.OpPrereqError("The iallocator and new node options can"
9685
                                 " only be used when changing the"
9686
                                 " secondary node", errors.ECODE_INVAL)
9687

    
9688
  @staticmethod
9689
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9690
    """Compute a new secondary node using an IAllocator.
9691

9692
    """
9693
    ial = IAllocator(lu.cfg, lu.rpc,
9694
                     mode=constants.IALLOCATOR_MODE_RELOC,
9695
                     name=instance_name,
9696
                     relocate_from=list(relocate_from))
9697

    
9698
    ial.Run(iallocator_name)
9699

    
9700
    if not ial.success:
9701
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9702
                                 " %s" % (iallocator_name, ial.info),
9703
                                 errors.ECODE_NORES)
9704

    
9705
    if len(ial.result) != ial.required_nodes:
9706
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9707
                                 " of nodes (%s), required %s" %
9708
                                 (iallocator_name,
9709
                                  len(ial.result), ial.required_nodes),
9710
                                 errors.ECODE_FAULT)
9711

    
9712
    remote_node_name = ial.result[0]
9713

    
9714
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9715
               instance_name, remote_node_name)
9716

    
9717
    return remote_node_name
9718

    
9719
  def _FindFaultyDisks(self, node_name):
9720
    """Wrapper for L{_FindFaultyInstanceDisks}.
9721

9722
    """
9723
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9724
                                    node_name, True)
9725

    
9726
  def _CheckDisksActivated(self, instance):
9727
    """Checks if the instance disks are activated.
9728

9729
    @param instance: The instance to check disks
9730
    @return: True if they are activated, False otherwise
9731

9732
    """
9733
    nodes = instance.all_nodes
9734

    
9735
    for idx, dev in enumerate(instance.disks):
9736
      for node in nodes:
9737
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9738
        self.cfg.SetDiskID(dev, node)
9739

    
9740
        result = self.rpc.call_blockdev_find(node, dev)
9741

    
9742
        if result.offline:
9743
          continue
9744
        elif result.fail_msg or not result.payload:
9745
          return False
9746

    
9747
    return True
9748

    
9749
  def CheckPrereq(self):
9750
    """Check prerequisites.
9751

9752
    This checks that the instance is in the cluster.
9753

9754
    """
9755
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9756
    assert instance is not None, \
9757
      "Cannot retrieve locked instance %s" % self.instance_name
9758

    
9759
    if instance.disk_template != constants.DT_DRBD8:
9760
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9761
                                 " instances", errors.ECODE_INVAL)
9762

    
9763
    if len(instance.secondary_nodes) != 1:
9764
      raise errors.OpPrereqError("The instance has a strange layout,"
9765
                                 " expected one secondary but found %d" %
9766
                                 len(instance.secondary_nodes),
9767
                                 errors.ECODE_FAULT)
9768

    
9769
    if not self.delay_iallocator:
9770
      self._CheckPrereq2()
9771

    
9772
  def _CheckPrereq2(self):
9773
    """Check prerequisites, second part.
9774

9775
    This function should always be part of CheckPrereq. It was separated and is
9776
    now called from Exec because during node evacuation iallocator was only
9777
    called with an unmodified cluster model, not taking planned changes into
9778
    account.
9779

9780
    """
9781
    instance = self.instance
9782
    secondary_node = instance.secondary_nodes[0]
9783

    
9784
    if self.iallocator_name is None:
9785
      remote_node = self.remote_node
9786
    else:
9787
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9788
                                       instance.name, instance.secondary_nodes)
9789

    
9790
    if remote_node is None:
9791
      self.remote_node_info = None
9792
    else:
9793
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9794
             "Remote node '%s' is not locked" % remote_node
9795

    
9796
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9797
      assert self.remote_node_info is not None, \
9798
        "Cannot retrieve locked node %s" % remote_node
9799

    
9800
    if remote_node == self.instance.primary_node:
9801
      raise errors.OpPrereqError("The specified node is the primary node of"
9802
                                 " the instance", errors.ECODE_INVAL)
9803

    
9804
    if remote_node == secondary_node:
9805
      raise errors.OpPrereqError("The specified node is already the"
9806
                                 " secondary node of the instance",
9807
                                 errors.ECODE_INVAL)
9808

    
9809
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9810
                                    constants.REPLACE_DISK_CHG):
9811
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9812
                                 errors.ECODE_INVAL)
9813

    
9814
    if self.mode == constants.REPLACE_DISK_AUTO:
9815
      if not self._CheckDisksActivated(instance):
9816
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9817
                                   " first" % self.instance_name,
9818
                                   errors.ECODE_STATE)
9819
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9820
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9821

    
9822
      if faulty_primary and faulty_secondary:
9823
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9824
                                   " one node and can not be repaired"
9825
                                   " automatically" % self.instance_name,
9826
                                   errors.ECODE_STATE)
9827

    
9828
      if faulty_primary:
9829
        self.disks = faulty_primary
9830
        self.target_node = instance.primary_node
9831
        self.other_node = secondary_node
9832
        check_nodes = [self.target_node, self.other_node]
9833
      elif faulty_secondary:
9834
        self.disks = faulty_secondary
9835
        self.target_node = secondary_node
9836
        self.other_node = instance.primary_node
9837
        check_nodes = [self.target_node, self.other_node]
9838
      else:
9839
        self.disks = []
9840
        check_nodes = []
9841

    
9842
    else:
9843
      # Non-automatic modes
9844
      if self.mode == constants.REPLACE_DISK_PRI:
9845
        self.target_node = instance.primary_node
9846
        self.other_node = secondary_node
9847
        check_nodes = [self.target_node, self.other_node]
9848

    
9849
      elif self.mode == constants.REPLACE_DISK_SEC:
9850
        self.target_node = secondary_node
9851
        self.other_node = instance.primary_node
9852
        check_nodes = [self.target_node, self.other_node]
9853

    
9854
      elif self.mode == constants.REPLACE_DISK_CHG:
9855
        self.new_node = remote_node
9856
        self.other_node = instance.primary_node
9857
        self.target_node = secondary_node
9858
        check_nodes = [self.new_node, self.other_node]
9859

    
9860
        _CheckNodeNotDrained(self.lu, remote_node)
9861
        _CheckNodeVmCapable(self.lu, remote_node)
9862

    
9863
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9864
        assert old_node_info is not None
9865
        if old_node_info.offline and not self.early_release:
9866
          # doesn't make sense to delay the release
9867
          self.early_release = True
9868
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9869
                          " early-release mode", secondary_node)
9870

    
9871
      else:
9872
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9873
                                     self.mode)
9874

    
9875
      # If not specified all disks should be replaced
9876
      if not self.disks:
9877
        self.disks = range(len(self.instance.disks))
9878

    
9879
    for node in check_nodes:
9880
      _CheckNodeOnline(self.lu, node)
9881

    
9882
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9883
                                                          self.other_node,
9884
                                                          self.target_node]
9885
                              if node_name is not None)
9886

    
9887
    # Release unneeded node and node resource locks
9888
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9889
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9890

    
9891
    # Release any owned node group
9892
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9893
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9894

    
9895
    # Check whether disks are valid
9896
    for disk_idx in self.disks:
9897
      instance.FindDisk(disk_idx)
9898

    
9899
    # Get secondary node IP addresses
9900
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9901
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9902

    
9903
  def Exec(self, feedback_fn):
9904
    """Execute disk replacement.
9905

9906
    This dispatches the disk replacement to the appropriate handler.
9907

9908
    """
9909
    if self.delay_iallocator:
9910
      self._CheckPrereq2()
9911

    
9912
    if __debug__:
9913
      # Verify owned locks before starting operation
9914
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9915
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9916
          ("Incorrect node locks, owning %s, expected %s" %
9917
           (owned_nodes, self.node_secondary_ip.keys()))
9918
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9919
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9920

    
9921
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9922
      assert list(owned_instances) == [self.instance_name], \
9923
          "Instance '%s' not locked" % self.instance_name
9924

    
9925
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9926
          "Should not own any node group lock at this point"
9927

    
9928
    if not self.disks:
9929
      feedback_fn("No disks need replacement")
9930
      return
9931

    
9932
    feedback_fn("Replacing disk(s) %s for %s" %
9933
                (utils.CommaJoin(self.disks), self.instance.name))
9934

    
9935
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9936

    
9937
    # Activate the instance disks if we're replacing them on a down instance
9938
    if activate_disks:
9939
      _StartInstanceDisks(self.lu, self.instance, True)
9940

    
9941
    try:
9942
      # Should we replace the secondary node?
9943
      if self.new_node is not None:
9944
        fn = self._ExecDrbd8Secondary
9945
      else:
9946
        fn = self._ExecDrbd8DiskOnly
9947

    
9948
      result = fn(feedback_fn)
9949
    finally:
9950
      # Deactivate the instance disks if we're replacing them on a
9951
      # down instance
9952
      if activate_disks:
9953
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9954

    
9955
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
9956

    
9957
    if __debug__:
9958
      # Verify owned locks
9959
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9960
      nodes = frozenset(self.node_secondary_ip)
9961
      assert ((self.early_release and not owned_nodes) or
9962
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9963
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9964
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9965

    
9966
    return result
9967

    
9968
  def _CheckVolumeGroup(self, nodes):
9969
    self.lu.LogInfo("Checking volume groups")
9970

    
9971
    vgname = self.cfg.GetVGName()
9972

    
9973
    # Make sure volume group exists on all involved nodes
9974
    results = self.rpc.call_vg_list(nodes)
9975
    if not results:
9976
      raise errors.OpExecError("Can't list volume groups on the nodes")
9977

    
9978
    for node in nodes:
9979
      res = results[node]
9980
      res.Raise("Error checking node %s" % node)
9981
      if vgname not in res.payload:
9982
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9983
                                 (vgname, node))
9984

    
9985
  def _CheckDisksExistence(self, nodes):
9986
    # Check disk existence
9987
    for idx, dev in enumerate(self.instance.disks):
9988
      if idx not in self.disks:
9989
        continue
9990

    
9991
      for node in nodes:
9992
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9993
        self.cfg.SetDiskID(dev, node)
9994

    
9995
        result = self.rpc.call_blockdev_find(node, dev)
9996

    
9997
        msg = result.fail_msg
9998
        if msg or not result.payload:
9999
          if not msg:
10000
            msg = "disk not found"
10001
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10002
                                   (idx, node, msg))
10003

    
10004
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10005
    for idx, dev in enumerate(self.instance.disks):
10006
      if idx not in self.disks:
10007
        continue
10008

    
10009
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10010
                      (idx, node_name))
10011

    
10012
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10013
                                   ldisk=ldisk):
10014
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10015
                                 " replace disks for instance %s" %
10016
                                 (node_name, self.instance.name))
10017

    
10018
  def _CreateNewStorage(self, node_name):
10019
    """Create new storage on the primary or secondary node.
10020

10021
    This is only used for same-node replaces, not for changing the
10022
    secondary node, hence we don't want to modify the existing disk.
10023

10024
    """
10025
    iv_names = {}
10026

    
10027
    for idx, dev in enumerate(self.instance.disks):
10028
      if idx not in self.disks:
10029
        continue
10030

    
10031
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10032

    
10033
      self.cfg.SetDiskID(dev, node_name)
10034

    
10035
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10036
      names = _GenerateUniqueNames(self.lu, lv_names)
10037

    
10038
      vg_data = dev.children[0].logical_id[0]
10039
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10040
                             logical_id=(vg_data, names[0]))
10041
      vg_meta = dev.children[1].logical_id[0]
10042
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10043
                             logical_id=(vg_meta, names[1]))
10044

    
10045
      new_lvs = [lv_data, lv_meta]
10046
      old_lvs = [child.Copy() for child in dev.children]
10047
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10048

    
10049
      # we pass force_create=True to force the LVM creation
10050
      for new_lv in new_lvs:
10051
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10052
                        _GetInstanceInfoText(self.instance), False)
10053

    
10054
    return iv_names
10055

    
10056
  def _CheckDevices(self, node_name, iv_names):
10057
    for name, (dev, _, _) in iv_names.iteritems():
10058
      self.cfg.SetDiskID(dev, node_name)
10059

    
10060
      result = self.rpc.call_blockdev_find(node_name, dev)
10061

    
10062
      msg = result.fail_msg
10063
      if msg or not result.payload:
10064
        if not msg:
10065
          msg = "disk not found"
10066
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10067
                                 (name, msg))
10068

    
10069
      if result.payload.is_degraded:
10070
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10071

    
10072
  def _RemoveOldStorage(self, node_name, iv_names):
10073
    for name, (_, old_lvs, _) in iv_names.iteritems():
10074
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10075

    
10076
      for lv in old_lvs:
10077
        self.cfg.SetDiskID(lv, node_name)
10078

    
10079
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10080
        if msg:
10081
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10082
                             hint="remove unused LVs manually")
10083

    
10084
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10085
    """Replace a disk on the primary or secondary for DRBD 8.
10086

10087
    The algorithm for replace is quite complicated:
10088

10089
      1. for each disk to be replaced:
10090

10091
        1. create new LVs on the target node with unique names
10092
        1. detach old LVs from the drbd device
10093
        1. rename old LVs to name_replaced.<time_t>
10094
        1. rename new LVs to old LVs
10095
        1. attach the new LVs (with the old names now) to the drbd device
10096

10097
      1. wait for sync across all devices
10098

10099
      1. for each modified disk:
10100

10101
        1. remove old LVs (which have the name name_replaces.<time_t>)
10102

10103
    Failures are not very well handled.
10104

10105
    """
10106
    steps_total = 6
10107

    
10108
    # Step: check device activation
10109
    self.lu.LogStep(1, steps_total, "Check device existence")
10110
    self._CheckDisksExistence([self.other_node, self.target_node])
10111
    self._CheckVolumeGroup([self.target_node, self.other_node])
10112

    
10113
    # Step: check other node consistency
10114
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10115
    self._CheckDisksConsistency(self.other_node,
10116
                                self.other_node == self.instance.primary_node,
10117
                                False)
10118

    
10119
    # Step: create new storage
10120
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10121
    iv_names = self._CreateNewStorage(self.target_node)
10122

    
10123
    # Step: for each lv, detach+rename*2+attach
10124
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10125
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10126
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10127

    
10128
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10129
                                                     old_lvs)
10130
      result.Raise("Can't detach drbd from local storage on node"
10131
                   " %s for device %s" % (self.target_node, dev.iv_name))
10132
      #dev.children = []
10133
      #cfg.Update(instance)
10134

    
10135
      # ok, we created the new LVs, so now we know we have the needed
10136
      # storage; as such, we proceed on the target node to rename
10137
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10138
      # using the assumption that logical_id == physical_id (which in
10139
      # turn is the unique_id on that node)
10140

    
10141
      # FIXME(iustin): use a better name for the replaced LVs
10142
      temp_suffix = int(time.time())
10143
      ren_fn = lambda d, suff: (d.physical_id[0],
10144
                                d.physical_id[1] + "_replaced-%s" % suff)
10145

    
10146
      # Build the rename list based on what LVs exist on the node
10147
      rename_old_to_new = []
10148
      for to_ren in old_lvs:
10149
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10150
        if not result.fail_msg and result.payload:
10151
          # device exists
10152
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10153

    
10154
      self.lu.LogInfo("Renaming the old LVs on the target node")
10155
      result = self.rpc.call_blockdev_rename(self.target_node,
10156
                                             rename_old_to_new)
10157
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10158

    
10159
      # Now we rename the new LVs to the old LVs
10160
      self.lu.LogInfo("Renaming the new LVs on the target node")
10161
      rename_new_to_old = [(new, old.physical_id)
10162
                           for old, new in zip(old_lvs, new_lvs)]
10163
      result = self.rpc.call_blockdev_rename(self.target_node,
10164
                                             rename_new_to_old)
10165
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10166

    
10167
      # Intermediate steps of in memory modifications
10168
      for old, new in zip(old_lvs, new_lvs):
10169
        new.logical_id = old.logical_id
10170
        self.cfg.SetDiskID(new, self.target_node)
10171

    
10172
      # We need to modify old_lvs so that removal later removes the
10173
      # right LVs, not the newly added ones; note that old_lvs is a
10174
      # copy here
10175
      for disk in old_lvs:
10176
        disk.logical_id = ren_fn(disk, temp_suffix)
10177
        self.cfg.SetDiskID(disk, self.target_node)
10178

    
10179
      # Now that the new lvs have the old name, we can add them to the device
10180
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10181
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10182
                                                  new_lvs)
10183
      msg = result.fail_msg
10184
      if msg:
10185
        for new_lv in new_lvs:
10186
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10187
                                               new_lv).fail_msg
10188
          if msg2:
10189
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10190
                               hint=("cleanup manually the unused logical"
10191
                                     "volumes"))
10192
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10193

    
10194
    cstep = 5
10195
    if self.early_release:
10196
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10197
      cstep += 1
10198
      self._RemoveOldStorage(self.target_node, iv_names)
10199
      # TODO: Check if releasing locks early still makes sense
10200
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10201
    else:
10202
      # Release all resource locks except those used by the instance
10203
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10204
                    keep=self.node_secondary_ip.keys())
10205

    
10206
    # Release all node locks while waiting for sync
10207
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10208

    
10209
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10210
    # shutdown in the caller into consideration.
10211

    
10212
    # Wait for sync
10213
    # This can fail as the old devices are degraded and _WaitForSync
10214
    # does a combined result over all disks, so we don't check its return value
10215
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10216
    cstep += 1
10217
    _WaitForSync(self.lu, self.instance)
10218

    
10219
    # Check all devices manually
10220
    self._CheckDevices(self.instance.primary_node, iv_names)
10221

    
10222
    # Step: remove old storage
10223
    if not self.early_release:
10224
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10225
      cstep += 1
10226
      self._RemoveOldStorage(self.target_node, iv_names)
10227

    
10228
  def _ExecDrbd8Secondary(self, feedback_fn):
10229
    """Replace the secondary node for DRBD 8.
10230

10231
    The algorithm for replace is quite complicated:
10232
      - for all disks of the instance:
10233
        - create new LVs on the new node with same names
10234
        - shutdown the drbd device on the old secondary
10235
        - disconnect the drbd network on the primary
10236
        - create the drbd device on the new secondary
10237
        - network attach the drbd on the primary, using an artifice:
10238
          the drbd code for Attach() will connect to the network if it
10239
          finds a device which is connected to the good local disks but
10240
          not network enabled
10241
      - wait for sync across all devices
10242
      - remove all disks from the old secondary
10243

10244
    Failures are not very well handled.
10245

10246
    """
10247
    steps_total = 6
10248

    
10249
    pnode = self.instance.primary_node
10250

    
10251
    # Step: check device activation
10252
    self.lu.LogStep(1, steps_total, "Check device existence")
10253
    self._CheckDisksExistence([self.instance.primary_node])
10254
    self._CheckVolumeGroup([self.instance.primary_node])
10255

    
10256
    # Step: check other node consistency
10257
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10258
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10259

    
10260
    # Step: create new storage
10261
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10262
    for idx, dev in enumerate(self.instance.disks):
10263
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10264
                      (self.new_node, idx))
10265
      # we pass force_create=True to force LVM creation
10266
      for new_lv in dev.children:
10267
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10268
                        _GetInstanceInfoText(self.instance), False)
10269

    
10270
    # Step 4: dbrd minors and drbd setups changes
10271
    # after this, we must manually remove the drbd minors on both the
10272
    # error and the success paths
10273
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10274
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10275
                                         for dev in self.instance.disks],
10276
                                        self.instance.name)
10277
    logging.debug("Allocated minors %r", minors)
10278

    
10279
    iv_names = {}
10280
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10281
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10282
                      (self.new_node, idx))
10283
      # create new devices on new_node; note that we create two IDs:
10284
      # one without port, so the drbd will be activated without
10285
      # networking information on the new node at this stage, and one
10286
      # with network, for the latter activation in step 4
10287
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10288
      if self.instance.primary_node == o_node1:
10289
        p_minor = o_minor1
10290
      else:
10291
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10292
        p_minor = o_minor2
10293

    
10294
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10295
                      p_minor, new_minor, o_secret)
10296
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10297
                    p_minor, new_minor, o_secret)
10298

    
10299
      iv_names[idx] = (dev, dev.children, new_net_id)
10300
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10301
                    new_net_id)
10302
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10303
                              logical_id=new_alone_id,
10304
                              children=dev.children,
10305
                              size=dev.size)
10306
      try:
10307
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10308
                              _GetInstanceInfoText(self.instance), False)
10309
      except errors.GenericError:
10310
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10311
        raise
10312

    
10313
    # We have new devices, shutdown the drbd on the old secondary
10314
    for idx, dev in enumerate(self.instance.disks):
10315
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10316
      self.cfg.SetDiskID(dev, self.target_node)
10317
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10318
      if msg:
10319
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10320
                           "node: %s" % (idx, msg),
10321
                           hint=("Please cleanup this device manually as"
10322
                                 " soon as possible"))
10323

    
10324
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10325
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10326
                                               self.instance.disks)[pnode]
10327

    
10328
    msg = result.fail_msg
10329
    if msg:
10330
      # detaches didn't succeed (unlikely)
10331
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10332
      raise errors.OpExecError("Can't detach the disks from the network on"
10333
                               " old node: %s" % (msg,))
10334

    
10335
    # if we managed to detach at least one, we update all the disks of
10336
    # the instance to point to the new secondary
10337
    self.lu.LogInfo("Updating instance configuration")
10338
    for dev, _, new_logical_id in iv_names.itervalues():
10339
      dev.logical_id = new_logical_id
10340
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10341

    
10342
    self.cfg.Update(self.instance, feedback_fn)
10343

    
10344
    # Release all node locks (the configuration has been updated)
10345
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10346

    
10347
    # and now perform the drbd attach
10348
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10349
                    " (standalone => connected)")
10350
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10351
                                            self.new_node],
10352
                                           self.node_secondary_ip,
10353
                                           self.instance.disks,
10354
                                           self.instance.name,
10355
                                           False)
10356
    for to_node, to_result in result.items():
10357
      msg = to_result.fail_msg
10358
      if msg:
10359
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10360
                           to_node, msg,
10361
                           hint=("please do a gnt-instance info to see the"
10362
                                 " status of disks"))
10363
    cstep = 5
10364
    if self.early_release:
10365
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10366
      cstep += 1
10367
      self._RemoveOldStorage(self.target_node, iv_names)
10368
      # TODO: Check if releasing locks early still makes sense
10369
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10370
    else:
10371
      # Release all resource locks except those used by the instance
10372
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10373
                    keep=self.node_secondary_ip.keys())
10374

    
10375
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10376
    # shutdown in the caller into consideration.
10377

    
10378
    # Wait for sync
10379
    # This can fail as the old devices are degraded and _WaitForSync
10380
    # does a combined result over all disks, so we don't check its return value
10381
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10382
    cstep += 1
10383
    _WaitForSync(self.lu, self.instance)
10384

    
10385
    # Check all devices manually
10386
    self._CheckDevices(self.instance.primary_node, iv_names)
10387

    
10388
    # Step: remove old storage
10389
    if not self.early_release:
10390
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10391
      self._RemoveOldStorage(self.target_node, iv_names)
10392

    
10393

    
10394
class LURepairNodeStorage(NoHooksLU):
10395
  """Repairs the volume group on a node.
10396

10397
  """
10398
  REQ_BGL = False
10399

    
10400
  def CheckArguments(self):
10401
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10402

    
10403
    storage_type = self.op.storage_type
10404

    
10405
    if (constants.SO_FIX_CONSISTENCY not in
10406
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10407
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10408
                                 " repaired" % storage_type,
10409
                                 errors.ECODE_INVAL)
10410

    
10411
  def ExpandNames(self):
10412
    self.needed_locks = {
10413
      locking.LEVEL_NODE: [self.op.node_name],
10414
      }
10415

    
10416
  def _CheckFaultyDisks(self, instance, node_name):
10417
    """Ensure faulty disks abort the opcode or at least warn."""
10418
    try:
10419
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10420
                                  node_name, True):
10421
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10422
                                   " node '%s'" % (instance.name, node_name),
10423
                                   errors.ECODE_STATE)
10424
    except errors.OpPrereqError, err:
10425
      if self.op.ignore_consistency:
10426
        self.proc.LogWarning(str(err.args[0]))
10427
      else:
10428
        raise
10429

    
10430
  def CheckPrereq(self):
10431
    """Check prerequisites.
10432

10433
    """
10434
    # Check whether any instance on this node has faulty disks
10435
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10436
      if inst.admin_state != constants.ADMINST_UP:
10437
        continue
10438
      check_nodes = set(inst.all_nodes)
10439
      check_nodes.discard(self.op.node_name)
10440
      for inst_node_name in check_nodes:
10441
        self._CheckFaultyDisks(inst, inst_node_name)
10442

    
10443
  def Exec(self, feedback_fn):
10444
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10445
                (self.op.name, self.op.node_name))
10446

    
10447
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10448
    result = self.rpc.call_storage_execute(self.op.node_name,
10449
                                           self.op.storage_type, st_args,
10450
                                           self.op.name,
10451
                                           constants.SO_FIX_CONSISTENCY)
10452
    result.Raise("Failed to repair storage unit '%s' on %s" %
10453
                 (self.op.name, self.op.node_name))
10454

    
10455

    
10456
class LUNodeEvacuate(NoHooksLU):
10457
  """Evacuates instances off a list of nodes.
10458

10459
  """
10460
  REQ_BGL = False
10461

    
10462
  def CheckArguments(self):
10463
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10464

    
10465
  def ExpandNames(self):
10466
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10467

    
10468
    if self.op.remote_node is not None:
10469
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10470
      assert self.op.remote_node
10471

    
10472
      if self.op.remote_node == self.op.node_name:
10473
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10474
                                   " secondary node", errors.ECODE_INVAL)
10475

    
10476
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10477
        raise errors.OpPrereqError("Without the use of an iallocator only"
10478
                                   " secondary instances can be evacuated",
10479
                                   errors.ECODE_INVAL)
10480

    
10481
    # Declare locks
10482
    self.share_locks = _ShareAll()
10483
    self.needed_locks = {
10484
      locking.LEVEL_INSTANCE: [],
10485
      locking.LEVEL_NODEGROUP: [],
10486
      locking.LEVEL_NODE: [],
10487
      }
10488

    
10489
    if self.op.remote_node is None:
10490
      # Iallocator will choose any node(s) in the same group
10491
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10492
    else:
10493
      group_nodes = frozenset([self.op.remote_node])
10494

    
10495
    # Determine nodes to be locked
10496
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10497

    
10498
  def _DetermineInstances(self):
10499
    """Builds list of instances to operate on.
10500

10501
    """
10502
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10503

    
10504
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10505
      # Primary instances only
10506
      inst_fn = _GetNodePrimaryInstances
10507
      assert self.op.remote_node is None, \
10508
        "Evacuating primary instances requires iallocator"
10509
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10510
      # Secondary instances only
10511
      inst_fn = _GetNodeSecondaryInstances
10512
    else:
10513
      # All instances
10514
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10515
      inst_fn = _GetNodeInstances
10516

    
10517
    return inst_fn(self.cfg, self.op.node_name)
10518

    
10519
  def DeclareLocks(self, level):
10520
    if level == locking.LEVEL_INSTANCE:
10521
      # Lock instances optimistically, needs verification once node and group
10522
      # locks have been acquired
10523
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10524
        set(i.name for i in self._DetermineInstances())
10525

    
10526
    elif level == locking.LEVEL_NODEGROUP:
10527
      # Lock node groups optimistically, needs verification once nodes have
10528
      # been acquired
10529
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10530
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10531

    
10532
    elif level == locking.LEVEL_NODE:
10533
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10534

    
10535
  def CheckPrereq(self):
10536
    # Verify locks
10537
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10538
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10539
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10540

    
10541
    assert owned_nodes == self.lock_nodes
10542

    
10543
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10544
    if owned_groups != wanted_groups:
10545
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10546
                               " current groups are '%s', used to be '%s'" %
10547
                               (utils.CommaJoin(wanted_groups),
10548
                                utils.CommaJoin(owned_groups)))
10549

    
10550
    # Determine affected instances
10551
    self.instances = self._DetermineInstances()
10552
    self.instance_names = [i.name for i in self.instances]
10553

    
10554
    if set(self.instance_names) != owned_instances:
10555
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10556
                               " were acquired, current instances are '%s',"
10557
                               " used to be '%s'" %
10558
                               (self.op.node_name,
10559
                                utils.CommaJoin(self.instance_names),
10560
                                utils.CommaJoin(owned_instances)))
10561

    
10562
    if self.instance_names:
10563
      self.LogInfo("Evacuating instances from node '%s': %s",
10564
                   self.op.node_name,
10565
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10566
    else:
10567
      self.LogInfo("No instances to evacuate from node '%s'",
10568
                   self.op.node_name)
10569

    
10570
    if self.op.remote_node is not None:
10571
      for i in self.instances:
10572
        if i.primary_node == self.op.remote_node:
10573
          raise errors.OpPrereqError("Node %s is the primary node of"
10574
                                     " instance %s, cannot use it as"
10575
                                     " secondary" %
10576
                                     (self.op.remote_node, i.name),
10577
                                     errors.ECODE_INVAL)
10578

    
10579
  def Exec(self, feedback_fn):
10580
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10581

    
10582
    if not self.instance_names:
10583
      # No instances to evacuate
10584
      jobs = []
10585

    
10586
    elif self.op.iallocator is not None:
10587
      # TODO: Implement relocation to other group
10588
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10589
                       evac_mode=self.op.mode,
10590
                       instances=list(self.instance_names))
10591

    
10592
      ial.Run(self.op.iallocator)
10593

    
10594
      if not ial.success:
10595
        raise errors.OpPrereqError("Can't compute node evacuation using"
10596
                                   " iallocator '%s': %s" %
10597
                                   (self.op.iallocator, ial.info),
10598
                                   errors.ECODE_NORES)
10599

    
10600
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10601

    
10602
    elif self.op.remote_node is not None:
10603
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10604
      jobs = [
10605
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10606
                                        remote_node=self.op.remote_node,
10607
                                        disks=[],
10608
                                        mode=constants.REPLACE_DISK_CHG,
10609
                                        early_release=self.op.early_release)]
10610
        for instance_name in self.instance_names
10611
        ]
10612

    
10613
    else:
10614
      raise errors.ProgrammerError("No iallocator or remote node")
10615

    
10616
    return ResultWithJobs(jobs)
10617

    
10618

    
10619
def _SetOpEarlyRelease(early_release, op):
10620
  """Sets C{early_release} flag on opcodes if available.
10621

10622
  """
10623
  try:
10624
    op.early_release = early_release
10625
  except AttributeError:
10626
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10627

    
10628
  return op
10629

    
10630

    
10631
def _NodeEvacDest(use_nodes, group, nodes):
10632
  """Returns group or nodes depending on caller's choice.
10633

10634
  """
10635
  if use_nodes:
10636
    return utils.CommaJoin(nodes)
10637
  else:
10638
    return group
10639

    
10640

    
10641
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10642
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10643

10644
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10645
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10646

10647
  @type lu: L{LogicalUnit}
10648
  @param lu: Logical unit instance
10649
  @type alloc_result: tuple/list
10650
  @param alloc_result: Result from iallocator
10651
  @type early_release: bool
10652
  @param early_release: Whether to release locks early if possible
10653
  @type use_nodes: bool
10654
  @param use_nodes: Whether to display node names instead of groups
10655

10656
  """
10657
  (moved, failed, jobs) = alloc_result
10658

    
10659
  if failed:
10660
    lu.LogWarning("Unable to evacuate instances %s",
10661
                  utils.CommaJoin("%s (%s)" % (name, reason)
10662
                                  for (name, reason) in failed))
10663

    
10664
  if moved:
10665
    lu.LogInfo("Instances to be moved: %s",
10666
               utils.CommaJoin("%s (to %s)" %
10667
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10668
                               for (name, group, nodes) in moved))
10669

    
10670
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10671
              map(opcodes.OpCode.LoadOpCode, ops))
10672
          for ops in jobs]
10673

    
10674

    
10675
class LUInstanceGrowDisk(LogicalUnit):
10676
  """Grow a disk of an instance.
10677

10678
  """
10679
  HPATH = "disk-grow"
10680
  HTYPE = constants.HTYPE_INSTANCE
10681
  REQ_BGL = False
10682

    
10683
  def ExpandNames(self):
10684
    self._ExpandAndLockInstance()
10685
    self.needed_locks[locking.LEVEL_NODE] = []
10686
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10687
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10688

    
10689
  def DeclareLocks(self, level):
10690
    if level == locking.LEVEL_NODE:
10691
      self._LockInstancesNodes()
10692
    elif level == locking.LEVEL_NODE_RES:
10693
      # Copy node locks
10694
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10695
        self.needed_locks[locking.LEVEL_NODE][:]
10696

    
10697
  def BuildHooksEnv(self):
10698
    """Build hooks env.
10699

10700
    This runs on the master, the primary and all the secondaries.
10701

10702
    """
10703
    env = {
10704
      "DISK": self.op.disk,
10705
      "AMOUNT": self.op.amount,
10706
      }
10707
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10708
    return env
10709

    
10710
  def BuildHooksNodes(self):
10711
    """Build hooks nodes.
10712

10713
    """
10714
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10715
    return (nl, nl)
10716

    
10717
  def CheckPrereq(self):
10718
    """Check prerequisites.
10719

10720
    This checks that the instance is in the cluster.
10721

10722
    """
10723
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10724
    assert instance is not None, \
10725
      "Cannot retrieve locked instance %s" % self.op.instance_name
10726
    nodenames = list(instance.all_nodes)
10727
    for node in nodenames:
10728
      _CheckNodeOnline(self, node)
10729

    
10730
    self.instance = instance
10731

    
10732
    if instance.disk_template not in constants.DTS_GROWABLE:
10733
      raise errors.OpPrereqError("Instance's disk layout does not support"
10734
                                 " growing", errors.ECODE_INVAL)
10735

    
10736
    self.disk = instance.FindDisk(self.op.disk)
10737

    
10738
    if instance.disk_template not in (constants.DT_FILE,
10739
                                      constants.DT_SHARED_FILE):
10740
      # TODO: check the free disk space for file, when that feature will be
10741
      # supported
10742
      _CheckNodesFreeDiskPerVG(self, nodenames,
10743
                               self.disk.ComputeGrowth(self.op.amount))
10744

    
10745
  def Exec(self, feedback_fn):
10746
    """Execute disk grow.
10747

10748
    """
10749
    instance = self.instance
10750
    disk = self.disk
10751

    
10752
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10753
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10754
            self.owned_locks(locking.LEVEL_NODE_RES))
10755

    
10756
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10757
    if not disks_ok:
10758
      raise errors.OpExecError("Cannot activate block device to grow")
10759

    
10760
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10761
                (self.op.disk, instance.name,
10762
                 utils.FormatUnit(self.op.amount, "h")))
10763

    
10764
    # First run all grow ops in dry-run mode
10765
    for node in instance.all_nodes:
10766
      self.cfg.SetDiskID(disk, node)
10767
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10768
      result.Raise("Grow request failed to node %s" % node)
10769

    
10770
    # We know that (as far as we can test) operations across different
10771
    # nodes will succeed, time to run it for real
10772
    for node in instance.all_nodes:
10773
      self.cfg.SetDiskID(disk, node)
10774
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10775
      result.Raise("Grow request failed to node %s" % node)
10776

    
10777
      # TODO: Rewrite code to work properly
10778
      # DRBD goes into sync mode for a short amount of time after executing the
10779
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10780
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10781
      # time is a work-around.
10782
      time.sleep(5)
10783

    
10784
    disk.RecordGrow(self.op.amount)
10785
    self.cfg.Update(instance, feedback_fn)
10786

    
10787
    # Changes have been recorded, release node lock
10788
    _ReleaseLocks(self, locking.LEVEL_NODE)
10789

    
10790
    # Downgrade lock while waiting for sync
10791
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10792

    
10793
    if self.op.wait_for_sync:
10794
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10795
      if disk_abort:
10796
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10797
                             " status; please check the instance")
10798
      if instance.admin_state != constants.ADMINST_UP:
10799
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10800
    elif instance.admin_state != constants.ADMINST_UP:
10801
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10802
                           " not supposed to be running because no wait for"
10803
                           " sync mode was requested")
10804

    
10805
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10806
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10807

    
10808

    
10809
class LUInstanceQueryData(NoHooksLU):
10810
  """Query runtime instance data.
10811

10812
  """
10813
  REQ_BGL = False
10814

    
10815
  def ExpandNames(self):
10816
    self.needed_locks = {}
10817

    
10818
    # Use locking if requested or when non-static information is wanted
10819
    if not (self.op.static or self.op.use_locking):
10820
      self.LogWarning("Non-static data requested, locks need to be acquired")
10821
      self.op.use_locking = True
10822

    
10823
    if self.op.instances or not self.op.use_locking:
10824
      # Expand instance names right here
10825
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10826
    else:
10827
      # Will use acquired locks
10828
      self.wanted_names = None
10829

    
10830
    if self.op.use_locking:
10831
      self.share_locks = _ShareAll()
10832

    
10833
      if self.wanted_names is None:
10834
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10835
      else:
10836
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10837

    
10838
      self.needed_locks[locking.LEVEL_NODE] = []
10839
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10840

    
10841
  def DeclareLocks(self, level):
10842
    if self.op.use_locking and level == locking.LEVEL_NODE:
10843
      self._LockInstancesNodes()
10844

    
10845
  def CheckPrereq(self):
10846
    """Check prerequisites.
10847

10848
    This only checks the optional instance list against the existing names.
10849

10850
    """
10851
    if self.wanted_names is None:
10852
      assert self.op.use_locking, "Locking was not used"
10853
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10854

    
10855
    self.wanted_instances = \
10856
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10857

    
10858
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10859
    """Returns the status of a block device
10860

10861
    """
10862
    if self.op.static or not node:
10863
      return None
10864

    
10865
    self.cfg.SetDiskID(dev, node)
10866

    
10867
    result = self.rpc.call_blockdev_find(node, dev)
10868
    if result.offline:
10869
      return None
10870

    
10871
    result.Raise("Can't compute disk status for %s" % instance_name)
10872

    
10873
    status = result.payload
10874
    if status is None:
10875
      return None
10876

    
10877
    return (status.dev_path, status.major, status.minor,
10878
            status.sync_percent, status.estimated_time,
10879
            status.is_degraded, status.ldisk_status)
10880

    
10881
  def _ComputeDiskStatus(self, instance, snode, dev):
10882
    """Compute block device status.
10883

10884
    """
10885
    if dev.dev_type in constants.LDS_DRBD:
10886
      # we change the snode then (otherwise we use the one passed in)
10887
      if dev.logical_id[0] == instance.primary_node:
10888
        snode = dev.logical_id[1]
10889
      else:
10890
        snode = dev.logical_id[0]
10891

    
10892
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10893
                                              instance.name, dev)
10894
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10895

    
10896
    if dev.children:
10897
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10898
                                        instance, snode),
10899
                         dev.children)
10900
    else:
10901
      dev_children = []
10902

    
10903
    return {
10904
      "iv_name": dev.iv_name,
10905
      "dev_type": dev.dev_type,
10906
      "logical_id": dev.logical_id,
10907
      "physical_id": dev.physical_id,
10908
      "pstatus": dev_pstatus,
10909
      "sstatus": dev_sstatus,
10910
      "children": dev_children,
10911
      "mode": dev.mode,
10912
      "size": dev.size,
10913
      }
10914

    
10915
  def Exec(self, feedback_fn):
10916
    """Gather and return data"""
10917
    result = {}
10918

    
10919
    cluster = self.cfg.GetClusterInfo()
10920

    
10921
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10922
                                          for i in self.wanted_instances)
10923
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10924
      if self.op.static or pnode.offline:
10925
        remote_state = None
10926
        if pnode.offline:
10927
          self.LogWarning("Primary node %s is marked offline, returning static"
10928
                          " information only for instance %s" %
10929
                          (pnode.name, instance.name))
10930
      else:
10931
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10932
                                                  instance.name,
10933
                                                  instance.hypervisor)
10934
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10935
        remote_info = remote_info.payload
10936
        if remote_info and "state" in remote_info:
10937
          remote_state = "up"
10938
        else:
10939
          if instance.admin_state == constants.ADMINST_UP:
10940
            remote_state = "down"
10941
          else:
10942
            remote_state = instance.admin_state
10943

    
10944
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10945
                  instance.disks)
10946

    
10947
      result[instance.name] = {
10948
        "name": instance.name,
10949
        "config_state": instance.admin_state,
10950
        "run_state": remote_state,
10951
        "pnode": instance.primary_node,
10952
        "snodes": instance.secondary_nodes,
10953
        "os": instance.os,
10954
        # this happens to be the same format used for hooks
10955
        "nics": _NICListToTuple(self, instance.nics),
10956
        "disk_template": instance.disk_template,
10957
        "disks": disks,
10958
        "hypervisor": instance.hypervisor,
10959
        "network_port": instance.network_port,
10960
        "hv_instance": instance.hvparams,
10961
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10962
        "be_instance": instance.beparams,
10963
        "be_actual": cluster.FillBE(instance),
10964
        "os_instance": instance.osparams,
10965
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10966
        "serial_no": instance.serial_no,
10967
        "mtime": instance.mtime,
10968
        "ctime": instance.ctime,
10969
        "uuid": instance.uuid,
10970
        }
10971

    
10972
    return result
10973

    
10974

    
10975
class LUInstanceSetParams(LogicalUnit):
10976
  """Modifies an instances's parameters.
10977

10978
  """
10979
  HPATH = "instance-modify"
10980
  HTYPE = constants.HTYPE_INSTANCE
10981
  REQ_BGL = False
10982

    
10983
  def CheckArguments(self):
10984
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10985
            self.op.hvparams or self.op.beparams or self.op.os_name):
10986
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10987

    
10988
    if self.op.hvparams:
10989
      _CheckGlobalHvParams(self.op.hvparams)
10990

    
10991
    # Disk validation
10992
    disk_addremove = 0
10993
    for disk_op, disk_dict in self.op.disks:
10994
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10995
      if disk_op == constants.DDM_REMOVE:
10996
        disk_addremove += 1
10997
        continue
10998
      elif disk_op == constants.DDM_ADD:
10999
        disk_addremove += 1
11000
      else:
11001
        if not isinstance(disk_op, int):
11002
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11003
        if not isinstance(disk_dict, dict):
11004
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11005
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11006

    
11007
      if disk_op == constants.DDM_ADD:
11008
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11009
        if mode not in constants.DISK_ACCESS_SET:
11010
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11011
                                     errors.ECODE_INVAL)
11012
        size = disk_dict.get(constants.IDISK_SIZE, None)
11013
        if size is None:
11014
          raise errors.OpPrereqError("Required disk parameter size missing",
11015
                                     errors.ECODE_INVAL)
11016
        try:
11017
          size = int(size)
11018
        except (TypeError, ValueError), err:
11019
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11020
                                     str(err), errors.ECODE_INVAL)
11021
        disk_dict[constants.IDISK_SIZE] = size
11022
      else:
11023
        # modification of disk
11024
        if constants.IDISK_SIZE in disk_dict:
11025
          raise errors.OpPrereqError("Disk size change not possible, use"
11026
                                     " grow-disk", errors.ECODE_INVAL)
11027

    
11028
    if disk_addremove > 1:
11029
      raise errors.OpPrereqError("Only one disk add or remove operation"
11030
                                 " supported at a time", errors.ECODE_INVAL)
11031

    
11032
    if self.op.disks and self.op.disk_template is not None:
11033
      raise errors.OpPrereqError("Disk template conversion and other disk"
11034
                                 " changes not supported at the same time",
11035
                                 errors.ECODE_INVAL)
11036

    
11037
    if (self.op.disk_template and
11038
        self.op.disk_template in constants.DTS_INT_MIRROR and
11039
        self.op.remote_node is None):
11040
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11041
                                 " one requires specifying a secondary node",
11042
                                 errors.ECODE_INVAL)
11043

    
11044
    # NIC validation
11045
    nic_addremove = 0
11046
    for nic_op, nic_dict in self.op.nics:
11047
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11048
      if nic_op == constants.DDM_REMOVE:
11049
        nic_addremove += 1
11050
        continue
11051
      elif nic_op == constants.DDM_ADD:
11052
        nic_addremove += 1
11053
      else:
11054
        if not isinstance(nic_op, int):
11055
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11056
        if not isinstance(nic_dict, dict):
11057
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11058
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11059

    
11060
      # nic_dict should be a dict
11061
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11062
      if nic_ip is not None:
11063
        if nic_ip.lower() == constants.VALUE_NONE:
11064
          nic_dict[constants.INIC_IP] = None
11065
        else:
11066
          if not netutils.IPAddress.IsValid(nic_ip):
11067
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11068
                                       errors.ECODE_INVAL)
11069

    
11070
      nic_bridge = nic_dict.get("bridge", None)
11071
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11072
      if nic_bridge and nic_link:
11073
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11074
                                   " at the same time", errors.ECODE_INVAL)
11075
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11076
        nic_dict["bridge"] = None
11077
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11078
        nic_dict[constants.INIC_LINK] = None
11079

    
11080
      if nic_op == constants.DDM_ADD:
11081
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11082
        if nic_mac is None:
11083
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11084

    
11085
      if constants.INIC_MAC in nic_dict:
11086
        nic_mac = nic_dict[constants.INIC_MAC]
11087
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11088
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11089

    
11090
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11091
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11092
                                     " modifying an existing nic",
11093
                                     errors.ECODE_INVAL)
11094

    
11095
    if nic_addremove > 1:
11096
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11097
                                 " supported at a time", errors.ECODE_INVAL)
11098

    
11099
  def ExpandNames(self):
11100
    self._ExpandAndLockInstance()
11101
    # Can't even acquire node locks in shared mode as upcoming changes in
11102
    # Ganeti 2.6 will start to modify the node object on disk conversion
11103
    self.needed_locks[locking.LEVEL_NODE] = []
11104
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11105
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11106

    
11107
  def DeclareLocks(self, level):
11108
    if level == locking.LEVEL_NODE:
11109
      self._LockInstancesNodes()
11110
      if self.op.disk_template and self.op.remote_node:
11111
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11112
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11113
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11114
      # Copy node locks
11115
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11116
        self.needed_locks[locking.LEVEL_NODE][:]
11117

    
11118
  def BuildHooksEnv(self):
11119
    """Build hooks env.
11120

11121
    This runs on the master, primary and secondaries.
11122

11123
    """
11124
    args = dict()
11125
    if constants.BE_MEMORY in self.be_new:
11126
      args["memory"] = self.be_new[constants.BE_MEMORY]
11127
    if constants.BE_VCPUS in self.be_new:
11128
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11129
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11130
    # information at all.
11131
    if self.op.nics:
11132
      args["nics"] = []
11133
      nic_override = dict(self.op.nics)
11134
      for idx, nic in enumerate(self.instance.nics):
11135
        if idx in nic_override:
11136
          this_nic_override = nic_override[idx]
11137
        else:
11138
          this_nic_override = {}
11139
        if constants.INIC_IP in this_nic_override:
11140
          ip = this_nic_override[constants.INIC_IP]
11141
        else:
11142
          ip = nic.ip
11143
        if constants.INIC_MAC in this_nic_override:
11144
          mac = this_nic_override[constants.INIC_MAC]
11145
        else:
11146
          mac = nic.mac
11147
        if idx in self.nic_pnew:
11148
          nicparams = self.nic_pnew[idx]
11149
        else:
11150
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11151
        mode = nicparams[constants.NIC_MODE]
11152
        link = nicparams[constants.NIC_LINK]
11153
        args["nics"].append((ip, mac, mode, link))
11154
      if constants.DDM_ADD in nic_override:
11155
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11156
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11157
        nicparams = self.nic_pnew[constants.DDM_ADD]
11158
        mode = nicparams[constants.NIC_MODE]
11159
        link = nicparams[constants.NIC_LINK]
11160
        args["nics"].append((ip, mac, mode, link))
11161
      elif constants.DDM_REMOVE in nic_override:
11162
        del args["nics"][-1]
11163

    
11164
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11165
    if self.op.disk_template:
11166
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11167

    
11168
    return env
11169

    
11170
  def BuildHooksNodes(self):
11171
    """Build hooks nodes.
11172

11173
    """
11174
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11175
    return (nl, nl)
11176

    
11177
  def CheckPrereq(self):
11178
    """Check prerequisites.
11179

11180
    This only checks the instance list against the existing names.
11181

11182
    """
11183
    # checking the new params on the primary/secondary nodes
11184

    
11185
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11186
    cluster = self.cluster = self.cfg.GetClusterInfo()
11187
    assert self.instance is not None, \
11188
      "Cannot retrieve locked instance %s" % self.op.instance_name
11189
    pnode = instance.primary_node
11190
    nodelist = list(instance.all_nodes)
11191

    
11192
    # OS change
11193
    if self.op.os_name and not self.op.force:
11194
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11195
                      self.op.force_variant)
11196
      instance_os = self.op.os_name
11197
    else:
11198
      instance_os = instance.os
11199

    
11200
    if self.op.disk_template:
11201
      if instance.disk_template == self.op.disk_template:
11202
        raise errors.OpPrereqError("Instance already has disk template %s" %
11203
                                   instance.disk_template, errors.ECODE_INVAL)
11204

    
11205
      if (instance.disk_template,
11206
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11207
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11208
                                   " %s to %s" % (instance.disk_template,
11209
                                                  self.op.disk_template),
11210
                                   errors.ECODE_INVAL)
11211
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11212
                          msg="cannot change disk template")
11213
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11214
        if self.op.remote_node == pnode:
11215
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11216
                                     " as the primary node of the instance" %
11217
                                     self.op.remote_node, errors.ECODE_STATE)
11218
        _CheckNodeOnline(self, self.op.remote_node)
11219
        _CheckNodeNotDrained(self, self.op.remote_node)
11220
        # FIXME: here we assume that the old instance type is DT_PLAIN
11221
        assert instance.disk_template == constants.DT_PLAIN
11222
        disks = [{constants.IDISK_SIZE: d.size,
11223
                  constants.IDISK_VG: d.logical_id[0]}
11224
                 for d in instance.disks]
11225
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11226
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11227

    
11228
    # hvparams processing
11229
    if self.op.hvparams:
11230
      hv_type = instance.hypervisor
11231
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11232
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11233
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11234

    
11235
      # local check
11236
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11237
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11238
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11239
      self.hv_inst = i_hvdict # the new dict (without defaults)
11240
    else:
11241
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11242
                                              instance.hvparams)
11243
      self.hv_new = self.hv_inst = {}
11244

    
11245
    # beparams processing
11246
    if self.op.beparams:
11247
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11248
                                   use_none=True)
11249
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11250
      be_new = cluster.SimpleFillBE(i_bedict)
11251
      self.be_proposed = self.be_new = be_new # the new actual values
11252
      self.be_inst = i_bedict # the new dict (without defaults)
11253
    else:
11254
      self.be_new = self.be_inst = {}
11255
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11256
    be_old = cluster.FillBE(instance)
11257

    
11258
    # CPU param validation -- checking every time a paramtere is
11259
    # changed to cover all cases where either CPU mask or vcpus have
11260
    # changed
11261
    if (constants.BE_VCPUS in self.be_proposed and
11262
        constants.HV_CPU_MASK in self.hv_proposed):
11263
      cpu_list = \
11264
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11265
      # Verify mask is consistent with number of vCPUs. Can skip this
11266
      # test if only 1 entry in the CPU mask, which means same mask
11267
      # is applied to all vCPUs.
11268
      if (len(cpu_list) > 1 and
11269
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11270
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11271
                                   " CPU mask [%s]" %
11272
                                   (self.be_proposed[constants.BE_VCPUS],
11273
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11274
                                   errors.ECODE_INVAL)
11275

    
11276
      # Only perform this test if a new CPU mask is given
11277
      if constants.HV_CPU_MASK in self.hv_new:
11278
        # Calculate the largest CPU number requested
11279
        max_requested_cpu = max(map(max, cpu_list))
11280
        # Check that all of the instance's nodes have enough physical CPUs to
11281
        # satisfy the requested CPU mask
11282
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11283
                                max_requested_cpu + 1, instance.hypervisor)
11284

    
11285
    # osparams processing
11286
    if self.op.osparams:
11287
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11288
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11289
      self.os_inst = i_osdict # the new dict (without defaults)
11290
    else:
11291
      self.os_inst = {}
11292

    
11293
    self.warn = []
11294

    
11295
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11296
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11297
      mem_check_list = [pnode]
11298
      if be_new[constants.BE_AUTO_BALANCE]:
11299
        # either we changed auto_balance to yes or it was from before
11300
        mem_check_list.extend(instance.secondary_nodes)
11301
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11302
                                                  instance.hypervisor)
11303
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11304
                                         instance.hypervisor)
11305
      pninfo = nodeinfo[pnode]
11306
      msg = pninfo.fail_msg
11307
      if msg:
11308
        # Assume the primary node is unreachable and go ahead
11309
        self.warn.append("Can't get info from primary node %s: %s" %
11310
                         (pnode, msg))
11311
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11312
        self.warn.append("Node data from primary node %s doesn't contain"
11313
                         " free memory information" % pnode)
11314
      elif instance_info.fail_msg:
11315
        self.warn.append("Can't get instance runtime information: %s" %
11316
                        instance_info.fail_msg)
11317
      else:
11318
        if instance_info.payload:
11319
          current_mem = int(instance_info.payload["memory"])
11320
        else:
11321
          # Assume instance not running
11322
          # (there is a slight race condition here, but it's not very probable,
11323
          # and we have no other way to check)
11324
          current_mem = 0
11325
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11326
                    pninfo.payload["memory_free"])
11327
        if miss_mem > 0:
11328
          raise errors.OpPrereqError("This change will prevent the instance"
11329
                                     " from starting, due to %d MB of memory"
11330
                                     " missing on its primary node" % miss_mem,
11331
                                     errors.ECODE_NORES)
11332

    
11333
      if be_new[constants.BE_AUTO_BALANCE]:
11334
        for node, nres in nodeinfo.items():
11335
          if node not in instance.secondary_nodes:
11336
            continue
11337
          nres.Raise("Can't get info from secondary node %s" % node,
11338
                     prereq=True, ecode=errors.ECODE_STATE)
11339
          if not isinstance(nres.payload.get("memory_free", None), int):
11340
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11341
                                       " memory information" % node,
11342
                                       errors.ECODE_STATE)
11343
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11344
            raise errors.OpPrereqError("This change will prevent the instance"
11345
                                       " from failover to its secondary node"
11346
                                       " %s, due to not enough memory" % node,
11347
                                       errors.ECODE_STATE)
11348

    
11349
    # NIC processing
11350
    self.nic_pnew = {}
11351
    self.nic_pinst = {}
11352
    for nic_op, nic_dict in self.op.nics:
11353
      if nic_op == constants.DDM_REMOVE:
11354
        if not instance.nics:
11355
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11356
                                     errors.ECODE_INVAL)
11357
        continue
11358
      if nic_op != constants.DDM_ADD:
11359
        # an existing nic
11360
        if not instance.nics:
11361
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11362
                                     " no NICs" % nic_op,
11363
                                     errors.ECODE_INVAL)
11364
        if nic_op < 0 or nic_op >= len(instance.nics):
11365
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11366
                                     " are 0 to %d" %
11367
                                     (nic_op, len(instance.nics) - 1),
11368
                                     errors.ECODE_INVAL)
11369
        old_nic_params = instance.nics[nic_op].nicparams
11370
        old_nic_ip = instance.nics[nic_op].ip
11371
      else:
11372
        old_nic_params = {}
11373
        old_nic_ip = None
11374

    
11375
      update_params_dict = dict([(key, nic_dict[key])
11376
                                 for key in constants.NICS_PARAMETERS
11377
                                 if key in nic_dict])
11378

    
11379
      if "bridge" in nic_dict:
11380
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11381

    
11382
      new_nic_params = _GetUpdatedParams(old_nic_params,
11383
                                         update_params_dict)
11384
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11385
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11386
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11387
      self.nic_pinst[nic_op] = new_nic_params
11388
      self.nic_pnew[nic_op] = new_filled_nic_params
11389
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11390

    
11391
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11392
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11393
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11394
        if msg:
11395
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11396
          if self.op.force:
11397
            self.warn.append(msg)
11398
          else:
11399
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11400
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11401
        if constants.INIC_IP in nic_dict:
11402
          nic_ip = nic_dict[constants.INIC_IP]
11403
        else:
11404
          nic_ip = old_nic_ip
11405
        if nic_ip is None:
11406
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11407
                                     " on a routed nic", errors.ECODE_INVAL)
11408
      if constants.INIC_MAC in nic_dict:
11409
        nic_mac = nic_dict[constants.INIC_MAC]
11410
        if nic_mac is None:
11411
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11412
                                     errors.ECODE_INVAL)
11413
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11414
          # otherwise generate the mac
11415
          nic_dict[constants.INIC_MAC] = \
11416
            self.cfg.GenerateMAC(self.proc.GetECId())
11417
        else:
11418
          # or validate/reserve the current one
11419
          try:
11420
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11421
          except errors.ReservationError:
11422
            raise errors.OpPrereqError("MAC address %s already in use"
11423
                                       " in cluster" % nic_mac,
11424
                                       errors.ECODE_NOTUNIQUE)
11425

    
11426
    # DISK processing
11427
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11428
      raise errors.OpPrereqError("Disk operations not supported for"
11429
                                 " diskless instances",
11430
                                 errors.ECODE_INVAL)
11431
    for disk_op, _ in self.op.disks:
11432
      if disk_op == constants.DDM_REMOVE:
11433
        if len(instance.disks) == 1:
11434
          raise errors.OpPrereqError("Cannot remove the last disk of"
11435
                                     " an instance", errors.ECODE_INVAL)
11436
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11437
                            msg="cannot remove disks")
11438

    
11439
      if (disk_op == constants.DDM_ADD and
11440
          len(instance.disks) >= constants.MAX_DISKS):
11441
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11442
                                   " add more" % constants.MAX_DISKS,
11443
                                   errors.ECODE_STATE)
11444
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11445
        # an existing disk
11446
        if disk_op < 0 or disk_op >= len(instance.disks):
11447
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11448
                                     " are 0 to %d" %
11449
                                     (disk_op, len(instance.disks)),
11450
                                     errors.ECODE_INVAL)
11451

    
11452
  def _ConvertPlainToDrbd(self, feedback_fn):
11453
    """Converts an instance from plain to drbd.
11454

11455
    """
11456
    feedback_fn("Converting template to drbd")
11457
    instance = self.instance
11458
    pnode = instance.primary_node
11459
    snode = self.op.remote_node
11460

    
11461
    assert instance.disk_template == constants.DT_PLAIN
11462

    
11463
    # create a fake disk info for _GenerateDiskTemplate
11464
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11465
                  constants.IDISK_VG: d.logical_id[0]}
11466
                 for d in instance.disks]
11467
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11468
                                      instance.name, pnode, [snode],
11469
                                      disk_info, None, None, 0, feedback_fn)
11470
    info = _GetInstanceInfoText(instance)
11471
    feedback_fn("Creating aditional volumes...")
11472
    # first, create the missing data and meta devices
11473
    for disk in new_disks:
11474
      # unfortunately this is... not too nice
11475
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11476
                            info, True)
11477
      for child in disk.children:
11478
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11479
    # at this stage, all new LVs have been created, we can rename the
11480
    # old ones
11481
    feedback_fn("Renaming original volumes...")
11482
    rename_list = [(o, n.children[0].logical_id)
11483
                   for (o, n) in zip(instance.disks, new_disks)]
11484
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11485
    result.Raise("Failed to rename original LVs")
11486

    
11487
    feedback_fn("Initializing DRBD devices...")
11488
    # all child devices are in place, we can now create the DRBD devices
11489
    for disk in new_disks:
11490
      for node in [pnode, snode]:
11491
        f_create = node == pnode
11492
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11493

    
11494
    # at this point, the instance has been modified
11495
    instance.disk_template = constants.DT_DRBD8
11496
    instance.disks = new_disks
11497
    self.cfg.Update(instance, feedback_fn)
11498

    
11499
    # Release node locks while waiting for sync
11500
    _ReleaseLocks(self, locking.LEVEL_NODE)
11501

    
11502
    # disks are created, waiting for sync
11503
    disk_abort = not _WaitForSync(self, instance,
11504
                                  oneshot=not self.op.wait_for_sync)
11505
    if disk_abort:
11506
      raise errors.OpExecError("There are some degraded disks for"
11507
                               " this instance, please cleanup manually")
11508

    
11509
    # Node resource locks will be released by caller
11510

    
11511
  def _ConvertDrbdToPlain(self, feedback_fn):
11512
    """Converts an instance from drbd to plain.
11513

11514
    """
11515
    instance = self.instance
11516

    
11517
    assert len(instance.secondary_nodes) == 1
11518
    assert instance.disk_template == constants.DT_DRBD8
11519

    
11520
    pnode = instance.primary_node
11521
    snode = instance.secondary_nodes[0]
11522
    feedback_fn("Converting template to plain")
11523

    
11524
    old_disks = instance.disks
11525
    new_disks = [d.children[0] for d in old_disks]
11526

    
11527
    # copy over size and mode
11528
    for parent, child in zip(old_disks, new_disks):
11529
      child.size = parent.size
11530
      child.mode = parent.mode
11531

    
11532
    # update instance structure
11533
    instance.disks = new_disks
11534
    instance.disk_template = constants.DT_PLAIN
11535
    self.cfg.Update(instance, feedback_fn)
11536

    
11537
    # Release locks in case removing disks takes a while
11538
    _ReleaseLocks(self, locking.LEVEL_NODE)
11539

    
11540
    feedback_fn("Removing volumes on the secondary node...")
11541
    for disk in old_disks:
11542
      self.cfg.SetDiskID(disk, snode)
11543
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11544
      if msg:
11545
        self.LogWarning("Could not remove block device %s on node %s,"
11546
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11547

    
11548
    feedback_fn("Removing unneeded volumes on the primary node...")
11549
    for idx, disk in enumerate(old_disks):
11550
      meta = disk.children[1]
11551
      self.cfg.SetDiskID(meta, pnode)
11552
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11553
      if msg:
11554
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11555
                        " continuing anyway: %s", idx, pnode, msg)
11556

    
11557
    # Node resource locks will be released by caller
11558

    
11559
  def Exec(self, feedback_fn):
11560
    """Modifies an instance.
11561

11562
    All parameters take effect only at the next restart of the instance.
11563

11564
    """
11565
    # Process here the warnings from CheckPrereq, as we don't have a
11566
    # feedback_fn there.
11567
    for warn in self.warn:
11568
      feedback_fn("WARNING: %s" % warn)
11569

    
11570
    assert ((self.op.disk_template is None) ^
11571
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11572
      "Not owning any node resource locks"
11573

    
11574
    result = []
11575
    instance = self.instance
11576
    # disk changes
11577
    for disk_op, disk_dict in self.op.disks:
11578
      if disk_op == constants.DDM_REMOVE:
11579
        # remove the last disk
11580
        device = instance.disks.pop()
11581
        device_idx = len(instance.disks)
11582
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11583
          self.cfg.SetDiskID(disk, node)
11584
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11585
          if msg:
11586
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11587
                            " continuing anyway", device_idx, node, msg)
11588
        result.append(("disk/%d" % device_idx, "remove"))
11589
      elif disk_op == constants.DDM_ADD:
11590
        # add a new disk
11591
        if instance.disk_template in (constants.DT_FILE,
11592
                                        constants.DT_SHARED_FILE):
11593
          file_driver, file_path = instance.disks[0].logical_id
11594
          file_path = os.path.dirname(file_path)
11595
        else:
11596
          file_driver = file_path = None
11597
        disk_idx_base = len(instance.disks)
11598
        new_disk = _GenerateDiskTemplate(self,
11599
                                         instance.disk_template,
11600
                                         instance.name, instance.primary_node,
11601
                                         instance.secondary_nodes,
11602
                                         [disk_dict],
11603
                                         file_path,
11604
                                         file_driver,
11605
                                         disk_idx_base, feedback_fn)[0]
11606
        instance.disks.append(new_disk)
11607
        info = _GetInstanceInfoText(instance)
11608

    
11609
        logging.info("Creating volume %s for instance %s",
11610
                     new_disk.iv_name, instance.name)
11611
        # Note: this needs to be kept in sync with _CreateDisks
11612
        #HARDCODE
11613
        for node in instance.all_nodes:
11614
          f_create = node == instance.primary_node
11615
          try:
11616
            _CreateBlockDev(self, node, instance, new_disk,
11617
                            f_create, info, f_create)
11618
          except errors.OpExecError, err:
11619
            self.LogWarning("Failed to create volume %s (%s) on"
11620
                            " node %s: %s",
11621
                            new_disk.iv_name, new_disk, node, err)
11622
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11623
                       (new_disk.size, new_disk.mode)))
11624
      else:
11625
        # change a given disk
11626
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11627
        result.append(("disk.mode/%d" % disk_op,
11628
                       disk_dict[constants.IDISK_MODE]))
11629

    
11630
    if self.op.disk_template:
11631
      if __debug__:
11632
        check_nodes = set(instance.all_nodes)
11633
        if self.op.remote_node:
11634
          check_nodes.add(self.op.remote_node)
11635
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11636
          owned = self.owned_locks(level)
11637
          assert not (check_nodes - owned), \
11638
            ("Not owning the correct locks, owning %r, expected at least %r" %
11639
             (owned, check_nodes))
11640

    
11641
      r_shut = _ShutdownInstanceDisks(self, instance)
11642
      if not r_shut:
11643
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11644
                                 " proceed with disk template conversion")
11645
      mode = (instance.disk_template, self.op.disk_template)
11646
      try:
11647
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11648
      except:
11649
        self.cfg.ReleaseDRBDMinors(instance.name)
11650
        raise
11651
      result.append(("disk_template", self.op.disk_template))
11652

    
11653
      assert instance.disk_template == self.op.disk_template, \
11654
        ("Expected disk template '%s', found '%s'" %
11655
         (self.op.disk_template, instance.disk_template))
11656

    
11657
    # Release node and resource locks if there are any (they might already have
11658
    # been released during disk conversion)
11659
    _ReleaseLocks(self, locking.LEVEL_NODE)
11660
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11661

    
11662
    # NIC changes
11663
    for nic_op, nic_dict in self.op.nics:
11664
      if nic_op == constants.DDM_REMOVE:
11665
        # remove the last nic
11666
        del instance.nics[-1]
11667
        result.append(("nic.%d" % len(instance.nics), "remove"))
11668
      elif nic_op == constants.DDM_ADD:
11669
        # mac and bridge should be set, by now
11670
        mac = nic_dict[constants.INIC_MAC]
11671
        ip = nic_dict.get(constants.INIC_IP, None)
11672
        nicparams = self.nic_pinst[constants.DDM_ADD]
11673
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11674
        instance.nics.append(new_nic)
11675
        result.append(("nic.%d" % (len(instance.nics) - 1),
11676
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11677
                       (new_nic.mac, new_nic.ip,
11678
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11679
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11680
                       )))
11681
      else:
11682
        for key in (constants.INIC_MAC, constants.INIC_IP):
11683
          if key in nic_dict:
11684
            setattr(instance.nics[nic_op], key, nic_dict[key])
11685
        if nic_op in self.nic_pinst:
11686
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11687
        for key, val in nic_dict.iteritems():
11688
          result.append(("nic.%s/%d" % (key, nic_op), val))
11689

    
11690
    # hvparams changes
11691
    if self.op.hvparams:
11692
      instance.hvparams = self.hv_inst
11693
      for key, val in self.op.hvparams.iteritems():
11694
        result.append(("hv/%s" % key, val))
11695

    
11696
    # beparams changes
11697
    if self.op.beparams:
11698
      instance.beparams = self.be_inst
11699
      for key, val in self.op.beparams.iteritems():
11700
        result.append(("be/%s" % key, val))
11701

    
11702
    # OS change
11703
    if self.op.os_name:
11704
      instance.os = self.op.os_name
11705

    
11706
    # osparams changes
11707
    if self.op.osparams:
11708
      instance.osparams = self.os_inst
11709
      for key, val in self.op.osparams.iteritems():
11710
        result.append(("os/%s" % key, val))
11711

    
11712
    self.cfg.Update(instance, feedback_fn)
11713

    
11714
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11715
                self.owned_locks(locking.LEVEL_NODE)), \
11716
      "All node locks should have been released by now"
11717

    
11718
    return result
11719

    
11720
  _DISK_CONVERSIONS = {
11721
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11722
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11723
    }
11724

    
11725

    
11726
class LUInstanceChangeGroup(LogicalUnit):
11727
  HPATH = "instance-change-group"
11728
  HTYPE = constants.HTYPE_INSTANCE
11729
  REQ_BGL = False
11730

    
11731
  def ExpandNames(self):
11732
    self.share_locks = _ShareAll()
11733
    self.needed_locks = {
11734
      locking.LEVEL_NODEGROUP: [],
11735
      locking.LEVEL_NODE: [],
11736
      }
11737

    
11738
    self._ExpandAndLockInstance()
11739

    
11740
    if self.op.target_groups:
11741
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11742
                                  self.op.target_groups)
11743
    else:
11744
      self.req_target_uuids = None
11745

    
11746
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11747

    
11748
  def DeclareLocks(self, level):
11749
    if level == locking.LEVEL_NODEGROUP:
11750
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11751

    
11752
      if self.req_target_uuids:
11753
        lock_groups = set(self.req_target_uuids)
11754

    
11755
        # Lock all groups used by instance optimistically; this requires going
11756
        # via the node before it's locked, requiring verification later on
11757
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11758
        lock_groups.update(instance_groups)
11759
      else:
11760
        # No target groups, need to lock all of them
11761
        lock_groups = locking.ALL_SET
11762

    
11763
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11764

    
11765
    elif level == locking.LEVEL_NODE:
11766
      if self.req_target_uuids:
11767
        # Lock all nodes used by instances
11768
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11769
        self._LockInstancesNodes()
11770

    
11771
        # Lock all nodes in all potential target groups
11772
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11773
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11774
        member_nodes = [node_name
11775
                        for group in lock_groups
11776
                        for node_name in self.cfg.GetNodeGroup(group).members]
11777
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11778
      else:
11779
        # Lock all nodes as all groups are potential targets
11780
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11781

    
11782
  def CheckPrereq(self):
11783
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11784
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11785
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11786

    
11787
    assert (self.req_target_uuids is None or
11788
            owned_groups.issuperset(self.req_target_uuids))
11789
    assert owned_instances == set([self.op.instance_name])
11790

    
11791
    # Get instance information
11792
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11793

    
11794
    # Check if node groups for locked instance are still correct
11795
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11796
      ("Instance %s's nodes changed while we kept the lock" %
11797
       self.op.instance_name)
11798

    
11799
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11800
                                           owned_groups)
11801

    
11802
    if self.req_target_uuids:
11803
      # User requested specific target groups
11804
      self.target_uuids = self.req_target_uuids
11805
    else:
11806
      # All groups except those used by the instance are potential targets
11807
      self.target_uuids = owned_groups - inst_groups
11808

    
11809
    conflicting_groups = self.target_uuids & inst_groups
11810
    if conflicting_groups:
11811
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11812
                                 " used by the instance '%s'" %
11813
                                 (utils.CommaJoin(conflicting_groups),
11814
                                  self.op.instance_name),
11815
                                 errors.ECODE_INVAL)
11816

    
11817
    if not self.target_uuids:
11818
      raise errors.OpPrereqError("There are no possible target groups",
11819
                                 errors.ECODE_INVAL)
11820

    
11821
  def BuildHooksEnv(self):
11822
    """Build hooks env.
11823

11824
    """
11825
    assert self.target_uuids
11826

    
11827
    env = {
11828
      "TARGET_GROUPS": " ".join(self.target_uuids),
11829
      }
11830

    
11831
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11832

    
11833
    return env
11834

    
11835
  def BuildHooksNodes(self):
11836
    """Build hooks nodes.
11837

11838
    """
11839
    mn = self.cfg.GetMasterNode()
11840
    return ([mn], [mn])
11841

    
11842
  def Exec(self, feedback_fn):
11843
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11844

    
11845
    assert instances == [self.op.instance_name], "Instance not locked"
11846

    
11847
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11848
                     instances=instances, target_groups=list(self.target_uuids))
11849

    
11850
    ial.Run(self.op.iallocator)
11851

    
11852
    if not ial.success:
11853
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11854
                                 " instance '%s' using iallocator '%s': %s" %
11855
                                 (self.op.instance_name, self.op.iallocator,
11856
                                  ial.info),
11857
                                 errors.ECODE_NORES)
11858

    
11859
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11860

    
11861
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11862
                 " instance '%s'", len(jobs), self.op.instance_name)
11863

    
11864
    return ResultWithJobs(jobs)
11865

    
11866

    
11867
class LUBackupQuery(NoHooksLU):
11868
  """Query the exports list
11869

11870
  """
11871
  REQ_BGL = False
11872

    
11873
  def ExpandNames(self):
11874
    self.needed_locks = {}
11875
    self.share_locks[locking.LEVEL_NODE] = 1
11876
    if not self.op.nodes:
11877
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11878
    else:
11879
      self.needed_locks[locking.LEVEL_NODE] = \
11880
        _GetWantedNodes(self, self.op.nodes)
11881

    
11882
  def Exec(self, feedback_fn):
11883
    """Compute the list of all the exported system images.
11884

11885
    @rtype: dict
11886
    @return: a dictionary with the structure node->(export-list)
11887
        where export-list is a list of the instances exported on
11888
        that node.
11889

11890
    """
11891
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11892
    rpcresult = self.rpc.call_export_list(self.nodes)
11893
    result = {}
11894
    for node in rpcresult:
11895
      if rpcresult[node].fail_msg:
11896
        result[node] = False
11897
      else:
11898
        result[node] = rpcresult[node].payload
11899

    
11900
    return result
11901

    
11902

    
11903
class LUBackupPrepare(NoHooksLU):
11904
  """Prepares an instance for an export and returns useful information.
11905

11906
  """
11907
  REQ_BGL = False
11908

    
11909
  def ExpandNames(self):
11910
    self._ExpandAndLockInstance()
11911

    
11912
  def CheckPrereq(self):
11913
    """Check prerequisites.
11914

11915
    """
11916
    instance_name = self.op.instance_name
11917

    
11918
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11919
    assert self.instance is not None, \
11920
          "Cannot retrieve locked instance %s" % self.op.instance_name
11921
    _CheckNodeOnline(self, self.instance.primary_node)
11922

    
11923
    self._cds = _GetClusterDomainSecret()
11924

    
11925
  def Exec(self, feedback_fn):
11926
    """Prepares an instance for an export.
11927

11928
    """
11929
    instance = self.instance
11930

    
11931
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11932
      salt = utils.GenerateSecret(8)
11933

    
11934
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11935
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11936
                                              constants.RIE_CERT_VALIDITY)
11937
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11938

    
11939
      (name, cert_pem) = result.payload
11940

    
11941
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11942
                                             cert_pem)
11943

    
11944
      return {
11945
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11946
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11947
                          salt),
11948
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11949
        }
11950

    
11951
    return None
11952

    
11953

    
11954
class LUBackupExport(LogicalUnit):
11955
  """Export an instance to an image in the cluster.
11956

11957
  """
11958
  HPATH = "instance-export"
11959
  HTYPE = constants.HTYPE_INSTANCE
11960
  REQ_BGL = False
11961

    
11962
  def CheckArguments(self):
11963
    """Check the arguments.
11964

11965
    """
11966
    self.x509_key_name = self.op.x509_key_name
11967
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11968

    
11969
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11970
      if not self.x509_key_name:
11971
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11972
                                   errors.ECODE_INVAL)
11973

    
11974
      if not self.dest_x509_ca_pem:
11975
        raise errors.OpPrereqError("Missing destination X509 CA",
11976
                                   errors.ECODE_INVAL)
11977

    
11978
  def ExpandNames(self):
11979
    self._ExpandAndLockInstance()
11980

    
11981
    # Lock all nodes for local exports
11982
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11983
      # FIXME: lock only instance primary and destination node
11984
      #
11985
      # Sad but true, for now we have do lock all nodes, as we don't know where
11986
      # the previous export might be, and in this LU we search for it and
11987
      # remove it from its current node. In the future we could fix this by:
11988
      #  - making a tasklet to search (share-lock all), then create the
11989
      #    new one, then one to remove, after
11990
      #  - removing the removal operation altogether
11991
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11992

    
11993
  def DeclareLocks(self, level):
11994
    """Last minute lock declaration."""
11995
    # All nodes are locked anyway, so nothing to do here.
11996

    
11997
  def BuildHooksEnv(self):
11998
    """Build hooks env.
11999

12000
    This will run on the master, primary node and target node.
12001

12002
    """
12003
    env = {
12004
      "EXPORT_MODE": self.op.mode,
12005
      "EXPORT_NODE": self.op.target_node,
12006
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12007
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12008
      # TODO: Generic function for boolean env variables
12009
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12010
      }
12011

    
12012
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12013

    
12014
    return env
12015

    
12016
  def BuildHooksNodes(self):
12017
    """Build hooks nodes.
12018

12019
    """
12020
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12021

    
12022
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12023
      nl.append(self.op.target_node)
12024

    
12025
    return (nl, nl)
12026

    
12027
  def CheckPrereq(self):
12028
    """Check prerequisites.
12029

12030
    This checks that the instance and node names are valid.
12031

12032
    """
12033
    instance_name = self.op.instance_name
12034

    
12035
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12036
    assert self.instance is not None, \
12037
          "Cannot retrieve locked instance %s" % self.op.instance_name
12038
    _CheckNodeOnline(self, self.instance.primary_node)
12039

    
12040
    if (self.op.remove_instance and
12041
        self.instance.admin_state == constants.ADMINST_UP and
12042
        not self.op.shutdown):
12043
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12044
                                 " down before")
12045

    
12046
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12047
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12048
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12049
      assert self.dst_node is not None
12050

    
12051
      _CheckNodeOnline(self, self.dst_node.name)
12052
      _CheckNodeNotDrained(self, self.dst_node.name)
12053

    
12054
      self._cds = None
12055
      self.dest_disk_info = None
12056
      self.dest_x509_ca = None
12057

    
12058
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12059
      self.dst_node = None
12060

    
12061
      if len(self.op.target_node) != len(self.instance.disks):
12062
        raise errors.OpPrereqError(("Received destination information for %s"
12063
                                    " disks, but instance %s has %s disks") %
12064
                                   (len(self.op.target_node), instance_name,
12065
                                    len(self.instance.disks)),
12066
                                   errors.ECODE_INVAL)
12067

    
12068
      cds = _GetClusterDomainSecret()
12069

    
12070
      # Check X509 key name
12071
      try:
12072
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12073
      except (TypeError, ValueError), err:
12074
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12075

    
12076
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12077
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12078
                                   errors.ECODE_INVAL)
12079

    
12080
      # Load and verify CA
12081
      try:
12082
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12083
      except OpenSSL.crypto.Error, err:
12084
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12085
                                   (err, ), errors.ECODE_INVAL)
12086

    
12087
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12088
      if errcode is not None:
12089
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12090
                                   (msg, ), errors.ECODE_INVAL)
12091

    
12092
      self.dest_x509_ca = cert
12093

    
12094
      # Verify target information
12095
      disk_info = []
12096
      for idx, disk_data in enumerate(self.op.target_node):
12097
        try:
12098
          (host, port, magic) = \
12099
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12100
        except errors.GenericError, err:
12101
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12102
                                     (idx, err), errors.ECODE_INVAL)
12103

    
12104
        disk_info.append((host, port, magic))
12105

    
12106
      assert len(disk_info) == len(self.op.target_node)
12107
      self.dest_disk_info = disk_info
12108

    
12109
    else:
12110
      raise errors.ProgrammerError("Unhandled export mode %r" %
12111
                                   self.op.mode)
12112

    
12113
    # instance disk type verification
12114
    # TODO: Implement export support for file-based disks
12115
    for disk in self.instance.disks:
12116
      if disk.dev_type == constants.LD_FILE:
12117
        raise errors.OpPrereqError("Export not supported for instances with"
12118
                                   " file-based disks", errors.ECODE_INVAL)
12119

    
12120
  def _CleanupExports(self, feedback_fn):
12121
    """Removes exports of current instance from all other nodes.
12122

12123
    If an instance in a cluster with nodes A..D was exported to node C, its
12124
    exports will be removed from the nodes A, B and D.
12125

12126
    """
12127
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12128

    
12129
    nodelist = self.cfg.GetNodeList()
12130
    nodelist.remove(self.dst_node.name)
12131

    
12132
    # on one-node clusters nodelist will be empty after the removal
12133
    # if we proceed the backup would be removed because OpBackupQuery
12134
    # substitutes an empty list with the full cluster node list.
12135
    iname = self.instance.name
12136
    if nodelist:
12137
      feedback_fn("Removing old exports for instance %s" % iname)
12138
      exportlist = self.rpc.call_export_list(nodelist)
12139
      for node in exportlist:
12140
        if exportlist[node].fail_msg:
12141
          continue
12142
        if iname in exportlist[node].payload:
12143
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12144
          if msg:
12145
            self.LogWarning("Could not remove older export for instance %s"
12146
                            " on node %s: %s", iname, node, msg)
12147

    
12148
  def Exec(self, feedback_fn):
12149
    """Export an instance to an image in the cluster.
12150

12151
    """
12152
    assert self.op.mode in constants.EXPORT_MODES
12153

    
12154
    instance = self.instance
12155
    src_node = instance.primary_node
12156

    
12157
    if self.op.shutdown:
12158
      # shutdown the instance, but not the disks
12159
      feedback_fn("Shutting down instance %s" % instance.name)
12160
      result = self.rpc.call_instance_shutdown(src_node, instance,
12161
                                               self.op.shutdown_timeout)
12162
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12163
      result.Raise("Could not shutdown instance %s on"
12164
                   " node %s" % (instance.name, src_node))
12165

    
12166
    # set the disks ID correctly since call_instance_start needs the
12167
    # correct drbd minor to create the symlinks
12168
    for disk in instance.disks:
12169
      self.cfg.SetDiskID(disk, src_node)
12170

    
12171
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12172

    
12173
    if activate_disks:
12174
      # Activate the instance disks if we'exporting a stopped instance
12175
      feedback_fn("Activating disks for %s" % instance.name)
12176
      _StartInstanceDisks(self, instance, None)
12177

    
12178
    try:
12179
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12180
                                                     instance)
12181

    
12182
      helper.CreateSnapshots()
12183
      try:
12184
        if (self.op.shutdown and
12185
            instance.admin_state == constants.ADMINST_UP and
12186
            not self.op.remove_instance):
12187
          assert not activate_disks
12188
          feedback_fn("Starting instance %s" % instance.name)
12189
          result = self.rpc.call_instance_start(src_node,
12190
                                                (instance, None, None), False)
12191
          msg = result.fail_msg
12192
          if msg:
12193
            feedback_fn("Failed to start instance: %s" % msg)
12194
            _ShutdownInstanceDisks(self, instance)
12195
            raise errors.OpExecError("Could not start instance: %s" % msg)
12196

    
12197
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12198
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12199
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12200
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12201
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12202

    
12203
          (key_name, _, _) = self.x509_key_name
12204

    
12205
          dest_ca_pem = \
12206
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12207
                                            self.dest_x509_ca)
12208

    
12209
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12210
                                                     key_name, dest_ca_pem,
12211
                                                     timeouts)
12212
      finally:
12213
        helper.Cleanup()
12214

    
12215
      # Check for backwards compatibility
12216
      assert len(dresults) == len(instance.disks)
12217
      assert compat.all(isinstance(i, bool) for i in dresults), \
12218
             "Not all results are boolean: %r" % dresults
12219

    
12220
    finally:
12221
      if activate_disks:
12222
        feedback_fn("Deactivating disks for %s" % instance.name)
12223
        _ShutdownInstanceDisks(self, instance)
12224

    
12225
    if not (compat.all(dresults) and fin_resu):
12226
      failures = []
12227
      if not fin_resu:
12228
        failures.append("export finalization")
12229
      if not compat.all(dresults):
12230
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12231
                               if not dsk)
12232
        failures.append("disk export: disk(s) %s" % fdsk)
12233

    
12234
      raise errors.OpExecError("Export failed, errors in %s" %
12235
                               utils.CommaJoin(failures))
12236

    
12237
    # At this point, the export was successful, we can cleanup/finish
12238

    
12239
    # Remove instance if requested
12240
    if self.op.remove_instance:
12241
      feedback_fn("Removing instance %s" % instance.name)
12242
      _RemoveInstance(self, feedback_fn, instance,
12243
                      self.op.ignore_remove_failures)
12244

    
12245
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12246
      self._CleanupExports(feedback_fn)
12247

    
12248
    return fin_resu, dresults
12249

    
12250

    
12251
class LUBackupRemove(NoHooksLU):
12252
  """Remove exports related to the named instance.
12253

12254
  """
12255
  REQ_BGL = False
12256

    
12257
  def ExpandNames(self):
12258
    self.needed_locks = {}
12259
    # We need all nodes to be locked in order for RemoveExport to work, but we
12260
    # don't need to lock the instance itself, as nothing will happen to it (and
12261
    # we can remove exports also for a removed instance)
12262
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12263

    
12264
  def Exec(self, feedback_fn):
12265
    """Remove any export.
12266

12267
    """
12268
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12269
    # If the instance was not found we'll try with the name that was passed in.
12270
    # This will only work if it was an FQDN, though.
12271
    fqdn_warn = False
12272
    if not instance_name:
12273
      fqdn_warn = True
12274
      instance_name = self.op.instance_name
12275

    
12276
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12277
    exportlist = self.rpc.call_export_list(locked_nodes)
12278
    found = False
12279
    for node in exportlist:
12280
      msg = exportlist[node].fail_msg
12281
      if msg:
12282
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12283
        continue
12284
      if instance_name in exportlist[node].payload:
12285
        found = True
12286
        result = self.rpc.call_export_remove(node, instance_name)
12287
        msg = result.fail_msg
12288
        if msg:
12289
          logging.error("Could not remove export for instance %s"
12290
                        " on node %s: %s", instance_name, node, msg)
12291

    
12292
    if fqdn_warn and not found:
12293
      feedback_fn("Export not found. If trying to remove an export belonging"
12294
                  " to a deleted instance please use its Fully Qualified"
12295
                  " Domain Name.")
12296

    
12297

    
12298
class LUGroupAdd(LogicalUnit):
12299
  """Logical unit for creating node groups.
12300

12301
  """
12302
  HPATH = "group-add"
12303
  HTYPE = constants.HTYPE_GROUP
12304
  REQ_BGL = False
12305

    
12306
  def ExpandNames(self):
12307
    # We need the new group's UUID here so that we can create and acquire the
12308
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12309
    # that it should not check whether the UUID exists in the configuration.
12310
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12311
    self.needed_locks = {}
12312
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12313

    
12314
  def CheckPrereq(self):
12315
    """Check prerequisites.
12316

12317
    This checks that the given group name is not an existing node group
12318
    already.
12319

12320
    """
12321
    try:
12322
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12323
    except errors.OpPrereqError:
12324
      pass
12325
    else:
12326
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12327
                                 " node group (UUID: %s)" %
12328
                                 (self.op.group_name, existing_uuid),
12329
                                 errors.ECODE_EXISTS)
12330

    
12331
    if self.op.ndparams:
12332
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12333

    
12334
  def BuildHooksEnv(self):
12335
    """Build hooks env.
12336

12337
    """
12338
    return {
12339
      "GROUP_NAME": self.op.group_name,
12340
      }
12341

    
12342
  def BuildHooksNodes(self):
12343
    """Build hooks nodes.
12344

12345
    """
12346
    mn = self.cfg.GetMasterNode()
12347
    return ([mn], [mn])
12348

    
12349
  def Exec(self, feedback_fn):
12350
    """Add the node group to the cluster.
12351

12352
    """
12353
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12354
                                  uuid=self.group_uuid,
12355
                                  alloc_policy=self.op.alloc_policy,
12356
                                  ndparams=self.op.ndparams)
12357

    
12358
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12359
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12360

    
12361

    
12362
class LUGroupAssignNodes(NoHooksLU):
12363
  """Logical unit for assigning nodes to groups.
12364

12365
  """
12366
  REQ_BGL = False
12367

    
12368
  def ExpandNames(self):
12369
    # These raise errors.OpPrereqError on their own:
12370
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12371
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12372

    
12373
    # We want to lock all the affected nodes and groups. We have readily
12374
    # available the list of nodes, and the *destination* group. To gather the
12375
    # list of "source" groups, we need to fetch node information later on.
12376
    self.needed_locks = {
12377
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12378
      locking.LEVEL_NODE: self.op.nodes,
12379
      }
12380

    
12381
  def DeclareLocks(self, level):
12382
    if level == locking.LEVEL_NODEGROUP:
12383
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12384

    
12385
      # Try to get all affected nodes' groups without having the group or node
12386
      # lock yet. Needs verification later in the code flow.
12387
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12388

    
12389
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12390

    
12391
  def CheckPrereq(self):
12392
    """Check prerequisites.
12393

12394
    """
12395
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12396
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12397
            frozenset(self.op.nodes))
12398

    
12399
    expected_locks = (set([self.group_uuid]) |
12400
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12401
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12402
    if actual_locks != expected_locks:
12403
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12404
                               " current groups are '%s', used to be '%s'" %
12405
                               (utils.CommaJoin(expected_locks),
12406
                                utils.CommaJoin(actual_locks)))
12407

    
12408
    self.node_data = self.cfg.GetAllNodesInfo()
12409
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12410
    instance_data = self.cfg.GetAllInstancesInfo()
12411

    
12412
    if self.group is None:
12413
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12414
                               (self.op.group_name, self.group_uuid))
12415

    
12416
    (new_splits, previous_splits) = \
12417
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12418
                                             for node in self.op.nodes],
12419
                                            self.node_data, instance_data)
12420

    
12421
    if new_splits:
12422
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12423

    
12424
      if not self.op.force:
12425
        raise errors.OpExecError("The following instances get split by this"
12426
                                 " change and --force was not given: %s" %
12427
                                 fmt_new_splits)
12428
      else:
12429
        self.LogWarning("This operation will split the following instances: %s",
12430
                        fmt_new_splits)
12431

    
12432
        if previous_splits:
12433
          self.LogWarning("In addition, these already-split instances continue"
12434
                          " to be split across groups: %s",
12435
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12436

    
12437
  def Exec(self, feedback_fn):
12438
    """Assign nodes to a new group.
12439

12440
    """
12441
    for node in self.op.nodes:
12442
      self.node_data[node].group = self.group_uuid
12443

    
12444
    # FIXME: Depends on side-effects of modifying the result of
12445
    # C{cfg.GetAllNodesInfo}
12446

    
12447
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12448

    
12449
  @staticmethod
12450
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12451
    """Check for split instances after a node assignment.
12452

12453
    This method considers a series of node assignments as an atomic operation,
12454
    and returns information about split instances after applying the set of
12455
    changes.
12456

12457
    In particular, it returns information about newly split instances, and
12458
    instances that were already split, and remain so after the change.
12459

12460
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12461
    considered.
12462

12463
    @type changes: list of (node_name, new_group_uuid) pairs.
12464
    @param changes: list of node assignments to consider.
12465
    @param node_data: a dict with data for all nodes
12466
    @param instance_data: a dict with all instances to consider
12467
    @rtype: a two-tuple
12468
    @return: a list of instances that were previously okay and result split as a
12469
      consequence of this change, and a list of instances that were previously
12470
      split and this change does not fix.
12471

12472
    """
12473
    changed_nodes = dict((node, group) for node, group in changes
12474
                         if node_data[node].group != group)
12475

    
12476
    all_split_instances = set()
12477
    previously_split_instances = set()
12478

    
12479
    def InstanceNodes(instance):
12480
      return [instance.primary_node] + list(instance.secondary_nodes)
12481

    
12482
    for inst in instance_data.values():
12483
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12484
        continue
12485

    
12486
      instance_nodes = InstanceNodes(inst)
12487

    
12488
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12489
        previously_split_instances.add(inst.name)
12490

    
12491
      if len(set(changed_nodes.get(node, node_data[node].group)
12492
                 for node in instance_nodes)) > 1:
12493
        all_split_instances.add(inst.name)
12494

    
12495
    return (list(all_split_instances - previously_split_instances),
12496
            list(previously_split_instances & all_split_instances))
12497

    
12498

    
12499
class _GroupQuery(_QueryBase):
12500
  FIELDS = query.GROUP_FIELDS
12501

    
12502
  def ExpandNames(self, lu):
12503
    lu.needed_locks = {}
12504

    
12505
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12506
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12507

    
12508
    if not self.names:
12509
      self.wanted = [name_to_uuid[name]
12510
                     for name in utils.NiceSort(name_to_uuid.keys())]
12511
    else:
12512
      # Accept names to be either names or UUIDs.
12513
      missing = []
12514
      self.wanted = []
12515
      all_uuid = frozenset(self._all_groups.keys())
12516

    
12517
      for name in self.names:
12518
        if name in all_uuid:
12519
          self.wanted.append(name)
12520
        elif name in name_to_uuid:
12521
          self.wanted.append(name_to_uuid[name])
12522
        else:
12523
          missing.append(name)
12524

    
12525
      if missing:
12526
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12527
                                   utils.CommaJoin(missing),
12528
                                   errors.ECODE_NOENT)
12529

    
12530
  def DeclareLocks(self, lu, level):
12531
    pass
12532

    
12533
  def _GetQueryData(self, lu):
12534
    """Computes the list of node groups and their attributes.
12535

12536
    """
12537
    do_nodes = query.GQ_NODE in self.requested_data
12538
    do_instances = query.GQ_INST in self.requested_data
12539

    
12540
    group_to_nodes = None
12541
    group_to_instances = None
12542

    
12543
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12544
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12545
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12546
    # instance->node. Hence, we will need to process nodes even if we only need
12547
    # instance information.
12548
    if do_nodes or do_instances:
12549
      all_nodes = lu.cfg.GetAllNodesInfo()
12550
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12551
      node_to_group = {}
12552

    
12553
      for node in all_nodes.values():
12554
        if node.group in group_to_nodes:
12555
          group_to_nodes[node.group].append(node.name)
12556
          node_to_group[node.name] = node.group
12557

    
12558
      if do_instances:
12559
        all_instances = lu.cfg.GetAllInstancesInfo()
12560
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12561

    
12562
        for instance in all_instances.values():
12563
          node = instance.primary_node
12564
          if node in node_to_group:
12565
            group_to_instances[node_to_group[node]].append(instance.name)
12566

    
12567
        if not do_nodes:
12568
          # Do not pass on node information if it was not requested.
12569
          group_to_nodes = None
12570

    
12571
    return query.GroupQueryData([self._all_groups[uuid]
12572
                                 for uuid in self.wanted],
12573
                                group_to_nodes, group_to_instances)
12574

    
12575

    
12576
class LUGroupQuery(NoHooksLU):
12577
  """Logical unit for querying node groups.
12578

12579
  """
12580
  REQ_BGL = False
12581

    
12582
  def CheckArguments(self):
12583
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12584
                          self.op.output_fields, False)
12585

    
12586
  def ExpandNames(self):
12587
    self.gq.ExpandNames(self)
12588

    
12589
  def DeclareLocks(self, level):
12590
    self.gq.DeclareLocks(self, level)
12591

    
12592
  def Exec(self, feedback_fn):
12593
    return self.gq.OldStyleQuery(self)
12594

    
12595

    
12596
class LUGroupSetParams(LogicalUnit):
12597
  """Modifies the parameters of a node group.
12598

12599
  """
12600
  HPATH = "group-modify"
12601
  HTYPE = constants.HTYPE_GROUP
12602
  REQ_BGL = False
12603

    
12604
  def CheckArguments(self):
12605
    all_changes = [
12606
      self.op.ndparams,
12607
      self.op.alloc_policy,
12608
      ]
12609

    
12610
    if all_changes.count(None) == len(all_changes):
12611
      raise errors.OpPrereqError("Please pass at least one modification",
12612
                                 errors.ECODE_INVAL)
12613

    
12614
  def ExpandNames(self):
12615
    # This raises errors.OpPrereqError on its own:
12616
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12617

    
12618
    self.needed_locks = {
12619
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12620
      }
12621

    
12622
  def CheckPrereq(self):
12623
    """Check prerequisites.
12624

12625
    """
12626
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12627

    
12628
    if self.group is None:
12629
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12630
                               (self.op.group_name, self.group_uuid))
12631

    
12632
    if self.op.ndparams:
12633
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12634
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12635
      self.new_ndparams = new_ndparams
12636

    
12637
  def BuildHooksEnv(self):
12638
    """Build hooks env.
12639

12640
    """
12641
    return {
12642
      "GROUP_NAME": self.op.group_name,
12643
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12644
      }
12645

    
12646
  def BuildHooksNodes(self):
12647
    """Build hooks nodes.
12648

12649
    """
12650
    mn = self.cfg.GetMasterNode()
12651
    return ([mn], [mn])
12652

    
12653
  def Exec(self, feedback_fn):
12654
    """Modifies the node group.
12655

12656
    """
12657
    result = []
12658

    
12659
    if self.op.ndparams:
12660
      self.group.ndparams = self.new_ndparams
12661
      result.append(("ndparams", str(self.group.ndparams)))
12662

    
12663
    if self.op.alloc_policy:
12664
      self.group.alloc_policy = self.op.alloc_policy
12665

    
12666
    self.cfg.Update(self.group, feedback_fn)
12667
    return result
12668

    
12669

    
12670
class LUGroupRemove(LogicalUnit):
12671
  HPATH = "group-remove"
12672
  HTYPE = constants.HTYPE_GROUP
12673
  REQ_BGL = False
12674

    
12675
  def ExpandNames(self):
12676
    # This will raises errors.OpPrereqError on its own:
12677
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12678
    self.needed_locks = {
12679
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12680
      }
12681

    
12682
  def CheckPrereq(self):
12683
    """Check prerequisites.
12684

12685
    This checks that the given group name exists as a node group, that is
12686
    empty (i.e., contains no nodes), and that is not the last group of the
12687
    cluster.
12688

12689
    """
12690
    # Verify that the group is empty.
12691
    group_nodes = [node.name
12692
                   for node in self.cfg.GetAllNodesInfo().values()
12693
                   if node.group == self.group_uuid]
12694

    
12695
    if group_nodes:
12696
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12697
                                 " nodes: %s" %
12698
                                 (self.op.group_name,
12699
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12700
                                 errors.ECODE_STATE)
12701

    
12702
    # Verify the cluster would not be left group-less.
12703
    if len(self.cfg.GetNodeGroupList()) == 1:
12704
      raise errors.OpPrereqError("Group '%s' is the only group,"
12705
                                 " cannot be removed" %
12706
                                 self.op.group_name,
12707
                                 errors.ECODE_STATE)
12708

    
12709
  def BuildHooksEnv(self):
12710
    """Build hooks env.
12711

12712
    """
12713
    return {
12714
      "GROUP_NAME": self.op.group_name,
12715
      }
12716

    
12717
  def BuildHooksNodes(self):
12718
    """Build hooks nodes.
12719

12720
    """
12721
    mn = self.cfg.GetMasterNode()
12722
    return ([mn], [mn])
12723

    
12724
  def Exec(self, feedback_fn):
12725
    """Remove the node group.
12726

12727
    """
12728
    try:
12729
      self.cfg.RemoveNodeGroup(self.group_uuid)
12730
    except errors.ConfigurationError:
12731
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12732
                               (self.op.group_name, self.group_uuid))
12733

    
12734
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12735

    
12736

    
12737
class LUGroupRename(LogicalUnit):
12738
  HPATH = "group-rename"
12739
  HTYPE = constants.HTYPE_GROUP
12740
  REQ_BGL = False
12741

    
12742
  def ExpandNames(self):
12743
    # This raises errors.OpPrereqError on its own:
12744
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12745

    
12746
    self.needed_locks = {
12747
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12748
      }
12749

    
12750
  def CheckPrereq(self):
12751
    """Check prerequisites.
12752

12753
    Ensures requested new name is not yet used.
12754

12755
    """
12756
    try:
12757
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12758
    except errors.OpPrereqError:
12759
      pass
12760
    else:
12761
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12762
                                 " node group (UUID: %s)" %
12763
                                 (self.op.new_name, new_name_uuid),
12764
                                 errors.ECODE_EXISTS)
12765

    
12766
  def BuildHooksEnv(self):
12767
    """Build hooks env.
12768

12769
    """
12770
    return {
12771
      "OLD_NAME": self.op.group_name,
12772
      "NEW_NAME": self.op.new_name,
12773
      }
12774

    
12775
  def BuildHooksNodes(self):
12776
    """Build hooks nodes.
12777

12778
    """
12779
    mn = self.cfg.GetMasterNode()
12780

    
12781
    all_nodes = self.cfg.GetAllNodesInfo()
12782
    all_nodes.pop(mn, None)
12783

    
12784
    run_nodes = [mn]
12785
    run_nodes.extend(node.name for node in all_nodes.values()
12786
                     if node.group == self.group_uuid)
12787

    
12788
    return (run_nodes, run_nodes)
12789

    
12790
  def Exec(self, feedback_fn):
12791
    """Rename the node group.
12792

12793
    """
12794
    group = self.cfg.GetNodeGroup(self.group_uuid)
12795

    
12796
    if group is None:
12797
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12798
                               (self.op.group_name, self.group_uuid))
12799

    
12800
    group.name = self.op.new_name
12801
    self.cfg.Update(group, feedback_fn)
12802

    
12803
    return self.op.new_name
12804

    
12805

    
12806
class LUGroupEvacuate(LogicalUnit):
12807
  HPATH = "group-evacuate"
12808
  HTYPE = constants.HTYPE_GROUP
12809
  REQ_BGL = False
12810

    
12811
  def ExpandNames(self):
12812
    # This raises errors.OpPrereqError on its own:
12813
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12814

    
12815
    if self.op.target_groups:
12816
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12817
                                  self.op.target_groups)
12818
    else:
12819
      self.req_target_uuids = []
12820

    
12821
    if self.group_uuid in self.req_target_uuids:
12822
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12823
                                 " as a target group (targets are %s)" %
12824
                                 (self.group_uuid,
12825
                                  utils.CommaJoin(self.req_target_uuids)),
12826
                                 errors.ECODE_INVAL)
12827

    
12828
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12829

    
12830
    self.share_locks = _ShareAll()
12831
    self.needed_locks = {
12832
      locking.LEVEL_INSTANCE: [],
12833
      locking.LEVEL_NODEGROUP: [],
12834
      locking.LEVEL_NODE: [],
12835
      }
12836

    
12837
  def DeclareLocks(self, level):
12838
    if level == locking.LEVEL_INSTANCE:
12839
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12840

    
12841
      # Lock instances optimistically, needs verification once node and group
12842
      # locks have been acquired
12843
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12844
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12845

    
12846
    elif level == locking.LEVEL_NODEGROUP:
12847
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12848

    
12849
      if self.req_target_uuids:
12850
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12851

    
12852
        # Lock all groups used by instances optimistically; this requires going
12853
        # via the node before it's locked, requiring verification later on
12854
        lock_groups.update(group_uuid
12855
                           for instance_name in
12856
                             self.owned_locks(locking.LEVEL_INSTANCE)
12857
                           for group_uuid in
12858
                             self.cfg.GetInstanceNodeGroups(instance_name))
12859
      else:
12860
        # No target groups, need to lock all of them
12861
        lock_groups = locking.ALL_SET
12862

    
12863
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12864

    
12865
    elif level == locking.LEVEL_NODE:
12866
      # This will only lock the nodes in the group to be evacuated which
12867
      # contain actual instances
12868
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12869
      self._LockInstancesNodes()
12870

    
12871
      # Lock all nodes in group to be evacuated and target groups
12872
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12873
      assert self.group_uuid in owned_groups
12874
      member_nodes = [node_name
12875
                      for group in owned_groups
12876
                      for node_name in self.cfg.GetNodeGroup(group).members]
12877
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12878

    
12879
  def CheckPrereq(self):
12880
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12881
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12882
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12883

    
12884
    assert owned_groups.issuperset(self.req_target_uuids)
12885
    assert self.group_uuid in owned_groups
12886

    
12887
    # Check if locked instances are still correct
12888
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12889

    
12890
    # Get instance information
12891
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12892

    
12893
    # Check if node groups for locked instances are still correct
12894
    for instance_name in owned_instances:
12895
      inst = self.instances[instance_name]
12896
      assert owned_nodes.issuperset(inst.all_nodes), \
12897
        "Instance %s's nodes changed while we kept the lock" % instance_name
12898

    
12899
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12900
                                             owned_groups)
12901

    
12902
      assert self.group_uuid in inst_groups, \
12903
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12904

    
12905
    if self.req_target_uuids:
12906
      # User requested specific target groups
12907
      self.target_uuids = self.req_target_uuids
12908
    else:
12909
      # All groups except the one to be evacuated are potential targets
12910
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12911
                           if group_uuid != self.group_uuid]
12912

    
12913
      if not self.target_uuids:
12914
        raise errors.OpPrereqError("There are no possible target groups",
12915
                                   errors.ECODE_INVAL)
12916

    
12917
  def BuildHooksEnv(self):
12918
    """Build hooks env.
12919

12920
    """
12921
    return {
12922
      "GROUP_NAME": self.op.group_name,
12923
      "TARGET_GROUPS": " ".join(self.target_uuids),
12924
      }
12925

    
12926
  def BuildHooksNodes(self):
12927
    """Build hooks nodes.
12928

12929
    """
12930
    mn = self.cfg.GetMasterNode()
12931

    
12932
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12933

    
12934
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12935

    
12936
    return (run_nodes, run_nodes)
12937

    
12938
  def Exec(self, feedback_fn):
12939
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12940

    
12941
    assert self.group_uuid not in self.target_uuids
12942

    
12943
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12944
                     instances=instances, target_groups=self.target_uuids)
12945

    
12946
    ial.Run(self.op.iallocator)
12947

    
12948
    if not ial.success:
12949
      raise errors.OpPrereqError("Can't compute group evacuation using"
12950
                                 " iallocator '%s': %s" %
12951
                                 (self.op.iallocator, ial.info),
12952
                                 errors.ECODE_NORES)
12953

    
12954
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12955

    
12956
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12957
                 len(jobs), self.op.group_name)
12958

    
12959
    return ResultWithJobs(jobs)
12960

    
12961

    
12962
class TagsLU(NoHooksLU): # pylint: disable=W0223
12963
  """Generic tags LU.
12964

12965
  This is an abstract class which is the parent of all the other tags LUs.
12966

12967
  """
12968
  def ExpandNames(self):
12969
    self.group_uuid = None
12970
    self.needed_locks = {}
12971
    if self.op.kind == constants.TAG_NODE:
12972
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12973
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12974
    elif self.op.kind == constants.TAG_INSTANCE:
12975
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12976
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12977
    elif self.op.kind == constants.TAG_NODEGROUP:
12978
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12979

    
12980
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12981
    # not possible to acquire the BGL based on opcode parameters)
12982

    
12983
  def CheckPrereq(self):
12984
    """Check prerequisites.
12985

12986
    """
12987
    if self.op.kind == constants.TAG_CLUSTER:
12988
      self.target = self.cfg.GetClusterInfo()
12989
    elif self.op.kind == constants.TAG_NODE:
12990
      self.target = self.cfg.GetNodeInfo(self.op.name)
12991
    elif self.op.kind == constants.TAG_INSTANCE:
12992
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12993
    elif self.op.kind == constants.TAG_NODEGROUP:
12994
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12995
    else:
12996
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12997
                                 str(self.op.kind), errors.ECODE_INVAL)
12998

    
12999

    
13000
class LUTagsGet(TagsLU):
13001
  """Returns the tags of a given object.
13002

13003
  """
13004
  REQ_BGL = False
13005

    
13006
  def ExpandNames(self):
13007
    TagsLU.ExpandNames(self)
13008

    
13009
    # Share locks as this is only a read operation
13010
    self.share_locks = _ShareAll()
13011

    
13012
  def Exec(self, feedback_fn):
13013
    """Returns the tag list.
13014

13015
    """
13016
    return list(self.target.GetTags())
13017

    
13018

    
13019
class LUTagsSearch(NoHooksLU):
13020
  """Searches the tags for a given pattern.
13021

13022
  """
13023
  REQ_BGL = False
13024

    
13025
  def ExpandNames(self):
13026
    self.needed_locks = {}
13027

    
13028
  def CheckPrereq(self):
13029
    """Check prerequisites.
13030

13031
    This checks the pattern passed for validity by compiling it.
13032

13033
    """
13034
    try:
13035
      self.re = re.compile(self.op.pattern)
13036
    except re.error, err:
13037
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13038
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13039

    
13040
  def Exec(self, feedback_fn):
13041
    """Returns the tag list.
13042

13043
    """
13044
    cfg = self.cfg
13045
    tgts = [("/cluster", cfg.GetClusterInfo())]
13046
    ilist = cfg.GetAllInstancesInfo().values()
13047
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13048
    nlist = cfg.GetAllNodesInfo().values()
13049
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13050
    tgts.extend(("/nodegroup/%s" % n.name, n)
13051
                for n in cfg.GetAllNodeGroupsInfo().values())
13052
    results = []
13053
    for path, target in tgts:
13054
      for tag in target.GetTags():
13055
        if self.re.search(tag):
13056
          results.append((path, tag))
13057
    return results
13058

    
13059

    
13060
class LUTagsSet(TagsLU):
13061
  """Sets a tag on a given object.
13062

13063
  """
13064
  REQ_BGL = False
13065

    
13066
  def CheckPrereq(self):
13067
    """Check prerequisites.
13068

13069
    This checks the type and length of the tag name and value.
13070

13071
    """
13072
    TagsLU.CheckPrereq(self)
13073
    for tag in self.op.tags:
13074
      objects.TaggableObject.ValidateTag(tag)
13075

    
13076
  def Exec(self, feedback_fn):
13077
    """Sets the tag.
13078

13079
    """
13080
    try:
13081
      for tag in self.op.tags:
13082
        self.target.AddTag(tag)
13083
    except errors.TagError, err:
13084
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13085
    self.cfg.Update(self.target, feedback_fn)
13086

    
13087

    
13088
class LUTagsDel(TagsLU):
13089
  """Delete a list of tags from a given object.
13090

13091
  """
13092
  REQ_BGL = False
13093

    
13094
  def CheckPrereq(self):
13095
    """Check prerequisites.
13096

13097
    This checks that we have the given tag.
13098

13099
    """
13100
    TagsLU.CheckPrereq(self)
13101
    for tag in self.op.tags:
13102
      objects.TaggableObject.ValidateTag(tag)
13103
    del_tags = frozenset(self.op.tags)
13104
    cur_tags = self.target.GetTags()
13105

    
13106
    diff_tags = del_tags - cur_tags
13107
    if diff_tags:
13108
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13109
      raise errors.OpPrereqError("Tag(s) %s not found" %
13110
                                 (utils.CommaJoin(diff_names), ),
13111
                                 errors.ECODE_NOENT)
13112

    
13113
  def Exec(self, feedback_fn):
13114
    """Remove the tag from the object.
13115

13116
    """
13117
    for tag in self.op.tags:
13118
      self.target.RemoveTag(tag)
13119
    self.cfg.Update(self.target, feedback_fn)
13120

    
13121

    
13122
class LUTestDelay(NoHooksLU):
13123
  """Sleep for a specified amount of time.
13124

13125
  This LU sleeps on the master and/or nodes for a specified amount of
13126
  time.
13127

13128
  """
13129
  REQ_BGL = False
13130

    
13131
  def ExpandNames(self):
13132
    """Expand names and set required locks.
13133

13134
    This expands the node list, if any.
13135

13136
    """
13137
    self.needed_locks = {}
13138
    if self.op.on_nodes:
13139
      # _GetWantedNodes can be used here, but is not always appropriate to use
13140
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13141
      # more information.
13142
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13143
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13144

    
13145
  def _TestDelay(self):
13146
    """Do the actual sleep.
13147

13148
    """
13149
    if self.op.on_master:
13150
      if not utils.TestDelay(self.op.duration):
13151
        raise errors.OpExecError("Error during master delay test")
13152
    if self.op.on_nodes:
13153
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13154
      for node, node_result in result.items():
13155
        node_result.Raise("Failure during rpc call to node %s" % node)
13156

    
13157
  def Exec(self, feedback_fn):
13158
    """Execute the test delay opcode, with the wanted repetitions.
13159

13160
    """
13161
    if self.op.repeat == 0:
13162
      self._TestDelay()
13163
    else:
13164
      top_value = self.op.repeat - 1
13165
      for i in range(self.op.repeat):
13166
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13167
        self._TestDelay()
13168

    
13169

    
13170
class LUTestJqueue(NoHooksLU):
13171
  """Utility LU to test some aspects of the job queue.
13172

13173
  """
13174
  REQ_BGL = False
13175

    
13176
  # Must be lower than default timeout for WaitForJobChange to see whether it
13177
  # notices changed jobs
13178
  _CLIENT_CONNECT_TIMEOUT = 20.0
13179
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13180

    
13181
  @classmethod
13182
  def _NotifyUsingSocket(cls, cb, errcls):
13183
    """Opens a Unix socket and waits for another program to connect.
13184

13185
    @type cb: callable
13186
    @param cb: Callback to send socket name to client
13187
    @type errcls: class
13188
    @param errcls: Exception class to use for errors
13189

13190
    """
13191
    # Using a temporary directory as there's no easy way to create temporary
13192
    # sockets without writing a custom loop around tempfile.mktemp and
13193
    # socket.bind
13194
    tmpdir = tempfile.mkdtemp()
13195
    try:
13196
      tmpsock = utils.PathJoin(tmpdir, "sock")
13197

    
13198
      logging.debug("Creating temporary socket at %s", tmpsock)
13199
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13200
      try:
13201
        sock.bind(tmpsock)
13202
        sock.listen(1)
13203

    
13204
        # Send details to client
13205
        cb(tmpsock)
13206

    
13207
        # Wait for client to connect before continuing
13208
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13209
        try:
13210
          (conn, _) = sock.accept()
13211
        except socket.error, err:
13212
          raise errcls("Client didn't connect in time (%s)" % err)
13213
      finally:
13214
        sock.close()
13215
    finally:
13216
      # Remove as soon as client is connected
13217
      shutil.rmtree(tmpdir)
13218

    
13219
    # Wait for client to close
13220
    try:
13221
      try:
13222
        # pylint: disable=E1101
13223
        # Instance of '_socketobject' has no ... member
13224
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13225
        conn.recv(1)
13226
      except socket.error, err:
13227
        raise errcls("Client failed to confirm notification (%s)" % err)
13228
    finally:
13229
      conn.close()
13230

    
13231
  def _SendNotification(self, test, arg, sockname):
13232
    """Sends a notification to the client.
13233

13234
    @type test: string
13235
    @param test: Test name
13236
    @param arg: Test argument (depends on test)
13237
    @type sockname: string
13238
    @param sockname: Socket path
13239

13240
    """
13241
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13242

    
13243
  def _Notify(self, prereq, test, arg):
13244
    """Notifies the client of a test.
13245

13246
    @type prereq: bool
13247
    @param prereq: Whether this is a prereq-phase test
13248
    @type test: string
13249
    @param test: Test name
13250
    @param arg: Test argument (depends on test)
13251

13252
    """
13253
    if prereq:
13254
      errcls = errors.OpPrereqError
13255
    else:
13256
      errcls = errors.OpExecError
13257

    
13258
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13259
                                                  test, arg),
13260
                                   errcls)
13261

    
13262
  def CheckArguments(self):
13263
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13264
    self.expandnames_calls = 0
13265

    
13266
  def ExpandNames(self):
13267
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13268
    if checkargs_calls < 1:
13269
      raise errors.ProgrammerError("CheckArguments was not called")
13270

    
13271
    self.expandnames_calls += 1
13272

    
13273
    if self.op.notify_waitlock:
13274
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13275

    
13276
    self.LogInfo("Expanding names")
13277

    
13278
    # Get lock on master node (just to get a lock, not for a particular reason)
13279
    self.needed_locks = {
13280
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13281
      }
13282

    
13283
  def Exec(self, feedback_fn):
13284
    if self.expandnames_calls < 1:
13285
      raise errors.ProgrammerError("ExpandNames was not called")
13286

    
13287
    if self.op.notify_exec:
13288
      self._Notify(False, constants.JQT_EXEC, None)
13289

    
13290
    self.LogInfo("Executing")
13291

    
13292
    if self.op.log_messages:
13293
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13294
      for idx, msg in enumerate(self.op.log_messages):
13295
        self.LogInfo("Sending log message %s", idx + 1)
13296
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13297
        # Report how many test messages have been sent
13298
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13299

    
13300
    if self.op.fail:
13301
      raise errors.OpExecError("Opcode failure was requested")
13302

    
13303
    return True
13304

    
13305

    
13306
class IAllocator(object):
13307
  """IAllocator framework.
13308

13309
  An IAllocator instance has three sets of attributes:
13310
    - cfg that is needed to query the cluster
13311
    - input data (all members of the _KEYS class attribute are required)
13312
    - four buffer attributes (in|out_data|text), that represent the
13313
      input (to the external script) in text and data structure format,
13314
      and the output from it, again in two formats
13315
    - the result variables from the script (success, info, nodes) for
13316
      easy usage
13317

13318
  """
13319
  # pylint: disable=R0902
13320
  # lots of instance attributes
13321

    
13322
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13323
    self.cfg = cfg
13324
    self.rpc = rpc_runner
13325
    # init buffer variables
13326
    self.in_text = self.out_text = self.in_data = self.out_data = None
13327
    # init all input fields so that pylint is happy
13328
    self.mode = mode
13329
    self.memory = self.disks = self.disk_template = None
13330
    self.os = self.tags = self.nics = self.vcpus = None
13331
    self.hypervisor = None
13332
    self.relocate_from = None
13333
    self.name = None
13334
    self.instances = None
13335
    self.evac_mode = None
13336
    self.target_groups = []
13337
    # computed fields
13338
    self.required_nodes = None
13339
    # init result fields
13340
    self.success = self.info = self.result = None
13341

    
13342
    try:
13343
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13344
    except KeyError:
13345
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13346
                                   " IAllocator" % self.mode)
13347

    
13348
    keyset = [n for (n, _) in keydata]
13349

    
13350
    for key in kwargs:
13351
      if key not in keyset:
13352
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13353
                                     " IAllocator" % key)
13354
      setattr(self, key, kwargs[key])
13355

    
13356
    for key in keyset:
13357
      if key not in kwargs:
13358
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13359
                                     " IAllocator" % key)
13360
    self._BuildInputData(compat.partial(fn, self), keydata)
13361

    
13362
  def _ComputeClusterData(self):
13363
    """Compute the generic allocator input data.
13364

13365
    This is the data that is independent of the actual operation.
13366

13367
    """
13368
    cfg = self.cfg
13369
    cluster_info = cfg.GetClusterInfo()
13370
    # cluster data
13371
    data = {
13372
      "version": constants.IALLOCATOR_VERSION,
13373
      "cluster_name": cfg.GetClusterName(),
13374
      "cluster_tags": list(cluster_info.GetTags()),
13375
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13376
      # we don't have job IDs
13377
      }
13378
    ninfo = cfg.GetAllNodesInfo()
13379
    iinfo = cfg.GetAllInstancesInfo().values()
13380
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13381

    
13382
    # node data
13383
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13384

    
13385
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13386
      hypervisor_name = self.hypervisor
13387
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13388
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13389
    else:
13390
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13391

    
13392
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13393
                                        hypervisor_name)
13394
    node_iinfo = \
13395
      self.rpc.call_all_instances_info(node_list,
13396
                                       cluster_info.enabled_hypervisors)
13397

    
13398
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13399

    
13400
    config_ndata = self._ComputeBasicNodeData(ninfo)
13401
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13402
                                                 i_list, config_ndata)
13403
    assert len(data["nodes"]) == len(ninfo), \
13404
        "Incomplete node data computed"
13405

    
13406
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13407

    
13408
    self.in_data = data
13409

    
13410
  @staticmethod
13411
  def _ComputeNodeGroupData(cfg):
13412
    """Compute node groups data.
13413

13414
    """
13415
    ng = dict((guuid, {
13416
      "name": gdata.name,
13417
      "alloc_policy": gdata.alloc_policy,
13418
      })
13419
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13420

    
13421
    return ng
13422

    
13423
  @staticmethod
13424
  def _ComputeBasicNodeData(node_cfg):
13425
    """Compute global node data.
13426

13427
    @rtype: dict
13428
    @returns: a dict of name: (node dict, node config)
13429

13430
    """
13431
    # fill in static (config-based) values
13432
    node_results = dict((ninfo.name, {
13433
      "tags": list(ninfo.GetTags()),
13434
      "primary_ip": ninfo.primary_ip,
13435
      "secondary_ip": ninfo.secondary_ip,
13436
      "offline": ninfo.offline,
13437
      "drained": ninfo.drained,
13438
      "master_candidate": ninfo.master_candidate,
13439
      "group": ninfo.group,
13440
      "master_capable": ninfo.master_capable,
13441
      "vm_capable": ninfo.vm_capable,
13442
      })
13443
      for ninfo in node_cfg.values())
13444

    
13445
    return node_results
13446

    
13447
  @staticmethod
13448
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13449
                              node_results):
13450
    """Compute global node data.
13451

13452
    @param node_results: the basic node structures as filled from the config
13453

13454
    """
13455
    # make a copy of the current dict
13456
    node_results = dict(node_results)
13457
    for nname, nresult in node_data.items():
13458
      assert nname in node_results, "Missing basic data for node %s" % nname
13459
      ninfo = node_cfg[nname]
13460

    
13461
      if not (ninfo.offline or ninfo.drained):
13462
        nresult.Raise("Can't get data for node %s" % nname)
13463
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13464
                                nname)
13465
        remote_info = nresult.payload
13466

    
13467
        for attr in ["memory_total", "memory_free", "memory_dom0",
13468
                     "vg_size", "vg_free", "cpu_total"]:
13469
          if attr not in remote_info:
13470
            raise errors.OpExecError("Node '%s' didn't return attribute"
13471
                                     " '%s'" % (nname, attr))
13472
          if not isinstance(remote_info[attr], int):
13473
            raise errors.OpExecError("Node '%s' returned invalid value"
13474
                                     " for '%s': %s" %
13475
                                     (nname, attr, remote_info[attr]))
13476
        # compute memory used by primary instances
13477
        i_p_mem = i_p_up_mem = 0
13478
        for iinfo, beinfo in i_list:
13479
          if iinfo.primary_node == nname:
13480
            i_p_mem += beinfo[constants.BE_MEMORY]
13481
            if iinfo.name not in node_iinfo[nname].payload:
13482
              i_used_mem = 0
13483
            else:
13484
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13485
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13486
            remote_info["memory_free"] -= max(0, i_mem_diff)
13487

    
13488
            if iinfo.admin_state == constants.ADMINST_UP:
13489
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13490

    
13491
        # compute memory used by instances
13492
        pnr_dyn = {
13493
          "total_memory": remote_info["memory_total"],
13494
          "reserved_memory": remote_info["memory_dom0"],
13495
          "free_memory": remote_info["memory_free"],
13496
          "total_disk": remote_info["vg_size"],
13497
          "free_disk": remote_info["vg_free"],
13498
          "total_cpus": remote_info["cpu_total"],
13499
          "i_pri_memory": i_p_mem,
13500
          "i_pri_up_memory": i_p_up_mem,
13501
          }
13502
        pnr_dyn.update(node_results[nname])
13503
        node_results[nname] = pnr_dyn
13504

    
13505
    return node_results
13506

    
13507
  @staticmethod
13508
  def _ComputeInstanceData(cluster_info, i_list):
13509
    """Compute global instance data.
13510

13511
    """
13512
    instance_data = {}
13513
    for iinfo, beinfo in i_list:
13514
      nic_data = []
13515
      for nic in iinfo.nics:
13516
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13517
        nic_dict = {
13518
          "mac": nic.mac,
13519
          "ip": nic.ip,
13520
          "mode": filled_params[constants.NIC_MODE],
13521
          "link": filled_params[constants.NIC_LINK],
13522
          }
13523
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13524
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13525
        nic_data.append(nic_dict)
13526
      pir = {
13527
        "tags": list(iinfo.GetTags()),
13528
        "admin_state": iinfo.admin_state,
13529
        "vcpus": beinfo[constants.BE_VCPUS],
13530
        "memory": beinfo[constants.BE_MEMORY],
13531
        "os": iinfo.os,
13532
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13533
        "nics": nic_data,
13534
        "disks": [{constants.IDISK_SIZE: dsk.size,
13535
                   constants.IDISK_MODE: dsk.mode}
13536
                  for dsk in iinfo.disks],
13537
        "disk_template": iinfo.disk_template,
13538
        "hypervisor": iinfo.hypervisor,
13539
        }
13540
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13541
                                                 pir["disks"])
13542
      instance_data[iinfo.name] = pir
13543

    
13544
    return instance_data
13545

    
13546
  def _AddNewInstance(self):
13547
    """Add new instance data to allocator structure.
13548

13549
    This in combination with _AllocatorGetClusterData will create the
13550
    correct structure needed as input for the allocator.
13551

13552
    The checks for the completeness of the opcode must have already been
13553
    done.
13554

13555
    """
13556
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13557

    
13558
    if self.disk_template in constants.DTS_INT_MIRROR:
13559
      self.required_nodes = 2
13560
    else:
13561
      self.required_nodes = 1
13562

    
13563
    request = {
13564
      "name": self.name,
13565
      "disk_template": self.disk_template,
13566
      "tags": self.tags,
13567
      "os": self.os,
13568
      "vcpus": self.vcpus,
13569
      "memory": self.memory,
13570
      "disks": self.disks,
13571
      "disk_space_total": disk_space,
13572
      "nics": self.nics,
13573
      "required_nodes": self.required_nodes,
13574
      "hypervisor": self.hypervisor,
13575
      }
13576

    
13577
    return request
13578

    
13579
  def _AddRelocateInstance(self):
13580
    """Add relocate instance data to allocator structure.
13581

13582
    This in combination with _IAllocatorGetClusterData will create the
13583
    correct structure needed as input for the allocator.
13584

13585
    The checks for the completeness of the opcode must have already been
13586
    done.
13587

13588
    """
13589
    instance = self.cfg.GetInstanceInfo(self.name)
13590
    if instance is None:
13591
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13592
                                   " IAllocator" % self.name)
13593

    
13594
    if instance.disk_template not in constants.DTS_MIRRORED:
13595
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13596
                                 errors.ECODE_INVAL)
13597

    
13598
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13599
        len(instance.secondary_nodes) != 1:
13600
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13601
                                 errors.ECODE_STATE)
13602

    
13603
    self.required_nodes = 1
13604
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13605
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13606

    
13607
    request = {
13608
      "name": self.name,
13609
      "disk_space_total": disk_space,
13610
      "required_nodes": self.required_nodes,
13611
      "relocate_from": self.relocate_from,
13612
      }
13613
    return request
13614

    
13615
  def _AddNodeEvacuate(self):
13616
    """Get data for node-evacuate requests.
13617

13618
    """
13619
    return {
13620
      "instances": self.instances,
13621
      "evac_mode": self.evac_mode,
13622
      }
13623

    
13624
  def _AddChangeGroup(self):
13625
    """Get data for node-evacuate requests.
13626

13627
    """
13628
    return {
13629
      "instances": self.instances,
13630
      "target_groups": self.target_groups,
13631
      }
13632

    
13633
  def _BuildInputData(self, fn, keydata):
13634
    """Build input data structures.
13635

13636
    """
13637
    self._ComputeClusterData()
13638

    
13639
    request = fn()
13640
    request["type"] = self.mode
13641
    for keyname, keytype in keydata:
13642
      if keyname not in request:
13643
        raise errors.ProgrammerError("Request parameter %s is missing" %
13644
                                     keyname)
13645
      val = request[keyname]
13646
      if not keytype(val):
13647
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13648
                                     " validation, value %s, expected"
13649
                                     " type %s" % (keyname, val, keytype))
13650
    self.in_data["request"] = request
13651

    
13652
    self.in_text = serializer.Dump(self.in_data)
13653

    
13654
  _STRING_LIST = ht.TListOf(ht.TString)
13655
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13656
     # pylint: disable=E1101
13657
     # Class '...' has no 'OP_ID' member
13658
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13659
                          opcodes.OpInstanceMigrate.OP_ID,
13660
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13661
     })))
13662

    
13663
  _NEVAC_MOVED = \
13664
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13665
                       ht.TItems([ht.TNonEmptyString,
13666
                                  ht.TNonEmptyString,
13667
                                  ht.TListOf(ht.TNonEmptyString),
13668
                                 ])))
13669
  _NEVAC_FAILED = \
13670
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13671
                       ht.TItems([ht.TNonEmptyString,
13672
                                  ht.TMaybeString,
13673
                                 ])))
13674
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13675
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13676

    
13677
  _MODE_DATA = {
13678
    constants.IALLOCATOR_MODE_ALLOC:
13679
      (_AddNewInstance,
13680
       [
13681
        ("name", ht.TString),
13682
        ("memory", ht.TInt),
13683
        ("disks", ht.TListOf(ht.TDict)),
13684
        ("disk_template", ht.TString),
13685
        ("os", ht.TString),
13686
        ("tags", _STRING_LIST),
13687
        ("nics", ht.TListOf(ht.TDict)),
13688
        ("vcpus", ht.TInt),
13689
        ("hypervisor", ht.TString),
13690
        ], ht.TList),
13691
    constants.IALLOCATOR_MODE_RELOC:
13692
      (_AddRelocateInstance,
13693
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13694
       ht.TList),
13695
     constants.IALLOCATOR_MODE_NODE_EVAC:
13696
      (_AddNodeEvacuate, [
13697
        ("instances", _STRING_LIST),
13698
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13699
        ], _NEVAC_RESULT),
13700
     constants.IALLOCATOR_MODE_CHG_GROUP:
13701
      (_AddChangeGroup, [
13702
        ("instances", _STRING_LIST),
13703
        ("target_groups", _STRING_LIST),
13704
        ], _NEVAC_RESULT),
13705
    }
13706

    
13707
  def Run(self, name, validate=True, call_fn=None):
13708
    """Run an instance allocator and return the results.
13709

13710
    """
13711
    if call_fn is None:
13712
      call_fn = self.rpc.call_iallocator_runner
13713

    
13714
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13715
    result.Raise("Failure while running the iallocator script")
13716

    
13717
    self.out_text = result.payload
13718
    if validate:
13719
      self._ValidateResult()
13720

    
13721
  def _ValidateResult(self):
13722
    """Process the allocator results.
13723

13724
    This will process and if successful save the result in
13725
    self.out_data and the other parameters.
13726

13727
    """
13728
    try:
13729
      rdict = serializer.Load(self.out_text)
13730
    except Exception, err:
13731
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13732

    
13733
    if not isinstance(rdict, dict):
13734
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13735

    
13736
    # TODO: remove backwards compatiblity in later versions
13737
    if "nodes" in rdict and "result" not in rdict:
13738
      rdict["result"] = rdict["nodes"]
13739
      del rdict["nodes"]
13740

    
13741
    for key in "success", "info", "result":
13742
      if key not in rdict:
13743
        raise errors.OpExecError("Can't parse iallocator results:"
13744
                                 " missing key '%s'" % key)
13745
      setattr(self, key, rdict[key])
13746

    
13747
    if not self._result_check(self.result):
13748
      raise errors.OpExecError("Iallocator returned invalid result,"
13749
                               " expected %s, got %s" %
13750
                               (self._result_check, self.result),
13751
                               errors.ECODE_INVAL)
13752

    
13753
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13754
      assert self.relocate_from is not None
13755
      assert self.required_nodes == 1
13756

    
13757
      node2group = dict((name, ndata["group"])
13758
                        for (name, ndata) in self.in_data["nodes"].items())
13759

    
13760
      fn = compat.partial(self._NodesToGroups, node2group,
13761
                          self.in_data["nodegroups"])
13762

    
13763
      instance = self.cfg.GetInstanceInfo(self.name)
13764
      request_groups = fn(self.relocate_from + [instance.primary_node])
13765
      result_groups = fn(rdict["result"] + [instance.primary_node])
13766

    
13767
      if self.success and not set(result_groups).issubset(request_groups):
13768
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13769
                                 " differ from original groups (%s)" %
13770
                                 (utils.CommaJoin(result_groups),
13771
                                  utils.CommaJoin(request_groups)))
13772

    
13773
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13774
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13775

    
13776
    self.out_data = rdict
13777

    
13778
  @staticmethod
13779
  def _NodesToGroups(node2group, groups, nodes):
13780
    """Returns a list of unique group names for a list of nodes.
13781

13782
    @type node2group: dict
13783
    @param node2group: Map from node name to group UUID
13784
    @type groups: dict
13785
    @param groups: Group information
13786
    @type nodes: list
13787
    @param nodes: Node names
13788

13789
    """
13790
    result = set()
13791

    
13792
    for node in nodes:
13793
      try:
13794
        group_uuid = node2group[node]
13795
      except KeyError:
13796
        # Ignore unknown node
13797
        pass
13798
      else:
13799
        try:
13800
          group = groups[group_uuid]
13801
        except KeyError:
13802
          # Can't find group, let's use UUID
13803
          group_name = group_uuid
13804
        else:
13805
          group_name = group["name"]
13806

    
13807
        result.add(group_name)
13808

    
13809
    return sorted(result)
13810

    
13811

    
13812
class LUTestAllocator(NoHooksLU):
13813
  """Run allocator tests.
13814

13815
  This LU runs the allocator tests
13816

13817
  """
13818
  def CheckPrereq(self):
13819
    """Check prerequisites.
13820

13821
    This checks the opcode parameters depending on the director and mode test.
13822

13823
    """
13824
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13825
      for attr in ["memory", "disks", "disk_template",
13826
                   "os", "tags", "nics", "vcpus"]:
13827
        if not hasattr(self.op, attr):
13828
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13829
                                     attr, errors.ECODE_INVAL)
13830
      iname = self.cfg.ExpandInstanceName(self.op.name)
13831
      if iname is not None:
13832
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13833
                                   iname, errors.ECODE_EXISTS)
13834
      if not isinstance(self.op.nics, list):
13835
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13836
                                   errors.ECODE_INVAL)
13837
      if not isinstance(self.op.disks, list):
13838
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13839
                                   errors.ECODE_INVAL)
13840
      for row in self.op.disks:
13841
        if (not isinstance(row, dict) or
13842
            constants.IDISK_SIZE not in row or
13843
            not isinstance(row[constants.IDISK_SIZE], int) or
13844
            constants.IDISK_MODE not in row or
13845
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13846
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13847
                                     " parameter", errors.ECODE_INVAL)
13848
      if self.op.hypervisor is None:
13849
        self.op.hypervisor = self.cfg.GetHypervisorType()
13850
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13851
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13852
      self.op.name = fname
13853
      self.relocate_from = \
13854
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13855
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13856
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13857
      if not self.op.instances:
13858
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13859
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13860
    else:
13861
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13862
                                 self.op.mode, errors.ECODE_INVAL)
13863

    
13864
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13865
      if self.op.allocator is None:
13866
        raise errors.OpPrereqError("Missing allocator name",
13867
                                   errors.ECODE_INVAL)
13868
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13869
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13870
                                 self.op.direction, errors.ECODE_INVAL)
13871

    
13872
  def Exec(self, feedback_fn):
13873
    """Run the allocator test.
13874

13875
    """
13876
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13877
      ial = IAllocator(self.cfg, self.rpc,
13878
                       mode=self.op.mode,
13879
                       name=self.op.name,
13880
                       memory=self.op.memory,
13881
                       disks=self.op.disks,
13882
                       disk_template=self.op.disk_template,
13883
                       os=self.op.os,
13884
                       tags=self.op.tags,
13885
                       nics=self.op.nics,
13886
                       vcpus=self.op.vcpus,
13887
                       hypervisor=self.op.hypervisor,
13888
                       )
13889
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13890
      ial = IAllocator(self.cfg, self.rpc,
13891
                       mode=self.op.mode,
13892
                       name=self.op.name,
13893
                       relocate_from=list(self.relocate_from),
13894
                       )
13895
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13896
      ial = IAllocator(self.cfg, self.rpc,
13897
                       mode=self.op.mode,
13898
                       instances=self.op.instances,
13899
                       target_groups=self.op.target_groups)
13900
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13901
      ial = IAllocator(self.cfg, self.rpc,
13902
                       mode=self.op.mode,
13903
                       instances=self.op.instances,
13904
                       evac_mode=self.op.evac_mode)
13905
    else:
13906
      raise errors.ProgrammerError("Uncatched mode %s in"
13907
                                   " LUTestAllocator.Exec", self.op.mode)
13908

    
13909
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13910
      result = ial.in_text
13911
    else:
13912
      ial.Run(self.op.allocator, validate=False)
13913
      result = ial.out_text
13914
    return result
13915

    
13916

    
13917
#: Query type implementations
13918
_QUERY_IMPL = {
13919
  constants.QR_INSTANCE: _InstanceQuery,
13920
  constants.QR_NODE: _NodeQuery,
13921
  constants.QR_GROUP: _GroupQuery,
13922
  constants.QR_OS: _OsQuery,
13923
  }
13924

    
13925
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13926

    
13927

    
13928
def _GetQueryImplementation(name):
13929
  """Returns the implemtnation for a query type.
13930

13931
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13932

13933
  """
13934
  try:
13935
    return _QUERY_IMPL[name]
13936
  except KeyError:
13937
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13938
                               errors.ECODE_INVAL)